dataset save raw file
This commit is contained in:
35
client/src/pages/api/admin/initv43.ts
Normal file
35
client/src/pages/api/admin/initv43.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { authUser } from '@/service/utils/auth';
|
||||
import { PgClient } from '@/service/pg';
|
||||
import { PgTrainingTableName } from '@/constants/plugin';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
await authUser({ req, authRoot: true });
|
||||
|
||||
const { rowCount } = await PgClient.query(`SELECT 1
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'public'
|
||||
AND table_name = '${PgTrainingTableName}'
|
||||
AND column_name = 'file_id'`);
|
||||
|
||||
if (rowCount > 0) {
|
||||
return jsonRes(res, {
|
||||
data: '已经存在file_id字段'
|
||||
});
|
||||
}
|
||||
|
||||
jsonRes(res, {
|
||||
data: await PgClient.query(
|
||||
`ALTER TABLE ${PgTrainingTableName} ADD COLUMN file_id VARCHAR(100)`
|
||||
)
|
||||
});
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -9,12 +9,11 @@ import { startQueue } from '@/service/utils/tools';
|
||||
import { PgClient } from '@/service/pg';
|
||||
import { modelToolMap } from '@/utils/plugin';
|
||||
import { getVectorModel } from '@/service/utils/data';
|
||||
|
||||
export type DateItemType = { a: string; q: string; source?: string };
|
||||
import { DatasetItemType } from '@/types/plugin';
|
||||
|
||||
export type Props = {
|
||||
kbId: string;
|
||||
data: DateItemType[];
|
||||
data: DatasetItemType[];
|
||||
mode: `${TrainingModeEnum}`;
|
||||
prompt?: string;
|
||||
};
|
||||
@@ -95,7 +94,7 @@ export async function pushDataToKb({
|
||||
|
||||
// 过滤重复的 qa 内容
|
||||
const set = new Set();
|
||||
const filterData: DateItemType[] = [];
|
||||
const filterData: DatasetItemType[] = [];
|
||||
|
||||
data.forEach((item) => {
|
||||
if (!item.q) return;
|
||||
@@ -120,13 +119,10 @@ export async function pushDataToKb({
|
||||
// 数据库去重
|
||||
const insertData = (
|
||||
await Promise.allSettled(
|
||||
filterData.map(async ({ q, a = '', source }) => {
|
||||
filterData.map(async (data) => {
|
||||
let { q, a } = data;
|
||||
if (mode !== TrainingModeEnum.index) {
|
||||
return Promise.resolve({
|
||||
q,
|
||||
a,
|
||||
source
|
||||
});
|
||||
return Promise.resolve(data);
|
||||
}
|
||||
|
||||
if (!q) {
|
||||
@@ -152,23 +148,17 @@ export async function pushDataToKb({
|
||||
console.log(error);
|
||||
error;
|
||||
}
|
||||
return Promise.resolve({
|
||||
q,
|
||||
a,
|
||||
source
|
||||
});
|
||||
return Promise.resolve(data);
|
||||
})
|
||||
)
|
||||
)
|
||||
.filter((item) => item.status === 'fulfilled')
|
||||
.map<DateItemType>((item: any) => item.value);
|
||||
.map<DatasetItemType>((item: any) => item.value);
|
||||
|
||||
// 插入记录
|
||||
const insertRes = await TrainingData.insertMany(
|
||||
insertData.map((item) => ({
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
source: item.source,
|
||||
...item,
|
||||
userId,
|
||||
kbId,
|
||||
mode,
|
||||
|
||||
@@ -41,7 +41,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
const response: any = await PgClient.query(
|
||||
`BEGIN;
|
||||
SET LOCAL ivfflat.probes = ${global.systemEnv.pgIvfflatProbe || 10};
|
||||
select id,q,a,source,(vector <#> '[${
|
||||
select id, q, a, source, file_id, (vector <#> '[${
|
||||
vectors[0]
|
||||
}]') * -1 AS score from ${PgTrainingTableName} where kb_id='${kbId}' AND user_id='${userId}' order by vector <#> '[${
|
||||
vectors[0]
|
||||
@@ -49,7 +49,9 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
COMMIT;`
|
||||
);
|
||||
|
||||
jsonRes<Response>(res, { data: response?.[2]?.rows || [] });
|
||||
jsonRes<Response>(res, {
|
||||
data: response?.[2]?.rows || []
|
||||
});
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
jsonRes(res, {
|
||||
|
||||
@@ -3,6 +3,7 @@ import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { GridFSStorage } from '@/service/lib/gridfs';
|
||||
import { authFileToken } from './readUrl';
|
||||
import jschardet from 'jschardet';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -12,6 +13,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
|
||||
const { fileId, userId } = await authFileToken(token);
|
||||
|
||||
if (!fileId) {
|
||||
throw new Error('fileId is empty');
|
||||
}
|
||||
|
||||
const gridFs = new GridFSStorage('dataset', userId);
|
||||
|
||||
const [file, buffer] = await Promise.all([
|
||||
@@ -19,9 +24,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
gridFs.download(fileId)
|
||||
]);
|
||||
|
||||
res.setHeader('encoding', file.encoding);
|
||||
const encoding = jschardet.detect(buffer)?.encoding;
|
||||
|
||||
res.setHeader('encoding', encoding);
|
||||
res.setHeader('Content-Type', file.contentType);
|
||||
res.setHeader('Cache-Control', 'public, max-age=3600');
|
||||
res.setHeader('Content-Disposition', `inline; filename="${encodeURIComponent(file.filename)}"`);
|
||||
|
||||
res.end(buffer);
|
||||
} catch (error) {
|
||||
|
||||
@@ -28,9 +28,10 @@ class UploadModel {
|
||||
limits: {
|
||||
fieldSize: maxSize
|
||||
},
|
||||
preservePath: true,
|
||||
storage: multer.diskStorage({
|
||||
filename: (_req, file, cb) => {
|
||||
const { ext } = path.parse(file.originalname);
|
||||
const { ext } = path.parse(decodeURIComponent(file.originalname));
|
||||
cb(null, nanoid() + ext);
|
||||
}
|
||||
})
|
||||
@@ -44,8 +45,13 @@ class UploadModel {
|
||||
return reject(error);
|
||||
}
|
||||
|
||||
// @ts-ignore
|
||||
resolve({ files: req.files });
|
||||
resolve({
|
||||
// @ts-ignore
|
||||
files: req.files?.map((file) => ({
|
||||
...file,
|
||||
originalname: decodeURIComponent(file.originalname)
|
||||
}))
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -56,9 +62,9 @@ const upload = new UploadModel();
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { userId } = await authUser({ req });
|
||||
const { userId } = await authUser({ req, authToken: true });
|
||||
|
||||
const { files } = await upload.doUpload(req, res);
|
||||
const { files = [] } = await upload.doUpload(req, res);
|
||||
|
||||
const gridFs = new GridFSStorage('dataset', userId);
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
const where: any = [['user_id', userId], 'AND', ['id', dataId]];
|
||||
|
||||
const searchRes = await PgClient.select<KbDataItemType>(PgTrainingTableName, {
|
||||
fields: ['kb_id', 'id', 'q', 'a', 'source'],
|
||||
fields: ['kb_id', 'id', 'q', 'a', 'source', 'file_id'],
|
||||
where,
|
||||
limit: 1
|
||||
});
|
||||
|
||||
@@ -43,7 +43,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
|
||||
const [searchRes, total] = await Promise.all([
|
||||
PgClient.select<KbDataItemType>(PgTrainingTableName, {
|
||||
fields: ['id', 'q', 'a', 'source'],
|
||||
fields: ['id', 'q', 'a', 'source', 'file_id'],
|
||||
where,
|
||||
order: [{ field: 'id', mode: 'DESC' }],
|
||||
limit: pageSize,
|
||||
|
||||
@@ -8,10 +8,11 @@ import { insertKbItem, PgClient } from '@/service/pg';
|
||||
import { modelToolMap } from '@/utils/plugin';
|
||||
import { getVectorModel } from '@/service/utils/data';
|
||||
import { getVector } from '@/pages/api/openapi/plugin/vector';
|
||||
import { DatasetItemType } from '@/types/plugin';
|
||||
|
||||
export type Props = {
|
||||
kbId: string;
|
||||
data: { a: string; q: string; source?: string };
|
||||
data: DatasetItemType;
|
||||
};
|
||||
|
||||
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
|
||||
@@ -198,8 +198,7 @@ const DataCard = ({ kbId }: { kbId: string }) => {
|
||||
onClick={() =>
|
||||
setEditInputData({
|
||||
dataId: item.id,
|
||||
q: item.q,
|
||||
a: item.a
|
||||
...item
|
||||
})
|
||||
}
|
||||
>
|
||||
|
||||
@@ -109,10 +109,9 @@ const ChunkImport = ({ kbId }: { kbId: string }) => {
|
||||
return {
|
||||
...file,
|
||||
tokens: splitRes.tokens,
|
||||
chunks: splitRes.chunks.map((chunk) => ({
|
||||
q: chunk,
|
||||
a: '',
|
||||
source: file.filename
|
||||
chunks: file.chunks.map((chunk, i) => ({
|
||||
...chunk,
|
||||
q: splitRes.chunks[i]
|
||||
}))
|
||||
};
|
||||
})
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
import React, { useState, useCallback, useMemo } from 'react';
|
||||
import React, { useState, useMemo } from 'react';
|
||||
import { Box, Flex, Button, useTheme, Image } from '@chakra-ui/react';
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
import { useConfirm } from '@/hooks/useConfirm';
|
||||
import { useMutation } from '@tanstack/react-query';
|
||||
import { postKbDataFromList } from '@/api/plugins/kb';
|
||||
import { getErrText } from '@/utils/tools';
|
||||
import { vectorModelList } from '@/store/static';
|
||||
import MyIcon from '@/components/Icon';
|
||||
import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete';
|
||||
import { TrainingModeEnum } from '@/constants/plugin';
|
||||
|
||||
@@ -2,7 +2,13 @@ import MyIcon from '@/components/Icon';
|
||||
import { useLoading } from '@/hooks/useLoading';
|
||||
import { useSelectFile } from '@/hooks/useSelectFile';
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
import { fileDownload, readCsvContent, simpleText, splitText2Chunks } from '@/utils/file';
|
||||
import {
|
||||
fileDownload,
|
||||
readCsvContent,
|
||||
simpleText,
|
||||
splitText2Chunks,
|
||||
uploadFiles
|
||||
} from '@/utils/file';
|
||||
import { Box, Flex, useDisclosure, type BoxProps } from '@chakra-ui/react';
|
||||
import { fileImgs } from '@/constants/common';
|
||||
import { DragEvent, useCallback, useState } from 'react';
|
||||
@@ -11,7 +17,8 @@ import { readTxtContent, readPdfContent, readDocContent } from '@/utils/file';
|
||||
import { customAlphabet } from 'nanoid';
|
||||
import dynamic from 'next/dynamic';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import { FetchResultItem } from '@/types/plugin';
|
||||
import { FetchResultItem, DatasetItemType } from '@/types/plugin';
|
||||
import { getErrText } from '@/utils/tools';
|
||||
|
||||
const UrlFetchModal = dynamic(() => import('./UrlFetchModal'));
|
||||
const CreateFileModal = dynamic(() => import('./CreateFileModal'));
|
||||
@@ -22,7 +29,7 @@ const csvTemplate = `question,answer,source\n"什么是 laf","laf 是一个云
|
||||
export type FileItemType = {
|
||||
id: string;
|
||||
filename: string;
|
||||
chunks: { q: string; a: string; source?: string }[];
|
||||
chunks: DatasetItemType[];
|
||||
text: string;
|
||||
icon: string;
|
||||
tokens: number;
|
||||
@@ -58,7 +65,7 @@ const FileSelect = ({
|
||||
});
|
||||
|
||||
const [isDragging, setIsDragging] = useState(false);
|
||||
const [selecting, setSelecting] = useState(false);
|
||||
const [selectingText, setSelectingText] = useState<string>();
|
||||
|
||||
const {
|
||||
isOpen: isOpenUrlFetch,
|
||||
@@ -73,7 +80,6 @@ const FileSelect = ({
|
||||
|
||||
const onSelectFile = useCallback(
|
||||
async (files: File[]) => {
|
||||
setSelecting(true);
|
||||
try {
|
||||
// Parse file by file
|
||||
const chunkFiles: FileItemType[] = [];
|
||||
@@ -88,19 +94,31 @@ const FileSelect = ({
|
||||
continue;
|
||||
}
|
||||
|
||||
let text = await (async () => {
|
||||
switch (extension) {
|
||||
case 'txt':
|
||||
case 'md':
|
||||
return readTxtContent(file);
|
||||
case 'pdf':
|
||||
return readPdfContent(file);
|
||||
case 'doc':
|
||||
case 'docx':
|
||||
return readDocContent(file);
|
||||
}
|
||||
return '';
|
||||
})();
|
||||
// parse and upload files
|
||||
let [text, filesId] = await Promise.all([
|
||||
(async () => {
|
||||
switch (extension) {
|
||||
case 'txt':
|
||||
case 'md':
|
||||
return readTxtContent(file);
|
||||
case 'pdf':
|
||||
return readPdfContent(file);
|
||||
case 'doc':
|
||||
case 'docx':
|
||||
return readDocContent(file);
|
||||
}
|
||||
return '';
|
||||
})(),
|
||||
uploadFiles(files, (percent) => {
|
||||
if (percent < 100) {
|
||||
setSelectingText(
|
||||
t('file.Uploading', { name: file.name.slice(0, 20), percent }) || ''
|
||||
);
|
||||
} else {
|
||||
setSelectingText(t('file.Parse', { name: file.name.slice(0, 20) }) || '');
|
||||
}
|
||||
})
|
||||
]);
|
||||
|
||||
if (text) {
|
||||
text = simpleText(text);
|
||||
@@ -117,7 +135,8 @@ const FileSelect = ({
|
||||
chunks: splitRes.chunks.map((chunk) => ({
|
||||
q: chunk,
|
||||
a: '',
|
||||
source: file.name
|
||||
source: file.name,
|
||||
file_id: filesId[0]
|
||||
}))
|
||||
};
|
||||
chunkFiles.unshift(fileItem);
|
||||
@@ -139,7 +158,8 @@ const FileSelect = ({
|
||||
chunks: data.map((item) => ({
|
||||
q: item[0],
|
||||
a: item[1],
|
||||
source: item[2] || file.name
|
||||
source: item[2] || file.name,
|
||||
file_id: filesId[0]
|
||||
}))
|
||||
};
|
||||
|
||||
@@ -150,13 +170,13 @@ const FileSelect = ({
|
||||
} catch (error: any) {
|
||||
console.log(error);
|
||||
toast({
|
||||
title: typeof error === 'string' ? error : '解析文件失败',
|
||||
title: getErrText(error, '解析文件失败'),
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
setSelecting(false);
|
||||
setSelectingText(undefined);
|
||||
},
|
||||
[chunkLen, onPushFiles, toast]
|
||||
[chunkLen, onPushFiles, t, toast]
|
||||
);
|
||||
const onUrlFetch = useCallback(
|
||||
(e: FetchResultItem[]) => {
|
||||
@@ -353,7 +373,9 @@ const FileSelect = ({
|
||||
{t('file.Click to download CSV template')}
|
||||
</Box>
|
||||
)}
|
||||
<FileSelectLoading loading={selecting} fixed={false} />
|
||||
{selectingText !== undefined && (
|
||||
<FileSelectLoading loading text={selectingText} fixed={false} />
|
||||
)}
|
||||
<File onSelect={onSelectFile} />
|
||||
{isOpenUrlFetch && <UrlFetchModal onClose={onCloseUrlFetch} onSuccess={onUrlFetch} />}
|
||||
{isOpenCreateFile && <CreateFileModal onClose={onCloseCreateFile} onSuccess={onCreateFile} />}
|
||||
|
||||
@@ -97,10 +97,9 @@ const QAImport = ({ kbId }: { kbId: string }) => {
|
||||
return {
|
||||
...file,
|
||||
tokens: splitRes.tokens,
|
||||
chunks: splitRes.chunks.map((chunk) => ({
|
||||
q: chunk,
|
||||
a: '',
|
||||
source: file.filename
|
||||
chunks: file.chunks.map((chunk, i) => ({
|
||||
...chunk,
|
||||
q: splitRes.chunks[i]
|
||||
}))
|
||||
};
|
||||
})
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import React, { useState, useCallback } from 'react';
|
||||
import { Box, Flex, Button, Textarea, IconButton } from '@chakra-ui/react';
|
||||
import { Box, Flex, Button, Textarea, IconButton, BoxProps } from '@chakra-ui/react';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { insertData2Kb, putKbDataById, delOneKbDataByDataId } from '@/api/plugins/kb';
|
||||
import { getFileViewUrl } from '@/api/system';
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
import { getErrText } from '@/utils/tools';
|
||||
import MyIcon from '@/components/Icon';
|
||||
@@ -10,8 +11,10 @@ import MyTooltip from '@/components/MyTooltip';
|
||||
import { QuestionOutlineIcon } from '@chakra-ui/icons';
|
||||
import { useUserStore } from '@/store/user';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import { DatasetItemType } from '@/types/plugin';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
export type FormData = { dataId?: string; a: string; q: string; source?: string };
|
||||
export type FormData = { dataId?: string } & DatasetItemType;
|
||||
|
||||
const InputDataModal = ({
|
||||
onClose,
|
||||
@@ -29,12 +32,13 @@ const InputDataModal = ({
|
||||
kbId: string;
|
||||
defaultValues?: FormData;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const [loading, setLoading] = useState(false);
|
||||
const { toast } = useToast();
|
||||
|
||||
const { kbDetail, getKbDetail } = useUserStore();
|
||||
|
||||
const { register, handleSubmit, reset } = useForm<FormData>({
|
||||
const { getValues, register, handleSubmit, reset } = useForm<FormData>({
|
||||
defaultValues
|
||||
});
|
||||
|
||||
@@ -183,7 +187,16 @@ const InputDataModal = ({
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
<Flex px={6} pt={2} pb={4} alignItems={'center'}>
|
||||
<Flex px={6} pt={['34px', 2]} pb={4} alignItems={'center'} position={'relative'}>
|
||||
<RawFileText
|
||||
fileId={getValues('file_id')}
|
||||
filename={getValues('source')}
|
||||
position={'absolute'}
|
||||
left={'50%'}
|
||||
top={['16px', '50%']}
|
||||
transform={'translate(-50%,-50%)'}
|
||||
/>
|
||||
|
||||
<Box flex={1}>
|
||||
{defaultValues.dataId && onDelete && (
|
||||
<IconButton
|
||||
@@ -217,15 +230,17 @@ const InputDataModal = ({
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
<Button variant={'base'} mr={3} isLoading={loading} onClick={onClose}>
|
||||
取消
|
||||
</Button>
|
||||
<Button
|
||||
isLoading={loading}
|
||||
onClick={handleSubmit(defaultValues.dataId ? updateData : sureImportData)}
|
||||
>
|
||||
{defaultValues.dataId ? '确认变更' : '确认导入'}
|
||||
</Button>
|
||||
<Box>
|
||||
<Button variant={'base'} mr={3} isLoading={loading} onClick={onClose}>
|
||||
取消
|
||||
</Button>
|
||||
<Button
|
||||
isLoading={loading}
|
||||
onClick={handleSubmit(defaultValues.dataId ? updateData : sureImportData)}
|
||||
>
|
||||
{defaultValues.dataId ? '确认变更' : '确认导入'}
|
||||
</Button>
|
||||
</Box>
|
||||
</Flex>
|
||||
</Flex>
|
||||
</MyModal>
|
||||
@@ -233,3 +248,44 @@ const InputDataModal = ({
|
||||
};
|
||||
|
||||
export default InputDataModal;
|
||||
|
||||
interface RawFileTextProps extends BoxProps {
|
||||
filename?: string;
|
||||
fileId?: string;
|
||||
}
|
||||
export function RawFileText({ fileId, filename = '', ...props }: RawFileTextProps) {
|
||||
const { t } = useTranslation();
|
||||
const { toast } = useToast();
|
||||
return (
|
||||
<MyTooltip label={fileId ? t('file.Click to view file') || '' : ''} shouldWrapChildren={false}>
|
||||
<Box
|
||||
color={'myGray.600'}
|
||||
display={'inline-block'}
|
||||
{...(!!fileId
|
||||
? {
|
||||
cursor: 'pointer',
|
||||
textDecoration: ['underline', 'none'],
|
||||
_hover: {
|
||||
textDecoration: 'underline'
|
||||
},
|
||||
onClick: async () => {
|
||||
try {
|
||||
const url = await getFileViewUrl(fileId);
|
||||
const asPath = `${location.origin}${url}`;
|
||||
window.open(asPath, '_blank');
|
||||
} catch (error) {
|
||||
toast({
|
||||
title: getErrText(error, '获取文件地址失败'),
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
: {})}
|
||||
{...props}
|
||||
>
|
||||
{filename}
|
||||
</Box>
|
||||
</MyTooltip>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -207,8 +207,7 @@ const Test = ({ kbId }: { kbId: string }) => {
|
||||
|
||||
setEditData({
|
||||
dataId: data.id,
|
||||
q: data.q,
|
||||
a: data.a
|
||||
...data
|
||||
});
|
||||
} catch (err) {
|
||||
toast({
|
||||
|
||||
Reference in New Issue
Block a user