feat: 训练数据管理
This commit is contained in:
@@ -38,8 +38,9 @@ export default function App({ Component, pageProps }: AppProps) {
|
||||
/>
|
||||
<link rel="icon" href="/favicon.ico" />
|
||||
</Head>
|
||||
<Script src="/iconfont.js" strategy="afterInteractive"></Script>
|
||||
<Script src="/qrcode.min.js" strategy="afterInteractive"></Script>
|
||||
<Script src="/js/iconfont.js" strategy="afterInteractive"></Script>
|
||||
<Script src="/js/qrcode.min.js" strategy="afterInteractive"></Script>
|
||||
<Script src="/js/pdf.js" strategy="afterInteractive"></Script>
|
||||
<QueryClientProvider client={queryClient}>
|
||||
<ChakraProvider theme={theme}>
|
||||
<ColorModeScript initialColorMode={theme.config.initialColorMode} />
|
||||
|
||||
88
src/pages/api/data/getDataList.ts
Normal file
88
src/pages/api/data/getDataList.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, Data, DataItem } from '@/service/mongo';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import type { DataSchema } from '@/types/mongoSchema';
|
||||
import type { DataListItem } from '@/types/data';
|
||||
import type { PagingData } from '@/types';
|
||||
import mongoose from 'mongoose';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
const { authorization } = req.headers;
|
||||
let { pageNum = 1, pageSize = 10 } = req.query as { pageNum: string; pageSize: string };
|
||||
|
||||
pageNum = +pageNum;
|
||||
pageSize = +pageSize;
|
||||
|
||||
if (!authorization) {
|
||||
throw new Error('缺少登录凭证');
|
||||
}
|
||||
|
||||
const userId = await authToken(authorization);
|
||||
|
||||
await connectToDatabase();
|
||||
|
||||
// 根据 id 获取用户账单
|
||||
const datalist = await Data.aggregate<DataListItem>([
|
||||
{
|
||||
$match: {
|
||||
userId: new mongoose.Types.ObjectId(userId)
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: { createTime: -1 } // 按照创建时间倒序排列
|
||||
},
|
||||
{
|
||||
$skip: (pageNum - 1) * pageSize // 跳过前面的数据
|
||||
},
|
||||
{
|
||||
$limit: pageSize // 取出指定数量的数据
|
||||
},
|
||||
{
|
||||
$lookup: {
|
||||
from: 'dataitems',
|
||||
localField: '_id',
|
||||
foreignField: 'dataId',
|
||||
as: 'items'
|
||||
}
|
||||
},
|
||||
{
|
||||
$addFields: {
|
||||
totalData: {
|
||||
$size: '$items' // 统计dataItem的总数
|
||||
},
|
||||
trainingData: {
|
||||
$size: {
|
||||
$filter: {
|
||||
input: '$items',
|
||||
as: 'item',
|
||||
cond: { $eq: ['$$item.status', 1] } // 统计status为1的数量
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
items: 0 // 不返回 items 字段
|
||||
}
|
||||
}
|
||||
]);
|
||||
|
||||
jsonRes<PagingData<DataListItem>>(res, {
|
||||
data: {
|
||||
pageNum,
|
||||
pageSize,
|
||||
data: datalist,
|
||||
total: 1
|
||||
}
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
33
src/pages/api/data/postData.ts
Normal file
33
src/pages/api/data/postData.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, Data } from '@/service/mongo';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
let { name } = req.query as { name: string };
|
||||
if (!name) {
|
||||
throw new Error('参数错误');
|
||||
}
|
||||
await connectToDatabase();
|
||||
|
||||
const { authorization } = req.headers;
|
||||
|
||||
const userId = await authToken(authorization);
|
||||
|
||||
// 生成 data 集合
|
||||
const data = await Data.create({
|
||||
userId,
|
||||
name
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: data._id
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -4,11 +4,10 @@ import { connectToDatabase, Data, DataItem } from '@/service/mongo';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import { generateQA } from '@/service/events/generateQA';
|
||||
|
||||
/* 定时删除那些不活跃的内容 */
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
let { text, name } = req.body as { text: string; name: string };
|
||||
if (!text || !name) {
|
||||
let { text, dataId } = req.body as { text: string; dataId: string };
|
||||
if (!text || !dataId) {
|
||||
throw new Error('参数错误');
|
||||
}
|
||||
text = text.replace(/\n+/g, '\n');
|
||||
@@ -18,28 +17,16 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
|
||||
const userId = await authToken(authorization);
|
||||
|
||||
// 生成 data 父级
|
||||
const data = await Data.create({
|
||||
userId,
|
||||
name
|
||||
});
|
||||
|
||||
const dataItems: any[] = [];
|
||||
|
||||
// 格式化文本长度
|
||||
for (let i = 0; i <= text.length / 1000; i++) {
|
||||
const dataItem = {
|
||||
dataItems.push({
|
||||
temperature: 0,
|
||||
userId,
|
||||
dataId: data._id,
|
||||
dataId,
|
||||
text: text.slice(i * 1000, (i + 1) * 1000),
|
||||
status: 1
|
||||
};
|
||||
|
||||
[0, 0.2, 0.4, 0.6, 0.8, 1.0].forEach((temperature) => {
|
||||
dataItems.push({
|
||||
temperature,
|
||||
...dataItem
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -58,8 +45,3 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查文本是否按格式返回
|
||||
*/
|
||||
function splitText(text: string) {}
|
||||
|
||||
65
src/pages/data/components/CreateDataModal.tsx
Normal file
65
src/pages/data/components/CreateDataModal.tsx
Normal file
@@ -0,0 +1,65 @@
|
||||
import React, { useState } from 'react';
|
||||
import {
|
||||
Modal,
|
||||
ModalOverlay,
|
||||
ModalContent,
|
||||
ModalHeader,
|
||||
ModalFooter,
|
||||
ModalBody,
|
||||
ModalCloseButton,
|
||||
Button,
|
||||
Input
|
||||
} from '@chakra-ui/react';
|
||||
import { postData } from '@/api/data';
|
||||
import { useMutation } from '@tanstack/react-query';
|
||||
|
||||
const CreateDataModal = ({
|
||||
onClose,
|
||||
onSuccess
|
||||
}: {
|
||||
onClose: () => void;
|
||||
onSuccess: () => void;
|
||||
}) => {
|
||||
const [inputVal, setInputVal] = useState('');
|
||||
|
||||
const { isLoading, mutate } = useMutation({
|
||||
mutationFn: (name: string) => postData(name),
|
||||
onSuccess() {
|
||||
onSuccess();
|
||||
onClose();
|
||||
}
|
||||
});
|
||||
|
||||
return (
|
||||
<Modal isOpen={true} onClose={onClose}>
|
||||
<ModalOverlay />
|
||||
<ModalContent>
|
||||
<ModalHeader>创建数据集</ModalHeader>
|
||||
<ModalCloseButton />
|
||||
|
||||
<ModalBody display={'flex'}>
|
||||
<Input
|
||||
value={inputVal}
|
||||
onChange={(e) => setInputVal(e.target.value)}
|
||||
placeholder={'数据集名称'}
|
||||
></Input>
|
||||
</ModalBody>
|
||||
<ModalFooter>
|
||||
<Button colorScheme={'gray'} onClick={onClose}>
|
||||
取消
|
||||
</Button>
|
||||
<Button
|
||||
ml={3}
|
||||
isDisabled={inputVal === ''}
|
||||
isLoading={isLoading}
|
||||
onClick={() => mutate(inputVal)}
|
||||
>
|
||||
确认
|
||||
</Button>
|
||||
</ModalFooter>
|
||||
</ModalContent>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default CreateDataModal;
|
||||
175
src/pages/data/components/ImportDataModal.tsx
Normal file
175
src/pages/data/components/ImportDataModal.tsx
Normal file
@@ -0,0 +1,175 @@
|
||||
import React, { useState, useCallback } from 'react';
|
||||
import {
|
||||
Modal,
|
||||
ModalOverlay,
|
||||
ModalContent,
|
||||
ModalHeader,
|
||||
ModalFooter,
|
||||
ModalBody,
|
||||
ModalCloseButton,
|
||||
Button,
|
||||
Input,
|
||||
Box,
|
||||
Flex,
|
||||
Textarea
|
||||
} from '@chakra-ui/react';
|
||||
import { useTabs } from '@/hooks/useTabs';
|
||||
import { useConfirm } from '@/hooks/useConfirm';
|
||||
import { useSelectFile } from '@/hooks/useSelectFile';
|
||||
import { readTxtContent, readPdfContent, readDocContent } from '@/utils/tools';
|
||||
import { postSplitData } from '@/api/data';
|
||||
import { useMutation } from '@tanstack/react-query';
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
import { useLoading } from '@/hooks/useLoading';
|
||||
|
||||
const ImportDataModal = ({ dataId, onClose }: { dataId: string; onClose: () => void }) => {
|
||||
const { openConfirm, ConfirmChild } = useConfirm({
|
||||
content: '确认提交生成任务?该任务无法终止!'
|
||||
});
|
||||
const { toast } = useToast();
|
||||
const { setIsLoading, Loading } = useLoading();
|
||||
const { File, onOpen } = useSelectFile({ fileType: '.txt,.doc,.docx,.pdf', multiple: true });
|
||||
const { tabs, activeTab, setActiveTab } = useTabs({
|
||||
tabs: [
|
||||
{ id: 'text', label: '文本' },
|
||||
{ id: 'doc', label: '文件' },
|
||||
{ id: 'url', label: '链接' }
|
||||
]
|
||||
});
|
||||
|
||||
const [textInput, setTextInput] = useState('');
|
||||
const [fileText, setFileText] = useState('');
|
||||
|
||||
const { mutate: handleClickSubmit, isLoading } = useMutation({
|
||||
mutationFn: async () => {
|
||||
let text = '';
|
||||
if (activeTab === 'text') {
|
||||
text = textInput;
|
||||
} else if (activeTab === 'doc') {
|
||||
text = fileText;
|
||||
} else if (activeTab === 'url') {
|
||||
}
|
||||
if (!text) return;
|
||||
return postSplitData(dataId, text);
|
||||
},
|
||||
onSuccess() {
|
||||
toast({
|
||||
title: '任务提交成功',
|
||||
status: 'success'
|
||||
});
|
||||
onClose();
|
||||
},
|
||||
onError(err: any) {
|
||||
toast({
|
||||
title: err?.message || '提交任务异常',
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
const onSelectFile = useCallback(
|
||||
async (e: File[]) => {
|
||||
setIsLoading(true);
|
||||
try {
|
||||
const fileTexts = (
|
||||
await Promise.all(
|
||||
e.map((file) => {
|
||||
// @ts-ignore
|
||||
const extension = file?.name?.split('.').pop().toLowerCase();
|
||||
if (extension === 'txt') {
|
||||
return readTxtContent(file);
|
||||
} else if (extension === 'pdf') {
|
||||
return readPdfContent(file);
|
||||
} else if (extension === 'docx' || extension === 'doc') {
|
||||
return readDocContent(file);
|
||||
}
|
||||
return '';
|
||||
})
|
||||
)
|
||||
).join('\n');
|
||||
setFileText(fileTexts);
|
||||
} catch (error: any) {
|
||||
console.log(error);
|
||||
toast({
|
||||
title: typeof error === 'string' ? error : '解析文件失败',
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
setIsLoading(false);
|
||||
},
|
||||
[setIsLoading, toast]
|
||||
);
|
||||
|
||||
return (
|
||||
<Modal isOpen={true} onClose={onClose}>
|
||||
<ModalOverlay />
|
||||
<ModalContent position={'relative'} maxW={['90vw', '800px']}>
|
||||
<ModalHeader>导入数据,生成QA</ModalHeader>
|
||||
<ModalCloseButton />
|
||||
|
||||
<ModalBody display={'flex'}>
|
||||
<Box>
|
||||
{tabs.map((item) => (
|
||||
<Button
|
||||
key={item.id}
|
||||
display={'block'}
|
||||
variant={activeTab === item.id ? 'solid' : 'outline'}
|
||||
_notLast={{
|
||||
mb: 3
|
||||
}}
|
||||
onClick={() => setActiveTab(item.id)}
|
||||
>
|
||||
{item.label}
|
||||
</Button>
|
||||
))}
|
||||
</Box>
|
||||
|
||||
<Box flex={'1 0 0'} w={0} ml={3} minH={'200px'}>
|
||||
{activeTab === 'text' && (
|
||||
<Textarea
|
||||
h={'100%'}
|
||||
maxLength={-1}
|
||||
value={textInput}
|
||||
placeholder={'请粘贴或输入需要处理的文本'}
|
||||
onChange={(e) => setTextInput(e.target.value)}
|
||||
/>
|
||||
)}
|
||||
{activeTab === 'doc' && (
|
||||
<Flex
|
||||
flexDirection={'column'}
|
||||
h={'100%'}
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
border={'1px solid '}
|
||||
borderColor={'blackAlpha.200'}
|
||||
borderRadius={'md'}
|
||||
>
|
||||
<Button onClick={onOpen}>选择文件</Button>
|
||||
{fileText && <Box mt={2}>一共 {fileText.length} 个字</Box>}
|
||||
</Flex>
|
||||
)}
|
||||
</Box>
|
||||
</ModalBody>
|
||||
<ModalFooter>
|
||||
<Button colorScheme={'gray'} onClick={onClose}>
|
||||
取消
|
||||
</Button>
|
||||
<Button
|
||||
ml={3}
|
||||
isLoading={isLoading}
|
||||
isDisabled={!textInput && !fileText}
|
||||
onClick={openConfirm(handleClickSubmit)}
|
||||
>
|
||||
确认
|
||||
</Button>
|
||||
</ModalFooter>
|
||||
<Loading />
|
||||
</ModalContent>
|
||||
|
||||
<ConfirmChild />
|
||||
<File onSelect={onSelectFile} />
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default ImportDataModal;
|
||||
111
src/pages/data/list.tsx
Normal file
111
src/pages/data/list.tsx
Normal file
@@ -0,0 +1,111 @@
|
||||
import React, { useState } from 'react';
|
||||
import {
|
||||
Card,
|
||||
Box,
|
||||
Flex,
|
||||
Button,
|
||||
Table,
|
||||
Thead,
|
||||
Tbody,
|
||||
Tr,
|
||||
Th,
|
||||
Td,
|
||||
TableContainer,
|
||||
useDisclosure
|
||||
} from '@chakra-ui/react';
|
||||
import { getDataList } from '@/api/data';
|
||||
import { usePaging } from '@/hooks/usePaging';
|
||||
import type { DataListItem } from '@/types/data';
|
||||
import ScrollData from '@/components/ScrollData';
|
||||
import dayjs from 'dayjs';
|
||||
import dynamic from 'next/dynamic';
|
||||
|
||||
const CreateDataModal = dynamic(() => import('./components/CreateDataModal'));
|
||||
const ImportDataModal = dynamic(() => import('./components/ImportDataModal'));
|
||||
|
||||
const DataList = () => {
|
||||
const {
|
||||
setPageNum,
|
||||
pageNum,
|
||||
data: dataList,
|
||||
getData
|
||||
} = usePaging<DataListItem>({
|
||||
api: getDataList,
|
||||
pageSize: 20
|
||||
});
|
||||
const [ImportDataId, setImportDataId] = useState<string>();
|
||||
|
||||
const {
|
||||
isOpen: isOpenCreateDataModal,
|
||||
onOpen: onOpenCreateDataModal,
|
||||
onClose: onCloseCreateDataModal
|
||||
} = useDisclosure();
|
||||
|
||||
return (
|
||||
<Box display={['block', 'flex']} flexDirection={'column'} h={'100%'}>
|
||||
<Card px={6} py={4}>
|
||||
<Flex>
|
||||
<Box flex={1} mr={1}>
|
||||
<Box fontSize={'xl'} fontWeight={'bold'}>
|
||||
对话数据管理
|
||||
</Box>
|
||||
<Box fontSize={'xs'} color={'blackAlpha.600'}>
|
||||
允许你将任意文本数据拆分成 QA 的形式。你可以使用这些 QA 去微调你的对话模型。
|
||||
</Box>
|
||||
</Box>
|
||||
<Button variant={'outline'} onClick={onOpenCreateDataModal}>
|
||||
创建数据集
|
||||
</Button>
|
||||
</Flex>
|
||||
</Card>
|
||||
{/* 数据表 */}
|
||||
<Card mt={3} flex={'1 0 0'} h={['auto', '0']} px={6} py={4}>
|
||||
<ScrollData h={'100%'} nextPage={() => setPageNum(pageNum + 1)}>
|
||||
<TableContainer>
|
||||
<Table>
|
||||
<Thead>
|
||||
<Tr>
|
||||
<Th>集合名</Th>
|
||||
<Th>创建时间</Th>
|
||||
<Th>训练中 / 总数据</Th>
|
||||
<Th></Th>
|
||||
</Tr>
|
||||
</Thead>
|
||||
<Tbody>
|
||||
{dataList.map((item, i) => (
|
||||
<Tr key={item._id}>
|
||||
<Td>{item.name}</Td>
|
||||
<Td>{dayjs(item.createTime).format('YYYY/MM/DD HH:mm')}</Td>
|
||||
<Td>
|
||||
{item.trainingData} / {item.totalData}
|
||||
</Td>
|
||||
<Td>
|
||||
<Button
|
||||
size={'sm'}
|
||||
variant={'outline'}
|
||||
mr={2}
|
||||
onClick={() => setImportDataId(item._id)}
|
||||
>
|
||||
导入
|
||||
</Button>
|
||||
<Button size={'sm'}>导出</Button>
|
||||
</Td>
|
||||
</Tr>
|
||||
))}
|
||||
</Tbody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
</ScrollData>
|
||||
</Card>
|
||||
|
||||
{ImportDataId && (
|
||||
<ImportDataModal dataId={ImportDataId} onClose={() => setImportDataId(undefined)} />
|
||||
)}
|
||||
{isOpenCreateDataModal && (
|
||||
<CreateDataModal onClose={onCloseCreateDataModal} onSuccess={() => getData(1, true)} />
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
export default DataList;
|
||||
@@ -291,15 +291,7 @@ const ModelDetail = ({ modelId }: { modelId: string }) => {
|
||||
</Box>
|
||||
<Flex mt={5} alignItems={'center'}>
|
||||
<Box flex={'0 0 80px'}>删除模型:</Box>
|
||||
<Button
|
||||
colorScheme={'red'}
|
||||
size={'sm'}
|
||||
onClick={() => {
|
||||
openConfirm(() => {
|
||||
handleDelModel();
|
||||
});
|
||||
}}
|
||||
>
|
||||
<Button colorScheme={'red'} size={'sm'} onClick={openConfirm(handleDelModel)}>
|
||||
删除模型
|
||||
</Button>
|
||||
</Flex>
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
import React from 'react';
|
||||
import { Card, Box, Flex, Button } from '@chakra-ui/react';
|
||||
|
||||
const TrainDataList = () => {
|
||||
return (
|
||||
<>
|
||||
<Card px={6} py={4}>
|
||||
<Flex alignItems={'center'}>
|
||||
<Box fontSize={'xl'} fontWeight={'bold'} flex={1}>
|
||||
训练数据管理
|
||||
</Box>
|
||||
<Button variant={'outline'} mr={6}>
|
||||
导入数据
|
||||
</Button>
|
||||
<Button>插入一条数据</Button>
|
||||
</Flex>
|
||||
</Card>
|
||||
{/* 数据表 */}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default TrainDataList;
|
||||
Reference in New Issue
Block a user