feat: 数据集管理
This commit is contained in:
@@ -3,6 +3,7 @@ import { RequestPaging } from '../types/index';
|
||||
import { Obj2Query } from '@/utils/tools';
|
||||
import type { DataListItem } from '@/types/data';
|
||||
import type { PagingData } from '../types/index';
|
||||
import { DataItemSchema } from '@/types/mongoSchema';
|
||||
|
||||
export const getDataList = (data: RequestPaging) =>
|
||||
GET<PagingData<DataListItem>>(`/data/getDataList?${Obj2Query(data)}`);
|
||||
@@ -16,3 +17,9 @@ export const updateDataName = (dataId: string, name: string) =>
|
||||
PUT(`/data/putDataName?dataId=${dataId}&name=${name}`);
|
||||
|
||||
export const delData = (dataId: string) => DELETE(`/data/delData?dataId=${dataId}`);
|
||||
|
||||
type GetDataItemsProps = RequestPaging & {
|
||||
dataId: string;
|
||||
};
|
||||
export const getDataItems = (data: GetDataItemsProps) =>
|
||||
GET<PagingData<DataItemSchema>>(`/data/getDataItems?${Obj2Query(data)}`);
|
||||
|
||||
@@ -30,7 +30,7 @@ const navbarList = [
|
||||
label: '数据',
|
||||
icon: 'icon-datafull',
|
||||
link: '/data/list',
|
||||
activeLink: ['/data/list']
|
||||
activeLink: ['/data/list', '/data/detail']
|
||||
},
|
||||
{
|
||||
label: '账号',
|
||||
|
||||
50
src/pages/api/data/getDataItems.ts
Normal file
50
src/pages/api/data/getDataItems.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, DataItem } from '@/service/mongo';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
let {
|
||||
dataId,
|
||||
pageNum = 1,
|
||||
pageSize = 10
|
||||
} = req.query as { dataId: string; pageNum: string; pageSize: string };
|
||||
pageNum = +pageNum;
|
||||
pageSize = +pageSize;
|
||||
|
||||
if (!dataId) {
|
||||
throw new Error('参数错误');
|
||||
}
|
||||
await connectToDatabase();
|
||||
|
||||
const { authorization } = req.headers;
|
||||
|
||||
await authToken(authorization);
|
||||
|
||||
const dataItems = await DataItem.find({
|
||||
dataId,
|
||||
status: 0
|
||||
})
|
||||
.sort({ time: -1 }) // 按照创建时间倒序排列
|
||||
.skip((pageNum - 1) * pageSize)
|
||||
.limit(pageSize);
|
||||
|
||||
jsonRes(res, {
|
||||
data: {
|
||||
pageNum,
|
||||
pageSize,
|
||||
data: dataItems,
|
||||
total: await DataItem.countDocuments({
|
||||
dataId,
|
||||
status: 0
|
||||
})
|
||||
}
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -24,7 +24,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
|
||||
await connectToDatabase();
|
||||
|
||||
// 根据 id 获取用户账单
|
||||
const datalist = await Data.aggregate<DataListItem>([
|
||||
{
|
||||
$match: {
|
||||
|
||||
@@ -22,18 +22,20 @@ import { useMutation } from '@tanstack/react-query';
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
import { useLoading } from '@/hooks/useLoading';
|
||||
|
||||
const fileExtension = '.txt,.doc,.docx,.pdf,.md';
|
||||
|
||||
const ImportDataModal = ({ dataId, onClose }: { dataId: string; onClose: () => void }) => {
|
||||
const { openConfirm, ConfirmChild } = useConfirm({
|
||||
content: '确认提交生成任务?该任务无法终止!'
|
||||
});
|
||||
const { toast } = useToast();
|
||||
const { setIsLoading, Loading } = useLoading();
|
||||
const { File, onOpen } = useSelectFile({ fileType: '.txt,.doc,.docx,.pdf', multiple: true });
|
||||
const { File, onOpen } = useSelectFile({ fileType: fileExtension, multiple: true });
|
||||
const { tabs, activeTab, setActiveTab } = useTabs({
|
||||
tabs: [
|
||||
{ id: 'text', label: '文本' },
|
||||
{ id: 'doc', label: '文件' },
|
||||
{ id: 'url', label: '链接' }
|
||||
{ id: 'doc', label: '文件' }
|
||||
// { id: 'url', label: '链接' }
|
||||
]
|
||||
});
|
||||
|
||||
@@ -76,14 +78,18 @@ const ImportDataModal = ({ dataId, onClose }: { dataId: string; onClose: () => v
|
||||
e.map((file) => {
|
||||
// @ts-ignore
|
||||
const extension = file?.name?.split('.').pop().toLowerCase();
|
||||
if (extension === 'txt') {
|
||||
return readTxtContent(file);
|
||||
} else if (extension === 'pdf') {
|
||||
return readPdfContent(file);
|
||||
} else if (extension === 'docx' || extension === 'doc') {
|
||||
return readDocContent(file);
|
||||
switch (extension) {
|
||||
case 'txt':
|
||||
case 'md':
|
||||
return readTxtContent(file);
|
||||
case 'pdf':
|
||||
return readPdfContent(file);
|
||||
case 'doc':
|
||||
case 'docx':
|
||||
return readDocContent(file);
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
return '';
|
||||
})
|
||||
)
|
||||
).join('\n');
|
||||
@@ -137,6 +143,7 @@ const ImportDataModal = ({ dataId, onClose }: { dataId: string; onClose: () => v
|
||||
{activeTab === 'doc' && (
|
||||
<Flex
|
||||
flexDirection={'column'}
|
||||
p={2}
|
||||
h={'100%'}
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
@@ -145,7 +152,23 @@ const ImportDataModal = ({ dataId, onClose }: { dataId: string; onClose: () => v
|
||||
borderRadius={'md'}
|
||||
>
|
||||
<Button onClick={onOpen}>选择文件</Button>
|
||||
{fileText && <Box mt={2}>一共 {fileText.length} 个字</Box>}
|
||||
<Box mt={2}>支持 {fileExtension} 文件</Box>
|
||||
{fileText && (
|
||||
<>
|
||||
<Box mt={2}>一共 {fileText.length} 个字</Box>
|
||||
<Box
|
||||
maxH={'300px'}
|
||||
w={'100%'}
|
||||
overflow={'auto'}
|
||||
p={2}
|
||||
backgroundColor={'blackAlpha.50'}
|
||||
whiteSpace={'pre'}
|
||||
fontSize={'xs'}
|
||||
>
|
||||
{fileText}
|
||||
</Box>
|
||||
</>
|
||||
)}
|
||||
</Flex>
|
||||
)}
|
||||
</Box>
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
import React from 'react';
|
||||
|
||||
const DataDetail = ({ dataId }: { dataId: string }) => {
|
||||
return <div>DataDetail</div>;
|
||||
};
|
||||
|
||||
export default DataDetail;
|
||||
|
||||
export async function getServerSideProps(context: any) {
|
||||
const dataId = context.query?.dataId || '';
|
||||
|
||||
return {
|
||||
props: { dataId }
|
||||
};
|
||||
}
|
||||
61
src/pages/data/detail.tsx
Normal file
61
src/pages/data/detail.tsx
Normal file
@@ -0,0 +1,61 @@
|
||||
import React from 'react';
|
||||
import { Box, Card, Table, Thead, Tbody, Tr, Th, Td, TableContainer } from '@chakra-ui/react';
|
||||
import ScrollData from '@/components/ScrollData';
|
||||
import { getDataItems } from '@/api/data';
|
||||
import { usePaging } from '@/hooks/usePaging';
|
||||
import type { DataItemSchema } from '@/types/mongoSchema';
|
||||
|
||||
const DataDetail = ({ dataName, dataId }: { dataName: string; dataId: string }) => {
|
||||
const {
|
||||
nextPage,
|
||||
isLoadAll,
|
||||
requesting,
|
||||
data: dataItems
|
||||
} = usePaging<DataItemSchema>({
|
||||
api: getDataItems,
|
||||
pageSize: 10,
|
||||
params: {
|
||||
dataId
|
||||
}
|
||||
});
|
||||
|
||||
return (
|
||||
<Card py={4} h={'100%'} display={'flex'} flexDirection={'column'}>
|
||||
<Box px={6} fontSize={'xl'} fontWeight={'bold'}>
|
||||
{dataName} 拆分结果
|
||||
</Box>
|
||||
<ScrollData
|
||||
flex={'1 0 0'}
|
||||
h={0}
|
||||
px={6}
|
||||
mt={3}
|
||||
isLoadAll={isLoadAll}
|
||||
requesting={requesting}
|
||||
nextPage={nextPage}
|
||||
fontSize={'xs'}
|
||||
>
|
||||
{dataItems.map((item) => (
|
||||
<Box key={item._id}>
|
||||
{item.result.map((result, i) => (
|
||||
<Box key={i} mb={3}>
|
||||
<Box fontWeight={'bold'}>Q: {result.q}</Box>
|
||||
<Box>A: {result.a}</Box>
|
||||
</Box>
|
||||
))}
|
||||
</Box>
|
||||
))}
|
||||
</ScrollData>
|
||||
</Card>
|
||||
);
|
||||
};
|
||||
|
||||
export default DataDetail;
|
||||
|
||||
export async function getServerSideProps(context: any) {
|
||||
return {
|
||||
props: {
|
||||
dataName: context.query?.dataName || '',
|
||||
dataId: context.query?.dataId || ''
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { useState } from 'react';
|
||||
import React, { useState, useCallback } from 'react';
|
||||
import {
|
||||
Card,
|
||||
Box,
|
||||
@@ -12,21 +12,30 @@ import {
|
||||
Td,
|
||||
TableContainer,
|
||||
useDisclosure,
|
||||
Input
|
||||
Input,
|
||||
Menu,
|
||||
MenuButton,
|
||||
MenuList,
|
||||
MenuItem
|
||||
} from '@chakra-ui/react';
|
||||
import { getDataList, updateDataName, delData } from '@/api/data';
|
||||
import { getDataList, updateDataName, delData, getDataItems } from '@/api/data';
|
||||
import { usePaging } from '@/hooks/usePaging';
|
||||
import type { DataListItem } from '@/types/data';
|
||||
import ScrollData from '@/components/ScrollData';
|
||||
import dayjs from 'dayjs';
|
||||
import dynamic from 'next/dynamic';
|
||||
import { useRouter } from 'next/router';
|
||||
import { useConfirm } from '@/hooks/useConfirm';
|
||||
import { useRequest } from '@/hooks/useRequest';
|
||||
import { DataItemSchema } from '@/types/mongoSchema';
|
||||
|
||||
const CreateDataModal = dynamic(() => import('./components/CreateDataModal'));
|
||||
const ImportDataModal = dynamic(() => import('./components/ImportDataModal'));
|
||||
|
||||
export type ExportDataType = 'jsonl';
|
||||
|
||||
const DataList = () => {
|
||||
const router = useRouter();
|
||||
const {
|
||||
nextPage,
|
||||
isLoadAll,
|
||||
@@ -58,13 +67,51 @@ const DataList = () => {
|
||||
}
|
||||
});
|
||||
|
||||
const { mutate: handleExportData, isLoading: isExporting } = useRequest({
|
||||
mutationFn: async ({ data, type }: { data: DataListItem; type: ExportDataType }) => ({
|
||||
type,
|
||||
data: await getDataItems({ dataId: data._id, pageNum: 1, pageSize: data.totalData }).then(
|
||||
(res) => res.data
|
||||
)
|
||||
}),
|
||||
successToast: '导出数据集成功',
|
||||
errorToast: '导出数据集异常',
|
||||
onSuccess(res: { type: ExportDataType; data: DataItemSchema[] }) {
|
||||
// 合并数据
|
||||
const data = res.data.map((item) => item.result).flat();
|
||||
let text = '';
|
||||
// 生成 jsonl
|
||||
data.forEach((item) => {
|
||||
const result = JSON.stringify({
|
||||
prompt: `${item.q.toLocaleLowerCase()}</s>`,
|
||||
completion: ` ${item.a}</s>`
|
||||
});
|
||||
text += `${result}\n`;
|
||||
});
|
||||
// 去掉最后一个 \n
|
||||
text = text.substring(0, text.length - 1);
|
||||
// 导出为文件
|
||||
const blob = new Blob([text], { type: 'application/json;charset=utf-8' });
|
||||
|
||||
// 创建下载链接
|
||||
const downloadLink = document.createElement('a');
|
||||
downloadLink.href = window.URL.createObjectURL(blob);
|
||||
downloadLink.download = 'file.jsonl';
|
||||
|
||||
// 添加链接到页面并触发下载
|
||||
document.body.appendChild(downloadLink);
|
||||
downloadLink.click();
|
||||
document.body.removeChild(downloadLink);
|
||||
}
|
||||
});
|
||||
|
||||
return (
|
||||
<Box display={['block', 'flex']} flexDirection={'column'} h={'100%'}>
|
||||
<Card px={6} py={4}>
|
||||
<Flex>
|
||||
<Box flex={1} mr={1}>
|
||||
<Box fontSize={'xl'} fontWeight={'bold'}>
|
||||
对话数据管理
|
||||
训练数据管理
|
||||
</Box>
|
||||
<Box fontSize={'xs'} color={'blackAlpha.600'}>
|
||||
允许你将任意文本数据拆分成 QA 的形式。你可以使用这些 QA 去微调你的对话模型。
|
||||
@@ -99,6 +146,7 @@ const DataList = () => {
|
||||
<Tr key={item._id}>
|
||||
<Td>
|
||||
<Input
|
||||
minW={'150px'}
|
||||
placeholder="请输入数据集名称"
|
||||
defaultValue={item.name}
|
||||
size={'sm'}
|
||||
@@ -118,7 +166,9 @@ const DataList = () => {
|
||||
variant={'outline'}
|
||||
colorScheme={'gray'}
|
||||
mr={2}
|
||||
onClick={() => setImportDataId(item._id)}
|
||||
onClick={() =>
|
||||
router.push(`/data/detail?dataId=${item._id}&dataName=${item.name}`)
|
||||
}
|
||||
>
|
||||
详细
|
||||
</Button>
|
||||
@@ -130,9 +180,17 @@ const DataList = () => {
|
||||
>
|
||||
导入
|
||||
</Button>
|
||||
<Button mr={2} size={'sm'}>
|
||||
导出
|
||||
</Button>
|
||||
<Menu>
|
||||
<MenuButton as={Button} mr={2} size={'sm'}>
|
||||
导出
|
||||
</MenuButton>
|
||||
<MenuList>
|
||||
<MenuItem onClick={() => handleExportData({ data: item, type: 'jsonl' })}>
|
||||
jsonl
|
||||
</MenuItem>
|
||||
</MenuList>
|
||||
</Menu>
|
||||
|
||||
<Button
|
||||
size={'sm'}
|
||||
colorScheme={'red'}
|
||||
|
||||
@@ -264,10 +264,10 @@ const ModelDetail = ({ modelId }: { modelId: string }) => {
|
||||
onClick={() => {
|
||||
SelectFileDom.current?.click();
|
||||
}}
|
||||
title={!canTrain ? '' : '模型不支持微调'}
|
||||
title={!canTrain ? '模型不支持微调' : ''}
|
||||
isDisabled={!canTrain}
|
||||
>
|
||||
上传微调数据集
|
||||
上传数据集
|
||||
</Button>
|
||||
<Flex
|
||||
as={'a'}
|
||||
@@ -283,16 +283,30 @@ const ModelDetail = ({ modelId }: { modelId: string }) => {
|
||||
</Flex>
|
||||
</Flex>
|
||||
{/* 提示 */}
|
||||
<Box mt={3} py={3} color={'blackAlpha.500'}>
|
||||
<Box mt={3} py={3} color={'blackAlpha.600'}>
|
||||
<Box as={'li'} lineHeight={1.9}>
|
||||
可以使用
|
||||
<Box
|
||||
as={'span'}
|
||||
fontWeight={'bold'}
|
||||
textDecoration={'underline'}
|
||||
color={'blackAlpha.800'}
|
||||
mx={2}
|
||||
cursor={'pointer'}
|
||||
onClick={() => router.push('/data/list')}
|
||||
>
|
||||
数据拆分
|
||||
</Box>
|
||||
功能,从任意文本中提取数据集。
|
||||
</Box>
|
||||
<Box as={'li'} lineHeight={1.9}>
|
||||
每行包括一个 prompt 和一个 completion
|
||||
</Box>
|
||||
<Box as={'li'} lineHeight={1.9}>
|
||||
prompt 必须以 \n\n###\n\n 结尾,且尽量保障每个 prompt
|
||||
内容不都是同一个标点结尾,可以加一个空格打断相同性,
|
||||
prompt 必须以 {'</s>'} 结尾
|
||||
</Box>
|
||||
<Box as={'li'} lineHeight={1.9}>
|
||||
completion 开头必须有一个空格,末尾必须以 ### 结尾,同样的不要都是同一个标点结尾。
|
||||
completion 开头必须有一个空格,必须以 {'</s>'} 结尾
|
||||
</Box>
|
||||
</Box>
|
||||
<Flex mt={5} alignItems={'center'}>
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { Schema, model, models } from 'mongoose';
|
||||
import { Schema, model, models, Model } from 'mongoose';
|
||||
import { AuthCodeSchema as AuthCodeType } from '@/types/mongoSchema';
|
||||
|
||||
const AuthCodeSchema = new Schema({
|
||||
email: {
|
||||
@@ -21,4 +22,5 @@ const AuthCodeSchema = new Schema({
|
||||
}
|
||||
});
|
||||
|
||||
export const AuthCode = models['auth_code'] || model('auth_code', AuthCodeSchema);
|
||||
export const AuthCode: Model<AuthCodeType> =
|
||||
models['auth_code'] || model('auth_code', AuthCodeSchema);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { Schema, model, models } from 'mongoose';
|
||||
import { Schema, model, models, Model } from 'mongoose';
|
||||
import { modelList } from '@/constants/model';
|
||||
import { BillSchema as BillType } from '@/types/mongoSchema';
|
||||
|
||||
const BillSchema = new Schema({
|
||||
userId: {
|
||||
@@ -42,4 +43,4 @@ const BillSchema = new Schema({
|
||||
}
|
||||
});
|
||||
|
||||
export const Bill = models['bill'] || model('bill', BillSchema);
|
||||
export const Bill: Model<BillType> = models['bill'] || model('bill', BillSchema);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { Schema, model, models } from 'mongoose';
|
||||
import { Schema, model, models, Model } from 'mongoose';
|
||||
import { ChatSchema as ChatType } from '@/types/mongoSchema';
|
||||
|
||||
const ChatSchema = new Schema({
|
||||
userId: {
|
||||
@@ -47,4 +48,4 @@ const ChatSchema = new Schema({
|
||||
}
|
||||
});
|
||||
|
||||
export const Chat = models['chat'] || model('chat', ChatSchema);
|
||||
export const Chat: Model<ChatType> = models['chat'] || model('chat', ChatSchema);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { Schema, model, models } from 'mongoose';
|
||||
import { Schema, model, models, Model } from 'mongoose';
|
||||
import { DataItemSchema as Datatype } from '@/types/mongoSchema';
|
||||
|
||||
const DataSchema = new Schema({
|
||||
userId: {
|
||||
@@ -20,4 +21,4 @@ const DataSchema = new Schema({
|
||||
}
|
||||
});
|
||||
|
||||
export const Data = models['data'] || model('data', DataSchema);
|
||||
export const Data: Model<Datatype> = models['data'] || model('data', DataSchema);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { Schema, model, models } from 'mongoose';
|
||||
import type { DataItemSchema as DataItemType } from '@/types/mongoSchema';
|
||||
import { Schema, model, models, Model } from 'mongoose';
|
||||
|
||||
const DataItemSchema = new Schema({
|
||||
userId: {
|
||||
@@ -45,4 +46,5 @@ const DataItemSchema = new Schema({
|
||||
}
|
||||
});
|
||||
|
||||
export const DataItem = models['dataItem'] || model('dataItem', DataItemSchema);
|
||||
export const DataItem: Model<DataItemType> =
|
||||
models['dataItem'] || model('dataItem', DataItemSchema);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { Schema, model, models } from 'mongoose';
|
||||
|
||||
import { Schema, model, models, Model as MongoModel } from 'mongoose';
|
||||
import { ModelSchema as ModelType } from '@/types/mongoSchema';
|
||||
const ModelSchema = new Schema({
|
||||
userId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
@@ -95,4 +95,4 @@ const ModelSchema = new Schema({
|
||||
}
|
||||
});
|
||||
|
||||
export const Model = models['model'] || model('model', ModelSchema);
|
||||
export const Model: MongoModel<ModelType> = models['model'] || model('model', ModelSchema);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { Schema, model, models } from 'mongoose';
|
||||
|
||||
import { Schema, model, models, Model } from 'mongoose';
|
||||
import { PaySchema as PayType } from '@/types/mongoSchema';
|
||||
const PaySchema = new Schema({
|
||||
userId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
@@ -26,4 +26,4 @@ const PaySchema = new Schema({
|
||||
}
|
||||
});
|
||||
|
||||
export const Pay = models['pay'] || model('pay', PaySchema);
|
||||
export const Pay: Model<PayType> = models['pay'] || model('pay', PaySchema);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { Schema, model, models } from 'mongoose';
|
||||
|
||||
import { Schema, model, models, Model } from 'mongoose';
|
||||
import { TrainingSchema as TrainingType } from '@/types/mongoSchema';
|
||||
const TrainingSChema = new Schema({
|
||||
serviceName: {
|
||||
// 模型厂商名
|
||||
@@ -25,4 +25,5 @@ const TrainingSChema = new Schema({
|
||||
}
|
||||
});
|
||||
|
||||
export const Training = models['training'] || model('training', TrainingSChema);
|
||||
export const Training: Model<TrainingType> =
|
||||
models['training'] || model('training', TrainingSChema);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { Schema, model, models } from 'mongoose';
|
||||
import { Schema, model, models, Model } from 'mongoose';
|
||||
import { hashPassword } from '@/service/utils/tools';
|
||||
import { PRICE_SCALE } from '@/constants/common';
|
||||
|
||||
import { UserModelSchema } from '@/types/mongoSchema';
|
||||
const UserSchema = new Schema({
|
||||
email: {
|
||||
type: String,
|
||||
@@ -38,4 +38,4 @@ const UserSchema = new Schema({
|
||||
}
|
||||
});
|
||||
|
||||
export const User = models['user'] || model('user', UserSchema);
|
||||
export const User: Model<UserModelSchema> = models['user'] || model('user', UserSchema);
|
||||
|
||||
2
src/types/index.d.ts
vendored
2
src/types/index.d.ts
vendored
@@ -16,4 +16,4 @@ export type PagingData<T> = {
|
||||
total;
|
||||
};
|
||||
|
||||
export type RequestPaging = { pageNum: number; pageSize: number };
|
||||
export type RequestPaging = { pageNum: number; pageSize: number; [key]: any };
|
||||
|
||||
Reference in New Issue
Block a user