feat: 摘要拆分

This commit is contained in:
archer
2023-03-26 22:09:59 +08:00
parent 888642f154
commit 3e4487ad9a
20 changed files with 397 additions and 83 deletions

View File

@@ -2,11 +2,12 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, Data } from '@/service/mongo';
import { authToken } from '@/service/utils/tools';
import type { DataType } from '@/types/data';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
let { name } = req.query as { name: string };
if (!name) {
let { name, type } = req.body as { name: string; type: DataType };
if (!name || !type) {
throw new Error('参数错误');
}
await connectToDatabase();
@@ -18,7 +19,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
// 生成 data 集合
const data = await Data.create({
userId,
name
name,
type
});
jsonRes(res, {

View File

@@ -1,9 +1,11 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, Data, DataItem } from '@/service/mongo';
import { connectToDatabase, DataItem, Data } from '@/service/mongo';
import { authToken } from '@/service/utils/tools';
import { generateQA } from '@/service/events/generateQA';
import { generateAbstract } from '@/service/events/generateAbstract';
/* 拆分数据成QA */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
let { text, dataId } = req.body as { text: string; dataId: string };
@@ -17,14 +19,20 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
const userId = await authToken(authorization);
const DataRecord = await Data.findById(dataId);
if (!DataRecord) {
throw new Error('找不到数据集');
}
const dataItems: any[] = [];
// 格式化文本长度
// 每 1000 字符一组
for (let i = 0; i <= text.length / 1000; i++) {
dataItems.push({
temperature: 0,
userId,
dataId,
type: DataRecord.type,
text: text.slice(i * 1000, (i + 1) * 1000),
status: 1
});
@@ -33,10 +41,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
// 批量插入数据
await DataItem.insertMany(dataItems);
generateQA();
try {
generateQA();
generateAbstract();
} catch (error) {
error;
}
jsonRes(res, {
data: dataItems.length
data: ''
});
} catch (err) {
jsonRes(res, {

View File

@@ -13,14 +13,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
// await DataItem.updateMany(
// {},
// {
// times: 2
// type: 'QA'
// // times: 2
// }
// );
await Data.updateMany(
{},
{
isDeleted: false
type: 'QA'
}
);

View File

@@ -8,10 +8,21 @@ import {
ModalBody,
ModalCloseButton,
Button,
Input
Input,
Select,
FormControl,
FormErrorMessage
} from '@chakra-ui/react';
import { postData } from '@/api/data';
import { useMutation } from '@tanstack/react-query';
import { useForm, SubmitHandler } from 'react-hook-form';
import { DataType } from '@/types/data';
import { DataTypeTextMap } from '@/constants/data';
export interface CreateDataProps {
name: string;
type: DataType;
}
const CreateDataModal = ({
onClose,
@@ -21,9 +32,20 @@ const CreateDataModal = ({
onSuccess: () => void;
}) => {
const [inputVal, setInputVal] = useState('');
const {
getValues,
register,
handleSubmit,
formState: { errors }
} = useForm<CreateDataProps>({
defaultValues: {
name: '',
type: 'abstract'
}
});
const { isLoading, mutate } = useMutation({
mutationFn: (name: string) => postData(name),
mutationFn: (e: CreateDataProps) => postData(e),
onSuccess() {
onSuccess();
onClose();
@@ -37,23 +59,33 @@ const CreateDataModal = ({
<ModalHeader></ModalHeader>
<ModalCloseButton />
<ModalBody display={'flex'}>
<Input
value={inputVal}
onChange={(e) => setInputVal(e.target.value)}
placeholder={'数据集名称'}
></Input>
<ModalBody>
<FormControl mb={8} isInvalid={!!errors.name}>
<Input
placeholder="数据集名称"
{...register('name', {
required: '数据集名称不能为空'
})}
/>
<FormErrorMessage position={'absolute'} fontSize="xs">
{!!errors.name && errors.name.message}
</FormErrorMessage>
</FormControl>
<FormControl>
<Select placeholder="数据集类型" {...register('type', {})}>
{Object.entries(DataTypeTextMap).map(([key, value]) => (
<option key={key} value={key}>
{value}
</option>
))}
</Select>
</FormControl>
</ModalBody>
<ModalFooter>
<Button colorScheme={'gray'} onClick={onClose}>
</Button>
<Button
ml={3}
isDisabled={inputVal === ''}
isLoading={isLoading}
onClick={() => mutate(inputVal)}
>
<Button ml={3} isLoading={isLoading} onClick={handleSubmit(mutate as any)}>
</Button>
</ModalFooter>

View File

@@ -22,6 +22,7 @@ import { useToast } from '@/hooks/useToast';
import { useLoading } from '@/hooks/useLoading';
import { formatPrice } from '@/utils/user';
import { modelList, ChatModelNameEnum } from '@/constants/model';
import { encode, decode } from 'gpt-token-utils';
const fileExtension = '.txt,.doc,.docx,.pdf,.md';
@@ -106,6 +107,7 @@ const ImportDataModal = ({
.join('\n')
.replace(/\n+/g, '\n');
setFileText(fileTexts);
console.log(encode(fileTexts));
} catch (error: any) {
console.log(error);
toast({
@@ -161,7 +163,9 @@ const ImportDataModal = ({
placeholder={'请粘贴或输入需要处理的文本'}
onChange={(e) => setTextInput(e.target.value)}
/>
<Box mt={2}> {textInput.length} </Box>
<Box mt={2}>
{textInput.length} {encode(textInput).length} tokens
</Box>
</>
)}
{activeTab === 'doc' && (
@@ -174,12 +178,15 @@ const ImportDataModal = ({
border={'1px solid '}
borderColor={'blackAlpha.200'}
borderRadius={'md'}
fontSize={'sm'}
>
<Button onClick={onOpen}></Button>
<Box mt={2}> {fileExtension} </Box>
{fileText && (
<>
<Box mt={2}> {fileText.length} </Box>
<Box mt={2}>
{fileText.length} {encode(fileText).length} tokens
</Box>
<Box
maxH={'300px'}
w={'100%'}

View File

@@ -22,7 +22,7 @@ const DataDetail = ({ dataName, dataId }: { dataName: string; dataId: string })
return (
<Card py={4} h={'100%'} display={'flex'} flexDirection={'column'}>
<Box px={6} fontSize={'xl'} fontWeight={'bold'}>
{dataName}
{dataName}
</Box>
<ScrollData
flex={'1 0 0'}
@@ -38,8 +38,13 @@ const DataDetail = ({ dataName, dataId }: { dataName: string; dataId: string })
<Box key={item._id}>
{item.result.map((result, i) => (
<Box key={i} mb={3}>
<Box fontWeight={'bold'}>Q: {result.q}</Box>
<Box>A: {result.a}</Box>
{item.type === 'QA' && (
<>
<Box fontWeight={'bold'}>Q: {result.q}</Box>
<Box>A: {result.a}</Box>
</>
)}
{item.type === 'abstract' && <Box fontSize={'sm'}>{result.abstract}</Box>}
</Box>
))}
</Box>

View File

@@ -28,13 +28,14 @@ import { useRouter } from 'next/router';
import { useConfirm } from '@/hooks/useConfirm';
import { useRequest } from '@/hooks/useRequest';
import { DataItemSchema } from '@/types/mongoSchema';
import { DataTypeTextMap } from '@/constants/data';
import { customAlphabet } from 'nanoid';
const nanoid = customAlphabet('.,', 1);
const CreateDataModal = dynamic(() => import('./components/CreateDataModal'));
const ImportDataModal = dynamic(() => import('./components/ImportDataModal'));
export type ExportDataType = 'jsonl';
export type ExportDataType = 'jsonl' | 'txt';
const DataList = () => {
const router = useRouter();
@@ -84,21 +85,26 @@ const DataList = () => {
let text = '';
// 生成 jsonl
data.forEach((item) => {
const result = JSON.stringify({
prompt: `${item.q.toLocaleLowerCase()}${nanoid()}</s>`,
completion: ` ${item.a}###`
});
text += `${result}\n`;
if (res.type === 'jsonl' && item.q && item.a) {
const result = JSON.stringify({
prompt: `${item.q.toLocaleLowerCase()}${nanoid()}</s>`,
completion: ` ${item.a}###`
});
text += `${result}\n`;
} else if (res.type === 'txt' && item.abstract) {
text += `${item.abstract}\n`;
}
});
// 去掉最后一个 \n
text = text.substring(0, text.length - 1);
// 导出为文件
const blob = new Blob([text], { type: 'application/json;charset=utf-8' });
// 创建下载链接
const downloadLink = document.createElement('a');
downloadLink.href = window.URL.createObjectURL(blob);
downloadLink.download = 'file.jsonl';
downloadLink.download = `data.${res.type}`;
// 添加链接到页面并触发下载
document.body.appendChild(downloadLink);
@@ -138,6 +144,7 @@ const DataList = () => {
<Thead>
<Tr>
<Th></Th>
<Th></Th>
<Th></Th>
<Th> / </Th>
<Th></Th>
@@ -158,6 +165,7 @@ const DataList = () => {
}}
/>
</Td>
<Td>{DataTypeTextMap[item.type || 'QA']}</Td>
<Td>{dayjs(item.createTime).format('YYYY/MM/DD HH:mm')}</Td>
<Td>
{item.trainingData} / {item.totalData}
@@ -187,9 +195,18 @@ const DataList = () => {
</MenuButton>
<MenuList>
<MenuItem onClick={() => handleExportData({ data: item, type: 'jsonl' })}>
jsonl
</MenuItem>
{item.type === 'QA' && (
<MenuItem
onClick={() => handleExportData({ data: item, type: 'jsonl' })}
>
jsonl
</MenuItem>
)}
{item.type === 'abstract' && (
<MenuItem onClick={() => handleExportData({ data: item, type: 'txt' })}>
txt
</MenuItem>
)}
</MenuList>
</Menu>

View File

@@ -97,7 +97,7 @@ const ModelEditForm = ({ formHooks }: { formHooks: UseFormReturn<ModelSchema> })
<Box mb={1}></Box>
<Textarea
rows={6}
maxLength={500}
maxLength={-1}
{...register('systemPrompt')}
placeholder={
'模型默认的 prompt 词,通过调整该内容,可以生成一个限定范围的模型。\n\n注意改功能会影响对话的整体朝向'