perf: code

This commit is contained in:
archer
2023-05-27 15:18:10 +08:00
parent 4f0bd677f2
commit a287ace126
23 changed files with 82 additions and 118 deletions

View File

@@ -3,7 +3,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { authUser } from '@/service/utils/auth';
import { connectToDatabase, TrainingData } from '@/service/mongo';
import { TrainingTypeEnum } from '@/constants/plugin';
import { TrainingModeEnum } from '@/constants/plugin';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
@@ -23,8 +23,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
jsonRes(res, {
data: {
qaListLen: result.find((item) => item._id === TrainingTypeEnum.qa)?.count || 0,
vectorListLen: result.find((item) => item._id === TrainingTypeEnum.index)?.count || 0
qaListLen: result.find((item) => item._id === TrainingModeEnum.qa)?.count || 0,
vectorListLen: result.find((item) => item._id === TrainingModeEnum.index)?.count || 0
}
});
} catch (error) {

View File

@@ -10,7 +10,6 @@ import { authModel } from '@/service/utils/auth';
import { ChatModelMap } from '@/constants/model';
import { ChatRoleEnum } from '@/constants/chat';
import { openaiEmbedding } from '../plugin/openaiEmbedding';
import { ModelDataStatusEnum } from '@/constants/model';
import { modelToolMap } from '@/utils/plugin';
export type QuoteItemType = { id: string; q: string; a: string; isEdit: boolean };
@@ -102,8 +101,6 @@ export async function appKbSearch({
PgClient.select<QuoteItemType>('modelData', {
fields: ['id', 'q', 'a'],
where: [
['status', ModelDataStatusEnum.ready],
'AND',
`kb_id IN (${model.chat.relatedKbs.map((item) => `'${item}'`).join(',')})`,
'AND',
`vector <=> '[${promptVector}]' < ${similarity}`

View File

@@ -5,13 +5,13 @@ import { connectToDatabase, TrainingData } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { authKb } from '@/service/utils/auth';
import { withNextCors } from '@/service/utils/tools';
import { TrainingTypeEnum } from '@/constants/plugin';
import { TrainingModeEnum } from '@/constants/plugin';
import { startQueue } from '@/service/utils/tools';
export type Props = {
kbId: string;
data: { a: KbDataItemType['a']; q: KbDataItemType['q'] }[];
mode: `${TrainingTypeEnum}`;
mode: `${TrainingModeEnum}`;
prompt?: string;
};
@@ -60,6 +60,39 @@ export async function pushDataToKb({
return {};
}
// 去重
// 过滤重复的 qa 内容
// const searchRes = await Promise.allSettled(
// dataItems.map(async ({ q, a = '' }) => {
// if (!q) {
// return Promise.reject('q为空');
// }
// q = q.replace(/\\n/g, '\n');
// a = a.replace(/\\n/g, '\n');
// // Exactly the same data, not push
// try {
// const count = await PgClient.count('modelData', {
// where: [['user_id', userId], 'AND', ['kb_id', kbId], 'AND', ['q', q], 'AND', ['a', a]]
// });
// if (count > 0) {
// return Promise.reject('已经存在');
// }
// } catch (error) {
// error;
// }
// return Promise.resolve({
// q,
// a
// });
// })
// );
// const filterData = searchRes
// .filter((item) => item.status === 'fulfilled')
// .map<{ q: string; a: string }>((item: any) => item.value);
// 插入记录
await TrainingData.insertMany(
data.map((item) => ({

View File

@@ -1,8 +1,6 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { authUser } from '@/service/utils/auth';
import { ModelDataStatusEnum } from '@/constants/model';
import { generateVector } from '@/service/events/generateVector';
import { PgClient } from '@/service/pg';
import { withNextCors } from '@/service/utils/tools';
import { openaiEmbedding } from '../plugin/openaiEmbedding';

View File

@@ -22,7 +22,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const where: any = [['user_id', userId], 'AND', ['id', dataId]];
const searchRes = await PgClient.select<PgKBDataItemType>('modelData', {
fields: ['id', 'q', 'a', 'status'],
fields: ['id', 'q', 'a'],
where,
limit: 1
});

View File

@@ -35,7 +35,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
];
const searchRes = await PgClient.select<PgKBDataItemType>('modelData', {
fields: ['id', 'q', 'a', 'status'],
fields: ['id', 'q', 'a'],
where,
order: [{ field: 'id', mode: 'DESC' }],
limit: pageSize,

View File

@@ -2,10 +2,9 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, TrainingData } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { generateQA } from '@/service/events/generateQA';
import { generateVector } from '@/service/events/generateVector';
import { TrainingTypeEnum } from '@/constants/plugin';
import { TrainingModeEnum } from '@/constants/plugin';
import { Types } from 'mongoose';
import { startQueue } from '@/service/utils/tools';
/* 拆分数据成QA */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
@@ -36,23 +35,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
jsonRes(res, {
data: {
qaListLen: result.find((item) => item._id === TrainingTypeEnum.qa)?.count || 0,
vectorListLen: result.find((item) => item._id === TrainingTypeEnum.index)?.count || 0
qaListLen: result.find((item) => item._id === TrainingModeEnum.qa)?.count || 0,
vectorListLen: result.find((item) => item._id === TrainingModeEnum.index)?.count || 0
}
});
if (init) {
const list = await TrainingData.find(
{
userId,
kbId
},
'_id'
).limit(10);
list.forEach((item) => {
generateQA();
generateVector();
});
startQueue();
}
} catch (err) {
jsonRes(res, {

View File

@@ -22,7 +22,6 @@ import {
import { QuestionOutlineIcon } from '@chakra-ui/icons';
import type { BoxProps } from '@chakra-ui/react';
import type { KbDataItemType } from '@/types/plugin';
import { ModelDataStatusMap } from '@/constants/model';
import { usePagination } from '@/hooks/usePagination';
import {
getKbDataList,
@@ -92,7 +91,7 @@ const DataCard = ({ kbId }: { kbId: string }) => {
} = useDisclosure();
const { data: { qaListLen = 0, vectorListLen = 0 } = {}, refetch } = useQuery(
['getModelSplitDataList'],
['getModelSplitDataList', kbId],
() => getTrainingData({ kbId, init: false }),
{
onError(err) {
@@ -240,7 +239,6 @@ const DataCard = ({ kbId }: { kbId: string }) => {
</Tooltip>
</Th>
<Th></Th>
<Th></Th>
<Th></Th>
</Tr>
</Thead>
@@ -253,7 +251,6 @@ const DataCard = ({ kbId }: { kbId: string }) => {
<Td>
<Box {...tdStyles.current}>{item.a || '-'}</Box>
</Td>
<Td>{ModelDataStatusMap[item.status]}</Td>
<Td>
<IconButton
mr={5}

View File

@@ -56,13 +56,13 @@ const Detail = ({ kbId }: { kbId: string }) => {
}
},
onError(err: any) {
loadKbList(true);
setLastKbId('');
router.replace(`/kb`);
toast({
title: getErrText(err, '获取知识库异常'),
status: 'error'
});
loadKbList(true);
setLastKbId('');
router.replace(`/kb?kbId=${myKbList[0]?._id || ''}`);
}
});

View File

@@ -13,7 +13,8 @@ import {
import { useForm } from 'react-hook-form';
import { postKbDataFromList, putKbDataById } from '@/api/plugins/kb';
import { useToast } from '@/hooks/useToast';
import { TrainingTypeEnum } from '@/constants/plugin';
import { TrainingModeEnum } from '@/constants/plugin';
import { getErrText } from '@/utils/tools';
export type FormData = { dataId?: string; a: string; q: string };
@@ -61,7 +62,7 @@ const InputDataModal = ({
q: e.q
}
],
mode: TrainingTypeEnum.index
mode: TrainingModeEnum.index
});
toast({
@@ -75,10 +76,9 @@ const InputDataModal = ({
onSuccess();
} catch (err: any) {
toast({
title: err?.message || '出现了点意外~',
title: getErrText(err, '出现了点意外~'),
status: 'error'
});
console.log(err);
}
setLoading(false);
},

View File

@@ -19,7 +19,8 @@ import { postKbDataFromList } from '@/api/plugins/kb';
import Markdown from '@/components/Markdown';
import { useMarkdown } from '@/hooks/useMarkdown';
import { fileDownload } from '@/utils/file';
import { TrainingTypeEnum } from '@/constants/plugin';
import { TrainingModeEnum } from '@/constants/plugin';
import { getErrText } from '@/utils/tools';
const csvTemplate = `question,answer\n"什么是 laf","laf 是一个云函数开发平台……"\n"什么是 sealos","Sealos 是以 kubernetes 为内核的云操作系统发行版,可以……"`;
@@ -56,9 +57,8 @@ const SelectJsonModal = ({
}))
);
} catch (error: any) {
console.log(error);
toast({
title: error?.message || 'csv 文件格式有误',
title: getErrText(error, 'csv 文件格式有误'),
status: 'error'
});
}
@@ -74,7 +74,7 @@ const SelectJsonModal = ({
const res = await postKbDataFromList({
kbId,
data: fileData,
mode: TrainingTypeEnum.index
mode: TrainingModeEnum.index
});
toast({

View File

@@ -20,19 +20,19 @@ import { useMutation } from '@tanstack/react-query';
import { postKbDataFromList } from '@/api/plugins/kb';
import Radio from '@/components/Radio';
import { splitText_token } from '@/utils/file';
import { TrainingTypeEnum } from '@/constants/plugin';
import { TrainingModeEnum } from '@/constants/plugin';
import { getErrText } from '@/utils/tools';
const fileExtension = '.txt,.doc,.docx,.pdf,.md';
const modeMap = {
qa: {
[TrainingModeEnum.qa]: {
maxLen: 2800,
slideLen: 800,
price: 4,
isPrompt: true
},
index: {
[TrainingModeEnum.index]: {
maxLen: 800,
slideLen: 300,
price: 0.4,
@@ -53,7 +53,7 @@ const SelectFileModal = ({
const { toast } = useToast();
const [prompt, setPrompt] = useState('');
const { File, onOpen } = useSelectFile({ fileType: fileExtension, multiple: true });
const [mode, setMode] = useState<`${TrainingTypeEnum}`>(TrainingTypeEnum.index);
const [mode, setMode] = useState<`${TrainingModeEnum}`>(TrainingModeEnum.index);
const [fileTextArr, setFileTextArr] = useState<string[]>(['']);
const [splitRes, setSplitRes] = useState<{ tokens: number; chunks: string[] }>({
tokens: 0,
@@ -122,9 +122,9 @@ const SelectFileModal = ({
onClose();
onSuccess();
},
onError() {
onError(err) {
toast({
title: '导入文件失败',
title: getErrText(err, '导入文件失败'),
status: 'error'
});
}