Files
FastGPT/projects/app/src/service/events/generateQA.ts
Archer 443ad37b6a sub plan page (#885)
* perf: insert mongo dataset data session

* perf: dataset data index

* remove delay

* rename bill schema

* rename bill record

* perf: bill table

* perf: prompt

* perf: sub plan

* change the usage count

* feat: usage bill

* publish usages

* doc

* 新增团队聊天功能 (#20)

* perf: doc

* feat 添加标签部分

feat 信息团队标签配置

feat 新增团队同步管理

feat team分享页面

feat 完成team分享页面

feat 实现模糊搜索

style 格式化

fix 修复迷糊匹配

style 样式修改

fix 团队标签功能修复

* fix 修复鉴权功能

* merge 合并代码

* fix 修复引用错误

* fix 修复pr问题

* fix 修复ts格式问题

---------

Co-authored-by: archer <545436317@qq.com>
Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>

* update extra plan

* fix: ts

* format

* perf: bill field

* feat: standard plan

* fix: ts

* feat 个人账号页面修改 (#22)

* feat 添加标签部分

feat 信息团队标签配置

feat 新增团队同步管理

feat team分享页面

feat 完成team分享页面

feat 实现模糊搜索

style 格式化

fix 修复迷糊匹配

style 样式修改

fix 团队标签功能修复

* fix 修复鉴权功能

* merge 合并代码

* fix 修复引用错误

* fix 修复pr问题

* fix 修复ts格式问题

* feat 修改个人账号页

---------

Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>

* fix chunk index; error page text

* feat: dataset process Integral prediction

* feat: stand plan field

* feat: sub plan limit

* perf: index

* query extension

* perf: share link push app name

* perf: plan point unit

* perf: get sub plan

* perf: account page

---------

Co-authored-by: yst <77910600+yu-and-liu@users.noreply.github.com>
Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>
2024-02-23 17:47:34 +08:00

259 lines
6.8 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { pushQAUsage } from '@/service/support/wallet/usage/push';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { sendOneInform } from '../support/user/inform/api';
import { getAIApi } from '@fastgpt/service/core/ai/config';
import type { ChatMessageItemType } from '@fastgpt/global/core/ai/type.d';
import { addLog } from '@fastgpt/service/common/system/log';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { Prompt_AgentQA } from '@/global/core/prompt/agent';
import { getErrText } from '@fastgpt/global/common/error/utils';
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api.d';
import { lockTrainingDataByTeamId } from '@fastgpt/service/core/dataset/training/controller';
import { pushDataToTrainingQueue } from '@/service/core/dataset/data/controller';
import { getLLMModel } from '../core/ai/model';
import { checkTeamAIPoints } from '../support/permission/teamLimit';
import { TeamErrEnum } from '@fastgpt/global/common/error/code/team';
const reduceQueue = () => {
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
return global.vectorQueueLen === 0;
};
export async function generateQA(): Promise<any> {
if (global.qaQueueLen >= global.systemEnv.qaMaxProcess) return;
global.qaQueueLen++;
// get training data
const {
data,
text,
done = false,
error = false
} = await (async () => {
try {
const data = await MongoDatasetTraining.findOneAndUpdate(
{
lockTime: { $lte: new Date(Date.now() - 6 * 60 * 1000) },
mode: TrainingModeEnum.qa
},
{
lockTime: new Date()
}
)
.select({
_id: 1,
userId: 1,
teamId: 1,
tmbId: 1,
datasetId: 1,
collectionId: 1,
q: 1,
model: 1,
chunkIndex: 1,
billId: 1,
prompt: 1
})
.lean();
// task preemption
if (!data) {
return {
done: true
};
}
return {
data,
text: data.q
};
} catch (error) {
console.log(`Get Training Data error`, error);
return {
error: true
};
}
})();
if (done || !data) {
if (reduceQueue()) {
console.log(`【QA】Task Done`);
}
return;
}
if (error) {
reduceQueue();
return generateQA();
}
// auth balance
try {
await checkTeamAIPoints(data.teamId);
} catch (error: any) {
if (error?.statusText === TeamErrEnum.aiPointsNotEnough) {
// send inform and lock data
try {
sendOneInform({
type: 'system',
title: '文本训练任务中止',
content:
'该团队账号的AI积分不足文本训练任务中止重新充值后将会继续。暂停的任务将在 7 天后被删除。',
tmbId: data.tmbId
});
console.log('余额不足暂停【QA】生成任务');
lockTrainingDataByTeamId(data.teamId);
} catch (error) {}
}
reduceQueue();
return generateQA();
}
try {
const startTime = Date.now();
const model = getLLMModel(data.model)?.model;
const prompt = `${data.prompt || Prompt_AgentQA.description}
${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
// request LLM to get QA
const messages: ChatMessageItemType[] = [
{
role: 'user',
content: prompt
}
];
const ai = getAIApi({
timeout: 600000
});
const chatResponse = await ai.chat.completions.create({
model,
temperature: 0.3,
messages,
stream: false
});
const answer = chatResponse.choices?.[0].message?.content || '';
const qaArr = formatSplitText(answer, text); // 格式化后的QA对
// get vector and insert
const { insertLen } = await pushDataToTrainingQueue({
teamId: data.teamId,
tmbId: data.tmbId,
collectionId: data.collectionId,
trainingMode: TrainingModeEnum.chunk,
data: qaArr.map((item) => ({
...item,
chunkIndex: data.chunkIndex
})),
billId: data.billId
});
// delete data from training
await MongoDatasetTraining.findByIdAndDelete(data._id);
addLog.info(`QA Training Finish`, {
time: `${(Date.now() - startTime) / 1000}s`,
splitLength: qaArr.length,
usage: chatResponse.usage
});
// add bill
if (insertLen > 0) {
pushQAUsage({
teamId: data.teamId,
tmbId: data.tmbId,
charsLength: `${prompt}${answer}`.length,
billId: data.billId,
model
});
} else {
addLog.info(`QA result 0:`, { answer });
}
reduceQueue();
generateQA();
} catch (err: any) {
reduceQueue();
// log
if (err?.response) {
addLog.info('openai error: 生成QA错误', {
status: err.response?.status,
stateusText: err.response?.statusText,
data: err.response?.data
});
} else {
console.log(err);
addLog.error(getErrText(err, '生成 QA 错误'));
}
// message error or openai account error
if (
err?.message === 'invalid message format' ||
err.response?.data?.error?.type === 'invalid_request_error' ||
err?.code === 500
) {
addLog.info('invalid message format', {
text
});
try {
await MongoDatasetTraining.findByIdAndUpdate(data._id, {
lockTime: new Date('2998/5/5')
});
} catch (error) {}
return generateQA();
}
setTimeout(() => {
generateQA();
}, 1000);
}
}
/**
* 检查文本是否按格式返回
*/
function formatSplitText(text: string, rawText: string) {
text = text.replace(/\\n/g, '\n'); // 将换行符替换为空格
const regex = /Q\d+:(\s*)(.*)(\s*)A\d+:(\s*)([\s\S]*?)(?=Q|$)/g; // 匹配Q和A的正则表达式
const matches = text.matchAll(regex); // 获取所有匹配到的结果
const result: PushDatasetDataChunkProps[] = []; // 存储最终的结果
for (const match of matches) {
const q = match[2] || '';
const a = match[5] || '';
if (q) {
result.push({
q,
a,
indexes: [
{
defaultIndex: true,
text: `${q}\n${a.trim().replace(/\n\s*/g, '\n')}`
}
]
});
}
}
// empty result. direct split chunk
if (result.length === 0) {
const { chunks } = splitText2Chunks({ text: rawText, chunkLen: 512 });
chunks.forEach((chunk) => {
result.push({
q: chunk,
a: '',
indexes: [
{
defaultIndex: true,
text: chunk
}
]
});
});
}
return result;
}