Files
FastGPT/projects/app/src/service/events/generateVector.ts
Archer 443ad37b6a sub plan page (#885)
* perf: insert mongo dataset data session

* perf: dataset data index

* remove delay

* rename bill schema

* rename bill record

* perf: bill table

* perf: prompt

* perf: sub plan

* change the usage count

* feat: usage bill

* publish usages

* doc

* 新增团队聊天功能 (#20)

* perf: doc

* feat 添加标签部分

feat 信息团队标签配置

feat 新增团队同步管理

feat team分享页面

feat 完成team分享页面

feat 实现模糊搜索

style 格式化

fix 修复迷糊匹配

style 样式修改

fix 团队标签功能修复

* fix 修复鉴权功能

* merge 合并代码

* fix 修复引用错误

* fix 修复pr问题

* fix 修复ts格式问题

---------

Co-authored-by: archer <545436317@qq.com>
Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>

* update extra plan

* fix: ts

* format

* perf: bill field

* feat: standard plan

* fix: ts

* feat 个人账号页面修改 (#22)

* feat 添加标签部分

feat 信息团队标签配置

feat 新增团队同步管理

feat team分享页面

feat 完成team分享页面

feat 实现模糊搜索

style 格式化

fix 修复迷糊匹配

style 样式修改

fix 团队标签功能修复

* fix 修复鉴权功能

* merge 合并代码

* fix 修复引用错误

* fix 修复pr问题

* fix 修复ts格式问题

* feat 修改个人账号页

---------

Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>

* fix chunk index; error page text

* feat: dataset process Integral prediction

* feat: stand plan field

* feat: sub plan limit

* perf: index

* query extension

* perf: share link push app name

* perf: plan point unit

* perf: get sub plan

* perf: account page

---------

Co-authored-by: yst <77910600+yu-and-liu@users.noreply.github.com>
Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>
2024-02-23 17:47:34 +08:00

193 lines
5.0 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { insertData2Dataset } from '@/service/core/dataset/data/controller';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { sendOneInform } from '../support/user/inform/api';
import { addLog } from '@fastgpt/service/common/system/log';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { checkTeamAIPoints } from '../support/permission/teamLimit';
import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
import { lockTrainingDataByTeamId } from '@fastgpt/service/core/dataset/training/controller';
import { TeamErrEnum } from '@fastgpt/global/common/error/code/team';
const reduceQueue = () => {
global.vectorQueueLen = global.vectorQueueLen > 0 ? global.vectorQueueLen - 1 : 0;
return global.vectorQueueLen === 0;
};
/* 索引生成队列。每导入一次,就是一个单独的线程 */
export async function generateVector(): Promise<any> {
if (global.vectorQueueLen >= global.systemEnv.vectorMaxProcess) return;
global.vectorQueueLen++;
const start = Date.now();
// get training data
const {
data,
dataItem,
done = false,
error = false
} = await (async () => {
try {
const data = await MongoDatasetTraining.findOneAndUpdate(
{
lockTime: { $lte: new Date(Date.now() - 1 * 60 * 1000) },
mode: TrainingModeEnum.chunk
},
{
lockTime: new Date()
}
)
.sort({
weight: -1
})
.select({
_id: 1,
userId: 1,
teamId: 1,
tmbId: 1,
datasetId: 1,
collectionId: 1,
q: 1,
a: 1,
chunkIndex: 1,
indexes: 1,
model: 1,
billId: 1
})
.lean();
// task preemption
if (!data) {
return {
done: true
};
}
return {
data,
dataItem: {
q: data.q,
a: data.a || '',
indexes: data.indexes
}
};
} catch (error) {
console.log(`Get Training Data error`, error);
return {
error: true
};
}
})();
if (done || !data) {
if (reduceQueue()) {
console.log(`【index】Task done`);
}
return;
}
if (error) {
reduceQueue();
return generateVector();
}
// auth balance
try {
await checkTeamAIPoints(data.teamId);
} catch (error: any) {
if (error?.statusText === TeamErrEnum.aiPointsNotEnough) {
// send inform and lock data
try {
sendOneInform({
type: 'system',
title: '文本训练任务中止',
content:
'该团队账号AI积分不足文本训练任务中止重新充值后将会继续。暂停的任务将在 7 天后被删除。',
tmbId: data.tmbId
});
console.log('余额不足,暂停【向量】生成任务');
lockTrainingDataByTeamId(data.teamId);
} catch (error) {}
}
reduceQueue();
return generateVector();
}
// create vector and insert
try {
// invalid data
if (!data.q.trim()) {
await MongoDatasetTraining.findByIdAndDelete(data._id);
reduceQueue();
generateVector();
return;
}
// insert data to pg
const { charsLength } = await insertData2Dataset({
teamId: data.teamId,
tmbId: data.tmbId,
datasetId: data.datasetId,
collectionId: data.collectionId,
q: dataItem.q,
a: dataItem.a,
chunkIndex: data.chunkIndex,
indexes: dataItem.indexes,
model: data.model
});
// push bill
pushGenerateVectorUsage({
teamId: data.teamId,
tmbId: data.tmbId,
charsLength,
model: data.model,
billId: data.billId
});
// delete data from training
await MongoDatasetTraining.findByIdAndDelete(data._id);
reduceQueue();
generateVector();
console.log(`embedding finished, time: ${Date.now() - start}ms`);
} catch (err: any) {
reduceQueue();
// log
if (err?.response) {
addLog.info('openai error: 生成向量错误', {
status: err.response?.status,
stateusText: err.response?.statusText,
data: err.response?.data
});
} else {
console.log(err);
addLog.error(getErrText(err, '生成向量错误'));
}
// message error or openai account error
if (
err?.message === 'invalid message format' ||
err.response?.data?.error?.type === 'invalid_request_error' ||
err?.code === 500
) {
addLog.info('Lock training data');
console.log(err?.code);
console.log(err.response?.data?.error?.type);
console.log(err?.message);
try {
await MongoDatasetTraining.findByIdAndUpdate(data._id, {
lockTime: new Date('2998/5/5')
});
} catch (error) {}
return generateVector();
}
setTimeout(() => {
generateVector();
}, 1000);
}
}