* perf: insert mongo dataset data session * perf: dataset data index * remove delay * rename bill schema * rename bill record * perf: bill table * perf: prompt * perf: sub plan * change the usage count * feat: usage bill * publish usages * doc * 新增团队聊天功能 (#20) * perf: doc * feat 添加标签部分 feat 信息团队标签配置 feat 新增团队同步管理 feat team分享页面 feat 完成team分享页面 feat 实现模糊搜索 style 格式化 fix 修复迷糊匹配 style 样式修改 fix 团队标签功能修复 * fix 修复鉴权功能 * merge 合并代码 * fix 修复引用错误 * fix 修复pr问题 * fix 修复ts格式问题 --------- Co-authored-by: archer <545436317@qq.com> Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> * update extra plan * fix: ts * format * perf: bill field * feat: standard plan * fix: ts * feat 个人账号页面修改 (#22) * feat 添加标签部分 feat 信息团队标签配置 feat 新增团队同步管理 feat team分享页面 feat 完成team分享页面 feat 实现模糊搜索 style 格式化 fix 修复迷糊匹配 style 样式修改 fix 团队标签功能修复 * fix 修复鉴权功能 * merge 合并代码 * fix 修复引用错误 * fix 修复pr问题 * fix 修复ts格式问题 * feat 修改个人账号页 --------- Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> * fix chunk index; error page text * feat: dataset process Integral prediction * feat: stand plan field * feat: sub plan limit * perf: index * query extension * perf: share link push app name * perf: plan point unit * perf: get sub plan * perf: account page --------- Co-authored-by: yst <77910600+yu-and-liu@users.noreply.github.com> Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>
193 lines
5.0 KiB
TypeScript
193 lines
5.0 KiB
TypeScript
import { insertData2Dataset } from '@/service/core/dataset/data/controller';
|
||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||
import { sendOneInform } from '../support/user/inform/api';
|
||
import { addLog } from '@fastgpt/service/common/system/log';
|
||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||
import { checkTeamAIPoints } from '../support/permission/teamLimit';
|
||
import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
|
||
import { lockTrainingDataByTeamId } from '@fastgpt/service/core/dataset/training/controller';
|
||
import { TeamErrEnum } from '@fastgpt/global/common/error/code/team';
|
||
|
||
const reduceQueue = () => {
|
||
global.vectorQueueLen = global.vectorQueueLen > 0 ? global.vectorQueueLen - 1 : 0;
|
||
|
||
return global.vectorQueueLen === 0;
|
||
};
|
||
|
||
/* 索引生成队列。每导入一次,就是一个单独的线程 */
|
||
export async function generateVector(): Promise<any> {
|
||
if (global.vectorQueueLen >= global.systemEnv.vectorMaxProcess) return;
|
||
global.vectorQueueLen++;
|
||
|
||
const start = Date.now();
|
||
|
||
// get training data
|
||
const {
|
||
data,
|
||
dataItem,
|
||
done = false,
|
||
error = false
|
||
} = await (async () => {
|
||
try {
|
||
const data = await MongoDatasetTraining.findOneAndUpdate(
|
||
{
|
||
lockTime: { $lte: new Date(Date.now() - 1 * 60 * 1000) },
|
||
mode: TrainingModeEnum.chunk
|
||
},
|
||
{
|
||
lockTime: new Date()
|
||
}
|
||
)
|
||
.sort({
|
||
weight: -1
|
||
})
|
||
.select({
|
||
_id: 1,
|
||
userId: 1,
|
||
teamId: 1,
|
||
tmbId: 1,
|
||
datasetId: 1,
|
||
collectionId: 1,
|
||
q: 1,
|
||
a: 1,
|
||
chunkIndex: 1,
|
||
indexes: 1,
|
||
model: 1,
|
||
billId: 1
|
||
})
|
||
.lean();
|
||
|
||
// task preemption
|
||
if (!data) {
|
||
return {
|
||
done: true
|
||
};
|
||
}
|
||
return {
|
||
data,
|
||
dataItem: {
|
||
q: data.q,
|
||
a: data.a || '',
|
||
indexes: data.indexes
|
||
}
|
||
};
|
||
} catch (error) {
|
||
console.log(`Get Training Data error`, error);
|
||
return {
|
||
error: true
|
||
};
|
||
}
|
||
})();
|
||
|
||
if (done || !data) {
|
||
if (reduceQueue()) {
|
||
console.log(`【index】Task done`);
|
||
}
|
||
return;
|
||
}
|
||
if (error) {
|
||
reduceQueue();
|
||
return generateVector();
|
||
}
|
||
|
||
// auth balance
|
||
try {
|
||
await checkTeamAIPoints(data.teamId);
|
||
} catch (error: any) {
|
||
if (error?.statusText === TeamErrEnum.aiPointsNotEnough) {
|
||
// send inform and lock data
|
||
try {
|
||
sendOneInform({
|
||
type: 'system',
|
||
title: '文本训练任务中止',
|
||
content:
|
||
'该团队账号AI积分不足,文本训练任务中止,重新充值后将会继续。暂停的任务将在 7 天后被删除。',
|
||
tmbId: data.tmbId
|
||
});
|
||
console.log('余额不足,暂停【向量】生成任务');
|
||
lockTrainingDataByTeamId(data.teamId);
|
||
} catch (error) {}
|
||
}
|
||
|
||
reduceQueue();
|
||
return generateVector();
|
||
}
|
||
|
||
// create vector and insert
|
||
try {
|
||
// invalid data
|
||
if (!data.q.trim()) {
|
||
await MongoDatasetTraining.findByIdAndDelete(data._id);
|
||
reduceQueue();
|
||
generateVector();
|
||
return;
|
||
}
|
||
|
||
// insert data to pg
|
||
const { charsLength } = await insertData2Dataset({
|
||
teamId: data.teamId,
|
||
tmbId: data.tmbId,
|
||
datasetId: data.datasetId,
|
||
collectionId: data.collectionId,
|
||
q: dataItem.q,
|
||
a: dataItem.a,
|
||
chunkIndex: data.chunkIndex,
|
||
indexes: dataItem.indexes,
|
||
model: data.model
|
||
});
|
||
|
||
// push bill
|
||
pushGenerateVectorUsage({
|
||
teamId: data.teamId,
|
||
tmbId: data.tmbId,
|
||
charsLength,
|
||
model: data.model,
|
||
billId: data.billId
|
||
});
|
||
|
||
// delete data from training
|
||
await MongoDatasetTraining.findByIdAndDelete(data._id);
|
||
reduceQueue();
|
||
generateVector();
|
||
|
||
console.log(`embedding finished, time: ${Date.now() - start}ms`);
|
||
} catch (err: any) {
|
||
reduceQueue();
|
||
// log
|
||
if (err?.response) {
|
||
addLog.info('openai error: 生成向量错误', {
|
||
status: err.response?.status,
|
||
stateusText: err.response?.statusText,
|
||
data: err.response?.data
|
||
});
|
||
} else {
|
||
console.log(err);
|
||
addLog.error(getErrText(err, '生成向量错误'));
|
||
}
|
||
|
||
// message error or openai account error
|
||
if (
|
||
err?.message === 'invalid message format' ||
|
||
err.response?.data?.error?.type === 'invalid_request_error' ||
|
||
err?.code === 500
|
||
) {
|
||
addLog.info('Lock training data');
|
||
console.log(err?.code);
|
||
console.log(err.response?.data?.error?.type);
|
||
console.log(err?.message);
|
||
|
||
try {
|
||
await MongoDatasetTraining.findByIdAndUpdate(data._id, {
|
||
lockTime: new Date('2998/5/5')
|
||
});
|
||
} catch (error) {}
|
||
return generateVector();
|
||
}
|
||
|
||
setTimeout(() => {
|
||
generateVector();
|
||
}, 1000);
|
||
}
|
||
}
|