Change embedding (#1463)
* rebuild embedding queue * dataset menu * feat: rebuild data api * feat: ui change embedding model * dataset ui * feat: rebuild index ui * rename collection
This commit is contained in:
@@ -0,0 +1,39 @@
|
||||
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
|
||||
type Props = {};
|
||||
|
||||
export type getDatasetTrainingQueueResponse = {
|
||||
rebuildingCount: number;
|
||||
trainingCount: number;
|
||||
};
|
||||
|
||||
async function handler(
|
||||
req: ApiRequestProps<any, { datasetId: string }>,
|
||||
res: ApiResponseType<any>
|
||||
): Promise<getDatasetTrainingQueueResponse> {
|
||||
const { datasetId } = req.query;
|
||||
|
||||
const { teamId } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
datasetId,
|
||||
per: 'r'
|
||||
});
|
||||
|
||||
const [rebuildingCount, trainingCount] = await Promise.all([
|
||||
MongoDatasetData.countDocuments({ teamId, datasetId, rebuilding: true }),
|
||||
MongoDatasetTraining.countDocuments({ teamId, datasetId })
|
||||
]);
|
||||
|
||||
return {
|
||||
rebuildingCount,
|
||||
trainingCount
|
||||
};
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
||||
@@ -0,0 +1,133 @@
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
|
||||
|
||||
export type rebuildEmbeddingBody = {
|
||||
datasetId: string;
|
||||
vectorModel: string;
|
||||
};
|
||||
|
||||
export type Response = {};
|
||||
|
||||
async function handler(
|
||||
req: ApiRequestProps<rebuildEmbeddingBody>,
|
||||
res: ApiResponseType<any>
|
||||
): Promise<Response> {
|
||||
const { datasetId, vectorModel } = req.body;
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
datasetId,
|
||||
per: 'owner'
|
||||
});
|
||||
|
||||
// check vector model
|
||||
if (!vectorModel || dataset.vectorModel === vectorModel) {
|
||||
return Promise.reject('vectorModel 不合法');
|
||||
}
|
||||
|
||||
// check rebuilding or training
|
||||
const [rebuilding, training] = await Promise.all([
|
||||
MongoDatasetData.findOne({ teamId, datasetId, rebuilding: true }),
|
||||
MongoDatasetTraining.findOne({ teamId, datasetId })
|
||||
]);
|
||||
|
||||
if (rebuilding || training) {
|
||||
return Promise.reject('数据集正在训练或者重建中,请稍后再试');
|
||||
}
|
||||
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: '切换索引模型',
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name
|
||||
});
|
||||
|
||||
// update vector model and dataset.data rebuild field
|
||||
await mongoSessionRun(async (session) => {
|
||||
await MongoDataset.findByIdAndUpdate(
|
||||
datasetId,
|
||||
{
|
||||
vectorModel
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
await MongoDatasetData.updateMany(
|
||||
{
|
||||
teamId,
|
||||
datasetId
|
||||
},
|
||||
{
|
||||
$set: {
|
||||
rebuilding: true
|
||||
}
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
});
|
||||
|
||||
// get 10 init dataset.data
|
||||
const arr = new Array(10).fill(0);
|
||||
for await (const _ of arr) {
|
||||
await mongoSessionRun(async (session) => {
|
||||
const data = await MongoDatasetData.findOneAndUpdate(
|
||||
{
|
||||
teamId,
|
||||
datasetId,
|
||||
rebuilding: true
|
||||
},
|
||||
{
|
||||
$unset: {
|
||||
rebuilding: null
|
||||
},
|
||||
updateTime: new Date()
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
).select({
|
||||
_id: 1,
|
||||
collectionId: 1
|
||||
});
|
||||
|
||||
if (data) {
|
||||
await MongoDatasetTraining.create(
|
||||
[
|
||||
{
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId,
|
||||
collectionId: data.collectionId,
|
||||
billId,
|
||||
mode: TrainingModeEnum.chunk,
|
||||
model: vectorModel,
|
||||
q: '1',
|
||||
dataId: data._id
|
||||
}
|
||||
],
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
||||
Reference in New Issue
Block a user