Change embedding (#1463)
* rebuild embedding queue * dataset menu * feat: rebuild data api * feat: ui change embedding model * dataset ui * feat: rebuild index ui * rename collection
This commit is contained in:
178
projects/app/src/pages/api/admin/initv481.ts
Normal file
178
projects/app/src/pages/api/admin/initv481.ts
Normal file
@@ -0,0 +1,178 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { PgClient } from '@fastgpt/service/common/vectorStore/pg';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { PgDatasetTableName } from '@fastgpt/global/common/vectorStore/constants';
|
||||
import { connectionMongo } from '@fastgpt/service/common/mongo';
|
||||
import { addLog } from '@fastgpt/service/common/system/log';
|
||||
|
||||
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
await authCert({ req, authRoot: true });
|
||||
|
||||
// 重命名 dataset.trainigns -> dataset_trainings
|
||||
try {
|
||||
const collections = await connectionMongo.connection.db
|
||||
.listCollections({ name: 'dataset.trainings' })
|
||||
.toArray();
|
||||
if (collections.length > 0) {
|
||||
const sourceCol = connectionMongo.connection.db.collection('dataset.trainings');
|
||||
const targetCol = connectionMongo.connection.db.collection('dataset_trainings');
|
||||
|
||||
if ((await targetCol.countDocuments()) > 0) {
|
||||
console.log(
|
||||
'dataset_trainings 中有数据,无法自动将 dataset.trainings 迁移到 dataset_trainings,请手动操作'
|
||||
);
|
||||
} else {
|
||||
await sourceCol.rename('dataset_trainings', { dropTarget: true });
|
||||
console.log('success rename dataset.trainings -> dataset_trainings');
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('error: rename dataset.trainings -> dataset_trainings', error);
|
||||
}
|
||||
|
||||
try {
|
||||
const collections = await connectionMongo.connection.db
|
||||
.listCollections({ name: 'dataset.collections' })
|
||||
.toArray();
|
||||
if (collections.length > 0) {
|
||||
const sourceCol = connectionMongo.connection.db.collection('dataset.collections');
|
||||
const targetCol = connectionMongo.connection.db.collection('dataset_collections');
|
||||
|
||||
if ((await targetCol.countDocuments()) > 0) {
|
||||
console.log(
|
||||
'dataset_collections 中有数据,无法自动将 dataset.collections 迁移到 dataset_collections,请手动操作'
|
||||
);
|
||||
} else {
|
||||
await sourceCol.rename('dataset_collections', { dropTarget: true });
|
||||
console.log('success rename dataset.collections -> dataset_collections');
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('error: rename dataset.collections -> dataset_collections', error);
|
||||
}
|
||||
|
||||
try {
|
||||
const collections = await connectionMongo.connection.db
|
||||
.listCollections({ name: 'dataset.datas' })
|
||||
.toArray();
|
||||
if (collections.length > 0) {
|
||||
const sourceCol = connectionMongo.connection.db.collection('dataset.datas');
|
||||
const targetCol = connectionMongo.connection.db.collection('dataset_datas');
|
||||
|
||||
if ((await targetCol.countDocuments()) > 0) {
|
||||
console.log(
|
||||
'dataset_datas 中有数据,无法自动将 dataset.datas 迁移到 dataset_datas,请手动操作'
|
||||
);
|
||||
} else {
|
||||
await sourceCol.rename('dataset_datas', { dropTarget: true });
|
||||
console.log('success rename dataset.datas -> dataset_datas');
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('error: rename dataset.datas -> dataset_datas', error);
|
||||
}
|
||||
|
||||
try {
|
||||
const collections = await connectionMongo.connection.db
|
||||
.listCollections({ name: 'app.versions' })
|
||||
.toArray();
|
||||
if (collections.length > 0) {
|
||||
const sourceCol = connectionMongo.connection.db.collection('app.versions');
|
||||
const targetCol = connectionMongo.connection.db.collection('app_versions');
|
||||
|
||||
if ((await targetCol.countDocuments()) > 0) {
|
||||
console.log(
|
||||
'app_versions 中有数据,无法自动将 app.versions 迁移到 app_versions,请手动操作'
|
||||
);
|
||||
} else {
|
||||
await sourceCol.rename('app_versions', { dropTarget: true });
|
||||
console.log('success rename app.versions -> app_versions');
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('error: rename app.versions -> app_versions', error);
|
||||
}
|
||||
|
||||
try {
|
||||
const collections = await connectionMongo.connection.db
|
||||
.listCollections({ name: 'buffer.rawtexts' })
|
||||
.toArray();
|
||||
if (collections.length > 0) {
|
||||
const sourceCol = connectionMongo.connection.db.collection('buffer.rawtexts');
|
||||
const targetCol = connectionMongo.connection.db.collection('buffer_rawtexts');
|
||||
|
||||
if ((await targetCol.countDocuments()) > 0) {
|
||||
console.log(
|
||||
'buffer_rawtexts 中有数据,无法自动将 buffer.rawtexts 迁移到 buffer_rawtexts,请手动操作'
|
||||
);
|
||||
} else {
|
||||
await sourceCol.rename('buffer_rawtexts', { dropTarget: true });
|
||||
console.log('success rename buffer.rawtexts -> buffer_rawtexts');
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('error: rename buffer.rawtext -> buffer_rawtext', error);
|
||||
}
|
||||
|
||||
try {
|
||||
const collections = await connectionMongo.connection.db
|
||||
.listCollections({ name: 'buffer.tts' })
|
||||
.toArray();
|
||||
if (collections.length > 0) {
|
||||
const sourceCol = connectionMongo.connection.db.collection('buffer.tts');
|
||||
const targetCol = connectionMongo.connection.db.collection('buffer_tts');
|
||||
|
||||
if ((await targetCol.countDocuments()) > 0) {
|
||||
console.log('buffer_tts 中有数据,无法自动将 buffer.tts 迁移到 buffer_tts,请手动操作');
|
||||
} else {
|
||||
await sourceCol.rename('buffer_tts', { dropTarget: true });
|
||||
console.log('success rename buffer.tts -> buffer_tts');
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('error: rename buffer.tts -> buffer_tts', error);
|
||||
}
|
||||
|
||||
try {
|
||||
const collections = await connectionMongo.connection.db
|
||||
.listCollections({ name: 'team.members' })
|
||||
.toArray();
|
||||
if (collections.length > 0) {
|
||||
const sourceCol = connectionMongo.connection.db.collection('team.members');
|
||||
|
||||
await sourceCol.rename('team_members', { dropTarget: true });
|
||||
console.log('success rename team.members -> team_members');
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('error: rename team.members -> team_members', error);
|
||||
}
|
||||
|
||||
try {
|
||||
const collections = await connectionMongo.connection.db
|
||||
.listCollections({ name: 'team.tags' })
|
||||
.toArray();
|
||||
if (collections.length > 0) {
|
||||
const sourceCol = connectionMongo.connection.db.collection('team.tags');
|
||||
const targetCol = connectionMongo.connection.db.collection('team_tags');
|
||||
|
||||
if ((await targetCol.countDocuments()) > 0) {
|
||||
console.log('team_tags 中有数据,无法自动将 team.tags 迁移到 team_tags,请手动操作');
|
||||
} else {
|
||||
await sourceCol.rename('team_tags', { dropTarget: true });
|
||||
console.log('success rename team.tags -> team_tags');
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('error: rename team.tags -> team_tags', error);
|
||||
}
|
||||
|
||||
jsonRes(res, {
|
||||
message: 'success'
|
||||
});
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
||||
@@ -0,0 +1,39 @@
|
||||
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
|
||||
type Props = {};
|
||||
|
||||
export type getDatasetTrainingQueueResponse = {
|
||||
rebuildingCount: number;
|
||||
trainingCount: number;
|
||||
};
|
||||
|
||||
async function handler(
|
||||
req: ApiRequestProps<any, { datasetId: string }>,
|
||||
res: ApiResponseType<any>
|
||||
): Promise<getDatasetTrainingQueueResponse> {
|
||||
const { datasetId } = req.query;
|
||||
|
||||
const { teamId } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
datasetId,
|
||||
per: 'r'
|
||||
});
|
||||
|
||||
const [rebuildingCount, trainingCount] = await Promise.all([
|
||||
MongoDatasetData.countDocuments({ teamId, datasetId, rebuilding: true }),
|
||||
MongoDatasetTraining.countDocuments({ teamId, datasetId })
|
||||
]);
|
||||
|
||||
return {
|
||||
rebuildingCount,
|
||||
trainingCount
|
||||
};
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
||||
@@ -0,0 +1,133 @@
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
|
||||
|
||||
export type rebuildEmbeddingBody = {
|
||||
datasetId: string;
|
||||
vectorModel: string;
|
||||
};
|
||||
|
||||
export type Response = {};
|
||||
|
||||
async function handler(
|
||||
req: ApiRequestProps<rebuildEmbeddingBody>,
|
||||
res: ApiResponseType<any>
|
||||
): Promise<Response> {
|
||||
const { datasetId, vectorModel } = req.body;
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
datasetId,
|
||||
per: 'owner'
|
||||
});
|
||||
|
||||
// check vector model
|
||||
if (!vectorModel || dataset.vectorModel === vectorModel) {
|
||||
return Promise.reject('vectorModel 不合法');
|
||||
}
|
||||
|
||||
// check rebuilding or training
|
||||
const [rebuilding, training] = await Promise.all([
|
||||
MongoDatasetData.findOne({ teamId, datasetId, rebuilding: true }),
|
||||
MongoDatasetTraining.findOne({ teamId, datasetId })
|
||||
]);
|
||||
|
||||
if (rebuilding || training) {
|
||||
return Promise.reject('数据集正在训练或者重建中,请稍后再试');
|
||||
}
|
||||
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: '切换索引模型',
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name
|
||||
});
|
||||
|
||||
// update vector model and dataset.data rebuild field
|
||||
await mongoSessionRun(async (session) => {
|
||||
await MongoDataset.findByIdAndUpdate(
|
||||
datasetId,
|
||||
{
|
||||
vectorModel
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
await MongoDatasetData.updateMany(
|
||||
{
|
||||
teamId,
|
||||
datasetId
|
||||
},
|
||||
{
|
||||
$set: {
|
||||
rebuilding: true
|
||||
}
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
});
|
||||
|
||||
// get 10 init dataset.data
|
||||
const arr = new Array(10).fill(0);
|
||||
for await (const _ of arr) {
|
||||
await mongoSessionRun(async (session) => {
|
||||
const data = await MongoDatasetData.findOneAndUpdate(
|
||||
{
|
||||
teamId,
|
||||
datasetId,
|
||||
rebuilding: true
|
||||
},
|
||||
{
|
||||
$unset: {
|
||||
rebuilding: null
|
||||
},
|
||||
updateTime: new Date()
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
).select({
|
||||
_id: 1,
|
||||
collectionId: 1
|
||||
});
|
||||
|
||||
if (data) {
|
||||
await MongoDatasetTraining.create(
|
||||
[
|
||||
{
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId,
|
||||
collectionId: data.collectionId,
|
||||
billId,
|
||||
mode: TrainingModeEnum.chunk,
|
||||
model: vectorModel,
|
||||
q: '1',
|
||||
dataId: data._id
|
||||
}
|
||||
],
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
||||
Reference in New Issue
Block a user