Files
FastGPT/projects/app/src/pages/api/admin/initv4818.ts
Archer 5c9cd15d6f add env to check internal ip (#4187)
* fix: ts

* update jieba package

* add env to check internal ip

* package

* fix: jieba

* reset package

* update config

* fix: jieba package

* init shell

* init version

* change team reload
2025-03-17 18:21:27 +08:00

138 lines
4.2 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { NextAPI } from '@/service/middleware/entry';
import { delay } from '@fastgpt/global/common/system/utils';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { jiebaSplit } from '@fastgpt/service/common/string/jieba/index';
import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoUser } from '@fastgpt/service/support/user/schema';
import { MongoTeamMember } from '@fastgpt/service/support/user/team/teamMemberSchema';
import { NextApiRequest, NextApiResponse } from 'next';
/*
简单版迁移:直接升级到最新镜像,会去除 MongoDatasetData 里的索引。直接执行这个脚本。
无缝迁移:
1. 先用 4.8.18-tmp 版本,会同时有 MongoDatasetData 和 MongoDatasetDataText 两个表和索引,依然是 MongoDatasetData 生效。会同步更新两张表数据。
2. 执行升级脚本,不要删除 MongoDatasetData 里的数据。
3. 切换正式版镜像,让 MongoDatasetDataText 生效。
4. 删除 MongoDatasetData 里的索引和多余字段。4819 再删
5. 移动 User 表中的 avatar 字段到 TeamMember 表中。
*/
let success = 0;
async function handler(req: NextApiRequest, res: NextApiResponse) {
await authCert({ req, authRoot: true });
const batchSize = req.body.batchSize || 500;
success = 0;
const start = Date.now();
await initData(batchSize);
// await restore();
console.log('Init data time:', Date.now() - start);
success = 0;
// batchUpdateFields();
return { success: true };
}
export default NextAPI(handler);
const restore = async () => {
try {
const data = await MongoDatasetData.findOne({ fullTextToken: { $exists: false } });
if (!data) return;
data.fullTextToken = await jiebaSplit({ text: `${data.q}\n${data.a}`.trim() });
await data.save();
success++;
console.log('Success:', success);
await restore();
} catch (error) {
console.log(error);
await delay(500);
await restore();
}
};
const initData = async (batchSize: number) => {
while (true) {
try {
// 找到没有初始化的数据
const dataList = await MongoDatasetData.find(
{
initFullText: { $exists: false }
},
'_id teamId datasetId collectionId fullTextToken'
)
.limit(batchSize)
.lean();
if (dataList.length === 0) break;
try {
await MongoDatasetDataText.insertMany(
dataList.map((item) => ({
teamId: item.teamId,
datasetId: item.datasetId,
collectionId: item.collectionId,
dataId: item._id,
fullTextToken: item.fullTextToken
})),
{ ordered: false, lean: true }
);
} catch (error: any) {
if (error.code === 11000) {
console.log('Duplicate key error');
} else {
throw error;
}
}
// 把成功插入的新数据的 dataId 更新为已初始化
await MongoDatasetData.updateMany(
{ _id: { $in: dataList.map((item) => item._id) } },
// FullText tmp
// { $set: { initFullText: true } }
{ $set: { initFullText: true }, $unset: { fullTextToken: 1 } }
);
success += dataList.length;
console.log('Success:', success);
// await initData(batchSize);
} catch (error: any) {
console.log(error, '===');
await delay(500);
// await initData(batchSize);
}
}
};
// const batchUpdateFields = async (batchSize = 2000) => {
// // Find documents that still have these fields
// const documents = await MongoDatasetData.find({ initFullText: { $exists: true } }, '_id')
// .limit(batchSize)
// .lean();
// if (documents.length === 0) return;
// // Update in batches
// await MongoDatasetData.updateMany(
// { _id: { $in: documents.map((doc) => doc._id) } },
// {
// $unset: {
// initFullText: 1
// // fullTextToken: 1
// }
// }
// );
// success += documents.length;
// console.log('Delete success:', success);
// await batchUpdateFields(batchSize);
// };