* feat: sync org from wecom, pref: member list pagination (#3549) * feat: sync org * chore: fe * chore: loading * chore: type * pref: team member list change to pagination. Edit a sort of list apis. * feat: member update avatar * chore: user avatar move to tmb * chore: init scripts move user avatar * chore: sourceMember * fix: list api sourceMember * fix: member sync * fix: pagination * chore: adjust code * chore: move changeOwner to pro * chore: init v4819 script * chore: adjust code * chore: UserBox * perf: scroll page code * perf: list data * docs:更新用户答疑 (#3576) * docs: add custom uid docs (#3572) * fix: pagination bug (#3577) * 4.8.19 test (#3584) * faet: dataset search filter * fix: scroll page * fix: collection list api old version (#3591) * fix: collection list api format * fix: type error of addSourceMemeber * fix: scroll fetch (#3592) * fix: yuque dataset file folder can enter (#3593) * perf: load members;perf: yuque load;fix: workflow llm params cannot close (#3594) * chat openapi doc * feat: dataset openapi doc * perf: load members * perf: member load code * perf: yuque load * fix: workflow llm params cannot close * fix: api dataset reference tag preview (#3600) * perf: doc * feat: chat page config * fix: http parse (#3634) * update doc * fix: http parse * fix code run node reset template (#3633) Co-authored-by: Archer <545436317@qq.com> * docs:faq (#3627) * docs:faq * docsFix * perf: sleep plugin * fix: selector --------- Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: Jiangween <145003935+Jiangween@users.noreply.github.com> Co-authored-by: heheer <heheer@sealos.io>
138 lines
4.2 KiB
TypeScript
138 lines
4.2 KiB
TypeScript
import { NextAPI } from '@/service/middleware/entry';
|
||
import { delay } from '@fastgpt/global/common/system/utils';
|
||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||
import { jiebaSplit } from '@fastgpt/service/common/string/jieba';
|
||
import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema';
|
||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||
import { MongoUser } from '@fastgpt/service/support/user/schema';
|
||
import { MongoTeamMember } from '@fastgpt/service/support/user/team/teamMemberSchema';
|
||
import { NextApiRequest, NextApiResponse } from 'next';
|
||
|
||
/*
|
||
简单版迁移:直接升级到最新镜像,会去除 MongoDatasetData 里的索引。直接执行这个脚本。
|
||
无缝迁移:
|
||
1. 先用 4.8.18-tmp 版本,会同时有 MongoDatasetData 和 MongoDatasetDataText 两个表和索引,依然是 MongoDatasetData 生效。会同步更新两张表数据。
|
||
2. 执行升级脚本,不要删除 MongoDatasetData 里的数据。
|
||
3. 切换正式版镜像,让 MongoDatasetDataText 生效。
|
||
4. 删除 MongoDatasetData 里的索引和多余字段。(4819 再删
|
||
5. 移动 User 表中的 avatar 字段到 TeamMember 表中。
|
||
*/
|
||
let success = 0;
|
||
async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||
await authCert({ req, authRoot: true });
|
||
|
||
const batchSize = req.body.batchSize || 500;
|
||
success = 0;
|
||
|
||
const start = Date.now();
|
||
await initData(batchSize);
|
||
// await restore();
|
||
console.log('Init data time:', Date.now() - start);
|
||
|
||
success = 0;
|
||
|
||
// batchUpdateFields();
|
||
|
||
return { success: true };
|
||
}
|
||
|
||
export default NextAPI(handler);
|
||
|
||
const restore = async () => {
|
||
try {
|
||
const data = await MongoDatasetData.findOne({ fullTextToken: { $exists: false } });
|
||
if (!data) return;
|
||
|
||
data.fullTextToken = jiebaSplit({ text: `${data.q}\n${data.a}`.trim() });
|
||
await data.save();
|
||
|
||
success++;
|
||
console.log('Success:', success);
|
||
|
||
await restore();
|
||
} catch (error) {
|
||
console.log(error);
|
||
await delay(500);
|
||
await restore();
|
||
}
|
||
};
|
||
|
||
const initData = async (batchSize: number) => {
|
||
while (true) {
|
||
try {
|
||
// 找到没有初始化的数据
|
||
const dataList = await MongoDatasetData.find(
|
||
{
|
||
initFullText: { $exists: false }
|
||
},
|
||
'_id teamId datasetId collectionId fullTextToken'
|
||
)
|
||
.limit(batchSize)
|
||
.lean();
|
||
|
||
if (dataList.length === 0) break;
|
||
|
||
try {
|
||
await MongoDatasetDataText.insertMany(
|
||
dataList.map((item) => ({
|
||
teamId: item.teamId,
|
||
datasetId: item.datasetId,
|
||
collectionId: item.collectionId,
|
||
dataId: item._id,
|
||
fullTextToken: item.fullTextToken
|
||
})),
|
||
{ ordered: false, lean: true }
|
||
);
|
||
} catch (error: any) {
|
||
if (error.code === 11000) {
|
||
console.log('Duplicate key error');
|
||
} else {
|
||
throw error;
|
||
}
|
||
}
|
||
|
||
// 把成功插入的新数据的 dataId 更新为已初始化
|
||
await MongoDatasetData.updateMany(
|
||
{ _id: { $in: dataList.map((item) => item._id) } },
|
||
// FullText tmp
|
||
// { $set: { initFullText: true } }
|
||
{ $set: { initFullText: true }, $unset: { fullTextToken: 1 } }
|
||
);
|
||
|
||
success += dataList.length;
|
||
console.log('Success:', success);
|
||
|
||
// await initData(batchSize);
|
||
} catch (error: any) {
|
||
console.log(error, '===');
|
||
await delay(500);
|
||
// await initData(batchSize);
|
||
}
|
||
}
|
||
};
|
||
|
||
// const batchUpdateFields = async (batchSize = 2000) => {
|
||
// // Find documents that still have these fields
|
||
// const documents = await MongoDatasetData.find({ initFullText: { $exists: true } }, '_id')
|
||
// .limit(batchSize)
|
||
// .lean();
|
||
|
||
// if (documents.length === 0) return;
|
||
|
||
// // Update in batches
|
||
// await MongoDatasetData.updateMany(
|
||
// { _id: { $in: documents.map((doc) => doc._id) } },
|
||
// {
|
||
// $unset: {
|
||
// initFullText: 1
|
||
// // fullTextToken: 1
|
||
// }
|
||
// }
|
||
// );
|
||
|
||
// success += documents.length;
|
||
// console.log('Delete success:', success);
|
||
// await batchUpdateFields(batchSize);
|
||
// };
|