From 2bf1fce32a61f28e8b1ae8e3dbfaf837d1f3744e Mon Sep 17 00:00:00 2001 From: Archer <545436317@qq.com> Date: Sun, 5 Jan 2025 14:40:02 +0800 Subject: [PATCH] perf: file encoding;perf: leave team code;@c121914yu perf: full text search code (#3528) * perf: text encoding * perf: leave team code * perf: full text search code * fix: http status * perf: embedding search and vector avatar --- .../zh-cn/docs/development/upgrading/4818.md | 4 +- packages/global/common/file/tools.ts | 17 ++ .../service/common/file/gridfs/controller.ts | 6 +- .../common/middle/reqFrequencyLimit.ts | 1 - packages/service/common/response/index.ts | 3 +- .../core/dataset/data/dataTextSchema.ts | 4 +- packages/service/core/dataset/data/schema.ts | 17 +- .../service/core/dataset/search/controller.ts | 174 ++++++++++++++---- .../permission/memberGroup/controllers.ts | 22 ++- packages/service/support/permission/schema.ts | 1 + packages/service/support/user/schema.ts | 3 - .../support/user/team/teamMemberSchema.ts | 10 +- packages/service/worker/htmlStr2Md/utils.ts | 27 ++- .../worker/readFile/extension/rawText.ts | 6 +- packages/web/i18n/en/account_team.json | 26 +-- packages/web/i18n/zh-CN/account_team.json | 2 +- packages/web/i18n/zh-Hant/account_team.json | 2 +- projects/app/src/pages/account/team/index.tsx | 12 +- projects/app/src/pages/api/admin/initv4818.ts | 94 +++++++++- .../app/src/pages/api/common/file/upload.ts | 1 - .../dataset/collection/create/localFile.ts | 1 - .../src/pages/dataset/list/component/List.tsx | 5 +- .../service/core/dataset/data/controller.ts | 4 +- projects/app/src/web/support/user/team/api.ts | 3 +- 24 files changed, 345 insertions(+), 100 deletions(-) diff --git a/docSite/content/zh-cn/docs/development/upgrading/4818.md b/docSite/content/zh-cn/docs/development/upgrading/4818.md index c0e244562..55dc1ddc2 100644 --- a/docSite/content/zh-cn/docs/development/upgrading/4818.md +++ b/docSite/content/zh-cn/docs/development/upgrading/4818.md @@ -13,4 +13,6 @@ weight: 806 2. 新增 - 支持部门架构权限模式 3. 优化 - 图片上传安全校验。并增加头像图片唯一存储,确保不会累计存储。 4. 优化 - Mongo 全文索引表分离。 -5. 优化 - 知识库检索查询语句合并,同时减少查库数量。 \ No newline at end of file +5. 优化 - 知识库检索查询语句合并,同时减少查库数量。 +6. 优化 - 文件编码检测,减少 CSV 文件乱码概率。 +7. 修复 - HTML 文件上传,base64 图片无法自动转图片链接。 \ No newline at end of file diff --git a/packages/global/common/file/tools.ts b/packages/global/common/file/tools.ts index df0de53c0..4f75f4e5d 100644 --- a/packages/global/common/file/tools.ts +++ b/packages/global/common/file/tools.ts @@ -2,6 +2,7 @@ import { detect } from 'jschardet'; import { documentFileType, imageFileType } from './constants'; import { ChatFileTypeEnum } from '../../core/chat/constants'; import { UserChatItemValueItemType } from '../../core/chat/type'; +import * as fs from 'fs'; export const formatFileSize = (bytes: number): string => { if (bytes === 0) return '0 B'; @@ -16,6 +17,22 @@ export const formatFileSize = (bytes: number): string => { export const detectFileEncoding = (buffer: Buffer) => { return detect(buffer.slice(0, 200))?.encoding?.toLocaleLowerCase(); }; +export const detectFileEncodingByPath = async (path: string) => { + // Get 64KB file head + const MAX_BYTES = 64 * 1024; + const buffer = Buffer.alloc(MAX_BYTES); + + const fd = await fs.promises.open(path, 'r'); + try { + // Read file head + const { bytesRead } = await fd.read(buffer, 0, MAX_BYTES, 0); + const actualBuffer = buffer.slice(0, bytesRead); + + return detect(actualBuffer)?.encoding?.toLocaleLowerCase(); + } finally { + await fd.close(); + } +}; // Url => user upload file type export const parseUrlToFileType = (url: string): UserChatItemValueItemType['file'] | undefined => { diff --git a/packages/service/common/file/gridfs/controller.ts b/packages/service/common/file/gridfs/controller.ts index a5693e39b..af304714c 100644 --- a/packages/service/common/file/gridfs/controller.ts +++ b/packages/service/common/file/gridfs/controller.ts @@ -4,7 +4,7 @@ import fsp from 'fs/promises'; import fs from 'fs'; import { DatasetFileSchema } from '@fastgpt/global/core/dataset/type'; import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema'; -import { detectFileEncoding } from '@fastgpt/global/common/file/tools'; +import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools'; import { CommonErrEnum } from '@fastgpt/global/common/error/code/common'; import { MongoRawTextBuffer } from '../../buffer/rawText/schema'; import { readRawContentByFileBuffer } from '../read/utils'; @@ -36,7 +36,6 @@ export async function uploadFile({ path, filename, contentType, - encoding, metadata = {} }: { bucketName: `${BucketNameEnum}`; @@ -45,7 +44,6 @@ export async function uploadFile({ path: string; filename: string; contentType?: string; - encoding: string; metadata?: Record; }) { if (!path) return Promise.reject(`filePath is empty`); @@ -59,7 +57,7 @@ export async function uploadFile({ // Add default metadata metadata.teamId = teamId; metadata.uid = uid; - metadata.encoding = encoding; + metadata.encoding = await detectFileEncodingByPath(path); // create a gridfs bucket const bucket = getGridBucket(bucketName); diff --git a/packages/service/common/middle/reqFrequencyLimit.ts b/packages/service/common/middle/reqFrequencyLimit.ts index f894d7416..de7249483 100644 --- a/packages/service/common/middle/reqFrequencyLimit.ts +++ b/packages/service/common/middle/reqFrequencyLimit.ts @@ -22,7 +22,6 @@ export function useReqFrequencyLimit(seconds: number, limit: number, force = fal expiredTime: addSeconds(new Date(), seconds) }); } catch (_) { - res.status(429); jsonRes(res, { code: 429, error: ERROR_ENUM.tooManyRequest diff --git a/packages/service/common/response/index.ts b/packages/service/common/response/index.ts index ee2d93d67..a6de2e38a 100644 --- a/packages/service/common/response/index.ts +++ b/packages/service/common/response/index.ts @@ -33,8 +33,7 @@ export const jsonRes = ( addLog.error(`Api response error: ${url}`, ERROR_RESPONSE[errResponseKey]); - res.status(ERROR_RESPONSE[errResponseKey].code); - return res.json(ERROR_RESPONSE[errResponseKey]); + return res.status(code).json(ERROR_RESPONSE[errResponseKey]); } // another error diff --git a/packages/service/core/dataset/data/dataTextSchema.ts b/packages/service/core/dataset/data/dataTextSchema.ts index 5332309d8..c7384392a 100644 --- a/packages/service/core/dataset/data/dataTextSchema.ts +++ b/packages/service/core/dataset/data/dataTextSchema.ts @@ -25,7 +25,7 @@ const DatasetDataTextSchema = new Schema({ required: true }, dataId: { - type: String, + type: Schema.Types.ObjectId, ref: DatasetDataCollectionName, required: true }, @@ -37,7 +37,7 @@ const DatasetDataTextSchema = new Schema({ try { DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' }); - DatasetDataTextSchema.index({ dataId: 'hashed' }); + DatasetDataTextSchema.index({ dataId: 1 }, { unique: true }); } catch (error) { console.log(error); } diff --git a/packages/service/core/dataset/data/schema.ts b/packages/service/core/dataset/data/schema.ts index 546b481e3..c21cf479f 100644 --- a/packages/service/core/dataset/data/schema.ts +++ b/packages/service/core/dataset/data/schema.ts @@ -39,10 +39,6 @@ const DatasetDataSchema = new Schema({ type: String, default: '' }, - fullTextToken: { - type: String, - default: '' - }, indexes: { type: [ { @@ -72,7 +68,13 @@ const DatasetDataSchema = new Schema({ default: 0 }, rebuilding: Boolean, - inited: Boolean + + // Abandon + fullTextToken: { + type: String, + default: '' + }, + initFullText: Boolean }); try { @@ -85,13 +87,14 @@ try { updateTime: -1 }); // full text index - DatasetDataSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' }); + // DatasetDataSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' }); // Recall vectors after data matching DatasetDataSchema.index({ teamId: 1, datasetId: 1, collectionId: 1, 'indexes.dataId': 1 }); DatasetDataSchema.index({ updateTime: 1 }); // rebuild data DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 }); - DatasetDataSchema.index({ inited: 'hashed' }); + + DatasetDataSchema.index({ initFullText: 1 }); } catch (error) { console.log(error); } diff --git a/packages/service/core/dataset/search/controller.ts b/packages/service/core/dataset/search/controller.ts index d6ec87464..45c5889bd 100644 --- a/packages/service/core/dataset/search/controller.ts +++ b/packages/service/core/dataset/search/controller.ts @@ -289,20 +289,22 @@ export async function searchDatasetData(props: SearchDatasetDataProps) { ).lean() ]); - const formatResult = dataList - .map((data, index) => { - const collection = collections.find((col) => String(col._id) === String(data.collectionId)); + const formatResult = results + .map((item, index) => { + const collection = collections.find((col) => String(col._id) === String(item.collectionId)); if (!collection) { - console.log('Collection is not found', data); + console.log('Collection is not found', item); + return; + } + const data = dataList.find((data) => + data.indexes.some((index) => index.dataId === item.id) + ); + if (!data) { + console.log('Data is not found', item); return; } - // add score to data(It's already sorted. The first one is the one with the most points) - const dataIdList = data.indexes.map((item) => item.dataId); - const maxScoreResult = results.find((item) => { - return dataIdList.includes(item.id); - }); - const score = maxScoreResult?.score || 0; + const score = item?.score || 0; const result: SearchDataResponseItemType = { id: String(data._id), @@ -320,8 +322,6 @@ export async function searchDatasetData(props: SearchDatasetDataProps) { }) .filter(Boolean) as SearchDataResponseItemType[]; - formatResult.sort((a, b) => b.score[0].value - a.score[0].value); - return { embeddingRecallResults: formatResult, tokens @@ -411,22 +411,6 @@ export async function searchDatasetData(props: SearchDatasetDataProps) { '_id name fileId rawLink externalFileId externalFileUrl', { ...readFromSecondary } ).lean(); - // const [dataList, collections] = await Promise.all([ - // MongoDatasetData.find( - // { - // _id: { $in: searchResults.map((item) => item.dataId) } - // }, - // '_id datasetId collectionId updateTime q a chunkIndex indexes', - // { ...readFromSecondary } - // ).lean(), - // MongoDatasetCollection.find( - // { - // _id: { $in: searchResults.map((item) => item.collectionId) } - // }, - // '_id name fileId rawLink externalFileId externalFileUrl', - // { ...readFromSecondary } - // ).lean() - // ]); return { fullTextRecallResults: searchResults @@ -439,9 +423,6 @@ export async function searchDatasetData(props: SearchDatasetDataProps) { return; } - // const score = - // searchResults.find((item) => String(item.dataId) === String(data._id))?.score || 0; - return { id: String(data._id), datasetId: String(data.datasetId), @@ -459,6 +440,135 @@ export async function searchDatasetData(props: SearchDatasetDataProps) { tokenLen: 0 }; }; + const fullTextRecall2 = async ({ + query, + limit, + filterCollectionIdList, + forbidCollectionIdList + }: { + query: string; + limit: number; + filterCollectionIdList?: string[]; + forbidCollectionIdList: string[]; + }): Promise<{ + fullTextRecallResults: SearchDataResponseItemType[]; + tokenLen: number; + }> => { + if (limit === 0) { + return { + fullTextRecallResults: [], + tokenLen: 0 + }; + } + + const searchResults = ( + await Promise.all( + datasetIds.map(async (id) => { + return MongoDatasetDataText.aggregate( + [ + { + $match: { + teamId: new Types.ObjectId(teamId), + datasetId: new Types.ObjectId(id), + $text: { $search: jiebaSplit({ text: query }) }, + ...(filterCollectionIdList + ? { + collectionId: { + $in: filterCollectionIdList.map((id) => new Types.ObjectId(id)) + } + } + : {}), + ...(forbidCollectionIdList && forbidCollectionIdList.length > 0 + ? { + collectionId: { + $nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id)) + } + } + : {}) + } + }, + { + $sort: { + score: { $meta: 'textScore' } + } + }, + { + $limit: limit + }, + { + $project: { + _id: 1, + collectionId: 1, + dataId: 1, + score: { $meta: 'textScore' } + } + } + ], + { + ...readFromSecondary + } + ); + }) + ) + ).flat() as (DatasetDataTextSchemaType & { score: number })[]; + + // Get data and collections + const [dataList, collections] = await Promise.all([ + MongoDatasetData.find( + { + _id: { $in: searchResults.map((item) => item.dataId) } + }, + '_id datasetId collectionId updateTime q a chunkIndex indexes', + { ...readFromSecondary } + ).lean(), + MongoDatasetCollection.find( + { + _id: { $in: searchResults.map((item) => item.collectionId) } + }, + '_id name fileId rawLink externalFileId externalFileUrl', + { ...readFromSecondary } + ).lean() + ]); + + return { + fullTextRecallResults: searchResults + .map((item, index) => { + const collection = collections.find( + (col) => String(col._id) === String(item.collectionId) + ); + if (!collection) { + console.log('Collection is not found', item); + return; + } + const data = dataList.find((data) => String(data._id) === String(item.dataId)); + if (!data) { + console.log('Data is not found', item); + return; + } + + return { + id: String(data._id), + datasetId: String(data.datasetId), + collectionId: String(data.collectionId), + updateTime: data.updateTime, + q: data.q, + a: data.a, + chunkIndex: data.chunkIndex, + indexes: data.indexes, + ...getCollectionSourceData(collection), + score: [ + { + type: SearchScoreTypeEnum.fullText, + value: item.score || 0, + index + } + ] + }; + }) + .filter(Boolean) as SearchDataResponseItemType[], + tokenLen: 0 + }; + }; const reRankSearchResult = async ({ data, query @@ -526,7 +636,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) { forbidCollectionIdList, filterCollectionIdList }), - fullTextRecall({ + fullTextRecall2({ query, limit: fullTextLimit, filterCollectionIdList, diff --git a/packages/service/support/permission/memberGroup/controllers.ts b/packages/service/support/permission/memberGroup/controllers.ts index fe7b59df4..c34d2b4b3 100644 --- a/packages/service/support/permission/memberGroup/controllers.ts +++ b/packages/service/support/permission/memberGroup/controllers.ts @@ -47,26 +47,32 @@ export const getTeamDefaultGroup = async ({ export const getGroupsByTmbId = async ({ tmbId, teamId, - role + role, + session }: { tmbId: string; teamId: string; role?: `${GroupMemberRole}`[]; + session?: ClientSession; }) => ( await Promise.all([ ( - await MongoGroupMemberModel.find({ - tmbId, - groupId: { - $exists: true + await MongoGroupMemberModel.find( + { + tmbId, + groupId: { + $exists: true + }, + ...(role ? { role: { $in: role } } : {}) }, - ...(role ? { role: { $in: role } } : {}) - }) + undefined, + { session } + ) .populate<{ group: MemberGroupSchemaType }>('group') .lean() ).map((item) => item.group), - role ? [] : getTeamDefaultGroup({ teamId }) + role ? [] : getTeamDefaultGroup({ teamId, session }) ]) ).flat(); diff --git a/packages/service/support/permission/schema.ts b/packages/service/support/permission/schema.ts index cca7b7c79..10b4a0cdb 100644 --- a/packages/service/support/permission/schema.ts +++ b/packages/service/support/permission/schema.ts @@ -115,6 +115,7 @@ try { } ); + // Delete tmb permission ResourcePermissionSchema.index({ resourceType: 1, teamId: 1, diff --git a/packages/service/support/user/schema.ts b/packages/service/support/user/schema.ts index c54245c9b..ee0763f60 100644 --- a/packages/service/support/user/schema.ts +++ b/packages/service/support/user/schema.ts @@ -78,9 +78,6 @@ const UserSchema = new Schema({ }); try { - // login - UserSchema.index({ username: 1 }); - // Admin charts UserSchema.index({ createTime: -1 }); } catch (error) { diff --git a/packages/service/support/user/team/teamMemberSchema.ts b/packages/service/support/user/team/teamMemberSchema.ts index c88eeb2a1..117b8e6fe 100644 --- a/packages/service/support/user/team/teamMemberSchema.ts +++ b/packages/service/support/user/team/teamMemberSchema.ts @@ -23,10 +23,6 @@ const TeamMemberSchema = new Schema({ type: String, default: 'Member' }, - role: { - type: String - // enum: Object.keys(TeamMemberRoleMap) // disable enum validation for old data - }, status: { type: String, enum: Object.keys(TeamMemberStatusMap) @@ -38,6 +34,12 @@ const TeamMemberSchema = new Schema({ defaultTeam: { type: Boolean, default: false + }, + + // Abandoned + role: { + type: String + // enum: Object.keys(TeamMemberRoleMap) // disable enum validation for old data } }); diff --git a/packages/service/worker/htmlStr2Md/utils.ts b/packages/service/worker/htmlStr2Md/utils.ts index 38112b92d..8384d005a 100644 --- a/packages/service/worker/htmlStr2Md/utils.ts +++ b/packages/service/worker/htmlStr2Md/utils.ts @@ -1,9 +1,27 @@ import TurndownService from 'turndown'; import { ImageType } from '../readFile/type'; import { matchMdImgTextAndUpload } from '@fastgpt/global/common/string/markdown'; +import { getNanoid } from '@fastgpt/global/common/string/tools'; // @ts-ignore const turndownPluginGfm = require('joplin-turndown-plugin-gfm'); +const processBase64Images = (htmlContent: string) => { + const base64Regex = /src="data:([^;]+);base64,([^"]+)"/g; + const images: ImageType[] = []; + + const processedHtml = htmlContent.replace(base64Regex, (match, mime, base64Data) => { + const uuid = `IMAGE_${getNanoid(12)}_IMAGE`; + images.push({ + uuid, + base64: base64Data, + mime + }); + return `src="${uuid}"`; + }); + + return { processedHtml, images }; +}; + export const html2md = ( html: string ): { @@ -25,11 +43,14 @@ export const html2md = ( turndownService.remove(['i', 'script', 'iframe', 'style']); turndownService.use(turndownPluginGfm.gfm); - const { text, imageList } = matchMdImgTextAndUpload(html); + // Base64 img to id, otherwise it will occupy memory when going to md + const { processedHtml, images } = processBase64Images(html); + const md = turndownService.turndown(processedHtml); + const { text, imageList } = matchMdImgTextAndUpload(md); return { - rawText: turndownService.turndown(text), - imageList + rawText: text, + imageList: [...images, ...imageList] }; } catch (error) { console.log('html 2 markdown error', error); diff --git a/packages/service/worker/readFile/extension/rawText.ts b/packages/service/worker/readFile/extension/rawText.ts index 15e0bed83..0f303f745 100644 --- a/packages/service/worker/readFile/extension/rawText.ts +++ b/packages/service/worker/readFile/extension/rawText.ts @@ -24,7 +24,11 @@ export const readFileRawText = ({ buffer, encoding }: ReadRawTextByBuffer): Read return buffer.toString(encoding as BufferEncoding); } - return iconv.decode(buffer, encoding); + if (encoding) { + return iconv.decode(buffer, encoding); + } + + return buffer.toString('utf-8'); } catch (error) { return buffer.toString('utf-8'); } diff --git a/packages/web/i18n/en/account_team.json b/packages/web/i18n/en/account_team.json index 65976bba3..8641c3c8f 100644 --- a/packages/web/i18n/en/account_team.json +++ b/packages/web/i18n/en/account_team.json @@ -1,31 +1,31 @@ { "action": "operate", "confirm_delete_group": "Confirm to delete group?", - "confirm_leave_team": "Confirmed to leave the team? \n \nAfter you log out, all your resources in the team (applications, knowledge bases, folders, managed groups, etc.) will be transferred to the team owner.", - "confirm_delete_org": "Confirm to delete organization?", "confirm_delete_member": "Confirm to delete member?", + "confirm_delete_org": "Confirm to delete organization?", + "confirm_leave_team": "Confirmed to leave the team? \nAfter exiting, all your resources in the team are transferred to the team owner.", "create_group": "Create group", - "delete": "delete", - "edit_info": "Edit information", - "group": "group", - "group_name": "Group name", - "org": "organization", - "org_name": "Organization name", - "org_description": "Organization description", "create_org": "Create organization", "create_sub_org": "Create sub-organization", - "edit_org_info": "Edit organization information", - "move_org": "Move organization", - "move_member": "Move member", + "delete": "delete", "delete_org": "Delete organization", - "remark": "remark", + "edit_info": "Edit information", + "edit_org_info": "Edit organization information", + "group": "group", + "group_name": "Group name", "label_sync": "Tag sync", "leave_team_failed": "Leaving the team exception", "manage_member": "Managing members", "member": "member", "member_group": "Belonging to member group", + "move_member": "Move member", + "move_org": "Move organization", + "org": "organization", + "org_description": "Organization description", + "org_name": "Organization name", "owner": "owner", "permission": "Permissions", + "remark": "remark", "remove_tip": "Confirm to remove {{username}} from the team?", "retain_admin_permissions": "Keep administrator rights", "search_member_group_name": "Search member/group name", diff --git a/packages/web/i18n/zh-CN/account_team.json b/packages/web/i18n/zh-CN/account_team.json index f9e67d3fe..e562ef7c9 100644 --- a/packages/web/i18n/zh-CN/account_team.json +++ b/packages/web/i18n/zh-CN/account_team.json @@ -3,7 +3,7 @@ "confirm_delete_group": "确认删除群组?", "confirm_delete_member": "确认删除成员?", "confirm_delete_org": "确认删除该部门?", - "confirm_leave_team": "确认离开该团队? \n 退出后,您在该团队所有的资源( 应用、知识库、文件夹、管理的群组等)均转让给团队所有者。", + "confirm_leave_team": "确认离开该团队? \n退出后,您在该团队所有的资源均转让给团队所有者。", "create_group": "创建群组", "create_org": "创建部门", "create_sub_org": "创建子部门", diff --git a/packages/web/i18n/zh-Hant/account_team.json b/packages/web/i18n/zh-Hant/account_team.json index 571d1c767..94a21818b 100644 --- a/packages/web/i18n/zh-Hant/account_team.json +++ b/packages/web/i18n/zh-Hant/account_team.json @@ -3,7 +3,7 @@ "confirm_delete_group": "確認刪除群組?", "confirm_delete_member": "確認刪除成員?", "confirm_delete_org": "確認刪除該部門?", - "confirm_leave_team": "確認離開該團隊? \n \n退出後,您在該團隊所有的資源( 應用程式、知識庫、資料夾、管理的群組等)均轉讓給團隊所有者。", + "confirm_leave_team": "確認離開該團隊? \n退出後,您在該團隊所有的資源轉讓給團隊所有者。", "create_group": "建立群組", "create_org": "創建部門", "create_sub_org": "創建子部門", diff --git a/projects/app/src/pages/account/team/index.tsx b/projects/app/src/pages/account/team/index.tsx index 0beb7c6eb..702183f0e 100644 --- a/projects/app/src/pages/account/team/index.tsx +++ b/projects/app/src/pages/account/team/index.tsx @@ -76,14 +76,13 @@ const Team = () => { onClose: onCloseManageGroupMember } = useDisclosure(); - const { runAsync: onLeaveTeam, loading: isLoadingLeaveTeam } = useRequest2( - async (teamId?: string) => { - if (!teamId) return; + const { runAsync: onLeaveTeam } = useRequest2( + async () => { const defaultTeam = myTeams.find((item) => item.defaultTeam) || myTeams[0]; // change to personal team // get members onSwitchTeam(defaultTeam.teamId); - return delLeaveTeam(teamId); + return delLeaveTeam(); }, { onSuccess() { @@ -242,10 +241,7 @@ const Team = () => { borderRadius={'md'} ml={3} leftIcon={} - isLoading={isLoadingLeaveTeam} - onClick={() => { - openLeaveConfirm(() => onLeaveTeam(userInfo?.team?.teamId))(); - }} + onClick={() => openLeaveConfirm(onLeaveTeam)()} > {t('account_team:user_team_leave_team')} diff --git a/projects/app/src/pages/api/admin/initv4818.ts b/projects/app/src/pages/api/admin/initv4818.ts index a4e14f85a..0374d93c2 100644 --- a/projects/app/src/pages/api/admin/initv4818.ts +++ b/projects/app/src/pages/api/admin/initv4818.ts @@ -1,14 +1,104 @@ import { NextAPI } from '@/service/middleware/entry'; +import { delay } from '@fastgpt/global/common/system/utils'; import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun'; +import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema'; +import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema'; import { authCert } from '@fastgpt/service/support/permission/auth/common'; -import { MongoUser } from '@fastgpt/service/support/user/schema'; -import { MongoTeam } from '@fastgpt/service/support/user/team/teamSchema'; import { NextApiRequest, NextApiResponse } from 'next'; +/* + 简单版迁移:直接升级到最新镜像,会去除 MongoDatasetData 里的索引。直接执行这个脚本。 + 无缝迁移: + 1. 先用 4.8.18-tmp 版本,会同时有 MongoDatasetData 和 MongoDatasetDataText 两个表和索引,依然是 MongoDatasetData 生效。会同步更新两张表数据。 + 2. 执行升级脚本,不要删除 MongoDatasetData 里的数据。 + 3. 切换正式版镜像,让 MongoDatasetDataText 生效。 + 4. 删除 MongoDatasetData 里的索引和多余字段。 +*/ +let success = 0; async function handler(req: NextApiRequest, res: NextApiResponse) { await authCert({ req, authRoot: true }); + const batchSize = req.body.batchSize || 500; + success = 0; + + const start = Date.now(); + await initData(batchSize); + console.log('Init data time:', Date.now() - start); + + success = 0; + await batchUpdateFields(); + return { success: true }; } export default NextAPI(handler); + +const initData = async (batchSize: number) => { + try { + // 找到没有初始化的数据 + const dataList = await MongoDatasetData.find( + { + initFullText: { $exists: false } + }, + '_id teamId datasetId collectionId fullTextToken' + ) + .limit(batchSize) + .lean(); + + if (dataList.length === 0) return; + + await mongoSessionRun(async (session) => { + // 插入新数据 + const result = await MongoDatasetDataText.insertMany( + dataList.map((item) => ({ + teamId: item.teamId, + datasetId: item.datasetId, + collectionId: item.collectionId, + dataId: item._id, + fullTextToken: item.fullTextToken + })), + { ordered: false, session, lean: true } + ); + // 把成功插入的新数据的 dataId 更新为已初始化 + await MongoDatasetData.updateMany( + { _id: { $in: result.map((item) => item.dataId) } }, + { $set: { initFullText: true }, $unset: { fullTextToken: 1 } }, + { session } + ); + + success += result.length; + + console.log('Success:', success); + }); + + await initData(batchSize); + } catch (error) { + console.log(error, '---'); + await delay(500); + await initData(batchSize); + } +}; + +const batchUpdateFields = async (batchSize = 2000) => { + // Find documents that still have these fields + const documents = await MongoDatasetData.find({ initFullText: { $exists: true } }, '_id') + .limit(batchSize) + .lean(); + + if (documents.length === 0) return; + + // Update in batches + await MongoDatasetData.updateMany( + { _id: { $in: documents.map((doc) => doc._id) } }, + { + $unset: { + initFullText: 1 + // fullTextToken: 1 + } + } + ); + + success += documents.length; + console.log('Delete success:', success); + await batchUpdateFields(batchSize); +}; diff --git a/projects/app/src/pages/api/common/file/upload.ts b/projects/app/src/pages/api/common/file/upload.ts index 7d29030d4..6d934d247 100644 --- a/projects/app/src/pages/api/common/file/upload.ts +++ b/projects/app/src/pages/api/common/file/upload.ts @@ -89,7 +89,6 @@ async function handler(req: NextApiRequest, res: NextApiResponse) { path: file.path, filename: file.originalname, contentType: file.mimetype, - encoding: file.encoding, metadata: metadata }); diff --git a/projects/app/src/pages/api/core/dataset/collection/create/localFile.ts b/projects/app/src/pages/api/core/dataset/collection/create/localFile.ts index 7b0f7daab..6f3876c2f 100644 --- a/projects/app/src/pages/api/core/dataset/collection/create/localFile.ts +++ b/projects/app/src/pages/api/core/dataset/collection/create/localFile.ts @@ -64,7 +64,6 @@ async function handler(req: NextApiRequest, res: NextApiResponse): CreateCo path: file.path, filename: file.originalname, contentType: file.mimetype, - encoding: file.encoding, metadata: fileMetadata }); diff --git a/projects/app/src/pages/dataset/list/component/List.tsx b/projects/app/src/pages/dataset/list/component/List.tsx index c2e5d5b73..4700ef19c 100644 --- a/projects/app/src/pages/dataset/list/component/List.tsx +++ b/projects/app/src/pages/dataset/list/component/List.tsx @@ -31,6 +31,7 @@ import { useTranslation } from 'next-i18next'; import { useUserStore } from '@/web/support/user/useUserStore'; import { useSystem } from '@fastgpt/web/hooks/useSystem'; import SideTag from './SideTag'; +import { getModelProvider } from '@fastgpt/global/core/ai/provider'; const EditResourceModal = dynamic(() => import('@/components/common/Modal/EditResourceModal')); @@ -156,6 +157,8 @@ function List() { > {formatDatasets.map((dataset, index) => { const owner = members.find((v) => v.tmbId === dataset.tmbId); + const vectorModelAvatar = getModelProvider(dataset.vectorModel.provider)?.avatar; + return ( {isPc && dataset.type !== DatasetTypeEnum.folder && ( - + {dataset.vectorModel.name} diff --git a/projects/app/src/service/core/dataset/data/controller.ts b/projects/app/src/service/core/dataset/data/controller.ts index 24c6b5926..abedf5817 100644 --- a/projects/app/src/service/core/dataset/data/controller.ts +++ b/projects/app/src/service/core/dataset/data/controller.ts @@ -89,7 +89,7 @@ export async function insertData2Dataset({ collectionId, q, a, - fullTextToken: jiebaSplit({ text: qaStr }), + // fullTextToken: jiebaSplit({ text: qaStr }), chunkIndex, indexes: indexes?.map((item, i) => ({ ...item, @@ -241,7 +241,7 @@ export async function updateData2Dataset({ // update mongo other data mongoData.q = q || mongoData.q; mongoData.a = a ?? mongoData.a; - mongoData.fullTextToken = jiebaSplit({ text: `${mongoData.q}\n${mongoData.a}`.trim() }); + // mongoData.fullTextToken = jiebaSplit({ text: `${mongoData.q}\n${mongoData.a}`.trim() }); // @ts-ignore mongoData.indexes = newIndexes; await mongoData.save({ session }); diff --git a/projects/app/src/web/support/user/team/api.ts b/projects/app/src/web/support/user/team/api.ts index 0ce236f8b..d5b3410f1 100644 --- a/projects/app/src/web/support/user/team/api.ts +++ b/projects/app/src/web/support/user/team/api.ts @@ -37,8 +37,7 @@ export const delRemoveMember = (tmbId: string) => DELETE(`/proApi/support/user/team/member/delete`, { tmbId }); export const updateInviteResult = (data: UpdateInviteProps) => PUT('/proApi/support/user/team/member/updateInvite', data); -export const delLeaveTeam = (teamId: string) => - DELETE('/proApi/support/user/team/member/leave', { teamId }); +export const delLeaveTeam = () => DELETE('/proApi/support/user/team/member/leave'); export const getTeamClbs = () => GET(`/proApi/support/user/team/collaborator/list`);