feat: model config required check;feat: dataset text model default setting (#3866)

* feat: model config required check

* feat: dataset text model default setting

* perf: collection list count

* fix: ts

* remove index count
This commit is contained in:
Archer
2025-02-24 19:55:49 +08:00
committed by GitHub
parent 3bfe802c48
commit 255764400f
32 changed files with 356 additions and 192 deletions

View File

@@ -17,6 +17,8 @@ type BaseModelItemType = {
isActive?: boolean;
isCustom?: boolean;
isDefault?: boolean;
isDefaultDatasetTextModel?: boolean;
isDefaultDatasetImageModel?: boolean;
// If has requestUrl, it will request the model directly
requestUrl?: string;

View File

@@ -192,6 +192,7 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
sourceId?: string;
file?: DatasetFileSchema;
permission: DatasetPermission;
indexAmount: number;
};
/* ================= data ===================== */

View File

@@ -1 +1,4 @@
export const FastGPTProUrl = process.env.PRO_URL ? `${process.env.PRO_URL}/api` : '';
export const isFastGPTMainService = !!process.env.PRO_URL;
// @ts-ignore
export const isFastGPTProService = () => !!global.systemConfig;

View File

@@ -21,6 +21,7 @@ export const recallFromVectorStore = Vector.embRecall;
export const getVectorDataByTime = Vector.getVectorDataByTime;
export const getVectorCountByTeamId = Vector.getVectorCountByTeamId;
export const getVectorCountByDatasetId = Vector.getVectorCountByDatasetId;
export const getVectorCountByCollectionId = Vector.getVectorCountByCollectionId;
export const insertDatasetDataVector = async ({
model,

View File

@@ -321,6 +321,23 @@ export class MilvusCtrl {
return total;
};
getVectorCountByCollectionId = async (
teamId: string,
datasetId: string,
collectionId: string
) => {
const client = await this.getClient();
const result = await client.query({
collection_name: DatasetVectorTableName,
output_fields: ['count(*)'],
filter: `(teamId == "${String(teamId)}") and (datasetId == "${String(datasetId)}") and (collectionId == "${String(collectionId)}")`
});
const total = result.data?.[0]?.['count(*)'] as number;
return total;
};
getVectorDataByTime = async (start: Date, end: Date) => {
const client = await this.getClient();

View File

@@ -240,6 +240,23 @@ export class PgVectorCtrl {
where: [['team_id', String(teamId)], 'and', ['dataset_id', String(datasetId)]]
});
return total;
};
getVectorCountByCollectionId = async (
teamId: string,
datasetId: string,
collectionId: string
) => {
const total = await PgClient.count(DatasetVectorTableName, {
where: [
['team_id', String(teamId)],
'and',
['dataset_id', String(datasetId)],
'and',
['collection_id', String(collectionId)]
]
});
return total;
};
}

View File

@@ -52,6 +52,12 @@ export const loadSystemModels = async (init = false) => {
if (model.isDefault) {
global.systemDefaultModel.llm = model;
}
if (model.isDefaultDatasetTextModel) {
global.systemDefaultModel.datasetTextLLM = model;
}
if (model.isDefaultDatasetImageModel) {
global.systemDefaultModel.datasetImageLLM = model;
}
} else if (model.type === ModelTypeEnum.embedding) {
global.embeddingModelMap.set(model.model, model);
global.embeddingModelMap.set(model.name, model);
@@ -134,6 +140,16 @@ export const loadSystemModels = async (init = false) => {
if (!global.systemDefaultModel.llm) {
global.systemDefaultModel.llm = Array.from(global.llmModelMap.values())[0];
}
if (!global.systemDefaultModel.datasetTextLLM) {
global.systemDefaultModel.datasetTextLLM = Array.from(global.llmModelMap.values()).find(
(item) => item.datasetProcess
);
}
if (!global.systemDefaultModel.datasetImageLLM) {
global.systemDefaultModel.datasetImageLLM = Array.from(global.llmModelMap.values()).find(
(item) => item.vision
);
}
if (!global.systemDefaultModel.embedding) {
global.systemDefaultModel.embedding = Array.from(global.embeddingModelMap.values())[0];
}

View File

@@ -22,6 +22,9 @@ export type SystemModelItemType =
export type SystemDefaultModelType = {
[ModelTypeEnum.llm]?: LLMModelItemType;
datasetTextLLM?: LLMModelItemType;
datasetImageLLM?: LLMModelItemType;
[ModelTypeEnum.embedding]?: EmbeddingModelItemType;
[ModelTypeEnum.tts]?: TTSModelType;
[ModelTypeEnum.stt]?: STTModelType;

View File

@@ -201,61 +201,6 @@ export async function searchDatasetData(
};
};
async function getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds
}: {
teamId: string;
datasetIds: string[];
parentCollectionIds: string[];
}): Promise<string[]> {
if (!parentCollectionIds.length) {
return [];
}
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
_id: { $in: parentCollectionIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const resultIds = new Set(collections.map((item) => String(item._id)));
const folderIds = collections
.filter((item) => item.type === 'folder')
.map((item) => String(item._id));
// Get all child collection ids
if (folderIds.length) {
const childCollections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
parentId: { $in: folderIds }
},
'_id',
{
...readFromSecondary
}
).lean();
const childIds = await getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds: childCollections.map((item) => String(item._id))
});
childIds.forEach((id) => resultIds.add(id));
}
return Array.from(resultIds);
}
/*
Collection metadata filter
标签过滤:
@@ -263,6 +208,63 @@ export async function searchDatasetData(
2. and 标签和 null 不能共存,否则返回空数组
*/
const filterCollectionByMetadata = async (): Promise<string[] | undefined> => {
const getAllCollectionIds = async ({
parentCollectionIds
}: {
parentCollectionIds?: string[];
}): Promise<string[] | undefined> => {
if (!parentCollectionIds) return;
if (parentCollectionIds.length === 0) {
return [];
}
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
_id: { $in: parentCollectionIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const resultIds = new Set<string>();
collections.forEach((item) => {
if (item.type !== 'folder') {
resultIds.add(String(item._id));
}
});
const folderIds = collections
.filter((item) => item.type === 'folder')
.map((item) => String(item._id));
// Get all child collection ids
if (folderIds.length) {
const childCollections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
parentId: { $in: folderIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const childIds = await getAllCollectionIds({
parentCollectionIds: childCollections.map((item) => String(item._id))
});
childIds?.forEach((id) => resultIds.add(id));
}
return Array.from(resultIds);
};
if (!collectionFilterMatch || !global.feConfigs.isPlus) return;
let tagCollectionIdList: string[] | undefined = undefined;
@@ -382,7 +384,7 @@ export async function searchDatasetData(
}
// Concat tag and time
const finalIds = (() => {
const collectionIds = (() => {
if (tagCollectionIdList && createTimeCollectionIdList) {
return tagCollectionIdList.filter((id) =>
(createTimeCollectionIdList as string[]).includes(id)
@@ -392,13 +394,9 @@ export async function searchDatasetData(
return tagCollectionIdList || createTimeCollectionIdList;
})();
return finalIds
? await getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds: finalIds
})
: undefined;
return await getAllCollectionIds({
parentCollectionIds: collectionIds
});
} catch (error) {}
};
const embeddingRecall = async ({

View File

@@ -8,12 +8,12 @@ import { i18nT } from '../../../../web/i18n/utils';
import { pushConcatBillTask, pushReduceTeamAiPointsTask } from './utils';
import { POST } from '../../../common/api/plusRequest';
import { FastGPTProUrl } from '../../../common/system/constants';
import { isFastGPTMainService } from '../../../common/system/constants';
export async function createUsage(data: CreateUsageProps) {
try {
// In FastGPT server
if (FastGPTProUrl) {
if (isFastGPTMainService) {
await POST('/support/wallet/usage/createUsage', data);
} else if (global.reduceAiPointsQueue) {
// In FastGPT pro server
@@ -31,7 +31,7 @@ export async function createUsage(data: CreateUsageProps) {
export async function concatUsage(data: ConcatUsageProps) {
try {
// In FastGPT server
if (FastGPTProUrl) {
if (isFastGPTMainService) {
await POST('/support/wallet/usage/concatUsage', data);
} else if (global.reduceAiPointsQueue) {
const {

View File

@@ -547,7 +547,6 @@
"core.dataset.data.Main Content": "Main Content",
"core.dataset.data.Search data placeholder": "Search Related Data",
"core.dataset.data.Too Long": "Total Length Exceeded",
"core.dataset.data.Total Amount": "{{total}} Groups",
"core.dataset.data.group": "Group",
"core.dataset.data.unit": "Items",
"core.dataset.embedding model tip": "The index model can convert natural language into vectors for semantic search.\nNote that different index models cannot be used together. Once an index model is selected, it cannot be changed.",
@@ -860,7 +859,6 @@
"dataset.collections.Collection Embedding": "{{total}} Indexes",
"dataset.collections.Confirm to delete the folder": "Confirm to Delete This Folder and All Its Contents?",
"dataset.collections.Create And Import": "Create/Import",
"dataset.collections.Data Amount": "Total Data",
"dataset.collections.Select Collection": "Select File",
"dataset.collections.Select One Collection To Store": "Select a File to Store",
"dataset.data.Can not edit": "No Edit Permission",
@@ -876,6 +874,7 @@
"dataset.dataset_name": "Dataset Name",
"dataset.deleteFolderTips": "Confirm to Delete This Folder and All Its Contained Datasets? Data Cannot Be Recovered After Deletion, Please Confirm!",
"dataset.test.noResult": "No Search Results",
"dataset_text_model_tip": "Used for text processing in the knowledge base preprocessing stage, such as automatic supplementary indexing, Q&A pair extraction.",
"deep_rag_search": "In-depth search",
"delete_api": "Are you sure you want to delete this API key? \nAfter deletion, the key will become invalid immediately and the corresponding conversation log will not be deleted. Please confirm!",
"embedding_model_not_config": "No index model is detected",

View File

@@ -7,6 +7,7 @@
"close_auto_sync": "Are you sure you want to turn off automatic sync?",
"collection.Create update time": "Creation/Update Time",
"collection.Training type": "Training",
"collection_data_count": "Data amount",
"collection_not_support_retraining": "This collection type does not support retuning parameters",
"collection_not_support_sync": "This collection does not support synchronization",
"collection_sync": "Sync data",
@@ -20,6 +21,7 @@
"custom_data_process_params": "Custom",
"custom_data_process_params_desc": "Customize data processing rules",
"data.ideal_chunk_length": "ideal block length",
"data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes",
"data_process_params": "Params",
"data_process_setting": "Processing config",
"dataset.Unsupported operation": "dataset.Unsupported operation",

View File

@@ -550,7 +550,6 @@
"core.dataset.data.Main Content": "主要内容",
"core.dataset.data.Search data placeholder": "搜索相关数据",
"core.dataset.data.Too Long": "总长度超长了",
"core.dataset.data.Total Amount": "{{total}} 组",
"core.dataset.data.group": "组",
"core.dataset.data.unit": "条",
"core.dataset.embedding model tip": "索引模型可以将自然语言转成向量,用于进行语义检索。\n注意不同索引模型无法一起使用选择完索引模型后将无法修改。",
@@ -863,7 +862,6 @@
"dataset.collections.Collection Embedding": "{{total}} 组索引中",
"dataset.collections.Confirm to delete the folder": "确认删除该文件夹及里面所有内容?",
"dataset.collections.Create And Import": "新建/导入",
"dataset.collections.Data Amount": "数据总量",
"dataset.collections.Select Collection": "选择文件",
"dataset.collections.Select One Collection To Store": "选择一个文件进行存储",
"dataset.data.Can not edit": "无编辑权限",
@@ -879,6 +877,7 @@
"dataset.dataset_name": "知识库名称",
"dataset.deleteFolderTips": "确认删除该文件夹及其包含的所有知识库?删除后数据无法恢复,请确认!",
"dataset.test.noResult": "搜索结果为空",
"dataset_text_model_tip": "用于知识库预处理阶段的文本处理,例如自动补充索引、问答对提取。",
"deep_rag_search": "深度搜索",
"delete_api": "确认删除该API密钥删除后该密钥立即失效对应的对话日志不会删除请确认",
"embedding_model_not_config": "检测到没有可用的索引模型",
@@ -944,9 +943,9 @@
"model_moka": "Moka-AI",
"model_moonshot": "月之暗面",
"model_other": "其他",
"model_ppio": "PPIO 派欧云",
"model_qwen": "阿里千问",
"model_siliconflow": "硅基流动",
"model_ppio": "PPIO 派欧云",
"model_sparkdesk": "讯飞星火",
"model_stepfun": "阶跃星辰",
"model_yi": "零一万物",

View File

@@ -7,6 +7,7 @@
"close_auto_sync": "确认关闭自动同步功能?",
"collection.Create update time": "创建/更新时间",
"collection.Training type": "训练模式",
"collection_data_count": "数据量",
"collection_not_support_retraining": "该集合类型不支持重新调整参数",
"collection_not_support_sync": "该集合不支持同步",
"collection_sync": "立即同步",
@@ -20,6 +21,7 @@
"custom_data_process_params": "自定义",
"custom_data_process_params_desc": "自定义设置数据处理规则",
"data.ideal_chunk_length": "理想分块长度",
"data_amount": "{{dataAmount}} 组数据, {{indexAmount}} 组索引",
"data_process_params": "处理参数",
"data_process_setting": "数据处理配置",
"dataset.Unsupported operation": "操作不支持",

View File

@@ -546,7 +546,6 @@
"core.dataset.data.Main Content": "主要內容",
"core.dataset.data.Search data placeholder": "搜尋相關資料",
"core.dataset.data.Too Long": "總長度超出上限",
"core.dataset.data.Total Amount": "{{total}} 組",
"core.dataset.data.group": "組",
"core.dataset.data.unit": "筆",
"core.dataset.embedding model tip": "索引模型可以將自然語言轉換成向量,用於進行語意搜尋。\n注意不同索引模型無法一起使用。選擇索引模型後就無法修改。",
@@ -860,7 +859,6 @@
"dataset.collections.Collection Embedding": "{{total}} 個索引",
"dataset.collections.Confirm to delete the folder": "確認刪除此資料夾及其所有內容?",
"dataset.collections.Create And Import": "建立或匯入",
"dataset.collections.Data Amount": "資料總量",
"dataset.collections.Select Collection": "選擇檔案",
"dataset.collections.Select One Collection To Store": "選擇一個檔案進行儲存",
"dataset.data.Can not edit": "無編輯權限",
@@ -876,6 +874,7 @@
"dataset.dataset_name": "知識庫名稱",
"dataset.deleteFolderTips": "確認刪除此資料夾及其包含的所有知識庫?刪除後資料無法復原,請確認!",
"dataset.test.noResult": "搜尋結果為空",
"dataset_text_model_tip": "用於知識庫預處理階段的文本處理,例如自動補充索引、問答對提取。",
"deep_rag_search": "深度搜索",
"delete_api": "確認刪除此 API 金鑰?\n刪除後該金鑰將立即失效對應的對話記錄不會被刪除請確認",
"embedding_model_not_config": "檢測到沒有可用的索引模型",

View File

@@ -7,6 +7,7 @@
"close_auto_sync": "確認關閉自動同步功能?",
"collection.Create update time": "建立/更新時間",
"collection.Training type": "分段模式",
"collection_data_count": "數據量",
"collection_not_support_retraining": "此集合類型不支援重新調整參數",
"collection_not_support_sync": "該集合不支援同步",
"collection_sync": "立即同步",
@@ -20,6 +21,7 @@
"custom_data_process_params": "自訂",
"custom_data_process_params_desc": "自訂資料處理規則",
"data.ideal_chunk_length": "理想分塊長度",
"data_amount": "{{dataAmount}} 組數據, {{indexAmount}} 組索引",
"data_process_params": "處理參數",
"data_process_setting": "資料處理設定",
"dataset.Unsupported operation": "操作不支持",