feat: model config required check;feat: dataset text model default setting (#3866)
* feat: model config required check * feat: dataset text model default setting * perf: collection list count * fix: ts * remove index count
This commit is contained in:
2
packages/global/core/ai/model.d.ts
vendored
2
packages/global/core/ai/model.d.ts
vendored
@@ -17,6 +17,8 @@ type BaseModelItemType = {
|
||||
isActive?: boolean;
|
||||
isCustom?: boolean;
|
||||
isDefault?: boolean;
|
||||
isDefaultDatasetTextModel?: boolean;
|
||||
isDefaultDatasetImageModel?: boolean;
|
||||
|
||||
// If has requestUrl, it will request the model directly
|
||||
requestUrl?: string;
|
||||
|
||||
1
packages/global/core/dataset/type.d.ts
vendored
1
packages/global/core/dataset/type.d.ts
vendored
@@ -192,6 +192,7 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
|
||||
sourceId?: string;
|
||||
file?: DatasetFileSchema;
|
||||
permission: DatasetPermission;
|
||||
indexAmount: number;
|
||||
};
|
||||
|
||||
/* ================= data ===================== */
|
||||
|
||||
@@ -1 +1,4 @@
|
||||
export const FastGPTProUrl = process.env.PRO_URL ? `${process.env.PRO_URL}/api` : '';
|
||||
export const isFastGPTMainService = !!process.env.PRO_URL;
|
||||
// @ts-ignore
|
||||
export const isFastGPTProService = () => !!global.systemConfig;
|
||||
|
||||
@@ -21,6 +21,7 @@ export const recallFromVectorStore = Vector.embRecall;
|
||||
export const getVectorDataByTime = Vector.getVectorDataByTime;
|
||||
export const getVectorCountByTeamId = Vector.getVectorCountByTeamId;
|
||||
export const getVectorCountByDatasetId = Vector.getVectorCountByDatasetId;
|
||||
export const getVectorCountByCollectionId = Vector.getVectorCountByCollectionId;
|
||||
|
||||
export const insertDatasetDataVector = async ({
|
||||
model,
|
||||
|
||||
@@ -321,6 +321,23 @@ export class MilvusCtrl {
|
||||
|
||||
return total;
|
||||
};
|
||||
getVectorCountByCollectionId = async (
|
||||
teamId: string,
|
||||
datasetId: string,
|
||||
collectionId: string
|
||||
) => {
|
||||
const client = await this.getClient();
|
||||
|
||||
const result = await client.query({
|
||||
collection_name: DatasetVectorTableName,
|
||||
output_fields: ['count(*)'],
|
||||
filter: `(teamId == "${String(teamId)}") and (datasetId == "${String(datasetId)}") and (collectionId == "${String(collectionId)}")`
|
||||
});
|
||||
|
||||
const total = result.data?.[0]?.['count(*)'] as number;
|
||||
|
||||
return total;
|
||||
};
|
||||
|
||||
getVectorDataByTime = async (start: Date, end: Date) => {
|
||||
const client = await this.getClient();
|
||||
|
||||
@@ -240,6 +240,23 @@ export class PgVectorCtrl {
|
||||
where: [['team_id', String(teamId)], 'and', ['dataset_id', String(datasetId)]]
|
||||
});
|
||||
|
||||
return total;
|
||||
};
|
||||
getVectorCountByCollectionId = async (
|
||||
teamId: string,
|
||||
datasetId: string,
|
||||
collectionId: string
|
||||
) => {
|
||||
const total = await PgClient.count(DatasetVectorTableName, {
|
||||
where: [
|
||||
['team_id', String(teamId)],
|
||||
'and',
|
||||
['dataset_id', String(datasetId)],
|
||||
'and',
|
||||
['collection_id', String(collectionId)]
|
||||
]
|
||||
});
|
||||
|
||||
return total;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -52,6 +52,12 @@ export const loadSystemModels = async (init = false) => {
|
||||
if (model.isDefault) {
|
||||
global.systemDefaultModel.llm = model;
|
||||
}
|
||||
if (model.isDefaultDatasetTextModel) {
|
||||
global.systemDefaultModel.datasetTextLLM = model;
|
||||
}
|
||||
if (model.isDefaultDatasetImageModel) {
|
||||
global.systemDefaultModel.datasetImageLLM = model;
|
||||
}
|
||||
} else if (model.type === ModelTypeEnum.embedding) {
|
||||
global.embeddingModelMap.set(model.model, model);
|
||||
global.embeddingModelMap.set(model.name, model);
|
||||
@@ -134,6 +140,16 @@ export const loadSystemModels = async (init = false) => {
|
||||
if (!global.systemDefaultModel.llm) {
|
||||
global.systemDefaultModel.llm = Array.from(global.llmModelMap.values())[0];
|
||||
}
|
||||
if (!global.systemDefaultModel.datasetTextLLM) {
|
||||
global.systemDefaultModel.datasetTextLLM = Array.from(global.llmModelMap.values()).find(
|
||||
(item) => item.datasetProcess
|
||||
);
|
||||
}
|
||||
if (!global.systemDefaultModel.datasetImageLLM) {
|
||||
global.systemDefaultModel.datasetImageLLM = Array.from(global.llmModelMap.values()).find(
|
||||
(item) => item.vision
|
||||
);
|
||||
}
|
||||
if (!global.systemDefaultModel.embedding) {
|
||||
global.systemDefaultModel.embedding = Array.from(global.embeddingModelMap.values())[0];
|
||||
}
|
||||
|
||||
3
packages/service/core/ai/type.d.ts
vendored
3
packages/service/core/ai/type.d.ts
vendored
@@ -22,6 +22,9 @@ export type SystemModelItemType =
|
||||
|
||||
export type SystemDefaultModelType = {
|
||||
[ModelTypeEnum.llm]?: LLMModelItemType;
|
||||
datasetTextLLM?: LLMModelItemType;
|
||||
datasetImageLLM?: LLMModelItemType;
|
||||
|
||||
[ModelTypeEnum.embedding]?: EmbeddingModelItemType;
|
||||
[ModelTypeEnum.tts]?: TTSModelType;
|
||||
[ModelTypeEnum.stt]?: STTModelType;
|
||||
|
||||
@@ -201,61 +201,6 @@ export async function searchDatasetData(
|
||||
};
|
||||
};
|
||||
|
||||
async function getAllCollectionIds({
|
||||
teamId,
|
||||
datasetIds,
|
||||
parentCollectionIds
|
||||
}: {
|
||||
teamId: string;
|
||||
datasetIds: string[];
|
||||
parentCollectionIds: string[];
|
||||
}): Promise<string[]> {
|
||||
if (!parentCollectionIds.length) {
|
||||
return [];
|
||||
}
|
||||
const collections = await MongoDatasetCollection.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
_id: { $in: parentCollectionIds }
|
||||
},
|
||||
'_id type',
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
).lean();
|
||||
|
||||
const resultIds = new Set(collections.map((item) => String(item._id)));
|
||||
|
||||
const folderIds = collections
|
||||
.filter((item) => item.type === 'folder')
|
||||
.map((item) => String(item._id));
|
||||
|
||||
// Get all child collection ids
|
||||
if (folderIds.length) {
|
||||
const childCollections = await MongoDatasetCollection.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
parentId: { $in: folderIds }
|
||||
},
|
||||
'_id',
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
).lean();
|
||||
|
||||
const childIds = await getAllCollectionIds({
|
||||
teamId,
|
||||
datasetIds,
|
||||
parentCollectionIds: childCollections.map((item) => String(item._id))
|
||||
});
|
||||
|
||||
childIds.forEach((id) => resultIds.add(id));
|
||||
}
|
||||
|
||||
return Array.from(resultIds);
|
||||
}
|
||||
/*
|
||||
Collection metadata filter
|
||||
标签过滤:
|
||||
@@ -263,6 +208,63 @@ export async function searchDatasetData(
|
||||
2. and 标签和 null 不能共存,否则返回空数组
|
||||
*/
|
||||
const filterCollectionByMetadata = async (): Promise<string[] | undefined> => {
|
||||
const getAllCollectionIds = async ({
|
||||
parentCollectionIds
|
||||
}: {
|
||||
parentCollectionIds?: string[];
|
||||
}): Promise<string[] | undefined> => {
|
||||
if (!parentCollectionIds) return;
|
||||
if (parentCollectionIds.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const collections = await MongoDatasetCollection.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
_id: { $in: parentCollectionIds }
|
||||
},
|
||||
'_id type',
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
).lean();
|
||||
|
||||
const resultIds = new Set<string>();
|
||||
collections.forEach((item) => {
|
||||
if (item.type !== 'folder') {
|
||||
resultIds.add(String(item._id));
|
||||
}
|
||||
});
|
||||
|
||||
const folderIds = collections
|
||||
.filter((item) => item.type === 'folder')
|
||||
.map((item) => String(item._id));
|
||||
|
||||
// Get all child collection ids
|
||||
if (folderIds.length) {
|
||||
const childCollections = await MongoDatasetCollection.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
parentId: { $in: folderIds }
|
||||
},
|
||||
'_id type',
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
).lean();
|
||||
|
||||
const childIds = await getAllCollectionIds({
|
||||
parentCollectionIds: childCollections.map((item) => String(item._id))
|
||||
});
|
||||
|
||||
childIds?.forEach((id) => resultIds.add(id));
|
||||
}
|
||||
|
||||
return Array.from(resultIds);
|
||||
};
|
||||
|
||||
if (!collectionFilterMatch || !global.feConfigs.isPlus) return;
|
||||
|
||||
let tagCollectionIdList: string[] | undefined = undefined;
|
||||
@@ -382,7 +384,7 @@ export async function searchDatasetData(
|
||||
}
|
||||
|
||||
// Concat tag and time
|
||||
const finalIds = (() => {
|
||||
const collectionIds = (() => {
|
||||
if (tagCollectionIdList && createTimeCollectionIdList) {
|
||||
return tagCollectionIdList.filter((id) =>
|
||||
(createTimeCollectionIdList as string[]).includes(id)
|
||||
@@ -392,13 +394,9 @@ export async function searchDatasetData(
|
||||
return tagCollectionIdList || createTimeCollectionIdList;
|
||||
})();
|
||||
|
||||
return finalIds
|
||||
? await getAllCollectionIds({
|
||||
teamId,
|
||||
datasetIds,
|
||||
parentCollectionIds: finalIds
|
||||
})
|
||||
: undefined;
|
||||
return await getAllCollectionIds({
|
||||
parentCollectionIds: collectionIds
|
||||
});
|
||||
} catch (error) {}
|
||||
};
|
||||
const embeddingRecall = async ({
|
||||
|
||||
@@ -8,12 +8,12 @@ import { i18nT } from '../../../../web/i18n/utils';
|
||||
import { pushConcatBillTask, pushReduceTeamAiPointsTask } from './utils';
|
||||
|
||||
import { POST } from '../../../common/api/plusRequest';
|
||||
import { FastGPTProUrl } from '../../../common/system/constants';
|
||||
import { isFastGPTMainService } from '../../../common/system/constants';
|
||||
|
||||
export async function createUsage(data: CreateUsageProps) {
|
||||
try {
|
||||
// In FastGPT server
|
||||
if (FastGPTProUrl) {
|
||||
if (isFastGPTMainService) {
|
||||
await POST('/support/wallet/usage/createUsage', data);
|
||||
} else if (global.reduceAiPointsQueue) {
|
||||
// In FastGPT pro server
|
||||
@@ -31,7 +31,7 @@ export async function createUsage(data: CreateUsageProps) {
|
||||
export async function concatUsage(data: ConcatUsageProps) {
|
||||
try {
|
||||
// In FastGPT server
|
||||
if (FastGPTProUrl) {
|
||||
if (isFastGPTMainService) {
|
||||
await POST('/support/wallet/usage/concatUsage', data);
|
||||
} else if (global.reduceAiPointsQueue) {
|
||||
const {
|
||||
|
||||
@@ -547,7 +547,6 @@
|
||||
"core.dataset.data.Main Content": "Main Content",
|
||||
"core.dataset.data.Search data placeholder": "Search Related Data",
|
||||
"core.dataset.data.Too Long": "Total Length Exceeded",
|
||||
"core.dataset.data.Total Amount": "{{total}} Groups",
|
||||
"core.dataset.data.group": "Group",
|
||||
"core.dataset.data.unit": "Items",
|
||||
"core.dataset.embedding model tip": "The index model can convert natural language into vectors for semantic search.\nNote that different index models cannot be used together. Once an index model is selected, it cannot be changed.",
|
||||
@@ -860,7 +859,6 @@
|
||||
"dataset.collections.Collection Embedding": "{{total}} Indexes",
|
||||
"dataset.collections.Confirm to delete the folder": "Confirm to Delete This Folder and All Its Contents?",
|
||||
"dataset.collections.Create And Import": "Create/Import",
|
||||
"dataset.collections.Data Amount": "Total Data",
|
||||
"dataset.collections.Select Collection": "Select File",
|
||||
"dataset.collections.Select One Collection To Store": "Select a File to Store",
|
||||
"dataset.data.Can not edit": "No Edit Permission",
|
||||
@@ -876,6 +874,7 @@
|
||||
"dataset.dataset_name": "Dataset Name",
|
||||
"dataset.deleteFolderTips": "Confirm to Delete This Folder and All Its Contained Datasets? Data Cannot Be Recovered After Deletion, Please Confirm!",
|
||||
"dataset.test.noResult": "No Search Results",
|
||||
"dataset_text_model_tip": "Used for text processing in the knowledge base preprocessing stage, such as automatic supplementary indexing, Q&A pair extraction.",
|
||||
"deep_rag_search": "In-depth search",
|
||||
"delete_api": "Are you sure you want to delete this API key? \nAfter deletion, the key will become invalid immediately and the corresponding conversation log will not be deleted. Please confirm!",
|
||||
"embedding_model_not_config": "No index model is detected",
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
"close_auto_sync": "Are you sure you want to turn off automatic sync?",
|
||||
"collection.Create update time": "Creation/Update Time",
|
||||
"collection.Training type": "Training",
|
||||
"collection_data_count": "Data amount",
|
||||
"collection_not_support_retraining": "This collection type does not support retuning parameters",
|
||||
"collection_not_support_sync": "This collection does not support synchronization",
|
||||
"collection_sync": "Sync data",
|
||||
@@ -20,6 +21,7 @@
|
||||
"custom_data_process_params": "Custom",
|
||||
"custom_data_process_params_desc": "Customize data processing rules",
|
||||
"data.ideal_chunk_length": "ideal block length",
|
||||
"data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes",
|
||||
"data_process_params": "Params",
|
||||
"data_process_setting": "Processing config",
|
||||
"dataset.Unsupported operation": "dataset.Unsupported operation",
|
||||
|
||||
@@ -550,7 +550,6 @@
|
||||
"core.dataset.data.Main Content": "主要内容",
|
||||
"core.dataset.data.Search data placeholder": "搜索相关数据",
|
||||
"core.dataset.data.Too Long": "总长度超长了",
|
||||
"core.dataset.data.Total Amount": "{{total}} 组",
|
||||
"core.dataset.data.group": "组",
|
||||
"core.dataset.data.unit": "条",
|
||||
"core.dataset.embedding model tip": "索引模型可以将自然语言转成向量,用于进行语义检索。\n注意,不同索引模型无法一起使用,选择完索引模型后将无法修改。",
|
||||
@@ -863,7 +862,6 @@
|
||||
"dataset.collections.Collection Embedding": "{{total}} 组索引中",
|
||||
"dataset.collections.Confirm to delete the folder": "确认删除该文件夹及里面所有内容?",
|
||||
"dataset.collections.Create And Import": "新建/导入",
|
||||
"dataset.collections.Data Amount": "数据总量",
|
||||
"dataset.collections.Select Collection": "选择文件",
|
||||
"dataset.collections.Select One Collection To Store": "选择一个文件进行存储",
|
||||
"dataset.data.Can not edit": "无编辑权限",
|
||||
@@ -879,6 +877,7 @@
|
||||
"dataset.dataset_name": "知识库名称",
|
||||
"dataset.deleteFolderTips": "确认删除该文件夹及其包含的所有知识库?删除后数据无法恢复,请确认!",
|
||||
"dataset.test.noResult": "搜索结果为空",
|
||||
"dataset_text_model_tip": "用于知识库预处理阶段的文本处理,例如自动补充索引、问答对提取。",
|
||||
"deep_rag_search": "深度搜索",
|
||||
"delete_api": "确认删除该API密钥?删除后该密钥立即失效,对应的对话日志不会删除,请确认!",
|
||||
"embedding_model_not_config": "检测到没有可用的索引模型",
|
||||
@@ -944,9 +943,9 @@
|
||||
"model_moka": "Moka-AI",
|
||||
"model_moonshot": "月之暗面",
|
||||
"model_other": "其他",
|
||||
"model_ppio": "PPIO 派欧云",
|
||||
"model_qwen": "阿里千问",
|
||||
"model_siliconflow": "硅基流动",
|
||||
"model_ppio": "PPIO 派欧云",
|
||||
"model_sparkdesk": "讯飞星火",
|
||||
"model_stepfun": "阶跃星辰",
|
||||
"model_yi": "零一万物",
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
"close_auto_sync": "确认关闭自动同步功能?",
|
||||
"collection.Create update time": "创建/更新时间",
|
||||
"collection.Training type": "训练模式",
|
||||
"collection_data_count": "数据量",
|
||||
"collection_not_support_retraining": "该集合类型不支持重新调整参数",
|
||||
"collection_not_support_sync": "该集合不支持同步",
|
||||
"collection_sync": "立即同步",
|
||||
@@ -20,6 +21,7 @@
|
||||
"custom_data_process_params": "自定义",
|
||||
"custom_data_process_params_desc": "自定义设置数据处理规则",
|
||||
"data.ideal_chunk_length": "理想分块长度",
|
||||
"data_amount": "{{dataAmount}} 组数据, {{indexAmount}} 组索引",
|
||||
"data_process_params": "处理参数",
|
||||
"data_process_setting": "数据处理配置",
|
||||
"dataset.Unsupported operation": "操作不支持",
|
||||
|
||||
@@ -546,7 +546,6 @@
|
||||
"core.dataset.data.Main Content": "主要內容",
|
||||
"core.dataset.data.Search data placeholder": "搜尋相關資料",
|
||||
"core.dataset.data.Too Long": "總長度超出上限",
|
||||
"core.dataset.data.Total Amount": "{{total}} 組",
|
||||
"core.dataset.data.group": "組",
|
||||
"core.dataset.data.unit": "筆",
|
||||
"core.dataset.embedding model tip": "索引模型可以將自然語言轉換成向量,用於進行語意搜尋。\n注意,不同索引模型無法一起使用。選擇索引模型後就無法修改。",
|
||||
@@ -860,7 +859,6 @@
|
||||
"dataset.collections.Collection Embedding": "{{total}} 個索引",
|
||||
"dataset.collections.Confirm to delete the folder": "確認刪除此資料夾及其所有內容?",
|
||||
"dataset.collections.Create And Import": "建立或匯入",
|
||||
"dataset.collections.Data Amount": "資料總量",
|
||||
"dataset.collections.Select Collection": "選擇檔案",
|
||||
"dataset.collections.Select One Collection To Store": "選擇一個檔案進行儲存",
|
||||
"dataset.data.Can not edit": "無編輯權限",
|
||||
@@ -876,6 +874,7 @@
|
||||
"dataset.dataset_name": "知識庫名稱",
|
||||
"dataset.deleteFolderTips": "確認刪除此資料夾及其包含的所有知識庫?刪除後資料無法復原,請確認!",
|
||||
"dataset.test.noResult": "搜尋結果為空",
|
||||
"dataset_text_model_tip": "用於知識庫預處理階段的文本處理,例如自動補充索引、問答對提取。",
|
||||
"deep_rag_search": "深度搜索",
|
||||
"delete_api": "確認刪除此 API 金鑰?\n刪除後該金鑰將立即失效,對應的對話記錄不會被刪除,請確認!",
|
||||
"embedding_model_not_config": "檢測到沒有可用的索引模型",
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
"close_auto_sync": "確認關閉自動同步功能?",
|
||||
"collection.Create update time": "建立/更新時間",
|
||||
"collection.Training type": "分段模式",
|
||||
"collection_data_count": "數據量",
|
||||
"collection_not_support_retraining": "此集合類型不支援重新調整參數",
|
||||
"collection_not_support_sync": "該集合不支援同步",
|
||||
"collection_sync": "立即同步",
|
||||
@@ -20,6 +21,7 @@
|
||||
"custom_data_process_params": "自訂",
|
||||
"custom_data_process_params_desc": "自訂資料處理規則",
|
||||
"data.ideal_chunk_length": "理想分塊長度",
|
||||
"data_amount": "{{dataAmount}} 組數據, {{indexAmount}} 組索引",
|
||||
"data_process_params": "處理參數",
|
||||
"data_process_setting": "資料處理設定",
|
||||
"dataset.Unsupported operation": "操作不支持",
|
||||
|
||||
Reference in New Issue
Block a user