perf: api dataset code

This commit is contained in:
archer
2025-06-03 18:31:35 +08:00
parent 2507997d20
commit e32ca8a3e9
63 changed files with 347 additions and 530 deletions

View File

@@ -17,6 +17,9 @@ import type { ParentIdType } from '../../common/parentFolder/type';
/* ================= dataset ===================== */
export type DatasetUpdateBody = {
id: string;
apiDatasetServer?: DatasetSchemaType['apiDatasetServer'];
parentId?: ParentIdType;
name?: string;
avatar?: string;
@@ -28,9 +31,6 @@ export type DatasetUpdateBody = {
websiteConfig?: DatasetSchemaType['websiteConfig'];
externalReadUrl?: DatasetSchemaType['externalReadUrl'];
defaultPermission?: DatasetSchemaType['defaultPermission'];
apiServer?: DatasetSchemaType['apiServer'];
yuqueServer?: DatasetSchemaType['yuqueServer'];
feishuServer?: DatasetSchemaType['feishuServer'];
chunkSettings?: DatasetSchemaType['chunkSettings'];
// sync schedule

View File

@@ -1,5 +1,5 @@
import { RequireOnlyOne } from '../../common/type/utils';
import type { ParentIdType } from '../../common/parentFolder/type.d';
import { RequireOnlyOne } from '../../../common/type/utils';
import type { ParentIdType } from '../../../common/parentFolder/type';
export type APIFileItem = {
id: string;
@@ -28,6 +28,12 @@ export type YuqueServer = {
basePath?: string;
};
export type ApiDatasetServerType = {
apiServer?: APIFileServer;
feishuServer?: FeishuServer;
yuqueServer?: YuqueServer;
};
// Api dataset api
export type APIFileListResponse = APIFileItem[];

View File

@@ -0,0 +1,31 @@
import type { ApiDatasetServerType } from './type';
export const filterApiDatasetServerPublicData = (apiDatasetServer?: ApiDatasetServerType) => {
if (!apiDatasetServer) return undefined;
const { apiServer, yuqueServer, feishuServer } = apiDatasetServer;
return {
apiServer: apiServer
? {
baseUrl: apiServer.baseUrl,
authorization: '',
basePath: apiServer.basePath
}
: undefined,
yuqueServer: yuqueServer
? {
userId: yuqueServer.userId,
token: '',
basePath: yuqueServer.basePath
}
: undefined,
feishuServer: feishuServer
? {
appId: feishuServer.appId,
appSecret: '',
folderToken: feishuServer.folderToken
}
: undefined
};
};

View File

@@ -6,11 +6,51 @@ export enum DatasetTypeEnum {
dataset = 'dataset',
websiteDataset = 'websiteDataset', // depp link
externalFile = 'externalFile',
apiDataset = 'apiDataset',
feishu = 'feishu',
yuque = 'yuque'
}
export const DatasetTypeMap = {
// @ts-ignore
export const ApiDatasetTypeMap: Record<
`${DatasetTypeEnum}`,
{
icon: string;
label: any;
collectionLabel: string;
courseUrl?: string;
}
> = {
[DatasetTypeEnum.apiDataset]: {
icon: 'core/dataset/externalDatasetOutline',
label: i18nT('dataset:api_file'),
collectionLabel: i18nT('common:File'),
courseUrl: '/docs/guide/knowledge_base/api_dataset/'
},
[DatasetTypeEnum.feishu]: {
icon: 'core/dataset/feishuDatasetOutline',
label: i18nT('dataset:feishu_dataset'),
collectionLabel: i18nT('common:File'),
courseUrl: '/docs/guide/knowledge_base/lark_dataset/'
},
[DatasetTypeEnum.yuque]: {
icon: 'core/dataset/yuqueDatasetOutline',
label: i18nT('dataset:yuque_dataset'),
collectionLabel: i18nT('common:File'),
courseUrl: '/docs/guide/knowledge_base/yuque_dataset/'
}
};
export const DatasetTypeMap: Record<
`${DatasetTypeEnum}`,
{
icon: string;
label: any;
collectionLabel: string;
courseUrl?: string;
}
> = {
...ApiDatasetTypeMap,
[DatasetTypeEnum.folder]: {
icon: 'common/folderFill',
label: i18nT('dataset:folder_dataset'),
@@ -24,27 +64,13 @@ export const DatasetTypeMap = {
[DatasetTypeEnum.websiteDataset]: {
icon: 'core/dataset/websiteDatasetOutline',
label: i18nT('dataset:website_dataset'),
collectionLabel: i18nT('common:Website')
collectionLabel: i18nT('common:Website'),
courseUrl: '/docs/guide/knowledge_base/websync/'
},
[DatasetTypeEnum.externalFile]: {
icon: 'core/dataset/externalDatasetOutline',
label: i18nT('dataset:external_file'),
collectionLabel: i18nT('common:File')
},
[DatasetTypeEnum.apiDataset]: {
icon: 'core/dataset/externalDatasetOutline',
label: i18nT('dataset:api_file'),
collectionLabel: i18nT('common:File')
},
[DatasetTypeEnum.feishu]: {
icon: 'core/dataset/feishuDatasetOutline',
label: i18nT('dataset:feishu_dataset'),
collectionLabel: i18nT('common:File')
},
[DatasetTypeEnum.yuque]: {
icon: 'core/dataset/yuqueDatasetOutline',
label: i18nT('dataset:yuque_dataset'),
collectionLabel: i18nT('common:File')
}
};

View File

@@ -13,7 +13,12 @@ import type {
ChunkTriggerConfigTypeEnum
} from './constants';
import type { DatasetPermission } from '../../support/permission/dataset/controller';
import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
import type {
ApiDatasetServerType,
APIFileServer,
FeishuServer,
YuqueServer
} from './apiDataset/type';
import type { SourceMemberType } from 'support/user/type';
import type { DatasetDataIndexTypeEnum } from './data/constants';
import type { ParentIdType } from 'common/parentFolder/type';
@@ -73,14 +78,16 @@ export type DatasetSchemaType = {
chunkSettings?: ChunkSettingsType;
inheritPermission: boolean;
apiServer?: APIFileServer;
feishuServer?: FeishuServer;
yuqueServer?: YuqueServer;
apiDatasetServer?: ApiDatasetServerType;
// abandon
autoSync?: boolean;
externalReadUrl?: string;
defaultPermission?: number;
apiServer?: APIFileServer;
feishuServer?: FeishuServer;
yuqueServer?: YuqueServer;
};
export type DatasetCollectionSchemaType = ChunkSettingsType & {

View File

@@ -1,5 +1,8 @@
import type { ApiDatasetDetailResponse } from '@fastgpt/global/core/dataset/apiDataset';
import { FeishuServer, YuqueServer } from '@fastgpt/global/core/dataset/apiDataset';
import type {
ApiDatasetDetailResponse,
FeishuServer,
YuqueServer
} from '@fastgpt/global/core/dataset/apiDataset/type';
import type {
DeepRagSearchProps,
SearchDatasetDataResponse

View File

@@ -3,12 +3,11 @@ import type {
ApiFileReadContentResponse,
APIFileReadResponse,
ApiDatasetDetailResponse,
APIFileServer,
APIFileItem
} from '@fastgpt/global/core/dataset/apiDataset';
APIFileServer
} from '@fastgpt/global/core/dataset/apiDataset/type';
import axios, { type Method } from 'axios';
import { addLog } from '../../../common/system/log';
import { readFileRawTextByUrl } from '../read';
import { addLog } from '../../../../common/system/log';
import { readFileRawTextByUrl } from '../../read';
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
import { type RequireOnlyOne } from '@fastgpt/global/common/type/utils';

View File

@@ -3,10 +3,10 @@ import type {
ApiFileReadContentResponse,
ApiDatasetDetailResponse,
FeishuServer
} from '@fastgpt/global/core/dataset/apiDataset';
} from '@fastgpt/global/core/dataset/apiDataset/type';
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
import axios, { type Method } from 'axios';
import { addLog } from '../../../common/system/log';
import { addLog } from '../../../../common/system/log';
type ResponseDataType = {
success: boolean;

View File

@@ -1,18 +1,10 @@
import type {
APIFileServer,
YuqueServer,
FeishuServer
} from '@fastgpt/global/core/dataset/apiDataset';
import { useApiDatasetRequest } from './api';
import { useYuqueDatasetRequest } from '../yuqueDataset/api';
import { useFeishuDatasetRequest } from '../feishuDataset/api';
import { useApiDatasetRequest } from './custom/api';
import { useYuqueDatasetRequest } from './yuqueDataset/api';
import { useFeishuDatasetRequest } from './feishuDataset/api';
import type { ApiDatasetServerType } from '@fastgpt/global/core/dataset/apiDataset/type';
export const getApiDatasetRequest = async (data: {
apiServer?: APIFileServer;
yuqueServer?: YuqueServer;
feishuServer?: FeishuServer;
}) => {
const { apiServer, yuqueServer, feishuServer } = data;
export const getApiDatasetRequest = async (apiDatasetServer?: ApiDatasetServerType) => {
const { apiServer, yuqueServer, feishuServer } = apiDatasetServer || {};
if (apiServer) {
return useApiDatasetRequest({ apiServer });

View File

@@ -3,9 +3,9 @@ import type {
ApiFileReadContentResponse,
YuqueServer,
ApiDatasetDetailResponse
} from '@fastgpt/global/core/dataset/apiDataset';
} from '@fastgpt/global/core/dataset/apiDataset/type';
import axios, { type Method } from 'axios';
import { addLog } from '../../../common/system/log';
import { addLog } from '../../../../common/system/log';
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
type ResponseDataType = {
@@ -105,7 +105,6 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
if (!parentId) {
if (yuqueServer.basePath) parentId = yuqueServer.basePath;
}
let files: APIFileItem[] = [];
if (!parentId) {

View File

@@ -157,9 +157,7 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
return {
type: DatasetSourceReadTypeEnum.apiFile,
sourceId,
apiServer: dataset.apiServer,
feishuServer: dataset.feishuServer,
yuqueServer: dataset.yuqueServer
apiDatasetServer: dataset.apiDatasetServer
};
})();

View File

@@ -9,13 +9,9 @@ import { type TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/st
import axios from 'axios';
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
import {
type APIFileServer,
type FeishuServer,
type YuqueServer
} from '@fastgpt/global/core/dataset/apiDataset';
import { getApiDatasetRequest } from './apiDataset';
import Papa from 'papaparse';
import type { ApiDatasetServerType } from '@fastgpt/global/core/dataset/apiDataset/type';
export const readFileRawTextByUrl = async ({
teamId,
@@ -69,9 +65,7 @@ export const readDatasetSourceRawText = async ({
sourceId,
selector,
externalFileId,
apiServer,
feishuServer,
yuqueServer,
apiDatasetServer,
customPdfParse,
getFormatText
}: {
@@ -84,9 +78,7 @@ export const readDatasetSourceRawText = async ({
selector?: string; // link selector
externalFileId?: string; // external file dataset
apiServer?: APIFileServer; // api dataset
feishuServer?: FeishuServer; // feishu dataset
yuqueServer?: YuqueServer; // yuque dataset
apiDatasetServer?: ApiDatasetServerType; // api dataset
}): Promise<{
title?: string;
rawText: string;
@@ -128,9 +120,7 @@ export const readDatasetSourceRawText = async ({
};
} else if (type === DatasetSourceReadTypeEnum.apiFile) {
const { title, rawText } = await readApiServerFileContent({
apiServer,
feishuServer,
yuqueServer,
apiDatasetServer,
apiFileId: sourceId,
teamId,
tmbId
@@ -147,17 +137,13 @@ export const readDatasetSourceRawText = async ({
};
export const readApiServerFileContent = async ({
apiServer,
feishuServer,
yuqueServer,
apiDatasetServer,
apiFileId,
teamId,
tmbId,
customPdfParse
}: {
apiServer?: APIFileServer;
feishuServer?: FeishuServer;
yuqueServer?: YuqueServer;
apiDatasetServer?: ApiDatasetServerType;
apiFileId: string;
teamId: string;
tmbId: string;
@@ -166,13 +152,7 @@ export const readApiServerFileContent = async ({
title?: string;
rawText: string;
}> => {
return (
await getApiDatasetRequest({
apiServer,
yuqueServer,
feishuServer
})
).getFileContent({
return (await getApiDatasetRequest(apiDatasetServer)).getFileContent({
teamId,
tmbId,
apiFileId,

View File

@@ -127,14 +127,16 @@ const DatasetSchema = new Schema({
type: Boolean,
default: true
},
apiServer: Object,
feishuServer: Object,
yuqueServer: Object,
apiDatasetServer: Object,
// abandoned
autoSync: Boolean,
externalReadUrl: String,
defaultPermission: Number
defaultPermission: Number,
apiServer: Object,
feishuServer: Object,
yuqueServer: Object
});
try {