Add image index and pdf parse (#3956)

* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
This commit is contained in:
Archer
2025-03-03 23:08:29 +08:00
committed by archer
parent 08b6f594df
commit adf5377ebe
106 changed files with 2337 additions and 1454 deletions

View File

@@ -215,7 +215,10 @@ export const getDatasetTrainingQueue = (datasetId: string) =>
});
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
POST<PreviewChunksResponse>('/core/dataset/file/getPreviewChunks', data);
POST<PreviewChunksResponse>('/core/dataset/file/getPreviewChunks', data, {
maxQuantity: 1,
timeout: 600000
});
/* ================== read source ======================== */
export const getCollectionSource = (data: readCollectionSourceBody) =>

View File

@@ -1,8 +1,8 @@
import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/model';
import {
DatasetCollectionDataProcessModeEnum,
DatasetCollectionTypeEnum,
DatasetTypeEnum,
TrainingModeEnum
DatasetTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import type {
DatasetCollectionItemType,
@@ -25,6 +25,7 @@ export const defaultDatasetDetail: DatasetItemType = {
permission: new DatasetPermission(),
vectorModel: defaultVectorModels[0],
agentModel: defaultQAModels[0],
vlmModel: defaultQAModels[0],
inheritPermission: true
};
@@ -57,13 +58,13 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
sourceName: '',
sourceId: '',
createTime: new Date(),
trainingType: TrainingModeEnum.chunk,
trainingType: DatasetCollectionDataProcessModeEnum.chunk,
chunkSize: 0,
permission: new DatasetPermission(),
indexAmount: 0
};
export enum ImportProcessWayEnum {
export enum ChunkSettingModeEnum {
auto = 'auto',
custom = 'custom'
}

View File

@@ -18,6 +18,7 @@ import { DatasetItemType, DatasetTagType } from '@fastgpt/global/core/dataset/ty
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { getWebLLMModel } from '@/web/common/system/utils';
type DatasetPageContextType = {
datasetId: string;
@@ -116,6 +117,8 @@ export const DatasetPageContextProvider = ({
setDatasetDetail((state) => ({
...state,
...data,
agentModel: getWebLLMModel(data.agentModel),
vlmModel: getWebLLMModel(data.vlmModel),
apiServer: data.apiServer
? {
baseUrl: data.apiServer.baseUrl,

View File

@@ -1,6 +1,6 @@
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { ImportProcessWayEnum } from './constants';
import { ChunkSettingModeEnum } from './constants';
import { UseFormReturn } from 'react-hook-form';
import { APIFileItem } from '@fastgpt/global/core/dataset/apiDataset';
@@ -44,7 +44,7 @@ export type ImportSourceParamsType = UseFormReturn<
customSplitChar: string;
prompt: string;
mode: TrainingModeEnum;
way: ImportProcessWayEnum;
way: ChunkSettingModeEnum;
},
any
>;