Add image index and pdf parse (#3956)

* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
This commit is contained in:
Archer
2025-03-03 23:08:29 +08:00
committed by archer
parent 08b6f594df
commit adf5377ebe
106 changed files with 2337 additions and 1454 deletions

View File

@@ -1,4 +1,3 @@
import type { PreviewContextProps } from '@/pages/api/common/file/previewContent';
import { GET, POST } from '@/web/common/api/request';
import type { UploadImgProps } from '@fastgpt/global/common/file/api.d';
import { AxiosProgressEvent } from 'axios';
@@ -19,11 +18,3 @@ export const postUploadFiles = (
'Content-Type': 'multipart/form-data; charset=utf-8'
}
});
export const getPreviewFileContent = (data: PreviewContextProps) =>
POST<{
previewContent: string;
totalLength: number;
}>('/common/file/previewContent', data, {
timeout: 600000
});

View File

@@ -53,6 +53,7 @@ type State = {
defaultModels: SystemDefaultModelType;
llmModelList: LLMModelItemType[];
datasetModelList: LLMModelItemType[];
getVllmModelList: () => LLMModelItemType[];
embeddingModelList: EmbeddingModelItemType[];
ttsModelList: TTSModelType[];
reRankModelList: ReRankModelItemType[];
@@ -134,6 +135,9 @@ export const useSystemStore = create<State>()(
ttsModelList: [],
reRankModelList: [],
sttModelList: [],
getVllmModelList: () => {
return get().llmModelList.filter((item) => item.vision);
},
initStaticData(res) {
set((state) => {
state.initDataBufferId = res.bufferId;

View File

@@ -215,7 +215,10 @@ export const getDatasetTrainingQueue = (datasetId: string) =>
});
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
POST<PreviewChunksResponse>('/core/dataset/file/getPreviewChunks', data);
POST<PreviewChunksResponse>('/core/dataset/file/getPreviewChunks', data, {
maxQuantity: 1,
timeout: 600000
});
/* ================== read source ======================== */
export const getCollectionSource = (data: readCollectionSourceBody) =>

View File

@@ -1,8 +1,8 @@
import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/model';
import {
DatasetCollectionDataProcessModeEnum,
DatasetCollectionTypeEnum,
DatasetTypeEnum,
TrainingModeEnum
DatasetTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import type {
DatasetCollectionItemType,
@@ -25,6 +25,7 @@ export const defaultDatasetDetail: DatasetItemType = {
permission: new DatasetPermission(),
vectorModel: defaultVectorModels[0],
agentModel: defaultQAModels[0],
vlmModel: defaultQAModels[0],
inheritPermission: true
};
@@ -57,13 +58,13 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
sourceName: '',
sourceId: '',
createTime: new Date(),
trainingType: TrainingModeEnum.chunk,
trainingType: DatasetCollectionDataProcessModeEnum.chunk,
chunkSize: 0,
permission: new DatasetPermission(),
indexAmount: 0
};
export enum ImportProcessWayEnum {
export enum ChunkSettingModeEnum {
auto = 'auto',
custom = 'custom'
}

View File

@@ -18,6 +18,7 @@ import { DatasetItemType, DatasetTagType } from '@fastgpt/global/core/dataset/ty
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { getWebLLMModel } from '@/web/common/system/utils';
type DatasetPageContextType = {
datasetId: string;
@@ -116,6 +117,8 @@ export const DatasetPageContextProvider = ({
setDatasetDetail((state) => ({
...state,
...data,
agentModel: getWebLLMModel(data.agentModel),
vlmModel: getWebLLMModel(data.vlmModel),
apiServer: data.apiServer
? {
baseUrl: data.apiServer.baseUrl,

View File

@@ -1,6 +1,6 @@
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { ImportProcessWayEnum } from './constants';
import { ChunkSettingModeEnum } from './constants';
import { UseFormReturn } from 'react-hook-form';
import { APIFileItem } from '@fastgpt/global/core/dataset/apiDataset';
@@ -44,7 +44,7 @@ export type ImportSourceParamsType = UseFormReturn<
customSplitChar: string;
prompt: string;
mode: TrainingModeEnum;
way: ImportProcessWayEnum;
way: ChunkSettingModeEnum;
},
any
>;