Add image index and pdf parse (#3956)
* feat: think tag parse * feat: parse think tag test * feat: pdf parse ux * feat: doc2x parse * perf: rewrite training mode setting * feat: image parse queue * perf: image index * feat: image parse process * feat: add init sh * fix: ts
This commit is contained in:
@@ -1,4 +1,3 @@
|
||||
import type { PreviewContextProps } from '@/pages/api/common/file/previewContent';
|
||||
import { GET, POST } from '@/web/common/api/request';
|
||||
import type { UploadImgProps } from '@fastgpt/global/common/file/api.d';
|
||||
import { AxiosProgressEvent } from 'axios';
|
||||
@@ -19,11 +18,3 @@ export const postUploadFiles = (
|
||||
'Content-Type': 'multipart/form-data; charset=utf-8'
|
||||
}
|
||||
});
|
||||
|
||||
export const getPreviewFileContent = (data: PreviewContextProps) =>
|
||||
POST<{
|
||||
previewContent: string;
|
||||
totalLength: number;
|
||||
}>('/common/file/previewContent', data, {
|
||||
timeout: 600000
|
||||
});
|
||||
|
||||
@@ -53,6 +53,7 @@ type State = {
|
||||
defaultModels: SystemDefaultModelType;
|
||||
llmModelList: LLMModelItemType[];
|
||||
datasetModelList: LLMModelItemType[];
|
||||
getVllmModelList: () => LLMModelItemType[];
|
||||
embeddingModelList: EmbeddingModelItemType[];
|
||||
ttsModelList: TTSModelType[];
|
||||
reRankModelList: ReRankModelItemType[];
|
||||
@@ -134,6 +135,9 @@ export const useSystemStore = create<State>()(
|
||||
ttsModelList: [],
|
||||
reRankModelList: [],
|
||||
sttModelList: [],
|
||||
getVllmModelList: () => {
|
||||
return get().llmModelList.filter((item) => item.vision);
|
||||
},
|
||||
initStaticData(res) {
|
||||
set((state) => {
|
||||
state.initDataBufferId = res.bufferId;
|
||||
|
||||
@@ -215,7 +215,10 @@ export const getDatasetTrainingQueue = (datasetId: string) =>
|
||||
});
|
||||
|
||||
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
|
||||
POST<PreviewChunksResponse>('/core/dataset/file/getPreviewChunks', data);
|
||||
POST<PreviewChunksResponse>('/core/dataset/file/getPreviewChunks', data, {
|
||||
maxQuantity: 1,
|
||||
timeout: 600000
|
||||
});
|
||||
|
||||
/* ================== read source ======================== */
|
||||
export const getCollectionSource = (data: readCollectionSourceBody) =>
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/model';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetTypeEnum,
|
||||
TrainingModeEnum
|
||||
DatasetTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import type {
|
||||
DatasetCollectionItemType,
|
||||
@@ -25,6 +25,7 @@ export const defaultDatasetDetail: DatasetItemType = {
|
||||
permission: new DatasetPermission(),
|
||||
vectorModel: defaultVectorModels[0],
|
||||
agentModel: defaultQAModels[0],
|
||||
vlmModel: defaultQAModels[0],
|
||||
inheritPermission: true
|
||||
};
|
||||
|
||||
@@ -57,13 +58,13 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
|
||||
sourceName: '',
|
||||
sourceId: '',
|
||||
createTime: new Date(),
|
||||
trainingType: TrainingModeEnum.chunk,
|
||||
trainingType: DatasetCollectionDataProcessModeEnum.chunk,
|
||||
chunkSize: 0,
|
||||
permission: new DatasetPermission(),
|
||||
indexAmount: 0
|
||||
};
|
||||
|
||||
export enum ImportProcessWayEnum {
|
||||
export enum ChunkSettingModeEnum {
|
||||
auto = 'auto',
|
||||
custom = 'custom'
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ import { DatasetItemType, DatasetTagType } from '@fastgpt/global/core/dataset/ty
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { getWebLLMModel } from '@/web/common/system/utils';
|
||||
|
||||
type DatasetPageContextType = {
|
||||
datasetId: string;
|
||||
@@ -116,6 +117,8 @@ export const DatasetPageContextProvider = ({
|
||||
setDatasetDetail((state) => ({
|
||||
...state,
|
||||
...data,
|
||||
agentModel: getWebLLMModel(data.agentModel),
|
||||
vlmModel: getWebLLMModel(data.vlmModel),
|
||||
apiServer: data.apiServer
|
||||
? {
|
||||
baseUrl: data.apiServer.baseUrl,
|
||||
|
||||
4
projects/app/src/web/core/dataset/type.d.ts
vendored
4
projects/app/src/web/core/dataset/type.d.ts
vendored
@@ -1,6 +1,6 @@
|
||||
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { ImportProcessWayEnum } from './constants';
|
||||
import { ChunkSettingModeEnum } from './constants';
|
||||
import { UseFormReturn } from 'react-hook-form';
|
||||
import { APIFileItem } from '@fastgpt/global/core/dataset/apiDataset';
|
||||
|
||||
@@ -44,7 +44,7 @@ export type ImportSourceParamsType = UseFormReturn<
|
||||
customSplitChar: string;
|
||||
prompt: string;
|
||||
mode: TrainingModeEnum;
|
||||
way: ImportProcessWayEnum;
|
||||
way: ChunkSettingModeEnum;
|
||||
},
|
||||
any
|
||||
>;
|
||||
|
||||
Reference in New Issue
Block a user