Add image index and pdf parse (#3956)
* feat: think tag parse * feat: parse think tag test * feat: pdf parse ux * feat: doc2x parse * perf: rewrite training mode setting * feat: image parse queue * perf: image index * feat: image parse process * feat: add init sh * fix: ts
This commit is contained in:
@@ -28,7 +28,7 @@ const APIDatasetCollection = () => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <CustomAPIFileInput />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks={true} />}
|
||||
{activeStep === 1 && <DataProcess />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
@@ -272,7 +272,7 @@ const CustomAPIFileInput = () => {
|
||||
onClick={onclickNext}
|
||||
>
|
||||
{selectFiles.length > 0
|
||||
? `${t('common:core.dataset.import.Total files', { total: selectFiles.length })} | `
|
||||
? `${t('dataset:total_num_files', { total: selectFiles.length })} | `
|
||||
: ''}
|
||||
{t('common:common.Next Step')}
|
||||
</Button>
|
||||
|
||||
@@ -34,7 +34,7 @@ const ExternalFileCollection = () => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <CustomLinkInput />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks={true} />}
|
||||
{activeStep === 1 && <DataProcess />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
|
||||
@@ -19,7 +19,7 @@ const CustomTet = () => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <CustomTextInput />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks />}
|
||||
{activeStep === 1 && <DataProcess />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
|
||||
@@ -23,7 +23,7 @@ const LinkCollection = () => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <CustomLinkImport />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks />}
|
||||
{activeStep === 1 && <DataProcess />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
|
||||
@@ -10,9 +10,8 @@ import { RenderUploadFiles } from '../components/RenderFiles';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetImportContext } from '../Context';
|
||||
|
||||
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'), {
|
||||
loading: () => <Loading fixed={false} />
|
||||
});
|
||||
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'));
|
||||
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
|
||||
const Upload = dynamic(() => import('../commonProgress/Upload'));
|
||||
|
||||
const fileType = '.txt, .docx, .csv, .xlsx, .pdf, .md, .html, .pptx';
|
||||
@@ -23,8 +22,9 @@ const FileLocal = () => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <SelectFile />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
{activeStep === 1 && <DataProcess />}
|
||||
{activeStep === 2 && <PreviewData />}
|
||||
{activeStep === 3 && <Upload />}
|
||||
</>
|
||||
);
|
||||
};
|
||||
@@ -64,12 +64,12 @@ const SelectFile = React.memo(function SelectFile() {
|
||||
/>
|
||||
|
||||
{/* render files */}
|
||||
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} showPreviewContent />
|
||||
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} />
|
||||
|
||||
<Box textAlign={'right'} mt={5}>
|
||||
<Button isDisabled={successFiles.length === 0 || uploading} onClick={onclickNext}>
|
||||
{selectFiles.length > 0
|
||||
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
|
||||
? `${t('dataset:total_num_files', { total: selectFiles.length })} | `
|
||||
: ''}
|
||||
{t('common:common.Next Step')}
|
||||
</Button>
|
||||
|
||||
@@ -8,10 +8,13 @@ import { useRouter } from 'next/router';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { getDatasetCollectionById } from '@/web/core/dataset/api';
|
||||
import MyBox from '@fastgpt/web/components/common/MyBox';
|
||||
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
|
||||
import { ChunkSettingModeEnum } from '@/web/core/dataset/constants';
|
||||
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import { Box } from '@chakra-ui/react';
|
||||
|
||||
const Upload = dynamic(() => import('../commonProgress/Upload'));
|
||||
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
|
||||
|
||||
const ReTraining = () => {
|
||||
const router = useRouter();
|
||||
@@ -20,6 +23,7 @@ const ReTraining = () => {
|
||||
collectionId: string;
|
||||
};
|
||||
|
||||
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
|
||||
const activeStep = useContextSelector(DatasetImportContext, (v) => v.activeStep);
|
||||
const setSources = useContextSelector(DatasetImportContext, (v) => v.setSources);
|
||||
const processParamsForm = useContextSelector(DatasetImportContext, (v) => v.processParamsForm);
|
||||
@@ -43,8 +47,12 @@ const ReTraining = () => {
|
||||
}
|
||||
]);
|
||||
processParamsForm.reset({
|
||||
mode: collection.trainingType,
|
||||
way: ImportProcessWayEnum.auto,
|
||||
customPdfParse: collection.customPdfParse,
|
||||
trainingType: collection.trainingType,
|
||||
imageIndex: collection.imageIndex,
|
||||
autoIndexes: collection.autoIndexes,
|
||||
|
||||
chunkSettingMode: ChunkSettingModeEnum.auto,
|
||||
embeddingChunkSize: collection.chunkSize,
|
||||
qaChunkSize: collection.chunkSize,
|
||||
customSplitChar: collection.chunkSplitter,
|
||||
@@ -55,9 +63,12 @@ const ReTraining = () => {
|
||||
});
|
||||
|
||||
return (
|
||||
<MyBox isLoading={loading} h={'100%'} overflow={'auto'}>
|
||||
{activeStep === 0 && <DataProcess showPreviewChunks={true} />}
|
||||
{activeStep === 1 && <Upload />}
|
||||
<MyBox isLoading={loading} h={'100%'}>
|
||||
<Box h={'100%'} overflow={'auto'}>
|
||||
{activeStep === 0 && <DataProcess />}
|
||||
{activeStep === 1 && <PreviewData />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</Box>
|
||||
</MyBox>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -21,7 +21,7 @@ const FileLocal = () => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <SelectFile />}
|
||||
{activeStep === 1 && <PreviewData showPreviewChunks />}
|
||||
{activeStep === 1 && <PreviewData />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
@@ -91,7 +91,7 @@ const SelectFile = React.memo(function SelectFile() {
|
||||
}}
|
||||
>
|
||||
{selectFiles.length > 0
|
||||
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
|
||||
? `${t('dataset:total_num_files', { total: selectFiles.length })} | `
|
||||
: ''}
|
||||
{t('common:common.Next Step')}
|
||||
</Button>
|
||||
|
||||
Reference in New Issue
Block a user