Add image index and pdf parse (#3956)

* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
This commit is contained in:
Archer
2025-03-03 23:08:29 +08:00
committed by archer
parent 08b6f594df
commit adf5377ebe
106 changed files with 2337 additions and 1454 deletions

View File

@@ -1,102 +0,0 @@
import React, { useState } from 'react';
import { Box, Flex, Grid, IconButton } from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import MyMenu from '@fastgpt/web/components/common/MyMenu';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import dynamic from 'next/dynamic';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
const PreviewChunks = dynamic(() => import('./PreviewChunks'));
const Preview = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
const { t } = useTranslation();
const { sources } = useContextSelector(DatasetImportContext, (v) => v);
const [previewRawTextSource, setPreviewRawTextSource] = useState<ImportSourceItemType>();
const [previewChunkSource, setPreviewChunkSource] = useState<ImportSourceItemType>();
return (
<Box h={'100%'} w={'100%'} display={['block', 'flex']} flexDirection={'column'}>
<Flex alignItems={'center'}>
<MyIcon name={'core/dataset/fileCollection'} w={'20px'} />
<Box fontSize={'md'}>{t('common:core.dataset.import.Sources list')}</Box>
</Flex>
<Box mt={3} flex={'1 0 0'} h={['auto', 0]} width={'100%'} overflowY={'auto'}>
<Grid w={'100%'} gap={3} gridTemplateColumns={['1fr', '1fr', '1fr', '1fr', '1fr 1fr']}>
{sources.map((source) => (
<Flex
key={source.id}
bg={'white'}
p={4}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
alignItems={'center'}
>
<MyIcon name={source.icon as any} w={['1rem', '1.25rem']} />
<Box mx={1} flex={'1 0 0'} wordBreak={'break-all'} fontSize={'sm'}>
{source.sourceName}
</Box>
{showPreviewChunks && (
<Box fontSize={'xs'} color={'myGray.600'}>
<MyMenu
Button={
<IconButton
icon={<MyIcon name={'common/viewLight'} w={'14px'} p={2} />}
aria-label={''}
size={'sm'}
variant={'whitePrimary'}
/>
}
menuList={[
{
children: [
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'core/dataset/fileCollection'} w={'14px'} mr={2} />
{t('common:core.dataset.import.Preview raw text')}
</Flex>
),
onClick: () => setPreviewRawTextSource(source)
},
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'core/dataset/splitLight'} w={'14px'} mr={2} />
{t('common:core.dataset.import.Preview chunks')}
</Flex>
),
onClick: () => setPreviewChunkSource(source)
}
]
}
]}
/>
</Box>
)}
</Flex>
))}
</Grid>
</Box>
{!!previewRawTextSource && (
<PreviewRawText
previewSource={previewRawTextSource}
onClose={() => setPreviewRawTextSource(undefined)}
/>
)}
{!!previewChunkSource && (
<PreviewChunks
previewSource={previewChunkSource}
onClose={() => setPreviewChunkSource(undefined)}
/>
)}
</Box>
);
};
export default React.memo(Preview);

View File

@@ -1,78 +0,0 @@
import React from 'react';
import { Box } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { getPreviewFileContent } from '@/web/common/file/api';
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { getPreviewSourceReadType } from '../utils';
const PreviewRawText = ({
previewSource,
onClose
}: {
previewSource: ImportSourceItemType;
onClose: () => void;
}) => {
const { toast } = useToast();
const { importSource, processParamsForm } = useContextSelector(DatasetImportContext, (v) => v);
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
const { data, loading: isLoading } = useRequest2(
async () => {
if (importSource === ImportDataSourceEnum.fileCustom && previewSource.rawText) {
return {
previewContent: previewSource.rawText.slice(0, 3000)
};
}
return getPreviewFileContent({
datasetId,
type: getPreviewSourceReadType(previewSource),
sourceId:
previewSource.dbFileId ||
previewSource.link ||
previewSource.externalFileUrl ||
previewSource.apiFileId ||
'',
isQAImport: importSource === ImportDataSourceEnum.csvTable,
selector: processParamsForm.getValues('webSelector'),
externalFileId: previewSource.externalFileId
});
},
{
refreshDeps: [previewSource.dbFileId, previewSource.link, previewSource.externalFileUrl],
manual: false,
onError(err) {
toast({
status: 'warning',
title: getErrText(err)
});
}
}
);
const rawText = data?.previewContent || '';
return (
<MyRightDrawer
onClose={onClose}
iconSrc={previewSource.icon}
title={previewSource.sourceName}
isLoading={isLoading}
px={0}
>
<Box whiteSpace={'pre-wrap'} overflowY={'auto'} px={5} fontSize={'sm'}>
{rawText}
</Box>
</MyRightDrawer>
);
};
export default React.memo(PreviewRawText);

View File

@@ -14,24 +14,17 @@ import {
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import dynamic from 'next/dynamic';
import { useI18n } from '@/web/context/I18n';
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
export const RenderUploadFiles = ({
files,
setFiles,
showPreviewContent
setFiles
}: {
files: ImportSourceItemType[];
setFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
showPreviewContent?: boolean;
}) => {
const { t } = useTranslation();
const { fileT } = useI18n();
const [previewFile, setPreviewFile] = useState<ImportSourceItemType>();
return files.length > 0 ? (
<>
@@ -84,18 +77,6 @@ export const RenderUploadFiles = ({
<Td>
{!item.isUploading && (
<Flex alignItems={'center'} gap={4}>
{showPreviewContent && (
<MyTooltip label={t('common:core.dataset.import.Preview raw text')}>
<IconButton
variant={'whitePrimary'}
size={'sm'}
icon={<MyIcon name={'common/viewLight'} w={'18px'} />}
aria-label={''}
onClick={() => setPreviewFile(item)}
/>
</MyTooltip>
)}
<IconButton
variant={'grayDanger'}
size={'sm'}
@@ -113,9 +94,6 @@ export const RenderUploadFiles = ({
</Tbody>
</Table>
</TableContainer>
{!!previewFile && (
<PreviewRawText previewSource={previewFile} onClose={() => setPreviewFile(undefined)} />
)}
</>
) : null;
};