Add image index and pdf parse (#3956)

* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
This commit is contained in:
Archer
2025-03-03 23:08:29 +08:00
committed by archer
parent 08b6f594df
commit adf5377ebe
106 changed files with 2337 additions and 1454 deletions

View File

@@ -7,7 +7,10 @@ import { useRouter } from 'next/router';
import MyBox from '@fastgpt/web/components/common/MyBox';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
import { DatasetCollectionTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import {
DatasetCollectionDataProcessModeMap,
DatasetCollectionTypeMap
} from '@fastgpt/global/core/dataset/constants';
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
import MyIcon from '@fastgpt/web/components/common/Icon';
@@ -61,13 +64,25 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
label: t('common:core.dataset.collection.metadata.Updatetime'),
value: formatTime2YMDHM(collection.updateTime)
},
{
label: t('dataset:collection_metadata_custom_pdf_parse'),
value: collection.customPdfParse ? 'Yes' : 'No'
},
{
label: t('common:core.dataset.collection.metadata.Raw text length'),
value: collection.rawTextLength ?? '-'
},
{
label: t('dataset:collection.Training type'),
value: t(TrainingTypeMap[collection.trainingType]?.label as any)
label: t('dataset:collection_metadata_image_parse'),
value: collection.imageIndex ? 'Yes' : 'No'
},
{
label: t('dataset:auto_indexes'),
value: collection.autoIndexes ? 'Yes' : 'No'
},
{
label: t('dataset:collection.training_type'),
value: t(DatasetCollectionDataProcessModeMap[collection.trainingType]?.label as any)
},
{
label: t('common:core.dataset.collection.metadata.Chunk Size'),
@@ -99,8 +114,8 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
<Box fontSize={'md'} pb={4}>
{t('common:core.dataset.collection.metadata.metadata')}
</Box>
<Flex mb={4} wordBreak={'break-all'} fontSize={'sm'}>
<Box color={'myGray.500'} flex={'0 0 70px'}>
<Flex mb={3} wordBreak={'break-all'} fontSize={'sm'}>
<Box color={'myGray.500'} flex={'0 0 90px'}>
{t('common:core.dataset.collection.id')}:
</Box>
<Box>{collection?._id}</Box>
@@ -109,8 +124,8 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
(item, i) =>
item.label &&
item.value && (
<Flex key={i} alignItems={'center'} mb={4} wordBreak={'break-all'} fontSize={'sm'}>
<Box color={'myGray.500'} flex={'0 0 70px'}>
<Flex key={i} alignItems={'center'} mb={3} wordBreak={'break-all'} fontSize={'sm'}>
<Box color={'myGray.500'} flex={'0 0 90px'}>
{item.label}
</Box>
<Box>{item.value}</Box>