Add image index and pdf parse (#3956)

* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
This commit is contained in:
Archer
2025-03-03 23:08:29 +08:00
committed by archer
parent 08b6f594df
commit adf5377ebe
106 changed files with 2337 additions and 1454 deletions

View File

@@ -18,7 +18,7 @@ import { useQuery } from '@tanstack/react-query';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
import MyInput from '@/components/MyInput';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { useRequest, useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { useRouter } from 'next/router';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyMenu from '@fastgpt/web/components/common/MyMenu';
@@ -28,7 +28,8 @@ import {
TrainingModeEnum,
DatasetTypeEnum,
DatasetTypeMap,
DatasetStatusEnum
DatasetStatusEnum,
DatasetCollectionDataProcessModeEnum
} from '@fastgpt/global/core/dataset/constants';
import EditFolderModal, { useEditFolder } from '../../EditFolderModal';
import { TabEnum } from '../../../../pages/dataset/detail/index';
@@ -41,6 +42,7 @@ import { CollectionPageContext } from './Context';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { useSystem } from '@fastgpt/web/hooks/useSystem';
import HeaderTagPopOver from './HeaderTagPopOver';
import MyBox from '@fastgpt/web/components/common/MyBox';
const FileSourceSelector = dynamic(() => import('../Import/components/FileSourceSelector'));
@@ -48,7 +50,7 @@ const Header = ({}: {}) => {
const { t } = useTranslation();
const theme = useTheme();
const { setLoading, feConfigs } = useSystemStore();
const { feConfigs } = useSystemStore();
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const router = useRouter();
@@ -69,50 +71,36 @@ const Header = ({}: {}) => {
tip: t('common:dataset.Manual collection Tip'),
canEmpty: false
});
const {
isOpen: isOpenFileSourceSelector,
onOpen: onOpenFileSourceSelector,
onClose: onCloseFileSourceSelector
} = useDisclosure();
const { mutate: onCreateCollection } = useRequest({
mutationFn: async ({
name,
type,
callback,
...props
}: {
name: string;
type: DatasetCollectionTypeEnum;
callback?: (id: string) => void;
trainingType?: TrainingModeEnum;
rawLink?: string;
chunkSize?: number;
}) => {
setLoading(true);
const { runAsync: onCreateCollection, loading: onCreating } = useRequest2(
async ({ name, type }: { name: string; type: DatasetCollectionTypeEnum }) => {
const id = await postDatasetCollection({
parentId,
datasetId: datasetDetail._id,
name,
type,
...props
type
});
callback?.(id);
return id;
},
onSuccess() {
getData(pageNum);
},
onSettled() {
setLoading(false);
},
{
onSuccess() {
getData(pageNum);
},
successToast: t('common:common.Create Success'),
errorToast: t('common:common.Create Failed')
}
);
successToast: t('common:common.Create Success'),
errorToast: t('common:common.Create Failed')
});
const isWebSite = datasetDetail?.type === DatasetTypeEnum.websiteDataset;
return (
<Box display={['block', 'flex']} alignItems={'center'} gap={2}>
<MyBox isLoading={onCreating} display={['block', 'flex']} alignItems={'center'} gap={2}>
<HStack flex={1}>
<Box flex={1} fontWeight={'500'} color={'myGray.900'} whiteSpace={'nowrap'}>
<ParentPath
@@ -446,7 +434,7 @@ const Header = ({}: {}) => {
)}
<EditCreateVirtualFileModal iconSrc={'modal/manualDataset'} closeBtnText={''} />
{isOpenFileSourceSelector && <FileSourceSelector onClose={onCloseFileSourceSelector} />}
</Box>
</MyBox>
);
};

View File

@@ -29,7 +29,8 @@ import {
DatasetCollectionTypeEnum,
DatasetStatusEnum,
DatasetCollectionSyncResultMap,
DatasetTypeEnum
DatasetTypeEnum,
DatasetCollectionDataProcessModeMap
} from '@fastgpt/global/core/dataset/constants';
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
import { TabEnum } from '../../../../pages/dataset/detail/index';
@@ -44,10 +45,7 @@ import { CollectionPageContext } from './Context';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
import MyTag from '@fastgpt/web/components/common/Tag/index';
import {
checkCollectionIsFolder,
getTrainingTypeLabel
} from '@fastgpt/global/core/dataset/collection/utils';
import { checkCollectionIsFolder } from '@fastgpt/global/core/dataset/collection/utils';
import { useFolderDrag } from '@/components/common/folder/useFolderDrag';
import TagsPopOver from './TagsPopOver';
import { useSystemStore } from '@/web/common/system/useSystemStore';
@@ -194,7 +192,7 @@ const CollectionCard = () => {
<Thead draggable={false}>
<Tr>
<Th py={4}>{t('common:common.Name')}</Th>
<Th py={4}>{t('dataset:collection.Training type')}</Th>
<Th py={4}>{t('dataset:collection.training_type')}</Th>
<Th py={4}>{t('dataset:collection_data_count')}</Th>
<Th py={4}>{t('dataset:collection.Create update time')}</Th>
<Th py={4}>{t('common:common.Status')}</Th>
@@ -251,7 +249,14 @@ const CollectionCard = () => {
</Td>
<Td py={2}>
{!checkCollectionIsFolder(collection.type) ? (
<>{t((getTrainingTypeLabel(collection.trainingType) || '-') as any)}</>
<>
{collection.trainingType
? t(
(DatasetCollectionDataProcessModeMap[collection.trainingType]
?.label || '-') as any
)
: '-'}
</>
) : (
'-'
)}