perf: backup import (#4866)

* i18n

* remove invalid code

* perf: backup import

* backup tip

* fix: indexsize invalid
This commit is contained in:
Archer
2025-05-22 15:53:51 +08:00
committed by GitHub
parent dd3c251603
commit 88bd3aaa9e
67 changed files with 751 additions and 388 deletions

View File

@@ -0,0 +1,94 @@
import React, { useState } from 'react';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { useTranslation } from 'next-i18next';
import { Box, Button, HStack, ModalBody, ModalFooter, VStack } from '@chakra-ui/react';
import FileSelector, { type SelectFileItemType } from '../components/FileSelector';
import MyIcon from '@fastgpt/web/components/common/Icon';
import MyIconButton from '@fastgpt/web/components/common/Icon/button';
import { postBackupDatasetCollection } from '@/web/core/dataset/api';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { useContextSelector } from 'use-context-selector';
import LightTip from '@fastgpt/web/components/common/LightTip';
const BackupImportModal = ({
onFinish,
onClose
}: {
onFinish: () => void;
onClose: () => void;
}) => {
const { t } = useTranslation();
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
const [selectFiles, setSelectFiles] = useState<SelectFileItemType[]>([]);
const [percent, setPercent] = useState(0);
const { runAsync: onBackupImport, loading: isBackupLoading } = useRequest2(
async () => {
await postBackupDatasetCollection({
datasetId,
file: selectFiles[0].file,
percentListen: setPercent
});
},
{
onSuccess() {
onFinish();
onClose();
},
successToast: t('dataset:backup_dataset_success')
}
);
return (
<MyModal iconSrc="backup" iconColor={'primary.600'} isOpen title={t('dataset:backup_dataset')}>
<ModalBody>
<LightTip mb={3} icon="common/info" text={t('dataset:backup_dataset_tip')} />
<FileSelector
maxCount={1}
fileType="csv"
selectFiles={selectFiles}
setSelectFiles={setSelectFiles}
/>
{/* File render */}
{selectFiles.length > 0 && (
<VStack mt={4} gap={2}>
{selectFiles.map((item, index) => (
<HStack key={index} w={'100%'}>
<MyIcon name={item.icon as any} w={'1rem'} />
<Box color={'myGray.900'}>{item.name}</Box>
<Box fontSize={'xs'} color={'myGray.500'} flex={1}>
{item.size}
</Box>
<MyIconButton
icon="delete"
hoverColor="red.500"
hoverBg="red.50"
onClick={() => {
setSelectFiles(selectFiles.filter((_, i) => i !== index));
}}
/>
</HStack>
))}
</VStack>
)}
</ModalBody>
<ModalFooter>
<Button isLoading={isBackupLoading} variant="whiteBase" mr={2} onClick={onClose}>
{t('common:Close')}
</Button>
<Button onClick={onBackupImport} isDisabled={selectFiles.length === 0 || isBackupLoading}>
{isBackupLoading
? percent === 100
? t('dataset:backup_data_parse')
: t('dataset:backup_data_uploading', { num: percent })
: t('common:Import')}
</Button>
</ModalFooter>
</MyModal>
);
};
export default BackupImportModal;

View File

@@ -36,6 +36,7 @@ import MyTag from '@fastgpt/web/components/common/Tag/index';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
const FileSourceSelector = dynamic(() => import('../Import/components/FileSourceSelector'));
const BackupImportModal = dynamic(() => import('./BackupImportModal'));
const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
const { t } = useTranslation();
@@ -76,6 +77,12 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
onOpen: onOpenFileSourceSelector,
onClose: onCloseFileSourceSelector
} = useDisclosure();
// Backup import modal
const {
isOpen: isOpenBackupImportModal,
onOpen: onOpenBackupImportModal,
onClose: onCloseBackupImportModal
} = useDisclosure();
const { runAsync: onCreateCollection } = useRequest2(
async ({ name, type }: { name: string; type: DatasetCollectionTypeEnum }) => {
@@ -220,11 +227,11 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
{
label: (
<Flex>
<MyIcon name={'common/folderFill'} w={'20px'} mr={2} />
{t('common:Folder')}
<MyIcon name={'core/dataset/fileCollection'} mr={2} w={'20px'} />
{t('common:core.dataset.Text collection')}
</Flex>
),
onClick: () => setEditFolderData({})
onClick: onOpenFileSourceSelector
},
{
label: (
@@ -244,27 +251,24 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
{
label: (
<Flex>
<MyIcon name={'core/dataset/fileCollection'} mr={2} w={'20px'} />
{t('common:core.dataset.Text collection')}
<MyIcon name={'backup'} mr={2} w={'20px'} />
{t('dataset:backup_dataset')}
</Flex>
),
onClick: onOpenFileSourceSelector
},
onClick: onOpenBackupImportModal
}
]
},
{
children: [
{
label: (
<Flex>
<MyIcon name={'core/dataset/tableCollection'} mr={2} w={'20px'} />
{t('common:core.dataset.Table collection')}
<MyIcon name={'common/folderFill'} w={'20px'} mr={2} />
{t('common:Folder')}
</Flex>
),
onClick: () =>
router.replace({
query: {
...router.query,
currentTab: TabEnum.import,
source: ImportDataSourceEnum.csvTable
}
})
onClick: () => setEditFolderData({})
}
]
}
@@ -471,6 +475,14 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
)}
<EditCreateVirtualFileModal iconSrc={'modal/manualDataset'} closeBtnText={''} />
{isOpenFileSourceSelector && <FileSourceSelector onClose={onCloseFileSourceSelector} />}
{isOpenBackupImportModal && (
<BackupImportModal
onFinish={() => {
getData(1);
}}
onClose={onCloseBackupImportModal}
/>
)}
</MyBox>
);
};

View File

@@ -257,18 +257,12 @@ const CollectionCard = () => {
)}
</Td>
<Td py={2}>
{!checkCollectionIsFolder(collection.type) ? (
<>
{collection.trainingType
? t(
(DatasetCollectionDataProcessModeMap[collection.trainingType]
?.label || '-') as any
)
: '-'}
</>
) : (
'-'
)}
{collection.trainingType
? t(
(DatasetCollectionDataProcessModeMap[collection.trainingType]?.label ||
'-') as any
)
: '-'}
</Td>
<Td py={2}>{collection.dataAmount || '-'}</Td>
<Td fontSize={'xs'} py={2} color={'myGray.500'}>

View File

@@ -27,7 +27,10 @@ import Markdown from '@/components/Markdown';
import { useMemoizedFn } from 'ahooks';
import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
import { TabEnum } from './NavBar';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import {
DatasetCollectionDataProcessModeEnum,
ImportDataSourceEnum
} from '@fastgpt/global/core/dataset/constants';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import TrainingStates from './CollectionCard/TrainingStates';
import { getTextValidLength } from '@fastgpt/global/common/string/utils';

View File

@@ -118,14 +118,18 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
const imageIndex = watch('imageIndex');
const trainingModeList = useMemo(() => {
const list = Object.entries(DatasetCollectionDataProcessModeMap);
return list
.filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto)
.map(([key, value]) => ({
title: t(value.label as any),
value: key as DatasetCollectionDataProcessModeEnum,
tooltip: t(value.tooltip as any)
}));
const list = {
[DatasetCollectionDataProcessModeEnum.chunk]:
DatasetCollectionDataProcessModeMap[DatasetCollectionDataProcessModeEnum.chunk],
[DatasetCollectionDataProcessModeEnum.qa]:
DatasetCollectionDataProcessModeMap[DatasetCollectionDataProcessModeEnum.qa]
};
return Object.entries(list).map(([key, value]) => ({
title: t(value.label as any),
value: key as DatasetCollectionDataProcessModeEnum,
tooltip: t(value.tooltip as any)
}));
}, [t]);
const {
chunkSizeField,

View File

@@ -144,20 +144,6 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.csvTable]: [
{
title: t('dataset:import_select_file')
},
{
title: t('dataset:import_param_setting')
},
{
title: t('dataset:import_data_preview')
},
{
title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.externalFile]: [
{
title: t('dataset:import_select_file')
@@ -206,7 +192,7 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
chunkSettingMode: ChunkSettingModeEnum.auto,
chunkSplitMode: DataChunkSplitModeEnum.size,
embeddingChunkSize: 2000,
embeddingChunkSize: chunkAutoChunkSize,
indexSize: vectorModel?.defaultToken || 512,
qaChunkSize: getLLMDefaultChunkSize(agentModel),
chunkSplitter: '',

View File

@@ -75,7 +75,6 @@ const PreviewData = () => {
overlapRatio: chunkOverlapRatio,
selector: processParamsForm.getValues('webSelector'),
isQAImport: importSource === ImportDataSourceEnum.csvTable,
externalFileId: previewFile.externalFileId
});
},

View File

@@ -26,7 +26,6 @@ import { useRouter } from 'next/router';
import { TabEnum } from '../../../../../pages/dataset/detail/index';
import {
postCreateDatasetApiDatasetCollection,
postCreateDatasetCsvTableCollection,
postCreateDatasetExternalFileCollection,
postCreateDatasetFileCollection,
postCreateDatasetLinkCollection,
@@ -146,11 +145,6 @@ const Upload = () => {
...commonParams,
text: item.rawText
});
} else if (importSource === ImportDataSourceEnum.csvTable && item.dbFileId) {
await postCreateDatasetCsvTableCollection({
...commonParams,
fileId: item.dbFileId
});
} else if (importSource === ImportDataSourceEnum.externalFile && item.externalFileUrl) {
await postCreateDatasetExternalFileCollection({
...commonParams,

View File

@@ -1,101 +0,0 @@
import React, { useEffect, useMemo, useState } from 'react';
import { type ImportSourceItemType } from '@/web/core/dataset/type.d';
import { Box, Button } from '@chakra-ui/react';
import FileSelector from '../components/FileSelector';
import { useTranslation } from 'next-i18next';
import dynamic from 'next/dynamic';
import { fileDownload } from '@/web/common/file/utils';
import { RenderUploadFiles } from '../components/RenderFiles';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
const Upload = dynamic(() => import('../commonProgress/Upload'));
const fileType = '.csv';
const FileLocal = () => {
const activeStep = useContextSelector(DatasetImportContext, (v) => v.activeStep);
return (
<>
{activeStep === 0 && <SelectFile />}
{activeStep === 1 && <PreviewData />}
{activeStep === 2 && <Upload />}
</>
);
};
export default React.memo(FileLocal);
const csvTemplate = `index,content
"第一列内容","第二列内容"
"必填列","可选列。CSV 中请注意内容不能包含双引号,双引号是列分割符号"
"只会将第一和第二列内容导入,其余列会被忽略",""
"结合人工智能的演进历程,AIGC的发展大致可以分为三个阶段即:早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期),以及快速发展展阶段(21世纪10年代中期至今)。",""
"AIGC发展分为几个阶段","早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期)、快速发展展阶段(21世纪10年代中期至今)"`;
const SelectFile = React.memo(function SelectFile() {
const { t } = useTranslation();
const { goToNext, sources, setSources } = useContextSelector(DatasetImportContext, (v) => v);
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
sources.map((source) => ({
isUploading: false,
...source
}))
);
const [uploading, setUploading] = useState(false);
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
useEffect(() => {
setSources(successFiles);
}, [successFiles]);
return (
<Box>
<FileSelector
fileType={fileType}
selectFiles={selectFiles}
setSelectFiles={setSelectFiles}
onStartSelect={() => setUploading(true)}
onFinishSelect={() => setUploading(false)}
/>
<Box
mt={4}
color={'primary.600'}
textDecoration={'underline'}
cursor={'pointer'}
onClick={() =>
fileDownload({
text: csvTemplate,
type: 'text/csv;charset=utf-8',
filename: 'template.csv'
})
}
>
{t('common:core.dataset.import.Down load csv template')}
</Box>
{/* render files */}
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} />
<Box textAlign={'right'} mt={5}>
<Button
isDisabled={successFiles.length === 0 || uploading}
onClick={() => {
setSelectFiles((state) => state.filter((item) => !item.errorMsg));
goToNext();
}}
>
{selectFiles.length > 0
? `${t('dataset:total_num_files', { total: selectFiles.length })} | `
: ''}
{t('common:next_step')}
</Button>
</Box>
</Box>
);
});

View File

@@ -8,7 +8,6 @@ import DatasetImportContextProvider, { DatasetImportContext } from './Context';
const FileLocal = dynamic(() => import('./diffSource/FileLocal'));
const FileLink = dynamic(() => import('./diffSource/FileLink'));
const FileCustomText = dynamic(() => import('./diffSource/FileCustomText'));
const TableLocal = dynamic(() => import('./diffSource/TableLocal'));
const ExternalFileCollection = dynamic(() => import('./diffSource/ExternalFile'));
const APIDatasetCollection = dynamic(() => import('./diffSource/APIDataset'));
const ReTraining = dynamic(() => import('./diffSource/ReTraining'));
@@ -21,7 +20,6 @@ const ImportDataset = () => {
if (importSource === ImportDataSourceEnum.fileLocal) return FileLocal;
if (importSource === ImportDataSourceEnum.fileLink) return FileLink;
if (importSource === ImportDataSourceEnum.fileCustom) return FileCustomText;
if (importSource === ImportDataSourceEnum.csvTable) return TableLocal;
if (importSource === ImportDataSourceEnum.externalFile) return ExternalFileCollection;
if (importSource === ImportDataSourceEnum.apiDataset) return APIDatasetCollection;
}, [importSource]);

View File

@@ -84,14 +84,22 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
label: t('dataset:collection.training_type'),
value: t(DatasetCollectionDataProcessModeMap[collection.trainingType]?.label as any)
},
{
label: t('dataset:chunk_size'),
value: collection.chunkSize || '-'
},
{
label: t('dataset:index_size'),
value: collection.indexSize || '-'
},
...(collection.chunkSize
? [
{
label: t('dataset:chunk_size'),
value: collection.chunkSize
}
]
: []),
...(collection.indexSize
? [
{
label: t('dataset:index_size'),
value: collection.indexSize
}
]
: []),
...(webSelector
? [
{

View File

@@ -0,0 +1,218 @@
import MyBox from '@fastgpt/web/components/common/MyBox';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { Box, type FlexProps } from '@chakra-ui/react';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import React, { type DragEvent, useCallback, useMemo, useState } from 'react';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import { useSystemStore } from '@/web/common/system/useSystemStore';
export type SelectFileItemType = {
file: File;
icon: string;
name: string;
size: string;
};
const FileSelector = ({
fileType,
selectFiles,
setSelectFiles,
maxCount = 1000,
...props
}: {
fileType: string;
selectFiles: SelectFileItemType[];
setSelectFiles: React.Dispatch<React.SetStateAction<SelectFileItemType[]>>;
maxCount?: number;
} & FlexProps) => {
const { t } = useTranslation();
const { toast } = useToast();
const { feConfigs } = useSystemStore();
const maxSize = (feConfigs?.uploadFileMaxSize || 1024) * 1024 * 1024;
const { File, onOpen } = useSelectFile({
fileType,
multiple: maxCount > 1,
maxCount
});
const [isDragging, setIsDragging] = useState(false);
const isMaxSelected = useMemo(
() => selectFiles.length >= maxCount,
[maxCount, selectFiles.length]
);
const filterTypeReg = new RegExp(
`(${fileType
.split(',')
.map((item) => item.trim())
.join('|')})$`,
'i'
);
const onSelectFile = useCallback(
async (files: File[]) => {
const fileList = files.map((file) => ({
file,
icon: getFileIcon(file.name),
name: file.name,
size: formatFileSize(file.size)
}));
setSelectFiles((state) => {
return [...fileList, ...state].slice(0, maxCount);
});
},
[maxCount, setSelectFiles]
);
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(true);
};
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
};
const handleDrop = async (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
const items = e.dataTransfer.items;
const firstEntry = items[0].webkitGetAsEntry();
if (firstEntry?.isDirectory && items.length === 1) {
{
const readFile = (entry: any) => {
return new Promise((resolve) => {
entry.file((file: File) => {
if (filterTypeReg.test(file.name)) {
onSelectFile([file]);
}
resolve(file);
});
});
};
const traverseFileTree = (dirReader: any) => {
return new Promise((resolve) => {
let fileNum = 0;
dirReader.readEntries(async (entries: any[]) => {
for await (const entry of entries) {
if (entry.isFile) {
await readFile(entry);
fileNum++;
} else if (entry.isDirectory) {
await traverseFileTree(entry.createReader());
}
}
// chrome: readEntries will return 100 entries at most
if (fileNum === 100) {
await traverseFileTree(dirReader);
}
resolve('');
});
});
};
for await (const item of items) {
const entry = item.webkitGetAsEntry();
if (entry) {
if (entry.isFile) {
await readFile(entry);
} else if (entry.isDirectory) {
//@ts-ignore
await traverseFileTree(entry.createReader());
}
}
}
}
} else if (firstEntry?.isFile) {
const files = Array.from(e.dataTransfer.files);
let isErr = files.some((item) => item.type === '');
if (isErr) {
return toast({
title: t('file:upload_error_description'),
status: 'error'
});
}
onSelectFile(files.filter((item) => filterTypeReg.test(item.name)));
} else {
return toast({
title: t('file:upload_error_description'),
status: 'error'
});
}
};
return (
<MyBox
display={'flex'}
flexDirection={'column'}
alignItems={'center'}
justifyContent={'center'}
px={3}
py={[4, 7]}
borderWidth={'1.5px'}
borderStyle={'dashed'}
borderRadius={'md'}
userSelect={'none'}
{...(isMaxSelected
? {
cursor: 'not-allowed'
}
: {
cursor: 'pointer',
_hover: {
bg: 'primary.50',
borderColor: 'primary.600'
},
borderColor: isDragging ? 'primary.600' : 'borderColor.high',
onDragEnter: handleDragEnter,
onDragOver: (e) => e.preventDefault(),
onDragLeave: handleDragLeave,
onDrop: handleDrop,
onClick: onOpen
})}
{...props}
>
<MyIcon name={'common/uploadFileFill'} w={'32px'} />
{isMaxSelected ? (
<>
<Box color={'myGray.500'} fontSize={'xs'}>
{t('file:reached_max_file_count')}
</Box>
</>
) : (
<>
<Box fontWeight={'bold'}>
{isDragging
? t('file:release_the_mouse_to_upload_the_file')
: t('file:select_and_drag_file_tip')}
</Box>
{/* file type */}
<Box color={'myGray.500'} fontSize={'xs'}>
{t('file:support_file_type', { fileType })}
</Box>
<Box color={'myGray.500'} fontSize={'xs'}>
{/* max count */}
{maxCount && t('file:support_max_count', { maxCount })}
{/* max size */}
{maxSize && t('file:support_max_size', { maxSize: formatFileSize(maxSize) })}
</Box>
<File onSelect={(files) => onSelectFile(files)} />
</>
)}
</MyBox>
);
};
export default React.memo(FileSelector);