feat: chunk index independent config (#4271)

* sync collection

* remove lock

* feat: chunk index independent config

* feat: add max chunksize to split chunk function

* remove log

* update doc

* remove

* remove log
This commit is contained in:
Archer
2025-03-21 16:44:25 +08:00
committed by archer
parent 4512b23d4d
commit 159bf17369
47 changed files with 784 additions and 443 deletions

View File

@@ -117,7 +117,7 @@ function EditModal({
ml={4}
autoFocus
bg={'myWhite.600'}
maxLength={20}
maxLength={100}
placeholder={t('user:team.Team Name')}
{...register('name', {
required: t('common:common.Please Input Name')

View File

@@ -326,7 +326,7 @@ function EditLinkModal({
<FormLabel flex={'0 0 90px'}>{t('common:Name')}</FormLabel>
<Input
placeholder={t('publish:link_name')}
maxLength={20}
maxLength={100}
{...register('name', {
required: t('common:common.name_is_empty')
})}

View File

@@ -26,7 +26,7 @@ function BasicInfo({
</FormLabel>
<Input
placeholder={t('publish:publish_name')}
maxLength={20}
maxLength={100}
{...register('name', {
required: t('common:common.name_is_empty')
})}

View File

@@ -96,7 +96,7 @@ const ExtractFieldModal = ({
<Input
bg={'myGray.50'}
placeholder="name/age/sql"
maxLength={20}
maxLength={100}
{...register('key', { required: true })}
/>
</Flex>

View File

@@ -418,7 +418,7 @@ const NodeCard = (props: Props) => {
{RenderToolHandle}
<ConfirmSyncModal />
<EditTitleModal maxLength={50} />
<EditTitleModal maxLength={100} />
</Flex>
);
};

View File

@@ -319,7 +319,7 @@ const TemplateMarketModal = ({
onChange={(e) => setCurrentSearch(e.target.value)}
h={8}
bg={'myGray.50'}
maxLength={20}
maxLength={100}
borderRadius={'sm'}
/>
</Box>

View File

@@ -49,7 +49,7 @@ const EditFolderModal = ({
defaultValue={name}
placeholder={t('common:dataset.Folder Name') || ''}
autoFocus
maxLength={20}
maxLength={100}
/>
</ModalBody>
<ModalFooter>

View File

@@ -10,11 +10,21 @@ import { useMyStep } from '@fastgpt/web/hooks/useStep';
import { Box, Button, Flex, IconButton } from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { TabEnum } from '../NavBar';
import { ChunkSettingModeEnum } from '@/web/core/dataset/constants';
import { ChunkSettingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { UseFormReturn, useForm } from 'react-hook-form';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { DataChunkSplitModeEnum } from '@fastgpt/global/core/dataset/constants';
import {
getMaxChunkSize,
getLLMDefaultChunkSize,
getLLMMaxChunkSize,
chunkAutoChunkSize,
minChunkSize,
getAutoIndexSize,
getMaxIndexSize
} from '@fastgpt/global/core/dataset/training/utils';
type TrainingFiledType = {
chunkOverlapRatio: number;
@@ -22,6 +32,9 @@ type TrainingFiledType = {
minChunkSize: number;
autoChunkSize: number;
chunkSize: number;
maxIndexSize?: number;
indexSize?: number;
autoIndexSize?: number;
charsPointsPrice: number;
priceTip: string;
uploadRate: number;
@@ -47,9 +60,13 @@ export type ImportFormType = {
autoIndexes: boolean;
chunkSettingMode: ChunkSettingModeEnum;
chunkSplitMode: DataChunkSplitModeEnum;
embeddingChunkSize: number;
qaChunkSize: number;
customSplitChar: string;
chunkSplitter: string;
indexSize: number;
qaPrompt: string;
webSelector: string;
};
@@ -199,9 +216,12 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
trainingType: DatasetCollectionDataProcessModeEnum.chunk,
chunkSettingMode: ChunkSettingModeEnum.auto,
embeddingChunkSize: vectorModel?.defaultToken || 512,
qaChunkSize: Math.min(agentModel.maxResponse * 1, agentModel.maxContext * 0.7),
customSplitChar: '',
chunkSplitMode: DataChunkSplitModeEnum.size,
embeddingChunkSize: 2000,
indexSize: vectorModel?.defaultToken || 512,
qaChunkSize: getLLMDefaultChunkSize(agentModel),
chunkSplitter: '',
qaPrompt: Prompt_AgentQA.description,
webSelector: '',
customPdfParse: false
@@ -215,17 +235,18 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
const chunkSettingMode = processParamsForm.watch('chunkSettingMode');
const embeddingChunkSize = processParamsForm.watch('embeddingChunkSize');
const qaChunkSize = processParamsForm.watch('qaChunkSize');
const customSplitChar = processParamsForm.watch('customSplitChar');
const chunkSplitter = processParamsForm.watch('chunkSplitter');
const autoIndexes = processParamsForm.watch('autoIndexes');
const indexSize = processParamsForm.watch('indexSize');
const TrainingModeMap = useMemo<TrainingFiledType>(() => {
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
return {
chunkSizeField: 'qaChunkSize',
chunkOverlapRatio: 0,
maxChunkSize: Math.min(agentModel.maxResponse * 4, agentModel.maxContext * 0.7),
minChunkSize: 4000,
autoChunkSize: Math.min(agentModel.maxResponse * 1, agentModel.maxContext * 0.7),
maxChunkSize: getLLMMaxChunkSize(agentModel),
minChunkSize: 1000,
autoChunkSize: getLLMDefaultChunkSize(agentModel),
chunkSize: qaChunkSize,
charsPointsPrice: agentModel.charsPointsPrice || 0,
priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
@@ -237,10 +258,13 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
return {
chunkSizeField: 'embeddingChunkSize',
chunkOverlapRatio: 0.2,
maxChunkSize: 2048,
minChunkSize: 100,
autoChunkSize: vectorModel?.defaultToken ? vectorModel.defaultToken * 2 : 1024,
maxChunkSize: getMaxChunkSize(agentModel),
minChunkSize: minChunkSize,
autoChunkSize: chunkAutoChunkSize,
chunkSize: embeddingChunkSize,
maxIndexSize: getMaxIndexSize(vectorModel),
autoIndexSize: getAutoIndexSize(vectorModel),
indexSize,
charsPointsPrice: agentModel.charsPointsPrice || 0,
priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
price: agentModel.charsPointsPrice
@@ -251,10 +275,13 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
return {
chunkSizeField: 'embeddingChunkSize',
chunkOverlapRatio: 0.2,
maxChunkSize: vectorModel?.maxToken || 512,
minChunkSize: 100,
autoChunkSize: vectorModel?.defaultToken || 512,
maxChunkSize: getMaxChunkSize(agentModel),
minChunkSize: minChunkSize,
autoChunkSize: chunkAutoChunkSize,
chunkSize: embeddingChunkSize,
maxIndexSize: getMaxIndexSize(vectorModel),
autoIndexSize: getAutoIndexSize(vectorModel),
indexSize,
charsPointsPrice: vectorModel.charsPointsPrice || 0,
priceTip: t('dataset:import.Embedding Estimated Price Tips', {
price: vectorModel.charsPointsPrice
@@ -265,30 +292,36 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
}, [
trainingType,
autoIndexes,
agentModel.maxResponse,
agentModel.maxContext,
agentModel.charsPointsPrice,
agentModel,
qaChunkSize,
t,
vectorModel.defaultToken,
vectorModel?.maxToken,
vectorModel.charsPointsPrice,
embeddingChunkSize
embeddingChunkSize,
vectorModel,
indexSize
]);
const chunkSettingModeMap = useMemo(() => {
if (chunkSettingMode === ChunkSettingModeEnum.auto) {
return {
chunkSize: TrainingModeMap.autoChunkSize,
customSplitChar: ''
indexSize: TrainingModeMap.autoIndexSize,
chunkSplitter: ''
};
} else {
return {
chunkSize: TrainingModeMap.chunkSize,
customSplitChar
indexSize: TrainingModeMap.indexSize,
chunkSplitter
};
}
}, [chunkSettingMode, TrainingModeMap.autoChunkSize, TrainingModeMap.chunkSize, customSplitChar]);
}, [
chunkSettingMode,
TrainingModeMap.autoChunkSize,
TrainingModeMap.autoIndexSize,
TrainingModeMap.chunkSize,
TrainingModeMap.indexSize,
chunkSplitter
]);
const contextValue = {
...TrainingModeMap,

View File

@@ -20,10 +20,11 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
import {
DataChunkSplitModeEnum,
DatasetCollectionDataProcessModeEnum,
DatasetCollectionDataProcessModeMap
} from '@fastgpt/global/core/dataset/constants';
import { ChunkSettingModeEnum } from '@/web/core/dataset/constants';
import { ChunkSettingModeEnum } from '@fastgpt/global/core/dataset/constants';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyModal from '@fastgpt/web/components/common/MyModal';
@@ -37,25 +38,39 @@ import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
import { shadowLight } from '@fastgpt/web/styles/theme';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import MySelect from '@fastgpt/web/components/common/MySelect';
import { getIndexSizeSelectList } from '@fastgpt/global/core/dataset/training/utils';
import RadioGroup from '@fastgpt/web/components/common/Radio/RadioGroup';
function DataProcess() {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const { goToNext, processParamsForm, chunkSizeField, minChunkSize, maxChunkSize } =
useContextSelector(DatasetImportContext, (v) => v);
const {
goToNext,
processParamsForm,
chunkSizeField,
minChunkSize,
maxChunkSize,
maxIndexSize,
indexSize
} = useContextSelector(DatasetImportContext, (v) => v);
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const { setValue, register, watch, getValues } = processParamsForm;
const trainingType = watch('trainingType');
const chunkSettingMode = watch('chunkSettingMode');
const trainingModeList = useMemo(() => {
const list = Object.entries(DatasetCollectionDataProcessModeMap);
return list
.filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto)
.map(([key, value]) => ({
title: t(value.label as any),
value: key as DatasetCollectionDataProcessModeEnum,
tooltip: t(value.tooltip as any)
}));
}, [t]);
const qaPrompt = watch('qaPrompt');
const {
isOpen: isOpenCustomPrompt,
onOpen: onOpenCustomPrompt,
onClose: onCloseCustomPrompt
} = useDisclosure();
const chunkSettingMode = watch('chunkSettingMode');
const chunkSplitMode = watch('chunkSplitMode');
const customSplitList = [
{ label: t('dataset:split_sign_null'), value: '' },
@@ -69,25 +84,25 @@ function DataProcess() {
{ label: t('dataset:split_sign_custom'), value: 'Other' }
];
const [customListSelectValue, setCustomListSelectValue] = useState(getValues('customSplitChar'));
const [customListSelectValue, setCustomListSelectValue] = useState(getValues('chunkSplitter'));
useEffect(() => {
if (customListSelectValue === 'Other') {
setValue('customSplitChar', '');
setValue('chunkSplitter', '');
} else {
setValue('customSplitChar', customListSelectValue);
setValue('chunkSplitter', customListSelectValue);
}
}, [customListSelectValue, setValue]);
const trainingModeList = useMemo(() => {
const list = Object.entries(DatasetCollectionDataProcessModeMap);
return list
.filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto)
.map(([key, value]) => ({
title: t(value.label as any),
value: key as DatasetCollectionDataProcessModeEnum,
tooltip: t(value.tooltip as any)
}));
}, [t]);
// Index size
const indexSizeSeletorList = useMemo(() => getIndexSizeSelectList(maxIndexSize), [maxIndexSize]);
// QA
const qaPrompt = watch('qaPrompt');
const {
isOpen: isOpenCustomPrompt,
onOpen: onOpenCustomPrompt,
onClose: onCloseCustomPrompt
} = useDisclosure();
const Title = useCallback(({ title }: { title: string }) => {
return (
@@ -237,67 +252,97 @@ function DataProcess() {
children: chunkSettingMode === ChunkSettingModeEnum.custom && (
<Box mt={5}>
<Box>
<Flex alignItems={'center'}>
<Box>{t('dataset:ideal_chunk_length')}</Box>
<QuestionTip label={t('dataset:ideal_chunk_length_tips')} />
</Flex>
<Box
mt={1}
css={{
'& > span': {
display: 'block'
<RadioGroup<DataChunkSplitModeEnum>
list={[
{
title: t('dataset:split_chunk_size'),
value: DataChunkSplitModeEnum.size
},
{
title: t('dataset:split_chunk_char'),
value: DataChunkSplitModeEnum.char,
tooltip: t('dataset:custom_split_sign_tip')
}
]}
value={chunkSplitMode}
onChange={(e) => {
setValue('chunkSplitMode', e);
}}
>
<MyTooltip
label={t('common:core.dataset.import.Chunk Range', {
min: minChunkSize,
max: maxChunkSize
})}
/>
{chunkSplitMode === DataChunkSplitModeEnum.size && (
<Box
mt={1.5}
css={{
'& > span': {
display: 'block'
}
}}
>
<MyNumberInput
register={register}
name={chunkSizeField}
min={minChunkSize}
max={maxChunkSize}
size={'sm'}
step={100}
/>
</MyTooltip>
</Box>
<MyTooltip
label={t('common:core.dataset.import.Chunk Range', {
min: minChunkSize,
max: maxChunkSize
})}
>
<MyNumberInput
register={register}
name={chunkSizeField}
min={minChunkSize}
max={maxChunkSize}
size={'sm'}
step={100}
/>
</MyTooltip>
</Box>
)}
{chunkSplitMode === DataChunkSplitModeEnum.char && (
<HStack mt={1.5}>
<Box flex={'1 0 0'}>
<MySelect<string>
list={customSplitList}
size={'sm'}
bg={'myGray.50'}
value={customListSelectValue}
h={'32px'}
onChange={(val) => {
setCustomListSelectValue(val);
}}
/>
</Box>
{customListSelectValue === 'Other' && (
<Input
flex={'1 0 0'}
h={'32px'}
size={'sm'}
bg={'myGray.50'}
placeholder="\n;======;==SPLIT=="
{...register('chunkSplitter')}
/>
)}
</HStack>
)}
</Box>
<Box mt={3}>
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
<Box>
{t('common:core.dataset.import.Custom split char')}
<QuestionTip label={t('dataset:custom_split_sign_tip')} />
</Box>
<HStack mt={1}>
<Box flex={'1 0 0'}>
<MySelect<string>
list={customSplitList}
size={'sm'}
<Flex alignItems={'center'} mt={3}>
<Box>{t('dataset:index_size')}</Box>
<QuestionTip label={t('dataset:index_size_tips')} />
</Flex>
<Box mt={1}>
<MySelect<number>
bg={'myGray.50'}
value={customListSelectValue}
h={'32px'}
list={indexSizeSeletorList}
value={indexSize}
onChange={(val) => {
setCustomListSelectValue(val);
setValue('indexSize', val);
}}
/>
</Box>
{customListSelectValue === 'Other' && (
<Input
flex={'1 0 0'}
h={'32px'}
size={'sm'}
bg={'myGray.50'}
placeholder="\n;======;==SPLIT=="
{...register('customSplitChar')}
/>
)}
</HStack>
</Box>
</Box>
)}
{showQAPromptInput && (
<Box mt={3}>

View File

@@ -16,6 +16,7 @@ import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContex
import MyBox from '@fastgpt/web/components/common/MyBox';
import Markdown from '@/components/Markdown';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getLLMMaxChunkSize } from '@fastgpt/global/core/dataset/training/utils';
const PreviewData = () => {
const { t } = useTranslation();
@@ -23,6 +24,7 @@ const PreviewData = () => {
const goToNext = useContextSelector(DatasetImportContext, (v) => v.goToNext);
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const sources = useContextSelector(DatasetImportContext, (v) => v.sources);
const importSource = useContextSelector(DatasetImportContext, (v) => v.importSource);
@@ -36,12 +38,13 @@ const PreviewData = () => {
async () => {
if (!previewFile) return;
if (importSource === ImportDataSourceEnum.fileCustom) {
const customSplitChar = processParamsForm.getValues('customSplitChar');
const chunkSplitter = processParamsForm.getValues('chunkSplitter');
const { chunks } = splitText2Chunks({
text: previewFile.rawText || '',
chunkLen: chunkSize,
chunkSize,
maxSize: getLLMMaxChunkSize(datasetDetail.agentModel),
overlapRatio: chunkOverlapRatio,
customReg: customSplitChar ? [customSplitChar] : []
customReg: chunkSplitter ? [chunkSplitter] : []
});
return chunks.map((chunk) => ({
q: chunk,
@@ -61,9 +64,12 @@ const PreviewData = () => {
customPdfParse: processParamsForm.getValues('customPdfParse'),
trainingType: processParamsForm.getValues('trainingType'),
chunkSettingMode: processParamsForm.getValues('chunkSettingMode'),
chunkSplitMode: processParamsForm.getValues('chunkSplitMode'),
chunkSize,
chunkSplitter: processParamsForm.getValues('chunkSplitter'),
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar'),
selector: processParamsForm.getValues('webSelector'),
isQAImport: importSource === ImportDataSourceEnum.csvTable,

View File

@@ -49,7 +49,7 @@ const Upload = () => {
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const retrainNewCollectionId = useRef('');
const { importSource, parentId, sources, setSources, processParamsForm, chunkSize } =
const { importSource, parentId, sources, setSources, processParamsForm, chunkSize, indexSize } =
useContextSelector(DatasetImportContext, (v) => v);
const { handleSubmit } = processParamsForm;
@@ -81,7 +81,7 @@ const Upload = () => {
}, [waitingFilesCount, totalFilesCount, allFinished, t]);
const { runAsync: startUpload, loading: isLoading } = useRequest2(
async ({ trainingType, customSplitChar, qaPrompt, webSelector }: ImportFormType) => {
async ({ trainingType, chunkSplitter, qaPrompt, webSelector }: ImportFormType) => {
if (sources.length === 0) return;
const filterWaitingSources = sources.filter((item) => item.createStatus === 'waiting');
@@ -111,10 +111,16 @@ const Upload = () => {
trainingType,
imageIndex: processParamsForm.getValues('imageIndex'),
autoIndexes: processParamsForm.getValues('autoIndexes'),
chunkSettingMode: processParamsForm.getValues('chunkSettingMode'),
chunkSplitMode: processParamsForm.getValues('chunkSplitMode'),
chunkSize,
chunkSplitter: customSplitChar,
indexSize,
chunkSplitter,
qaPrompt: trainingType === DatasetCollectionDataProcessModeEnum.qa ? qaPrompt : undefined
};
if (importSource === ImportDataSourceEnum.reTraining) {
const res = await postReTrainingDatasetFileCollection({
...commonParams,

View File

@@ -1,102 +0,0 @@
import React from 'react';
import { Box } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
import { getPreviewChunks } from '@/web/core/dataset/api';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { getPreviewSourceReadType } from '../utils';
const PreviewChunks = ({
previewSource,
onClose
}: {
previewSource: ImportSourceItemType;
onClose: () => void;
}) => {
const { importSource, chunkSize, chunkOverlapRatio, processParamsForm } = useContextSelector(
DatasetImportContext,
(v) => v
);
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
const { data = [], loading: isLoading } = useRequest2(
async () => {
if (importSource === ImportDataSourceEnum.fileCustom) {
const customSplitChar = processParamsForm.getValues('customSplitChar');
const { chunks } = splitText2Chunks({
text: previewSource.rawText || '',
chunkLen: chunkSize,
overlapRatio: chunkOverlapRatio,
customReg: customSplitChar ? [customSplitChar] : []
});
return chunks.map((chunk) => ({
q: chunk,
a: ''
}));
}
return getPreviewChunks({
datasetId,
type: getPreviewSourceReadType(previewSource),
sourceId:
previewSource.dbFileId ||
previewSource.link ||
previewSource.externalFileUrl ||
previewSource.apiFileId ||
'',
chunkSize,
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar'),
selector: processParamsForm.getValues('webSelector'),
isQAImport: importSource === ImportDataSourceEnum.csvTable,
externalFileId: previewSource.externalFileId
});
},
{
manual: false
}
);
return (
<MyRightDrawer
onClose={onClose}
iconSrc={previewSource.icon}
title={previewSource.sourceName}
isLoading={isLoading}
maxW={['90vw', '40vw']}
px={0}
>
<Box overflowY={'auto'} px={5} fontSize={'sm'}>
{data.map((item, index) => (
<Box
key={index}
whiteSpace={'pre-wrap'}
fontSize={'sm'}
p={4}
bg={index % 2 === 0 ? 'white' : 'myWhite.600'}
mb={3}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
_notLast={{
mb: 2
}}
>
<Box color={'myGray.900'}>{item.q}</Box>
<Box color={'myGray.500'}>{item.a}</Box>
</Box>
))}
</Box>
</MyRightDrawer>
);
};
export default React.memo(PreviewChunks);

View File

@@ -8,10 +8,11 @@ import { useRouter } from 'next/router';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { getDatasetCollectionById } from '@/web/core/dataset/api';
import MyBox from '@fastgpt/web/components/common/MyBox';
import { ChunkSettingModeEnum } from '@/web/core/dataset/constants';
import { ChunkSettingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { Box } from '@chakra-ui/react';
import { DataChunkSplitModeEnum } from '@fastgpt/global/core/dataset/constants';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
const Upload = dynamic(() => import('../commonProgress/Upload'));
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
@@ -23,7 +24,6 @@ const ReTraining = () => {
collectionId: string;
};
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const activeStep = useContextSelector(DatasetImportContext, (v) => v.activeStep);
const setSources = useContextSelector(DatasetImportContext, (v) => v.setSources);
const processParamsForm = useContextSelector(DatasetImportContext, (v) => v.processParamsForm);
@@ -46,18 +46,21 @@ const ReTraining = () => {
uploadedFileRate: 100
}
]);
processParamsForm.reset({
customPdfParse: collection.customPdfParse,
trainingType: collection.trainingType,
imageIndex: collection.imageIndex,
autoIndexes: collection.autoIndexes,
chunkSettingMode: ChunkSettingModeEnum.auto,
chunkSettingMode: collection.chunkSettingMode || ChunkSettingModeEnum.auto,
chunkSplitMode: collection.chunkSplitMode || DataChunkSplitModeEnum.size,
embeddingChunkSize: collection.chunkSize,
qaChunkSize: collection.chunkSize,
customSplitChar: collection.chunkSplitter,
qaPrompt: collection.qaPrompt,
webSelector: collection.metadata?.webPageSelector
indexSize: collection.indexSize || 512,
chunkSplitter: collection.chunkSplitter,
webSelector: collection.metadata?.webPageSelector,
qaPrompt: collection.qaPrompt || Prompt_AgentQA.description
});
}
});