Add image index and pdf parse (#3956)

* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
This commit is contained in:
Archer
2025-03-03 23:08:29 +08:00
committed by archer
parent 08b6f594df
commit adf5377ebe
106 changed files with 2337 additions and 1454 deletions

View File

@@ -23,7 +23,7 @@ import MyModal from '@fastgpt/web/components/common/MyModal';
import MyTag from '@fastgpt/web/components/common/Tag/index';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { batchRun } from '@fastgpt/global/common/fn/utils';
import { batchRun } from '@fastgpt/global/common/system/utils';
import { useToast } from '@fastgpt/web/hooks/useToast';
type ModelTestItem = {

View File

@@ -26,7 +26,7 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
[usage.list]
);
const { hasModel, hasToken, hasInputToken, hasOutputToken, hasCharsLen, hasDuration } =
const { hasModel, hasToken, hasInputToken, hasOutputToken, hasCharsLen, hasDuration, hasPages } =
useMemo(() => {
let hasModel = false;
let hasToken = false;
@@ -34,7 +34,7 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
let hasOutputToken = false;
let hasCharsLen = false;
let hasDuration = false;
let hasDataLen = false;
let hasPages = false;
usage.list.forEach((item) => {
if (item.model !== undefined) {
@@ -56,6 +56,9 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
if (typeof item.duration === 'number') {
hasDuration = true;
}
if (typeof item.pages === 'number') {
hasPages = true;
}
});
return {
@@ -65,7 +68,7 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
hasOutputToken,
hasCharsLen,
hasDuration,
hasDataLen
hasPages
};
}, [usage.list]);
@@ -113,6 +116,7 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
{hasOutputToken && <Th>{t('account_usage:output_token_length')}</Th>}
{hasCharsLen && <Th>{t('account_usage:text_length')}</Th>}
{hasDuration && <Th>{t('account_usage:duration_seconds')}</Th>}
{hasPages && <Th>{t('account_usage:pages')}</Th>}
<Th>{t('account_usage:total_points_consumed')}</Th>
</Tr>
</Thead>
@@ -126,6 +130,7 @@ const UsageDetail = ({ usage, onClose }: { usage: UsageItemType; onClose: () =>
{hasOutputToken && <Td>{item.outputTokens ?? '-'}</Td>}
{hasCharsLen && <Td>{item.charsLength ?? '-'}</Td>}
{hasDuration && <Td>{item.duration ?? '-'}</Td>}
{hasPages && <Td>{item.pages ?? '-'}</Td>}
<Td>{formatNumber(item.amount)}</Td>
</Tr>
))}

View File

@@ -87,8 +87,8 @@ const UsageTableList = ({
'common:support.wallet.usage.Audio Speech'
),
['support.wallet.usage.Whisper']: t('common:support.wallet.usage.Whisper'),
['support.wallet.moduleName.index']: t('common:support.wallet.moduleName.index'),
['support.wallet.moduleName.qa']: t('common:support.wallet.moduleName.qa'),
['account_usage:embedding_index']: t('account_usage:embedding_index'),
['account_usage:qa']: t('account_usage:qa'),
['core.dataset.training.Auto mode']: t('common:core.dataset.training.Auto mode'),
['common:core.module.template.ai_chat']: t('common:core.module.template.ai_chat')
},
@@ -122,49 +122,51 @@ const UsageTableList = ({
onConfirm={exportUsage}
/>
</Flex>
<MyBox position={'relative'} overflowY={'auto'} mt={3} flex={1} isLoading={isLoading}>
<TableContainer>
<Table>
<Thead>
<Tr>
<Th>{t('common:user.Time')}</Th>
<Th>{t('account_usage:member')}</Th>
<Th>{t('account_usage:user_type')}</Th>
<Th>{t('account_usage:project_name')}</Th>
<Th>{t('account_usage:total_points')}</Th>
<Th></Th>
</Tr>
</Thead>
<Tbody fontSize={'sm'}>
{usages.map((item) => (
<Tr key={item.id}>
<Td>{dayjs(item.time).format('YYYY/MM/DD HH:mm:ss')}</Td>
<Td>
<Flex alignItems={'center'} color={'myGray.500'}>
<Avatar src={item.sourceMember.avatar} w={'20px'} mr={1} rounded={'full'} />
{item.sourceMember.name}
</Flex>
</Td>
<Td>{t(UsageSourceMap[item.source]?.label as any) || '-'}</Td>
<Td>{t(item.appName as any) || '-'}</Td>
<Td>{formatNumber(item.totalPoints) || 0}</Td>
<Td>
<Button
size={'sm'}
variant={'whitePrimary'}
onClick={() => setUsageDetail(item)}
>
{t('account_usage:details')}
</Button>
</Td>
<MyBox mt={3} flex={'1 0 0'} h={0} isLoading={isLoading}>
<Box h={'100%'} overflow={'auto'}>
<TableContainer>
<Table>
<Thead>
<Tr>
<Th>{t('common:user.Time')}</Th>
<Th>{t('account_usage:member')}</Th>
<Th>{t('account_usage:user_type')}</Th>
<Th>{t('account_usage:project_name')}</Th>
<Th>{t('account_usage:total_points')}</Th>
<Th></Th>
</Tr>
))}
</Tbody>
</Table>
{!isLoading && usages.length === 0 && (
<EmptyTip text={t('account_usage:no_usage_records')}></EmptyTip>
)}
</TableContainer>
</Thead>
<Tbody fontSize={'sm'}>
{usages.map((item) => (
<Tr key={item.id}>
<Td>{dayjs(item.time).format('YYYY/MM/DD HH:mm:ss')}</Td>
<Td>
<Flex alignItems={'center'} color={'myGray.500'}>
<Avatar src={item.sourceMember.avatar} w={'20px'} mr={1} rounded={'full'} />
{item.sourceMember.name}
</Flex>
</Td>
<Td>{t(UsageSourceMap[item.source]?.label as any) || '-'}</Td>
<Td>{t(item.appName as any) || '-'}</Td>
<Td>{formatNumber(item.totalPoints) || 0}</Td>
<Td>
<Button
size={'sm'}
variant={'whitePrimary'}
onClick={() => setUsageDetail(item)}
>
{t('account_usage:details')}
</Button>
</Td>
</Tr>
))}
</Tbody>
</Table>
{!isLoading && usages.length === 0 && (
<EmptyTip text={t('account_usage:no_usage_records')}></EmptyTip>
)}
</TableContainer>
</Box>
</MyBox>
<Flex mt={3} justifyContent={'center'}>
<Pagination />

View File

@@ -18,7 +18,7 @@ import { useQuery } from '@tanstack/react-query';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
import MyInput from '@/components/MyInput';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { useRequest, useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { useRouter } from 'next/router';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyMenu from '@fastgpt/web/components/common/MyMenu';
@@ -28,7 +28,8 @@ import {
TrainingModeEnum,
DatasetTypeEnum,
DatasetTypeMap,
DatasetStatusEnum
DatasetStatusEnum,
DatasetCollectionDataProcessModeEnum
} from '@fastgpt/global/core/dataset/constants';
import EditFolderModal, { useEditFolder } from '../../EditFolderModal';
import { TabEnum } from '../../../../pages/dataset/detail/index';
@@ -41,6 +42,7 @@ import { CollectionPageContext } from './Context';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { useSystem } from '@fastgpt/web/hooks/useSystem';
import HeaderTagPopOver from './HeaderTagPopOver';
import MyBox from '@fastgpt/web/components/common/MyBox';
const FileSourceSelector = dynamic(() => import('../Import/components/FileSourceSelector'));
@@ -48,7 +50,7 @@ const Header = ({}: {}) => {
const { t } = useTranslation();
const theme = useTheme();
const { setLoading, feConfigs } = useSystemStore();
const { feConfigs } = useSystemStore();
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const router = useRouter();
@@ -69,50 +71,36 @@ const Header = ({}: {}) => {
tip: t('common:dataset.Manual collection Tip'),
canEmpty: false
});
const {
isOpen: isOpenFileSourceSelector,
onOpen: onOpenFileSourceSelector,
onClose: onCloseFileSourceSelector
} = useDisclosure();
const { mutate: onCreateCollection } = useRequest({
mutationFn: async ({
name,
type,
callback,
...props
}: {
name: string;
type: DatasetCollectionTypeEnum;
callback?: (id: string) => void;
trainingType?: TrainingModeEnum;
rawLink?: string;
chunkSize?: number;
}) => {
setLoading(true);
const { runAsync: onCreateCollection, loading: onCreating } = useRequest2(
async ({ name, type }: { name: string; type: DatasetCollectionTypeEnum }) => {
const id = await postDatasetCollection({
parentId,
datasetId: datasetDetail._id,
name,
type,
...props
type
});
callback?.(id);
return id;
},
onSuccess() {
getData(pageNum);
},
onSettled() {
setLoading(false);
},
{
onSuccess() {
getData(pageNum);
},
successToast: t('common:common.Create Success'),
errorToast: t('common:common.Create Failed')
}
);
successToast: t('common:common.Create Success'),
errorToast: t('common:common.Create Failed')
});
const isWebSite = datasetDetail?.type === DatasetTypeEnum.websiteDataset;
return (
<Box display={['block', 'flex']} alignItems={'center'} gap={2}>
<MyBox isLoading={onCreating} display={['block', 'flex']} alignItems={'center'} gap={2}>
<HStack flex={1}>
<Box flex={1} fontWeight={'500'} color={'myGray.900'} whiteSpace={'nowrap'}>
<ParentPath
@@ -446,7 +434,7 @@ const Header = ({}: {}) => {
)}
<EditCreateVirtualFileModal iconSrc={'modal/manualDataset'} closeBtnText={''} />
{isOpenFileSourceSelector && <FileSourceSelector onClose={onCloseFileSourceSelector} />}
</Box>
</MyBox>
);
};

View File

@@ -29,7 +29,8 @@ import {
DatasetCollectionTypeEnum,
DatasetStatusEnum,
DatasetCollectionSyncResultMap,
DatasetTypeEnum
DatasetTypeEnum,
DatasetCollectionDataProcessModeMap
} from '@fastgpt/global/core/dataset/constants';
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
import { TabEnum } from '../../../../pages/dataset/detail/index';
@@ -44,10 +45,7 @@ import { CollectionPageContext } from './Context';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
import MyTag from '@fastgpt/web/components/common/Tag/index';
import {
checkCollectionIsFolder,
getTrainingTypeLabel
} from '@fastgpt/global/core/dataset/collection/utils';
import { checkCollectionIsFolder } from '@fastgpt/global/core/dataset/collection/utils';
import { useFolderDrag } from '@/components/common/folder/useFolderDrag';
import TagsPopOver from './TagsPopOver';
import { useSystemStore } from '@/web/common/system/useSystemStore';
@@ -194,7 +192,7 @@ const CollectionCard = () => {
<Thead draggable={false}>
<Tr>
<Th py={4}>{t('common:common.Name')}</Th>
<Th py={4}>{t('dataset:collection.Training type')}</Th>
<Th py={4}>{t('dataset:collection.training_type')}</Th>
<Th py={4}>{t('dataset:collection_data_count')}</Th>
<Th py={4}>{t('dataset:collection.Create update time')}</Th>
<Th py={4}>{t('common:common.Status')}</Th>
@@ -251,7 +249,14 @@ const CollectionCard = () => {
</Td>
<Td py={2}>
{!checkCollectionIsFolder(collection.type) ? (
<>{t((getTrainingTypeLabel(collection.trainingType) || '-') as any)}</>
<>
{collection.trainingType
? t(
(DatasetCollectionDataProcessModeMap[collection.trainingType]
?.label || '-') as any
)
: '-'}
</>
) : (
'-'
)}

View File

@@ -1,13 +1,16 @@
import { useRouter } from 'next/router';
import { SetStateAction, useState } from 'react';
import { SetStateAction, useMemo, useState } from 'react';
import { useTranslation } from 'next-i18next';
import { createContext, useContextSelector } from 'use-context-selector';
import { ImportDataSourceEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import {
DatasetCollectionDataProcessModeEnum,
ImportDataSourceEnum
} from '@fastgpt/global/core/dataset/constants';
import { useMyStep } from '@fastgpt/web/hooks/useStep';
import { Box, Button, Flex, IconButton } from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { TabEnum } from '../NavBar';
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
import { ChunkSettingModeEnum } from '@/web/core/dataset/constants';
import { UseFormReturn, useForm } from 'react-hook-form';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
@@ -19,12 +22,10 @@ type TrainingFiledType = {
minChunkSize: number;
autoChunkSize: number;
chunkSize: number;
showChunkInput: boolean;
showPromptInput: boolean;
charsPointsPrice: number;
priceTip: string;
uploadRate: number;
chunkSizeField?: ChunkSizeFieldType;
chunkSizeField: ChunkSizeFieldType;
};
type DatasetImportContextType = {
importSource: ImportDataSourceEnum;
@@ -39,8 +40,13 @@ type DatasetImportContextType = {
type ChunkSizeFieldType = 'embeddingChunkSize' | 'qaChunkSize';
export type ImportFormType = {
mode: TrainingModeEnum;
way: ImportProcessWayEnum;
customPdfParse: boolean;
trainingType: DatasetCollectionDataProcessModeEnum;
imageIndex: boolean;
autoIndexes: boolean;
chunkSettingMode: ChunkSettingModeEnum;
embeddingChunkSize: number;
qaChunkSize: number;
customSplitChar: string;
@@ -58,8 +64,6 @@ export const DatasetImportContext = createContext<DatasetImportContextType>({
maxChunkSize: 0,
minChunkSize: 0,
showChunkInput: false,
showPromptInput: false,
sources: [],
setSources: function (value: SetStateAction<ImportSourceItemType[]>): void {
throw new Error('Function not implemented.');
@@ -88,72 +92,93 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
const modeSteps: Record<ImportDataSourceEnum, { title: string }[]> = {
[ImportDataSourceEnum.reTraining]: [
{ title: t('dataset:core.dataset.import.Adjust parameters') },
{ title: t('common:core.dataset.import.Upload data') }
{
title: t('dataset:import_data_preview')
},
{ title: t('dataset:import_confirm') }
],
[ImportDataSourceEnum.fileLocal]: [
{
title: t('common:core.dataset.import.Select file')
title: t('dataset:import_select_file')
},
{
title: t('common:core.dataset.import.Data Preprocessing')
title: t('dataset:import_param_setting')
},
{
title: t('common:core.dataset.import.Upload data')
title: t('dataset:import_data_preview')
},
{
title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.fileLink]: [
{
title: t('common:core.dataset.import.Select file')
title: t('dataset:import_select_file')
},
{
title: t('common:core.dataset.import.Data Preprocessing')
title: t('dataset:import_param_setting')
},
{
title: t('common:core.dataset.import.Upload data')
title: t('dataset:import_data_preview')
},
{
title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.fileCustom]: [
{
title: t('common:core.dataset.import.Select file')
title: t('dataset:import_select_file')
},
{
title: t('common:core.dataset.import.Data Preprocessing')
title: t('dataset:import_param_setting')
},
{
title: t('common:core.dataset.import.Upload data')
title: t('dataset:import_data_preview')
},
{
title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.csvTable]: [
{
title: t('common:core.dataset.import.Select file')
title: t('dataset:import_select_file')
},
{
title: t('common:core.dataset.import.Data Preprocessing')
title: t('dataset:import_param_setting')
},
{
title: t('common:core.dataset.import.Upload data')
title: t('dataset:import_data_preview')
},
{
title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.externalFile]: [
{
title: t('common:core.dataset.import.Select file')
title: t('dataset:import_select_file')
},
{
title: t('common:core.dataset.import.Data Preprocessing')
title: t('dataset:import_param_setting')
},
{
title: t('common:core.dataset.import.Upload data')
title: t('dataset:import_data_preview')
},
{
title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.apiDataset]: [
{
title: t('common:core.dataset.import.Select file')
title: t('dataset:import_select_file')
},
{
title: t('common:core.dataset.import.Data Preprocessing')
title: t('dataset:import_param_setting')
},
{
title: t('common:core.dataset.import.Upload data')
title: t('dataset:import_data_preview')
},
{
title: t('dataset:import_confirm')
}
]
};
@@ -168,96 +193,114 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
const processParamsForm = useForm<ImportFormType>({
defaultValues: {
mode: TrainingModeEnum.chunk,
way: ImportProcessWayEnum.auto,
imageIndex: false,
autoIndexes: false,
trainingType: DatasetCollectionDataProcessModeEnum.chunk,
chunkSettingMode: ChunkSettingModeEnum.auto,
embeddingChunkSize: vectorModel?.defaultToken || 512,
qaChunkSize: Math.min(agentModel.maxResponse * 1, agentModel.maxContext * 0.7),
customSplitChar: '',
qaPrompt: Prompt_AgentQA.description,
webSelector: ''
webSelector: '',
customPdfParse: false
}
});
const [sources, setSources] = useState<ImportSourceItemType[]>([]);
// watch form
const mode = processParamsForm.watch('mode');
const way = processParamsForm.watch('way');
const trainingType = processParamsForm.watch('trainingType');
const chunkSettingMode = processParamsForm.watch('chunkSettingMode');
const embeddingChunkSize = processParamsForm.watch('embeddingChunkSize');
const qaChunkSize = processParamsForm.watch('qaChunkSize');
const customSplitChar = processParamsForm.watch('customSplitChar');
const autoIndexes = processParamsForm.watch('autoIndexes');
const modeStaticParams: Record<TrainingModeEnum, TrainingFiledType> = {
[TrainingModeEnum.auto]: {
chunkOverlapRatio: 0.2,
maxChunkSize: 2048,
minChunkSize: 100,
autoChunkSize: vectorModel?.defaultToken ? vectorModel?.defaultToken * 2 : 1024,
chunkSize: vectorModel?.defaultToken ? vectorModel?.defaultToken * 2 : 1024,
showChunkInput: false,
showPromptInput: false,
charsPointsPrice: agentModel.charsPointsPrice || 0,
priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
price: agentModel.charsPointsPrice
}),
uploadRate: 100
},
[TrainingModeEnum.chunk]: {
chunkSizeField: 'embeddingChunkSize' as ChunkSizeFieldType,
chunkOverlapRatio: 0.2,
maxChunkSize: vectorModel?.maxToken || 512,
minChunkSize: 100,
autoChunkSize: vectorModel?.defaultToken || 512,
chunkSize: embeddingChunkSize,
showChunkInput: true,
showPromptInput: false,
charsPointsPrice: vectorModel.charsPointsPrice || 0,
priceTip: t('dataset:import.Embedding Estimated Price Tips', {
price: vectorModel.charsPointsPrice
}),
uploadRate: 150
},
[TrainingModeEnum.qa]: {
chunkSizeField: 'qaChunkSize' as ChunkSizeFieldType,
chunkOverlapRatio: 0,
maxChunkSize: Math.min(agentModel.maxResponse * 4, agentModel.maxContext * 0.7),
minChunkSize: 4000,
autoChunkSize: Math.min(agentModel.maxResponse * 1, agentModel.maxContext * 0.7),
chunkSize: qaChunkSize,
showChunkInput: true,
showPromptInput: true,
charsPointsPrice: agentModel.charsPointsPrice || 0,
priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
price: agentModel.charsPointsPrice
}),
uploadRate: 30
const TrainingModeMap = useMemo<TrainingFiledType>(() => {
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
return {
chunkSizeField: 'qaChunkSize',
chunkOverlapRatio: 0,
maxChunkSize: Math.min(agentModel.maxResponse * 4, agentModel.maxContext * 0.7),
minChunkSize: 4000,
autoChunkSize: Math.min(agentModel.maxResponse * 1, agentModel.maxContext * 0.7),
chunkSize: qaChunkSize,
charsPointsPrice: agentModel.charsPointsPrice || 0,
priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
price: agentModel.charsPointsPrice
}),
uploadRate: 30
};
} else if (autoIndexes) {
return {
chunkSizeField: 'embeddingChunkSize',
chunkOverlapRatio: 0.2,
maxChunkSize: 2048,
minChunkSize: 100,
autoChunkSize: vectorModel?.defaultToken ? vectorModel.defaultToken * 2 : 1024,
chunkSize: embeddingChunkSize,
charsPointsPrice: agentModel.charsPointsPrice || 0,
priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
price: agentModel.charsPointsPrice
}),
uploadRate: 100
};
} else {
return {
chunkSizeField: 'embeddingChunkSize',
chunkOverlapRatio: 0.2,
maxChunkSize: vectorModel?.maxToken || 512,
minChunkSize: 100,
autoChunkSize: vectorModel?.defaultToken || 512,
chunkSize: embeddingChunkSize,
charsPointsPrice: vectorModel.charsPointsPrice || 0,
priceTip: t('dataset:import.Embedding Estimated Price Tips', {
price: vectorModel.charsPointsPrice
}),
uploadRate: 150
};
}
};
const selectModelStaticParam = modeStaticParams[mode];
}, [
trainingType,
autoIndexes,
agentModel.maxResponse,
agentModel.maxContext,
agentModel.charsPointsPrice,
qaChunkSize,
t,
vectorModel.defaultToken,
vectorModel?.maxToken,
vectorModel.charsPointsPrice,
embeddingChunkSize
]);
const wayStaticPrams = {
[ImportProcessWayEnum.auto]: {
chunkSize: selectModelStaticParam.autoChunkSize,
customSplitChar: ''
},
[ImportProcessWayEnum.custom]: {
chunkSize: modeStaticParams[mode].chunkSize,
customSplitChar
const chunkSettingModeMap = useMemo(() => {
if (chunkSettingMode === ChunkSettingModeEnum.auto) {
return {
chunkSize: TrainingModeMap.autoChunkSize,
customSplitChar: ''
};
} else {
return {
chunkSize: TrainingModeMap.chunkSize,
customSplitChar
};
}
};
const chunkSize = wayStaticPrams[way].chunkSize;
}, [chunkSettingMode, TrainingModeMap.autoChunkSize, TrainingModeMap.chunkSize, customSplitChar]);
const contextValue = {
...TrainingModeMap,
...chunkSettingModeMap,
importSource: source,
parentId,
activeStep,
goToNext,
processParamsForm,
...selectModelStaticParam,
sources,
setSources,
chunkSize
setSources
};
return (

View File

@@ -1,4 +1,4 @@
import React, { useCallback, useMemo, useRef } from 'react';
import React, { useCallback, useEffect, useMemo, useRef } from 'react';
import {
Box,
Flex,
@@ -7,27 +7,37 @@ import {
ModalBody,
ModalFooter,
Textarea,
useDisclosure
useDisclosure,
Checkbox,
Accordion,
AccordionItem,
AccordionButton,
AccordionPanel,
AccordionIcon,
HStack
} from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
import { TrainingModeEnum, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
import {
DatasetCollectionDataProcessModeEnum,
DatasetCollectionDataProcessModeMap
} from '@fastgpt/global/core/dataset/constants';
import { ChunkSettingModeEnum } from '@/web/core/dataset/constants';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import Preview from '../components/Preview';
import MyTag from '@fastgpt/web/components/common/Tag/index';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
import { useToast } from '@fastgpt/web/hooks/useToast';
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
import MyNumberInput from '@fastgpt/web/components/common/Input/NumberInput';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
import { shadowLight } from '@fastgpt/web/styles/theme';
import AIModelSelector from '@/components/Select/AIModelSelector';
function DataProcess({ showPreviewChunks = true }: { showPreviewChunks: boolean }) {
function DataProcess() {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
@@ -36,16 +46,13 @@ function DataProcess({ showPreviewChunks = true }: { showPreviewChunks: boolean
processParamsForm,
chunkSizeField,
minChunkSize,
showChunkInput,
showPromptInput,
maxChunkSize,
priceTip,
chunkSize
} = useContextSelector(DatasetImportContext, (v) => v);
const { getValues, setValue, register, watch } = processParamsForm;
const { toast } = useToast();
const mode = watch('mode');
const way = watch('way');
const trainingType = watch('trainingType');
const chunkSettingMode = watch('chunkSettingMode');
const {
isOpen: isOpenCustomPrompt,
@@ -54,214 +61,315 @@ function DataProcess({ showPreviewChunks = true }: { showPreviewChunks: boolean
} = useDisclosure();
const trainingModeList = useMemo(() => {
const list = Object.entries(TrainingTypeMap);
return list;
const list = Object.entries(DatasetCollectionDataProcessModeMap);
return list
.filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto)
.map(([key, value]) => ({
title: t(value.label as any),
value: key as DatasetCollectionDataProcessModeEnum,
tooltip: t(value.tooltip as any)
}));
}, []);
const onSelectTrainWay = useCallback(
(e: TrainingModeEnum) => {
if (!feConfigs?.isPlus && !TrainingTypeMap[e]?.openSource) {
return toast({
status: 'warning',
title: t('common:common.system.Commercial version function')
});
}
setValue('mode', e);
},
[feConfigs?.isPlus, setValue, t, toast]
);
const Title = useCallback(({ title }: { title: string }) => {
return (
<AccordionButton bg={'none !important'} p={2}>
<Box w={'3px'} h={'16px'} bg={'primary.600'} borderRadius={'2px'} mr={2} />
<Box color={'myGray.900'} flex={'1 0 0'} textAlign={'left'}>
{title}
</Box>
<AccordionIcon />
</AccordionButton>
);
}, []);
// Adapt auto training
useEffect(() => {
if (trainingType === DatasetCollectionDataProcessModeEnum.auto) {
setValue('autoIndexes', true);
setValue('trainingType', DatasetCollectionDataProcessModeEnum.chunk);
}
}, [trainingType, setValue]);
const showFileParseSetting = feConfigs?.showCustomPdfParse;
const showQAPromptInput = trainingType === DatasetCollectionDataProcessModeEnum.qa;
return (
<Box h={'100%'} display={['block', 'flex']} fontSize={'sm'}>
<Box
flex={'1 0 0'}
minW={['auto', '500px']}
maxW={'600px'}
h={['auto', '100%']}
overflow={'auto'}
pr={[0, 3]}
>
<Flex alignItems={'center'}>
<MyIcon name={'common/settingLight'} w={'20px'} />
<Box fontSize={'md'}>{t('dataset:data_process_setting')}</Box>
</Flex>
<>
<Box flex={'1 0 0'} maxW={['90vw', '640px']} m={'auto'} overflow={'auto'}>
<Accordion allowMultiple reduceMotion defaultIndex={[0, 1, 2]}>
{showFileParseSetting && (
<AccordionItem border={'none'} borderBottom={'base'} pb={4}>
<Title title={t('dataset:import_file_parse_setting')} />
<Box display={['block', 'flex']} mt={4} alignItems={'center'}>
<FormLabel flex={'0 0 100px'}>{t('dataset:training_mode')}</FormLabel>
<LeftRadio
list={trainingModeList.map(([key, value]) => ({
title: t(value.label as any),
value: key,
tooltip: t(value.tooltip as any)
}))}
px={3}
py={2}
value={mode}
onChange={onSelectTrainWay}
defaultBg="white"
activeBg="white"
display={'flex'}
flexWrap={'wrap'}
/>
</Box>
<Box display={['block', 'flex']} mt={5}>
<FormLabel flex={'0 0 100px'}>{t('dataset:data_process_params')}</FormLabel>
<LeftRadio
list={[
{
title: t('common:core.dataset.import.Auto process'),
desc: t('common:core.dataset.import.Auto process desc'),
value: ImportProcessWayEnum.auto
},
{
title: t('dataset:custom_data_process_params'),
desc: t('dataset:custom_data_process_params_desc'),
value: ImportProcessWayEnum.custom,
children: way === ImportProcessWayEnum.custom && (
<Box mt={5}>
{showChunkInput && chunkSizeField && (
<Box>
<Flex alignItems={'center'}>
<Box>{t('dataset:ideal_chunk_length')}</Box>
<QuestionTip label={t('dataset:ideal_chunk_length_tips')} />
</Flex>
<Box
mt={1}
css={{
'& > span': {
display: 'block'
}
}}
>
<MyTooltip
label={t('common:core.dataset.import.Chunk Range', {
min: minChunkSize,
max: maxChunkSize
})}
>
<MyNumberInput
name={chunkSizeField}
min={minChunkSize}
max={maxChunkSize}
size={'sm'}
step={100}
value={chunkSize}
onChange={(e) => {
if (e === undefined) return;
setValue(chunkSizeField, +e);
}}
/>
</MyTooltip>
</Box>
</Box>
)}
<Box mt={3}>
<Box>
{t('common:core.dataset.import.Custom split char')}
<QuestionTip
label={t('common:core.dataset.import.Custom split char Tips')}
/>
</Box>
<Box mt={1}>
<Input
size={'sm'}
bg={'myGray.50'}
defaultValue={''}
placeholder="\n;======;==SPLIT=="
{...register('customSplitChar')}
/>
</Box>
</Box>
{showPromptInput && (
<Box mt={3}>
<Box>{t('common:core.dataset.collection.QA Prompt')}</Box>
<Box
position={'relative'}
py={2}
px={3}
bg={'myGray.50'}
fontSize={'xs'}
whiteSpace={'pre-wrap'}
border={'1px'}
borderColor={'borderColor.base'}
<AccordionPanel p={2}>
<Flex
flexDirection={'column'}
gap={3}
border={'1px solid'}
borderColor={'primary.600'}
borderRadius={'md'}
boxShadow={shadowLight}
p={4}
>
{feConfigs.showCustomPdfParse && (
<HStack spacing={1}>
<Checkbox {...register('customPdfParse')}>
<FormLabel>{t('dataset:pdf_enhance_parse')}</FormLabel>
</Checkbox>
<QuestionTip label={t('dataset:pdf_enhance_parse_tips')} />
{feConfigs?.show_pay && (
<MyTag
type={'borderSolid'}
borderColor={'myGray.200'}
bg={'myGray.100'}
color={'primary.600'}
py={1.5}
borderRadius={'md'}
maxH={'140px'}
overflow={'auto'}
_hover={{
'& .mask': {
display: 'block'
}
}}
px={3}
whiteSpace={'wrap'}
ml={1}
>
{getValues('qaPrompt')}
{t('dataset:pdf_enhance_parse_price', {
price: feConfigs.customPdfParsePrice || 0
})}
</MyTag>
)}
</HStack>
)}
</Flex>
</AccordionPanel>
</AccordionItem>
)}
<Box
display={'none'}
className="mask"
position={'absolute'}
top={0}
right={0}
bottom={0}
left={0}
background={
'linear-gradient(182deg, rgba(255, 255, 255, 0.00) 1.76%, #FFF 84.07%)'
}
>
<Button
size="xs"
variant={'whiteBase'}
leftIcon={<MyIcon name={'edit'} w={'13px'} />}
color={'black'}
position={'absolute'}
right={2}
bottom={2}
onClick={onOpenCustomPrompt}
>
{t('common:core.dataset.import.Custom prompt')}
</Button>
</Box>
</Box>
</Box>
)}
<AccordionItem mt={4} border={'none'}>
<Title title={t('dataset:import_data_process_setting')} />
<AccordionPanel p={2}>
<Box mt={2}>
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
{t('dataset:training_mode')}
</Box>
<LeftRadio<DatasetCollectionDataProcessModeEnum>
list={trainingModeList}
px={3}
py={2.5}
value={trainingType}
onChange={(e) => {
setValue('trainingType', e);
}}
defaultBg="white"
activeBg="white"
gridTemplateColumns={'repeat(2, 1fr)'}
/>
</Box>
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
<Box mt={6}>
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
{t('dataset:enhanced_indexes')}
</Box>
)
}
]}
px={3}
py={3}
defaultBg="white"
activeBg="white"
value={way}
w={'100%'}
onChange={(e) => {
setValue('way', e);
}}
></LeftRadio>
</Box>
{feConfigs?.isPlus && (
<HStack gap={[3, 7]}>
<HStack flex={'1'} spacing={1}>
<Checkbox {...register('autoIndexes')}>
<FormLabel>{t('dataset:auto_indexes')}</FormLabel>
</Checkbox>
<QuestionTip label={t('dataset:auto_indexes_tips')} />
</HStack>
<HStack flex={'1'} spacing={1}>
<Checkbox {...register('imageIndex')}>
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
</Checkbox>
<QuestionTip label={t('dataset:image_auto_parse_tips')} />
</HStack>
</HStack>
)}
</Box>
)}
<Box mt={6}>
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
{t('dataset:params_setting')}
</Box>
<LeftRadio<ChunkSettingModeEnum>
list={[
{
title: t('dataset:default_params'),
desc: t('dataset:default_params_desc'),
value: ChunkSettingModeEnum.auto
},
{
title: t('dataset:custom_data_process_params'),
desc: t('dataset:custom_data_process_params_desc'),
value: ChunkSettingModeEnum.custom,
children: chunkSettingMode === ChunkSettingModeEnum.custom && (
<Box mt={5}>
<Box>
<Flex alignItems={'center'}>
<Box>{t('dataset:ideal_chunk_length')}</Box>
<QuestionTip label={t('dataset:ideal_chunk_length_tips')} />
</Flex>
<Box
mt={1}
css={{
'& > span': {
display: 'block'
}
}}
>
<MyTooltip
label={t('common:core.dataset.import.Chunk Range', {
min: minChunkSize,
max: maxChunkSize
})}
>
<MyNumberInput
register={register}
name={chunkSizeField}
min={minChunkSize}
max={maxChunkSize}
size={'sm'}
step={100}
/>
</MyTooltip>
</Box>
</Box>
{feConfigs?.show_pay && (
<Box mt={5} pl={[0, '100px']} gap={3}>
<MyTag colorSchema={'gray'} py={1.5} borderRadius={'md'} px={3} whiteSpace={'wrap'}>
{priceTip}
</MyTag>
</Box>
)}
<Box mt={3}>
<Box>
{t('common:core.dataset.import.Custom split char')}
<QuestionTip
label={t('common:core.dataset.import.Custom split char Tips')}
/>
</Box>
<Box mt={1}>
<Input
size={'sm'}
bg={'myGray.50'}
defaultValue={''}
placeholder="\n;======;==SPLIT=="
{...register('customSplitChar')}
/>
</Box>
</Box>
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
<Button
onClick={() => {
goToNext();
}}
>
{t('common:common.Next Step')}
</Button>
</Flex>
</Box>
<Box flex={'1 0 0'} w={['auto', '0']} h={['auto', '100%']} pl={[0, 3]}>
<Preview showPreviewChunks={showPreviewChunks} />
{showQAPromptInput && (
<Box mt={3}>
<Box>{t('common:core.dataset.collection.QA Prompt')}</Box>
<Box
position={'relative'}
py={2}
px={3}
bg={'myGray.50'}
fontSize={'xs'}
whiteSpace={'pre-wrap'}
border={'1px'}
borderColor={'borderColor.base'}
borderRadius={'md'}
maxH={'140px'}
overflow={'auto'}
_hover={{
'& .mask': {
display: 'block'
}
}}
>
{getValues('qaPrompt')}
<Box
display={'none'}
className="mask"
position={'absolute'}
top={0}
right={0}
bottom={0}
left={0}
background={
'linear-gradient(182deg, rgba(255, 255, 255, 0.00) 1.76%, #FFF 84.07%)'
}
>
<Button
size="xs"
variant={'whiteBase'}
leftIcon={<MyIcon name={'edit'} w={'13px'} />}
color={'black'}
position={'absolute'}
right={2}
bottom={2}
onClick={onOpenCustomPrompt}
>
{t('common:core.dataset.import.Custom prompt')}
</Button>
</Box>
</Box>
</Box>
)}
</Box>
)
}
]}
gridGap={3}
px={3}
py={3}
defaultBg="white"
activeBg="white"
value={chunkSettingMode}
w={'100%'}
onChange={(e) => {
setValue('chunkSettingMode', e);
}}
/>
</Box>
</AccordionPanel>
</AccordionItem>
{/* <AccordionItem mt={4} border={'none'}>
<Title title={t('dataset:import_model_config')} />
<AccordionPanel p={2} fontSize={'sm'}>
<Box>
<Box>{t('common:core.ai.model.Dataset Agent Model')}</Box>
<Box mt={1}>
<AIModelSelector
w={'100%'}
value={llmModel}
list={datasetModelList.map((item) => ({
label: item.name,
value: item.model
}))}
onchange={(e) => {
setValue('llmModel', e);
}}
/>
</Box>
</Box>
<Box pt={5}>
<Box>{t('dataset:vllm_model')}</Box>
<Box mt={1}>
<AIModelSelector
w={'100%'}
value={vlmModel}
list={vllmModelList.map((item) => ({
label: item.name,
value: item.model
}))}
onchange={(e) => {
setValue('vlmModel', e);
}}
/>
</Box>
</Box>
</AccordionPanel>
</AccordionItem> */}
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
<Button
onClick={() => {
goToNext();
}}
>
{t('common:common.Next Step')}
</Button>
</Flex>
</Accordion>
</Box>
{isOpenCustomPrompt && (
@@ -273,7 +381,7 @@ function DataProcess({ showPreviewChunks = true }: { showPreviewChunks: boolean
onClose={onCloseCustomPrompt}
/>
)}
</Box>
</>
);
}

View File

@@ -1,19 +1,160 @@
import React from 'react';
import Preview from '../components/Preview';
import { Box, Button, Flex } from '@chakra-ui/react';
import React, { useState } from 'react';
import { Box, Button, Flex, HStack } from '@chakra-ui/react';
import { useTranslation } from 'next-i18next';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
import MyIcon from '@fastgpt/web/components/common/Icon';
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
import EmptyTip from '@fastgpt/web/components/common/EmptyTip';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { getPreviewChunks } from '@/web/core/dataset/api';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { getPreviewSourceReadType } from '../utils';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import MyBox from '@fastgpt/web/components/common/MyBox';
import Markdown from '@/components/Markdown';
import { useToast } from '@fastgpt/web/hooks/useToast';
const PreviewData = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
const PreviewData = () => {
const { t } = useTranslation();
const { toast } = useToast();
const goToNext = useContextSelector(DatasetImportContext, (v) => v.goToNext);
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
const sources = useContextSelector(DatasetImportContext, (v) => v.sources);
const importSource = useContextSelector(DatasetImportContext, (v) => v.importSource);
const chunkSize = useContextSelector(DatasetImportContext, (v) => v.chunkSize);
const chunkOverlapRatio = useContextSelector(DatasetImportContext, (v) => v.chunkOverlapRatio);
const processParamsForm = useContextSelector(DatasetImportContext, (v) => v.processParamsForm);
const [previewFile, setPreviewFile] = useState<ImportSourceItemType>();
const { data = [], loading: isLoading } = useRequest2(
async () => {
if (!previewFile) return;
if (importSource === ImportDataSourceEnum.fileCustom) {
const customSplitChar = processParamsForm.getValues('customSplitChar');
const { chunks } = splitText2Chunks({
text: previewFile.rawText || '',
chunkLen: chunkSize,
overlapRatio: chunkOverlapRatio,
customReg: customSplitChar ? [customSplitChar] : []
});
return chunks.map((chunk) => ({
q: chunk,
a: ''
}));
}
return getPreviewChunks({
datasetId,
type: getPreviewSourceReadType(previewFile),
sourceId:
previewFile.dbFileId ||
previewFile.link ||
previewFile.externalFileUrl ||
previewFile.apiFileId ||
'',
customPdfParse: processParamsForm.getValues('customPdfParse'),
chunkSize,
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar'),
selector: processParamsForm.getValues('webSelector'),
isQAImport: importSource === ImportDataSourceEnum.csvTable,
externalFileId: previewFile.externalFileId
});
},
{
refreshDeps: [previewFile],
manual: false,
onSuccess(result) {
if (!previewFile) return;
if (!result || result.length === 0) {
toast({
title: t('dataset:preview_chunk_empty'),
status: 'error'
});
}
}
}
);
return (
<Flex flexDirection={'column'} h={'100%'}>
<Box flex={'1 0 0 '}>
<Preview showPreviewChunks={showPreviewChunks} />
</Box>
<Flex flex={'1 0 0'} border={'base'} borderRadius={'md'}>
<Flex flexDirection={'column'} flex={'1 0 0'} borderRight={'base'}>
<FormLabel fontSize={'md'} py={4} px={5} borderBottom={'base'}>
{t('dataset:file_list')}
</FormLabel>
<Box flex={'1 0 0'} overflowY={'auto'} px={5} py={3}>
{sources.map((source) => (
<HStack
key={source.id}
bg={'myGray.50'}
p={4}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'transparent'}
cursor={'pointer'}
_hover={{
borderColor: 'primary.300'
}}
{...(previewFile?.id === source.id && {
borderColor: 'primary.500 !important',
bg: 'primary.50 !important'
})}
_notLast={{ mb: 3 }}
onClick={() => setPreviewFile(source)}
>
<MyIcon name={source.icon as any} w={'1.25rem'} />
<Box ml={1} flex={'1 0 0'} wordBreak={'break-all'} fontSize={'sm'}>
{source.sourceName}
</Box>
</HStack>
))}
</Box>
</Flex>
<Flex flexDirection={'column'} flex={'1 0 0'}>
<Flex py={4} px={5} borderBottom={'base'} justifyContent={'space-between'}>
<FormLabel fontSize={'md'}>{t('dataset:preview_chunk')}</FormLabel>
<Box fontSize={'xs'} color={'myGray.500'}>
{t('dataset:preview_chunk_intro')}
</Box>
</Flex>
<MyBox isLoading={isLoading} flex={'1 0 0'} overflowY={'auto'} px={5} py={3}>
{previewFile ? (
<>
{data.map((item, index) => (
<Box
key={index}
fontSize={'sm'}
color={'myGray.600'}
_notLast={{
mb: 3,
pb: 3,
borderBottom: 'base'
}}
_hover={{
bg: 'myGray.100'
}}
>
<Markdown source={item.q} />
<Markdown source={item.a} />
</Box>
))}
</>
) : (
<EmptyTip text={t('dataset:preview_chunk_not_selected')} />
)}
</MyBox>
</Flex>
</Flex>
<Flex mt={2} justifyContent={'flex-end'}>
<Button onClick={goToNext}>{t('common:common.Next Step')}</Button>
</Flex>

View File

@@ -14,7 +14,10 @@ import {
IconButton,
Tooltip
} from '@chakra-ui/react';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import {
DatasetCollectionDataProcessModeEnum,
ImportDataSourceEnum
} from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
@@ -34,6 +37,7 @@ import MyTag from '@fastgpt/web/components/common/Tag/index';
import { useContextSelector } from 'use-context-selector';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { DatasetImportContext, type ImportFormType } from '../Context';
import { ApiCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
const Upload = () => {
const { t } = useTranslation();
@@ -77,7 +81,7 @@ const Upload = () => {
}, [waitingFilesCount, totalFilesCount, allFinished, t]);
const { runAsync: startUpload, loading: isLoading } = useRequest2(
async ({ mode, customSplitChar, qaPrompt, webSelector }: ImportFormType) => {
async ({ trainingType, customSplitChar, qaPrompt, webSelector }: ImportFormType) => {
if (sources.length === 0) return;
const filterWaitingSources = sources.filter((item) => item.createStatus === 'waiting');
@@ -95,15 +99,21 @@ const Upload = () => {
);
// create collection
const commonParams = {
const commonParams: ApiCreateDatasetCollectionParams & {
name: string;
} = {
parentId,
trainingType: mode,
datasetId: datasetDetail._id,
name: item.sourceName,
customPdfParse: processParamsForm.getValues('customPdfParse'),
trainingType,
imageIndex: processParamsForm.getValues('imageIndex'),
autoIndexes: processParamsForm.getValues('autoIndexes'),
chunkSize,
chunkSplitter: customSplitChar,
qaPrompt,
name: item.sourceName
qaPrompt: trainingType === DatasetCollectionDataProcessModeEnum.qa ? qaPrompt : undefined
};
if (importSource === ImportDataSourceEnum.reTraining) {
const res = await postReTrainingDatasetFileCollection({
@@ -272,7 +282,7 @@ const Upload = () => {
<Flex justifyContent={'flex-end'} mt={4}>
<Button isLoading={isLoading} onClick={handleSubmit((data) => startUpload(data))}>
{totalFilesCount > 0 &&
`${t('common:core.dataset.import.Total files', {
`${t('dataset:total_num_files', {
total: totalFilesCount
})} | `}
{buttonText}

View File

@@ -1,102 +0,0 @@
import React, { useState } from 'react';
import { Box, Flex, Grid, IconButton } from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import MyMenu from '@fastgpt/web/components/common/MyMenu';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import dynamic from 'next/dynamic';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
const PreviewChunks = dynamic(() => import('./PreviewChunks'));
const Preview = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
const { t } = useTranslation();
const { sources } = useContextSelector(DatasetImportContext, (v) => v);
const [previewRawTextSource, setPreviewRawTextSource] = useState<ImportSourceItemType>();
const [previewChunkSource, setPreviewChunkSource] = useState<ImportSourceItemType>();
return (
<Box h={'100%'} w={'100%'} display={['block', 'flex']} flexDirection={'column'}>
<Flex alignItems={'center'}>
<MyIcon name={'core/dataset/fileCollection'} w={'20px'} />
<Box fontSize={'md'}>{t('common:core.dataset.import.Sources list')}</Box>
</Flex>
<Box mt={3} flex={'1 0 0'} h={['auto', 0]} width={'100%'} overflowY={'auto'}>
<Grid w={'100%'} gap={3} gridTemplateColumns={['1fr', '1fr', '1fr', '1fr', '1fr 1fr']}>
{sources.map((source) => (
<Flex
key={source.id}
bg={'white'}
p={4}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
alignItems={'center'}
>
<MyIcon name={source.icon as any} w={['1rem', '1.25rem']} />
<Box mx={1} flex={'1 0 0'} wordBreak={'break-all'} fontSize={'sm'}>
{source.sourceName}
</Box>
{showPreviewChunks && (
<Box fontSize={'xs'} color={'myGray.600'}>
<MyMenu
Button={
<IconButton
icon={<MyIcon name={'common/viewLight'} w={'14px'} p={2} />}
aria-label={''}
size={'sm'}
variant={'whitePrimary'}
/>
}
menuList={[
{
children: [
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'core/dataset/fileCollection'} w={'14px'} mr={2} />
{t('common:core.dataset.import.Preview raw text')}
</Flex>
),
onClick: () => setPreviewRawTextSource(source)
},
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'core/dataset/splitLight'} w={'14px'} mr={2} />
{t('common:core.dataset.import.Preview chunks')}
</Flex>
),
onClick: () => setPreviewChunkSource(source)
}
]
}
]}
/>
</Box>
)}
</Flex>
))}
</Grid>
</Box>
{!!previewRawTextSource && (
<PreviewRawText
previewSource={previewRawTextSource}
onClose={() => setPreviewRawTextSource(undefined)}
/>
)}
{!!previewChunkSource && (
<PreviewChunks
previewSource={previewChunkSource}
onClose={() => setPreviewChunkSource(undefined)}
/>
)}
</Box>
);
};
export default React.memo(Preview);

View File

@@ -1,78 +0,0 @@
import React from 'react';
import { Box } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { getPreviewFileContent } from '@/web/common/file/api';
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { getPreviewSourceReadType } from '../utils';
const PreviewRawText = ({
previewSource,
onClose
}: {
previewSource: ImportSourceItemType;
onClose: () => void;
}) => {
const { toast } = useToast();
const { importSource, processParamsForm } = useContextSelector(DatasetImportContext, (v) => v);
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
const { data, loading: isLoading } = useRequest2(
async () => {
if (importSource === ImportDataSourceEnum.fileCustom && previewSource.rawText) {
return {
previewContent: previewSource.rawText.slice(0, 3000)
};
}
return getPreviewFileContent({
datasetId,
type: getPreviewSourceReadType(previewSource),
sourceId:
previewSource.dbFileId ||
previewSource.link ||
previewSource.externalFileUrl ||
previewSource.apiFileId ||
'',
isQAImport: importSource === ImportDataSourceEnum.csvTable,
selector: processParamsForm.getValues('webSelector'),
externalFileId: previewSource.externalFileId
});
},
{
refreshDeps: [previewSource.dbFileId, previewSource.link, previewSource.externalFileUrl],
manual: false,
onError(err) {
toast({
status: 'warning',
title: getErrText(err)
});
}
}
);
const rawText = data?.previewContent || '';
return (
<MyRightDrawer
onClose={onClose}
iconSrc={previewSource.icon}
title={previewSource.sourceName}
isLoading={isLoading}
px={0}
>
<Box whiteSpace={'pre-wrap'} overflowY={'auto'} px={5} fontSize={'sm'}>
{rawText}
</Box>
</MyRightDrawer>
);
};
export default React.memo(PreviewRawText);

View File

@@ -14,24 +14,17 @@ import {
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import dynamic from 'next/dynamic';
import { useI18n } from '@/web/context/I18n';
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
export const RenderUploadFiles = ({
files,
setFiles,
showPreviewContent
setFiles
}: {
files: ImportSourceItemType[];
setFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
showPreviewContent?: boolean;
}) => {
const { t } = useTranslation();
const { fileT } = useI18n();
const [previewFile, setPreviewFile] = useState<ImportSourceItemType>();
return files.length > 0 ? (
<>
@@ -84,18 +77,6 @@ export const RenderUploadFiles = ({
<Td>
{!item.isUploading && (
<Flex alignItems={'center'} gap={4}>
{showPreviewContent && (
<MyTooltip label={t('common:core.dataset.import.Preview raw text')}>
<IconButton
variant={'whitePrimary'}
size={'sm'}
icon={<MyIcon name={'common/viewLight'} w={'18px'} />}
aria-label={''}
onClick={() => setPreviewFile(item)}
/>
</MyTooltip>
)}
<IconButton
variant={'grayDanger'}
size={'sm'}
@@ -113,9 +94,6 @@ export const RenderUploadFiles = ({
</Tbody>
</Table>
</TableContainer>
{!!previewFile && (
<PreviewRawText previewSource={previewFile} onClose={() => setPreviewFile(undefined)} />
)}
</>
) : null;
};

View File

@@ -28,7 +28,7 @@ const APIDatasetCollection = () => {
return (
<>
{activeStep === 0 && <CustomAPIFileInput />}
{activeStep === 1 && <DataProcess showPreviewChunks={true} />}
{activeStep === 1 && <DataProcess />}
{activeStep === 2 && <Upload />}
</>
);
@@ -272,7 +272,7 @@ const CustomAPIFileInput = () => {
onClick={onclickNext}
>
{selectFiles.length > 0
? `${t('common:core.dataset.import.Total files', { total: selectFiles.length })} | `
? `${t('dataset:total_num_files', { total: selectFiles.length })} | `
: ''}
{t('common:common.Next Step')}
</Button>

View File

@@ -34,7 +34,7 @@ const ExternalFileCollection = () => {
return (
<>
{activeStep === 0 && <CustomLinkInput />}
{activeStep === 1 && <DataProcess showPreviewChunks={true} />}
{activeStep === 1 && <DataProcess />}
{activeStep === 2 && <Upload />}
</>
);

View File

@@ -19,7 +19,7 @@ const CustomTet = () => {
return (
<>
{activeStep === 0 && <CustomTextInput />}
{activeStep === 1 && <DataProcess showPreviewChunks />}
{activeStep === 1 && <DataProcess />}
{activeStep === 2 && <Upload />}
</>
);

View File

@@ -23,7 +23,7 @@ const LinkCollection = () => {
return (
<>
{activeStep === 0 && <CustomLinkImport />}
{activeStep === 1 && <DataProcess showPreviewChunks />}
{activeStep === 1 && <DataProcess />}
{activeStep === 2 && <Upload />}
</>
);

View File

@@ -10,9 +10,8 @@ import { RenderUploadFiles } from '../components/RenderFiles';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'), {
loading: () => <Loading fixed={false} />
});
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'));
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
const Upload = dynamic(() => import('../commonProgress/Upload'));
const fileType = '.txt, .docx, .csv, .xlsx, .pdf, .md, .html, .pptx';
@@ -23,8 +22,9 @@ const FileLocal = () => {
return (
<>
{activeStep === 0 && <SelectFile />}
{activeStep === 1 && <DataProcess showPreviewChunks />}
{activeStep === 2 && <Upload />}
{activeStep === 1 && <DataProcess />}
{activeStep === 2 && <PreviewData />}
{activeStep === 3 && <Upload />}
</>
);
};
@@ -64,12 +64,12 @@ const SelectFile = React.memo(function SelectFile() {
/>
{/* render files */}
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} showPreviewContent />
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} />
<Box textAlign={'right'} mt={5}>
<Button isDisabled={successFiles.length === 0 || uploading} onClick={onclickNext}>
{selectFiles.length > 0
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
? `${t('dataset:total_num_files', { total: selectFiles.length })} | `
: ''}
{t('common:common.Next Step')}
</Button>

View File

@@ -8,10 +8,13 @@ import { useRouter } from 'next/router';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { getDatasetCollectionById } from '@/web/core/dataset/api';
import MyBox from '@fastgpt/web/components/common/MyBox';
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
import { ChunkSettingModeEnum } from '@/web/core/dataset/constants';
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { Box } from '@chakra-ui/react';
const Upload = dynamic(() => import('../commonProgress/Upload'));
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
const ReTraining = () => {
const router = useRouter();
@@ -20,6 +23,7 @@ const ReTraining = () => {
collectionId: string;
};
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const activeStep = useContextSelector(DatasetImportContext, (v) => v.activeStep);
const setSources = useContextSelector(DatasetImportContext, (v) => v.setSources);
const processParamsForm = useContextSelector(DatasetImportContext, (v) => v.processParamsForm);
@@ -43,8 +47,12 @@ const ReTraining = () => {
}
]);
processParamsForm.reset({
mode: collection.trainingType,
way: ImportProcessWayEnum.auto,
customPdfParse: collection.customPdfParse,
trainingType: collection.trainingType,
imageIndex: collection.imageIndex,
autoIndexes: collection.autoIndexes,
chunkSettingMode: ChunkSettingModeEnum.auto,
embeddingChunkSize: collection.chunkSize,
qaChunkSize: collection.chunkSize,
customSplitChar: collection.chunkSplitter,
@@ -55,9 +63,12 @@ const ReTraining = () => {
});
return (
<MyBox isLoading={loading} h={'100%'} overflow={'auto'}>
{activeStep === 0 && <DataProcess showPreviewChunks={true} />}
{activeStep === 1 && <Upload />}
<MyBox isLoading={loading} h={'100%'}>
<Box h={'100%'} overflow={'auto'}>
{activeStep === 0 && <DataProcess />}
{activeStep === 1 && <PreviewData />}
{activeStep === 2 && <Upload />}
</Box>
</MyBox>
);
};

View File

@@ -21,7 +21,7 @@ const FileLocal = () => {
return (
<>
{activeStep === 0 && <SelectFile />}
{activeStep === 1 && <PreviewData showPreviewChunks />}
{activeStep === 1 && <PreviewData />}
{activeStep === 2 && <Upload />}
</>
);
@@ -91,7 +91,7 @@ const SelectFile = React.memo(function SelectFile() {
}}
>
{selectFiles.length > 0
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
? `${t('dataset:total_num_files', { total: selectFiles.length })} | `
: ''}
{t('common:common.Next Step')}
</Button>

View File

@@ -1,4 +1,4 @@
import React, { useEffect, useState } from 'react';
import React, { useEffect, useMemo, useState } from 'react';
import { Box, Flex, Switch, Input } from '@chakra-ui/react';
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
import { useForm } from 'react-hook-form';
@@ -37,6 +37,8 @@ const Info = ({ datasetId }: { datasetId: string }) => {
const { t } = useTranslation();
const { datasetDetail, loadDatasetDetail, updateDataset, rebuildingCount, trainingCount } =
useContextSelector(DatasetPageContext, (v) => v);
const { feConfigs, datasetModelList, embeddingModelList, getVllmModelList } = useSystemStore();
const [editedDataset, setEditedDataset] = useState<EditResourceInfoFormType>();
const [editedAPIDataset, setEditedAPIDataset] = useState<EditAPIDatasetInfoFormType>();
const refetchDatasetTraining = useContextSelector(
@@ -50,7 +52,9 @@ const Info = ({ datasetId }: { datasetId: string }) => {
const vectorModel = watch('vectorModel');
const agentModel = watch('agentModel');
const { feConfigs, datasetModelList, embeddingModelList } = useSystemStore();
const vllmModelList = useMemo(() => getVllmModelList(), [getVllmModelList]);
const vlmModel = watch('vlmModel');
const { ConfirmModal: ConfirmDelModal } = useConfirm({
content: t('common:core.dataset.Delete Confirm'),
type: 'delete'
@@ -69,7 +73,8 @@ const Info = ({ datasetId }: { datasetId: string }) => {
(data: DatasetItemType) => {
return updateDataset({
id: datasetId,
agentModel: data.agentModel,
agentModel: data.agentModel?.model,
vlmModel: data.vlmModel?.model,
externalReadUrl: data.externalReadUrl
});
},
@@ -225,6 +230,31 @@ const Info = ({ datasetId }: { datasetId: string }) => {
</Box>
</Box>
{feConfigs?.isPlus && (
<Box pt={5}>
<FormLabel fontSize={'mini'} fontWeight={'500'}>
{t('dataset:vllm_model')}
</FormLabel>
<Box pt={2}>
<AIModelSelector
w={'100%'}
value={vlmModel?.model}
list={vllmModelList.map((item) => ({
label: item.name,
value: item.model
}))}
fontSize={'mini'}
onchange={(e) => {
const vlmModel = vllmModelList.find((item) => item.model === e);
if (!vlmModel) return;
setValue('vlmModel', vlmModel);
return handleSubmit((data) => onSave({ ...data, vlmModel }))();
}}
/>
</Box>
</Box>
)}
{feConfigs?.isPlus && (
<Flex alignItems={'center'} pt={5}>
<FormLabel fontSize={'mini'} fontWeight={'500'}>

View File

@@ -1,9 +1,7 @@
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import { Box, Flex, Button, Textarea, useTheme, Grid, HStack } from '@chakra-ui/react';
import { Box, Flex, Button, Textarea, useTheme } from '@chakra-ui/react';
import {
Control,
FieldArrayWithId,
UseFieldArrayAppend,
UseFieldArrayRemove,
UseFormRegister,
useFieldArray,
@@ -12,7 +10,6 @@ import {
import {
postInsertData2Dataset,
putDatasetDataById,
delOneDatasetDataById,
getDatasetCollectionById,
getDatasetDataItemById
} from '@/web/core/dataset/api';
@@ -24,7 +21,7 @@ import { useQuery } from '@tanstack/react-query';
import { useTranslation } from 'next-i18next';
import { useRequest, useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
import { getDefaultIndex, getSourceNameIcon } from '@fastgpt/global/core/dataset/utils';
import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils';
import { DatasetDataIndexItemType } from '@fastgpt/global/core/dataset/type';
import DeleteIcon from '@fastgpt/web/components/common/Icon/delete';
import { defaultCollectionDetail } from '@/web/core/dataset/constants';
@@ -36,6 +33,7 @@ import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
import { useSystem } from '@fastgpt/web/hooks/useSystem';
import LightRowTabs from '@fastgpt/web/components/common/Tabs/LightRowTabs';
import styles from './styles.module.scss';
import { getDatasetIndexMapData } from '@fastgpt/global/core/dataset/data/constants';
export type InputDataType = {
q: string;
@@ -218,10 +216,7 @@ const InputDataModal = ({
await putDatasetDataById({
dataId,
...e,
indexes:
e.indexes?.map((index) =>
index.defaultIndex ? getDefaultIndex({ q: e.q, a: e.a, dataId: index.dataId }) : index
) || []
indexes: e.indexes
});
return {
@@ -296,7 +291,7 @@ const InputDataModal = ({
p={0}
onClick={() =>
appendIndexes({
defaultIndex: false,
type: 'custom',
text: '',
dataId: `${Date.now()}`
})
@@ -315,7 +310,6 @@ const InputDataModal = ({
<DataIndex
register={register}
maxToken={maxToken}
appendIndexes={appendIndexes}
removeIndexes={removeIndexes}
indexes={indexes}
/>
@@ -424,13 +418,11 @@ const DataIndex = ({
maxToken,
register,
indexes,
appendIndexes,
removeIndexes
}: {
maxToken: number;
register: UseFormRegister<InputDataType>;
indexes: FieldArrayWithId<InputDataType, 'indexes', 'id'>[];
appendIndexes: UseFieldArrayAppend<InputDataType, 'indexes'>;
removeIndexes: UseFieldArrayRemove;
}) => {
const { t } = useTranslation();
@@ -438,52 +430,41 @@ const DataIndex = ({
return (
<>
<Flex mt={3} gap={3} flexDir={'column'}>
<Box
p={4}
borderRadius={'md'}
border={'1.5px solid var(--light-fastgpt-primary-opacity-01, rgba(51, 112, 255, 0.10))'}
bg={'primary.50'}
>
<Flex mb={2}>
<Box flex={1} fontWeight={'medium'} fontSize={'sm'} color={'primary.700'}>
{t('common:dataset.data.Default Index')}
</Box>
</Flex>
<Box fontSize={'sm'} fontWeight={'medium'} color={'myGray.600'}>
{t('common:core.dataset.data.Default Index Tip')}
</Box>
</Box>
{indexes?.map((index, i) => {
const data = getDatasetIndexMapData(index.type);
return (
!index.defaultIndex && (
<Box
key={index.dataId || i}
p={4}
borderRadius={'md'}
border={'1.5px solid var(--Gray-Modern-200, #E8EBF0)'}
bg={'myGray.25'}
_hover={{
'& .delete': {
display: 'block'
}
}}
>
<Flex mb={2}>
<Box flex={1} fontWeight={'medium'} fontSize={'sm'} color={'myGray.900'}>
{t('dataset.data.Custom Index Number', { number: i })}
</Box>
<Box
key={index.dataId || i}
p={4}
borderRadius={'md'}
border={'1.5px solid var(--Gray-Modern-200, #E8EBF0)'}
bg={'myGray.25'}
_hover={{
'& .delete': {
display: 'block'
}
}}
>
<Flex mb={2}>
<Box flex={1} fontWeight={'medium'} fontSize={'sm'} color={'myGray.900'}>
{t(data.label)}
</Box>
{index.type !== 'default' && (
<DeleteIcon
onClick={() => {
if (indexes.length <= 1) {
appendIndexes(getDefaultIndex({ dataId: `${Date.now()}` }));
}
removeIndexes(i);
}}
/>
</Flex>
<DataIndexTextArea index={i} maxToken={maxToken} register={register} />
</Box>
)
)}
</Flex>
<DataIndexTextArea
disabled={index.type === 'default'}
index={i}
value={index.text}
maxToken={maxToken}
register={register}
/>
</Box>
);
})}
</Flex>
@@ -491,14 +472,19 @@ const DataIndex = ({
);
};
const textareaMinH = '40px';
const DataIndexTextArea = ({
value,
index,
maxToken,
register
register,
disabled
}: {
value: string;
index: number;
maxToken: number;
register: UseFormRegister<InputDataType>;
disabled?: boolean;
}) => {
const { t } = useTranslation();
const TextareaDom = useRef<HTMLTextAreaElement | null>(null);
@@ -509,7 +495,7 @@ const DataIndexTextArea = ({
onChange: onTextChange,
onBlur
} = register(`indexes.${index}.text`, { required: true });
const textareaMinH = '40px';
useEffect(() => {
if (TextareaDom.current) {
TextareaDom.current.style.height = textareaMinH;
@@ -522,7 +508,12 @@ const DataIndexTextArea = ({
e.target.style.height = `${e.target.scrollHeight + 5}px`;
}
}, []);
return (
return disabled ? (
<Box fontSize={'sm'} color={'myGray.500'} whiteSpace={'pre-wrap'}>
{value}
</Box>
) : (
<Textarea
maxLength={maxToken}
borderColor={'transparent'}

View File

@@ -7,7 +7,10 @@ import { useRouter } from 'next/router';
import MyBox from '@fastgpt/web/components/common/MyBox';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
import { DatasetCollectionTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import {
DatasetCollectionDataProcessModeMap,
DatasetCollectionTypeMap
} from '@fastgpt/global/core/dataset/constants';
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
import MyIcon from '@fastgpt/web/components/common/Icon';
@@ -61,13 +64,25 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
label: t('common:core.dataset.collection.metadata.Updatetime'),
value: formatTime2YMDHM(collection.updateTime)
},
{
label: t('dataset:collection_metadata_custom_pdf_parse'),
value: collection.customPdfParse ? 'Yes' : 'No'
},
{
label: t('common:core.dataset.collection.metadata.Raw text length'),
value: collection.rawTextLength ?? '-'
},
{
label: t('dataset:collection.Training type'),
value: t(TrainingTypeMap[collection.trainingType]?.label as any)
label: t('dataset:collection_metadata_image_parse'),
value: collection.imageIndex ? 'Yes' : 'No'
},
{
label: t('dataset:auto_indexes'),
value: collection.autoIndexes ? 'Yes' : 'No'
},
{
label: t('dataset:collection.training_type'),
value: t(DatasetCollectionDataProcessModeMap[collection.trainingType]?.label as any)
},
{
label: t('common:core.dataset.collection.metadata.Chunk Size'),
@@ -99,8 +114,8 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
<Box fontSize={'md'} pb={4}>
{t('common:core.dataset.collection.metadata.metadata')}
</Box>
<Flex mb={4} wordBreak={'break-all'} fontSize={'sm'}>
<Box color={'myGray.500'} flex={'0 0 70px'}>
<Flex mb={3} wordBreak={'break-all'} fontSize={'sm'}>
<Box color={'myGray.500'} flex={'0 0 90px'}>
{t('common:core.dataset.collection.id')}:
</Box>
<Box>{collection?._id}</Box>
@@ -109,8 +124,8 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
(item, i) =>
item.label &&
item.value && (
<Flex key={i} alignItems={'center'} mb={4} wordBreak={'break-all'} fontSize={'sm'}>
<Box color={'myGray.500'} flex={'0 0 70px'}>
<Flex key={i} alignItems={'center'} mb={3} wordBreak={'break-all'} fontSize={'sm'}>
<Box color={'myGray.500'} flex={'0 0 90px'}>
{item.label}
</Box>
<Box>{item.value}</Box>

View File

@@ -2,7 +2,6 @@ import React, { useMemo } from 'react';
import { Box, Flex, Button, ModalFooter, ModalBody, Input, HStack } from '@chakra-ui/react';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useForm } from 'react-hook-form';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { useRouter } from 'next/router';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
@@ -41,7 +40,8 @@ const CreateModal = ({
}) => {
const { t } = useTranslation();
const router = useRouter();
const { defaultModels, embeddingModelList, datasetModelList } = useSystemStore();
const { feConfigs, defaultModels, embeddingModelList, datasetModelList, getVllmModelList } =
useSystemStore();
const { isPc } = useSystem();
const datasetTypeMap = useMemo(() => {
@@ -71,6 +71,8 @@ const CreateModal = ({
const filterNotHiddenVectorModelList = embeddingModelList.filter((item) => !item.hidden);
const vllmModelList = useMemo(() => getVllmModelList(), [getVllmModelList]);
const form = useForm<CreateDatasetParams>({
defaultValues: {
parentId,
@@ -81,13 +83,15 @@ const CreateModal = ({
vectorModel:
defaultModels.embedding?.model || getWebDefaultEmbeddingModel(embeddingModelList)?.model,
agentModel:
defaultModels.datasetTextLLM?.model || getWebDefaultLLMModel(datasetModelList)?.model
defaultModels.datasetTextLLM?.model || getWebDefaultLLMModel(datasetModelList)?.model,
vlmModel: defaultModels.datasetImageLLM?.model
}
});
const { register, setValue, handleSubmit, watch } = form;
const avatar = watch('avatar');
const vectorModel = watch('vectorModel');
const agentModel = watch('agentModel');
const vlmModel = watch('vlmModel');
const {
File,
@@ -174,6 +178,7 @@ const CreateModal = ({
/>
</Flex>
</Box>
<Flex
mt={6}
alignItems={['flex-start', 'center']}
@@ -206,6 +211,7 @@ const CreateModal = ({
/>
</Box>
</Flex>
<Flex
mt={6}
alignItems={['flex-start', 'center']}
@@ -232,11 +238,45 @@ const CreateModal = ({
value: item.model
}))}
onchange={(e) => {
setValue('agentModel' as const, e);
setValue('agentModel', e);
}}
/>
</Box>
</Flex>
{feConfigs?.isPlus && (
<Flex
mt={6}
alignItems={['flex-start', 'center']}
justify={'space-between'}
flexDir={['column', 'row']}
>
<HStack
spacing={1}
flex={['', '0 0 110px']}
fontSize={'sm'}
color={'myGray.900'}
fontWeight={500}
pb={['12px', '0']}
>
<Box>{t('dataset:vllm_model')}</Box>
</HStack>
<Box w={['100%', '300px']}>
<AIModelSelector
w={['100%', '300px']}
value={vlmModel}
list={vllmModelList.map((item) => ({
label: item.name,
value: item.model
}))}
onchange={(e) => {
setValue('vlmModel', e);
}}
/>
</Box>
</Flex>
)}
{/* @ts-ignore */}
<ApiDatasetForm type={type} form={form} />
</ModalBody>