V4.9.4 feature (#4470)
* Training status (#4424) * dataset data training state (#4311) * dataset data training state * fix * fix ts * fix * fix api format * fix * fix * perf: count training * format * fix: dataset training state (#4417) * fix * add test * fix * fix * fix test * fix test * perf: training count * count * loading status --------- Co-authored-by: heheer <heheer@sealos.io> * doc * website sync feature (#4429) * perf: introduce BullMQ for website sync (#4403) * perf: introduce BullMQ for website sync * feat: new redis module * fix: remove graceful shutdown * perf: improve UI in dataset detail - Updated the "change" icon SVG file. - Modified i18n strings. - Added new i18n string "immediate_sync". - Improved UI in dataset detail page, including button icons and background colors. * refactor: Add chunkSettings to DatasetSchema * perf: website sync ux * env template * fix: clean up website dataset when updating chunk settings (#4420) * perf: check setting updated * perf: worker currency * feat: init script for website sync refactor (#4425) * website feature doc --------- Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com> * pro migration (#4388) (#4433) * pro migration * reuse customPdfParseType Co-authored-by: gggaaallleee <91131304+gggaaallleee@users.noreply.github.com> * perf: remove loading ui * feat: config chat file expired time * Redis cache (#4436) * perf: add Redis cache for vector counting (#4432) * feat: cache * perf: get cache key --------- Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com> * perf: mobile voice input (#4437) * update:Mobile voice interaction (#4362) * Add files via upload * Add files via upload * Update ollama.md * Update ollama.md * Add files via upload * Update useSpeech.ts * Update ChatInput.tsx * Update useSpeech.ts * Update ChatInput.tsx * Update useSpeech.ts * Update constants.ts * Add files via upload * Update ChatInput.tsx * Update useSpeech.ts * Update useSpeech.ts * Update useSpeech.ts * Update ChatInput.tsx * Add files via upload * Update common.json * Update VoiceInput.tsx * Update ChatInput.tsx * Update VoiceInput.tsx * Update useSpeech.ts * Update useSpeech.ts * Update common.json * Update common.json * Update common.json * Update VoiceInput.tsx * Update VoiceInput.tsx * Update ChatInput.tsx * Update VoiceInput.tsx * Update ChatInput.tsx * Update VoiceInput.tsx * Update ChatInput.tsx * Update useSpeech.ts * Update common.json * Update chat.json * Update common.json * Update chat.json * Update common.json * Update chat.json * Update VoiceInput.tsx * Update ChatInput.tsx * Update useSpeech.ts * Update VoiceInput.tsx * speech ui * 优化语音输入组件,调整输入框显示逻辑,修复语音输入遮罩层样式,更新画布背景透明度,增强用户交互体验。 (#4435) * perf: mobil voice input --------- Co-authored-by: dreamer6680 <1468683855@qq.com> * Test completion v2 (#4438) * add v2 completions (#4364) * add v2 completions * completion config * config version * fix * frontend * doc * fix * fix: completions v2 api --------- Co-authored-by: heheer <heheer@sealos.io> * package * Test mongo log (#4443) * feat: mongodb-log (#4426) * perf: mongo log * feat: completions stop reasoner * mongo db log --------- Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> * update doc * Update doc * fix external var ui (#4444) * action * fix: ts (#4458) * preview doc action add docs preview permission update preview action udpate action * update doc (#4460) * update preview action * update doc * remove * update * schema * update mq export;perf: redis cache (#4465) * perf: redis cache * update mq export * perf: website sync error tip * add error worker * website sync ui (#4466) * Updated the dynamic display of the voice input pop-up (#4469) * Update VoiceInput.tsx * Update VoiceInput.tsx * Update VoiceInput.tsx * fix: voice input --------- Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com> Co-authored-by: gggaaallleee <91131304+gggaaallleee@users.noreply.github.com> Co-authored-by: dreamer6680 <1468683855@qq.com> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
This commit is contained in:
@@ -25,6 +25,14 @@ import {
|
||||
getAutoIndexSize,
|
||||
getMaxIndexSize
|
||||
} from '@fastgpt/global/core/dataset/training/utils';
|
||||
import { CollectionChunkFormType } from '../Form/CollectionChunkForm';
|
||||
|
||||
type ChunkSizeFieldType = 'embeddingChunkSize' | 'qaChunkSize';
|
||||
export type ImportFormType = {
|
||||
customPdfParse: boolean;
|
||||
|
||||
webSelector: string;
|
||||
} & CollectionChunkFormType;
|
||||
|
||||
type TrainingFiledType = {
|
||||
chunkOverlapRatio: number;
|
||||
@@ -51,26 +59,6 @@ type DatasetImportContextType = {
|
||||
setSources: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
|
||||
} & TrainingFiledType;
|
||||
|
||||
type ChunkSizeFieldType = 'embeddingChunkSize' | 'qaChunkSize';
|
||||
export type ImportFormType = {
|
||||
customPdfParse: boolean;
|
||||
|
||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||
imageIndex: boolean;
|
||||
autoIndexes: boolean;
|
||||
|
||||
chunkSettingMode: ChunkSettingModeEnum;
|
||||
|
||||
chunkSplitMode: DataChunkSplitModeEnum;
|
||||
embeddingChunkSize: number;
|
||||
qaChunkSize: number;
|
||||
chunkSplitter: string;
|
||||
indexSize: number;
|
||||
|
||||
qaPrompt: string;
|
||||
webSelector: string;
|
||||
};
|
||||
|
||||
export const DatasetImportContext = createContext<DatasetImportContextType>({
|
||||
importSource: ImportDataSourceEnum.fileLocal,
|
||||
goToNext: function (): void {
|
||||
@@ -314,14 +302,7 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
|
||||
chunkSplitter
|
||||
};
|
||||
}
|
||||
}, [
|
||||
chunkSettingMode,
|
||||
TrainingModeMap.autoChunkSize,
|
||||
TrainingModeMap.autoIndexSize,
|
||||
TrainingModeMap.chunkSize,
|
||||
TrainingModeMap.indexSize,
|
||||
chunkSplitter
|
||||
]);
|
||||
}, [chunkSettingMode, TrainingModeMap, chunkSplitter]);
|
||||
|
||||
const contextValue = {
|
||||
...TrainingModeMap,
|
||||
|
||||
@@ -1,13 +1,8 @@
|
||||
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
import React, { useCallback } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Flex,
|
||||
Input,
|
||||
Button,
|
||||
ModalBody,
|
||||
ModalFooter,
|
||||
Textarea,
|
||||
useDisclosure,
|
||||
Checkbox,
|
||||
Accordion,
|
||||
AccordionItem,
|
||||
@@ -16,93 +11,26 @@ import {
|
||||
AccordionIcon,
|
||||
HStack
|
||||
} from '@chakra-ui/react';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
|
||||
import {
|
||||
DataChunkSplitModeEnum,
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionDataProcessModeMap
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { ChunkSettingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import MyModal from '@fastgpt/web/components/common/MyModal';
|
||||
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
|
||||
import MyTag from '@fastgpt/web/components/common/Tag/index';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetImportContext } from '../Context';
|
||||
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
|
||||
import MyNumberInput from '@fastgpt/web/components/common/Input/NumberInput';
|
||||
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
|
||||
import { shadowLight } from '@fastgpt/web/styles/theme';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import MySelect from '@fastgpt/web/components/common/MySelect';
|
||||
import { getIndexSizeSelectList } from '@fastgpt/global/core/dataset/training/utils';
|
||||
import RadioGroup from '@fastgpt/web/components/common/Radio/RadioGroup';
|
||||
import CollectionChunkForm from '../../Form/CollectionChunkForm';
|
||||
import { DatasetCollectionDataProcessModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
function DataProcess() {
|
||||
const { t } = useTranslation();
|
||||
const { feConfigs } = useSystemStore();
|
||||
|
||||
const {
|
||||
goToNext,
|
||||
processParamsForm,
|
||||
chunkSizeField,
|
||||
minChunkSize,
|
||||
maxChunkSize,
|
||||
maxIndexSize,
|
||||
indexSize
|
||||
} = useContextSelector(DatasetImportContext, (v) => v);
|
||||
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
|
||||
const { setValue, register, watch, getValues } = processParamsForm;
|
||||
|
||||
const trainingType = watch('trainingType');
|
||||
const trainingModeList = useMemo(() => {
|
||||
const list = Object.entries(DatasetCollectionDataProcessModeMap);
|
||||
return list
|
||||
.filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto)
|
||||
.map(([key, value]) => ({
|
||||
title: t(value.label as any),
|
||||
value: key as DatasetCollectionDataProcessModeEnum,
|
||||
tooltip: t(value.tooltip as any)
|
||||
}));
|
||||
}, [t]);
|
||||
|
||||
const chunkSettingMode = watch('chunkSettingMode');
|
||||
const chunkSplitMode = watch('chunkSplitMode');
|
||||
|
||||
const customSplitList = [
|
||||
{ label: t('dataset:split_sign_null'), value: '' },
|
||||
{ label: t('dataset:split_sign_break'), value: '\\n' },
|
||||
{ label: t('dataset:split_sign_break2'), value: '\\n\\n' },
|
||||
{ label: t('dataset:split_sign_period'), value: '.|。' },
|
||||
{ label: t('dataset:split_sign_exclamatiob'), value: '!|!' },
|
||||
{ label: t('dataset:split_sign_question'), value: '?|?' },
|
||||
{ label: t('dataset:split_sign_semicolon'), value: ';|;' },
|
||||
{ label: '=====', value: '=====' },
|
||||
{ label: t('dataset:split_sign_custom'), value: 'Other' }
|
||||
];
|
||||
|
||||
const [customListSelectValue, setCustomListSelectValue] = useState(getValues('chunkSplitter'));
|
||||
useEffect(() => {
|
||||
if (customListSelectValue === 'Other') {
|
||||
setValue('chunkSplitter', '');
|
||||
} else {
|
||||
setValue('chunkSplitter', customListSelectValue);
|
||||
}
|
||||
}, [customListSelectValue, setValue]);
|
||||
|
||||
// Index size
|
||||
const indexSizeSeletorList = useMemo(() => getIndexSizeSelectList(maxIndexSize), [maxIndexSize]);
|
||||
|
||||
// QA
|
||||
const qaPrompt = watch('qaPrompt');
|
||||
const {
|
||||
isOpen: isOpenCustomPrompt,
|
||||
onOpen: onOpenCustomPrompt,
|
||||
onClose: onCloseCustomPrompt
|
||||
} = useDisclosure();
|
||||
const { goToNext, processParamsForm, chunkSize } = useContextSelector(
|
||||
DatasetImportContext,
|
||||
(v) => v
|
||||
);
|
||||
const { register } = processParamsForm;
|
||||
|
||||
const Title = useCallback(({ title }: { title: string }) => {
|
||||
return (
|
||||
@@ -116,16 +44,7 @@ function DataProcess() {
|
||||
);
|
||||
}, []);
|
||||
|
||||
// Adapt auto training
|
||||
useEffect(() => {
|
||||
if (trainingType === DatasetCollectionDataProcessModeEnum.auto) {
|
||||
setValue('autoIndexes', true);
|
||||
setValue('trainingType', DatasetCollectionDataProcessModeEnum.chunk);
|
||||
}
|
||||
}, [trainingType, setValue]);
|
||||
|
||||
const showFileParseSetting = feConfigs?.showCustomPdfParse;
|
||||
const showQAPromptInput = trainingType === DatasetCollectionDataProcessModeEnum.qa;
|
||||
|
||||
return (
|
||||
<>
|
||||
@@ -179,238 +98,8 @@ function DataProcess() {
|
||||
<Title title={t('dataset:import_data_process_setting')} />
|
||||
|
||||
<AccordionPanel p={2}>
|
||||
<Box mt={2}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:training_mode')}
|
||||
</Box>
|
||||
<LeftRadio<DatasetCollectionDataProcessModeEnum>
|
||||
list={trainingModeList}
|
||||
px={3}
|
||||
py={2.5}
|
||||
value={trainingType}
|
||||
onChange={(e) => {
|
||||
setValue('trainingType', e);
|
||||
}}
|
||||
defaultBg="white"
|
||||
activeBg="white"
|
||||
gridTemplateColumns={'repeat(2, 1fr)'}
|
||||
/>
|
||||
</Box>
|
||||
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
|
||||
<Box mt={6}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:enhanced_indexes')}
|
||||
</Box>
|
||||
<HStack gap={[3, 7]}>
|
||||
<HStack flex={'1'} spacing={1}>
|
||||
<MyTooltip
|
||||
label={!feConfigs?.isPlus ? t('common:commercial_function_tip') : ''}
|
||||
>
|
||||
<Checkbox isDisabled={!feConfigs?.isPlus} {...register('autoIndexes')}>
|
||||
<FormLabel>{t('dataset:auto_indexes')}</FormLabel>
|
||||
</Checkbox>
|
||||
</MyTooltip>
|
||||
<QuestionTip label={t('dataset:auto_indexes_tips')} />
|
||||
</HStack>
|
||||
<HStack flex={'1'} spacing={1}>
|
||||
<MyTooltip
|
||||
label={
|
||||
!feConfigs?.isPlus
|
||||
? t('common:commercial_function_tip')
|
||||
: !datasetDetail?.vlmModel
|
||||
? t('common:error_vlm_not_config')
|
||||
: ''
|
||||
}
|
||||
>
|
||||
<Checkbox
|
||||
isDisabled={!feConfigs?.isPlus || !datasetDetail?.vlmModel}
|
||||
{...register('imageIndex')}
|
||||
>
|
||||
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
|
||||
</Checkbox>
|
||||
</MyTooltip>
|
||||
<QuestionTip label={t('dataset:image_auto_parse_tips')} />
|
||||
</HStack>
|
||||
</HStack>
|
||||
</Box>
|
||||
)}
|
||||
<Box mt={6}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:params_setting')}
|
||||
</Box>
|
||||
<LeftRadio<ChunkSettingModeEnum>
|
||||
list={[
|
||||
{
|
||||
title: t('dataset:default_params'),
|
||||
desc: t('dataset:default_params_desc'),
|
||||
value: ChunkSettingModeEnum.auto
|
||||
},
|
||||
{
|
||||
title: t('dataset:custom_data_process_params'),
|
||||
desc: t('dataset:custom_data_process_params_desc'),
|
||||
value: ChunkSettingModeEnum.custom,
|
||||
children: chunkSettingMode === ChunkSettingModeEnum.custom && (
|
||||
<Box mt={5}>
|
||||
<Box>
|
||||
<RadioGroup<DataChunkSplitModeEnum>
|
||||
list={[
|
||||
{
|
||||
title: t('dataset:split_chunk_size'),
|
||||
value: DataChunkSplitModeEnum.size
|
||||
},
|
||||
{
|
||||
title: t('dataset:split_chunk_char'),
|
||||
value: DataChunkSplitModeEnum.char,
|
||||
tooltip: t('dataset:custom_split_sign_tip')
|
||||
}
|
||||
]}
|
||||
value={chunkSplitMode}
|
||||
onChange={(e) => {
|
||||
setValue('chunkSplitMode', e);
|
||||
}}
|
||||
/>
|
||||
|
||||
{chunkSplitMode === DataChunkSplitModeEnum.size && (
|
||||
<Box
|
||||
mt={1.5}
|
||||
css={{
|
||||
'& > span': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
<MyTooltip
|
||||
label={t('common:core.dataset.import.Chunk Range', {
|
||||
min: minChunkSize,
|
||||
max: maxChunkSize
|
||||
})}
|
||||
>
|
||||
<MyNumberInput
|
||||
register={register}
|
||||
name={chunkSizeField}
|
||||
min={minChunkSize}
|
||||
max={maxChunkSize}
|
||||
size={'sm'}
|
||||
step={100}
|
||||
/>
|
||||
</MyTooltip>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{chunkSplitMode === DataChunkSplitModeEnum.char && (
|
||||
<HStack mt={1.5}>
|
||||
<Box flex={'1 0 0'}>
|
||||
<MySelect<string>
|
||||
list={customSplitList}
|
||||
size={'sm'}
|
||||
bg={'myGray.50'}
|
||||
value={customListSelectValue}
|
||||
h={'32px'}
|
||||
onChange={(val) => {
|
||||
setCustomListSelectValue(val);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
{customListSelectValue === 'Other' && (
|
||||
<Input
|
||||
flex={'1 0 0'}
|
||||
h={'32px'}
|
||||
size={'sm'}
|
||||
bg={'myGray.50'}
|
||||
placeholder="\n;======;==SPLIT=="
|
||||
{...register('chunkSplitter')}
|
||||
/>
|
||||
)}
|
||||
</HStack>
|
||||
)}
|
||||
</Box>
|
||||
|
||||
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
|
||||
<Box>
|
||||
<Flex alignItems={'center'} mt={3}>
|
||||
<Box>{t('dataset:index_size')}</Box>
|
||||
<QuestionTip label={t('dataset:index_size_tips')} />
|
||||
</Flex>
|
||||
<Box mt={1}>
|
||||
<MySelect<number>
|
||||
bg={'myGray.50'}
|
||||
list={indexSizeSeletorList}
|
||||
value={indexSize}
|
||||
onChange={(val) => {
|
||||
setValue('indexSize', val);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{showQAPromptInput && (
|
||||
<Box mt={3}>
|
||||
<Box>{t('common:core.dataset.collection.QA Prompt')}</Box>
|
||||
<Box
|
||||
position={'relative'}
|
||||
py={2}
|
||||
px={3}
|
||||
bg={'myGray.50'}
|
||||
fontSize={'xs'}
|
||||
whiteSpace={'pre-wrap'}
|
||||
border={'1px'}
|
||||
borderColor={'borderColor.base'}
|
||||
borderRadius={'md'}
|
||||
maxH={'140px'}
|
||||
overflow={'auto'}
|
||||
_hover={{
|
||||
'& .mask': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
{qaPrompt}
|
||||
|
||||
<Box
|
||||
display={'none'}
|
||||
className="mask"
|
||||
position={'absolute'}
|
||||
top={0}
|
||||
right={0}
|
||||
bottom={0}
|
||||
left={0}
|
||||
background={
|
||||
'linear-gradient(182deg, rgba(255, 255, 255, 0.00) 1.76%, #FFF 84.07%)'
|
||||
}
|
||||
>
|
||||
<Button
|
||||
size="xs"
|
||||
variant={'whiteBase'}
|
||||
leftIcon={<MyIcon name={'edit'} w={'13px'} />}
|
||||
color={'black'}
|
||||
position={'absolute'}
|
||||
right={2}
|
||||
bottom={2}
|
||||
onClick={onOpenCustomPrompt}
|
||||
>
|
||||
{t('common:core.dataset.import.Custom prompt')}
|
||||
</Button>
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
)
|
||||
}
|
||||
]}
|
||||
gridGap={3}
|
||||
px={3}
|
||||
py={3}
|
||||
defaultBg="white"
|
||||
activeBg="white"
|
||||
value={chunkSettingMode}
|
||||
w={'100%'}
|
||||
onChange={(e) => {
|
||||
setValue('chunkSettingMode', e);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
{/* @ts-ignore */}
|
||||
<CollectionChunkForm form={processParamsForm} />
|
||||
</AccordionPanel>
|
||||
</AccordionItem>
|
||||
|
||||
@@ -425,57 +114,8 @@ function DataProcess() {
|
||||
</Flex>
|
||||
</Accordion>
|
||||
</Box>
|
||||
|
||||
{isOpenCustomPrompt && (
|
||||
<PromptTextarea
|
||||
defaultValue={qaPrompt}
|
||||
onChange={(e) => {
|
||||
setValue('qaPrompt', e);
|
||||
}}
|
||||
onClose={onCloseCustomPrompt}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
export default React.memo(DataProcess);
|
||||
|
||||
const PromptTextarea = ({
|
||||
defaultValue,
|
||||
onChange,
|
||||
onClose
|
||||
}: {
|
||||
defaultValue: string;
|
||||
onChange: (e: string) => void;
|
||||
onClose: () => void;
|
||||
}) => {
|
||||
const ref = useRef<HTMLTextAreaElement>(null);
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<MyModal
|
||||
isOpen
|
||||
title={t('common:core.dataset.import.Custom prompt')}
|
||||
iconSrc="modal/edit"
|
||||
w={'600px'}
|
||||
onClose={onClose}
|
||||
>
|
||||
<ModalBody whiteSpace={'pre-wrap'} fontSize={'sm'} px={[3, 6]} pt={[3, 6]}>
|
||||
<Textarea ref={ref} rows={8} fontSize={'sm'} defaultValue={defaultValue} />
|
||||
<Box>{Prompt_AgentQA.fixedText}</Box>
|
||||
</ModalBody>
|
||||
<ModalFooter>
|
||||
<Button
|
||||
onClick={() => {
|
||||
const val = ref.current?.value || Prompt_AgentQA.description;
|
||||
onChange(val);
|
||||
onClose();
|
||||
}}
|
||||
>
|
||||
{t('common:common.Confirm')}
|
||||
</Button>
|
||||
</ModalFooter>
|
||||
</MyModal>
|
||||
);
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user