V4.9.4 feature (#4470)

* Training status (#4424)

* dataset data training state (#4311)

* dataset data training state

* fix

* fix ts

* fix

* fix api format

* fix

* fix

* perf: count training

* format

* fix: dataset training state (#4417)

* fix

* add test

* fix

* fix

* fix test

* fix test

* perf: training count

* count

* loading status

---------

Co-authored-by: heheer <heheer@sealos.io>

* doc

* website sync feature (#4429)

* perf: introduce BullMQ for website sync (#4403)

* perf: introduce BullMQ for website sync

* feat: new redis module

* fix: remove graceful shutdown

* perf: improve UI in dataset detail

- Updated the "change" icon SVG file.
- Modified i18n strings.
- Added new i18n string "immediate_sync".
- Improved UI in dataset detail page, including button icons and
background colors.

* refactor: Add chunkSettings to DatasetSchema

* perf: website sync ux

* env template

* fix: clean up website dataset when updating chunk settings (#4420)

* perf: check setting updated

* perf: worker currency

* feat: init script for website sync refactor (#4425)

* website feature doc

---------

Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com>

* pro migration (#4388) (#4433)

* pro migration

* reuse customPdfParseType

Co-authored-by: gggaaallleee <91131304+gggaaallleee@users.noreply.github.com>

* perf: remove loading ui

* feat: config chat file expired time

* Redis cache (#4436)

* perf: add Redis cache for vector counting (#4432)

* feat: cache

* perf: get cache key

---------

Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com>

* perf: mobile voice input (#4437)

* update:Mobile voice interaction (#4362)

* Add files via upload

* Add files via upload

* Update ollama.md

* Update ollama.md

* Add files via upload

* Update useSpeech.ts

* Update ChatInput.tsx

* Update useSpeech.ts

* Update ChatInput.tsx

* Update useSpeech.ts

* Update constants.ts

* Add files via upload

* Update ChatInput.tsx

* Update useSpeech.ts

* Update useSpeech.ts

* Update useSpeech.ts

* Update ChatInput.tsx

* Add files via upload

* Update common.json

* Update VoiceInput.tsx

* Update ChatInput.tsx

* Update VoiceInput.tsx

* Update useSpeech.ts

* Update useSpeech.ts

* Update common.json

* Update common.json

* Update common.json

* Update VoiceInput.tsx

* Update VoiceInput.tsx

* Update ChatInput.tsx

* Update VoiceInput.tsx

* Update ChatInput.tsx

* Update VoiceInput.tsx

* Update ChatInput.tsx

* Update useSpeech.ts

* Update common.json

* Update chat.json

* Update common.json

* Update chat.json

* Update common.json

* Update chat.json

* Update VoiceInput.tsx

* Update ChatInput.tsx

* Update useSpeech.ts

* Update VoiceInput.tsx

* speech ui

* 优化语音输入组件,调整输入框显示逻辑,修复语音输入遮罩层样式,更新画布背景透明度,增强用户交互体验。 (#4435)

* perf: mobil voice input

---------

Co-authored-by: dreamer6680 <1468683855@qq.com>

* Test completion v2 (#4438)

* add v2 completions (#4364)

* add v2 completions

* completion config

* config version

* fix

* frontend

* doc

* fix

* fix: completions v2 api

---------

Co-authored-by: heheer <heheer@sealos.io>

* package

* Test mongo log (#4443)

* feat: mongodb-log (#4426)

* perf: mongo log

* feat: completions stop reasoner

* mongo db log

---------

Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>

* update doc

* Update doc

* fix external var ui (#4444)

* action

* fix: ts (#4458)

* preview doc action

add docs preview permission

update preview action

udpate action

* update doc (#4460)

* update preview action

* update doc

* remove

* update

* schema

* update mq export;perf: redis cache  (#4465)

* perf: redis cache

* update mq export

* perf: website sync error tip

* add error worker

* website sync ui (#4466)

* Updated the dynamic display of the voice input pop-up (#4469)

* Update VoiceInput.tsx

* Update VoiceInput.tsx

* Update VoiceInput.tsx

* fix: voice input

---------

Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com>
Co-authored-by: gggaaallleee <91131304+gggaaallleee@users.noreply.github.com>
Co-authored-by: dreamer6680 <1468683855@qq.com>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
This commit is contained in:
Archer
2025-04-08 12:05:04 +08:00
committed by GitHub
parent 5839325f77
commit f642c9603b
151 changed files with 5434 additions and 1354 deletions

View File

@@ -25,6 +25,14 @@ import {
getAutoIndexSize,
getMaxIndexSize
} from '@fastgpt/global/core/dataset/training/utils';
import { CollectionChunkFormType } from '../Form/CollectionChunkForm';
type ChunkSizeFieldType = 'embeddingChunkSize' | 'qaChunkSize';
export type ImportFormType = {
customPdfParse: boolean;
webSelector: string;
} & CollectionChunkFormType;
type TrainingFiledType = {
chunkOverlapRatio: number;
@@ -51,26 +59,6 @@ type DatasetImportContextType = {
setSources: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
} & TrainingFiledType;
type ChunkSizeFieldType = 'embeddingChunkSize' | 'qaChunkSize';
export type ImportFormType = {
customPdfParse: boolean;
trainingType: DatasetCollectionDataProcessModeEnum;
imageIndex: boolean;
autoIndexes: boolean;
chunkSettingMode: ChunkSettingModeEnum;
chunkSplitMode: DataChunkSplitModeEnum;
embeddingChunkSize: number;
qaChunkSize: number;
chunkSplitter: string;
indexSize: number;
qaPrompt: string;
webSelector: string;
};
export const DatasetImportContext = createContext<DatasetImportContextType>({
importSource: ImportDataSourceEnum.fileLocal,
goToNext: function (): void {
@@ -314,14 +302,7 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
chunkSplitter
};
}
}, [
chunkSettingMode,
TrainingModeMap.autoChunkSize,
TrainingModeMap.autoIndexSize,
TrainingModeMap.chunkSize,
TrainingModeMap.indexSize,
chunkSplitter
]);
}, [chunkSettingMode, TrainingModeMap, chunkSplitter]);
const contextValue = {
...TrainingModeMap,

View File

@@ -1,13 +1,8 @@
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import React, { useCallback } from 'react';
import {
Box,
Flex,
Input,
Button,
ModalBody,
ModalFooter,
Textarea,
useDisclosure,
Checkbox,
Accordion,
AccordionItem,
@@ -16,93 +11,26 @@ import {
AccordionIcon,
HStack
} from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
import {
DataChunkSplitModeEnum,
DatasetCollectionDataProcessModeEnum,
DatasetCollectionDataProcessModeMap
} from '@fastgpt/global/core/dataset/constants';
import { ChunkSettingModeEnum } from '@fastgpt/global/core/dataset/constants';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import MyTag from '@fastgpt/web/components/common/Tag/index';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
import MyNumberInput from '@fastgpt/web/components/common/Input/NumberInput';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
import { shadowLight } from '@fastgpt/web/styles/theme';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import MySelect from '@fastgpt/web/components/common/MySelect';
import { getIndexSizeSelectList } from '@fastgpt/global/core/dataset/training/utils';
import RadioGroup from '@fastgpt/web/components/common/Radio/RadioGroup';
import CollectionChunkForm from '../../Form/CollectionChunkForm';
import { DatasetCollectionDataProcessModeEnum } from '@fastgpt/global/core/dataset/constants';
function DataProcess() {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const {
goToNext,
processParamsForm,
chunkSizeField,
minChunkSize,
maxChunkSize,
maxIndexSize,
indexSize
} = useContextSelector(DatasetImportContext, (v) => v);
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const { setValue, register, watch, getValues } = processParamsForm;
const trainingType = watch('trainingType');
const trainingModeList = useMemo(() => {
const list = Object.entries(DatasetCollectionDataProcessModeMap);
return list
.filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto)
.map(([key, value]) => ({
title: t(value.label as any),
value: key as DatasetCollectionDataProcessModeEnum,
tooltip: t(value.tooltip as any)
}));
}, [t]);
const chunkSettingMode = watch('chunkSettingMode');
const chunkSplitMode = watch('chunkSplitMode');
const customSplitList = [
{ label: t('dataset:split_sign_null'), value: '' },
{ label: t('dataset:split_sign_break'), value: '\\n' },
{ label: t('dataset:split_sign_break2'), value: '\\n\\n' },
{ label: t('dataset:split_sign_period'), value: '.|。' },
{ label: t('dataset:split_sign_exclamatiob'), value: '!|' },
{ label: t('dataset:split_sign_question'), value: '?|' },
{ label: t('dataset:split_sign_semicolon'), value: ';|' },
{ label: '=====', value: '=====' },
{ label: t('dataset:split_sign_custom'), value: 'Other' }
];
const [customListSelectValue, setCustomListSelectValue] = useState(getValues('chunkSplitter'));
useEffect(() => {
if (customListSelectValue === 'Other') {
setValue('chunkSplitter', '');
} else {
setValue('chunkSplitter', customListSelectValue);
}
}, [customListSelectValue, setValue]);
// Index size
const indexSizeSeletorList = useMemo(() => getIndexSizeSelectList(maxIndexSize), [maxIndexSize]);
// QA
const qaPrompt = watch('qaPrompt');
const {
isOpen: isOpenCustomPrompt,
onOpen: onOpenCustomPrompt,
onClose: onCloseCustomPrompt
} = useDisclosure();
const { goToNext, processParamsForm, chunkSize } = useContextSelector(
DatasetImportContext,
(v) => v
);
const { register } = processParamsForm;
const Title = useCallback(({ title }: { title: string }) => {
return (
@@ -116,16 +44,7 @@ function DataProcess() {
);
}, []);
// Adapt auto training
useEffect(() => {
if (trainingType === DatasetCollectionDataProcessModeEnum.auto) {
setValue('autoIndexes', true);
setValue('trainingType', DatasetCollectionDataProcessModeEnum.chunk);
}
}, [trainingType, setValue]);
const showFileParseSetting = feConfigs?.showCustomPdfParse;
const showQAPromptInput = trainingType === DatasetCollectionDataProcessModeEnum.qa;
return (
<>
@@ -179,238 +98,8 @@ function DataProcess() {
<Title title={t('dataset:import_data_process_setting')} />
<AccordionPanel p={2}>
<Box mt={2}>
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
{t('dataset:training_mode')}
</Box>
<LeftRadio<DatasetCollectionDataProcessModeEnum>
list={trainingModeList}
px={3}
py={2.5}
value={trainingType}
onChange={(e) => {
setValue('trainingType', e);
}}
defaultBg="white"
activeBg="white"
gridTemplateColumns={'repeat(2, 1fr)'}
/>
</Box>
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
<Box mt={6}>
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
{t('dataset:enhanced_indexes')}
</Box>
<HStack gap={[3, 7]}>
<HStack flex={'1'} spacing={1}>
<MyTooltip
label={!feConfigs?.isPlus ? t('common:commercial_function_tip') : ''}
>
<Checkbox isDisabled={!feConfigs?.isPlus} {...register('autoIndexes')}>
<FormLabel>{t('dataset:auto_indexes')}</FormLabel>
</Checkbox>
</MyTooltip>
<QuestionTip label={t('dataset:auto_indexes_tips')} />
</HStack>
<HStack flex={'1'} spacing={1}>
<MyTooltip
label={
!feConfigs?.isPlus
? t('common:commercial_function_tip')
: !datasetDetail?.vlmModel
? t('common:error_vlm_not_config')
: ''
}
>
<Checkbox
isDisabled={!feConfigs?.isPlus || !datasetDetail?.vlmModel}
{...register('imageIndex')}
>
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
</Checkbox>
</MyTooltip>
<QuestionTip label={t('dataset:image_auto_parse_tips')} />
</HStack>
</HStack>
</Box>
)}
<Box mt={6}>
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
{t('dataset:params_setting')}
</Box>
<LeftRadio<ChunkSettingModeEnum>
list={[
{
title: t('dataset:default_params'),
desc: t('dataset:default_params_desc'),
value: ChunkSettingModeEnum.auto
},
{
title: t('dataset:custom_data_process_params'),
desc: t('dataset:custom_data_process_params_desc'),
value: ChunkSettingModeEnum.custom,
children: chunkSettingMode === ChunkSettingModeEnum.custom && (
<Box mt={5}>
<Box>
<RadioGroup<DataChunkSplitModeEnum>
list={[
{
title: t('dataset:split_chunk_size'),
value: DataChunkSplitModeEnum.size
},
{
title: t('dataset:split_chunk_char'),
value: DataChunkSplitModeEnum.char,
tooltip: t('dataset:custom_split_sign_tip')
}
]}
value={chunkSplitMode}
onChange={(e) => {
setValue('chunkSplitMode', e);
}}
/>
{chunkSplitMode === DataChunkSplitModeEnum.size && (
<Box
mt={1.5}
css={{
'& > span': {
display: 'block'
}
}}
>
<MyTooltip
label={t('common:core.dataset.import.Chunk Range', {
min: minChunkSize,
max: maxChunkSize
})}
>
<MyNumberInput
register={register}
name={chunkSizeField}
min={minChunkSize}
max={maxChunkSize}
size={'sm'}
step={100}
/>
</MyTooltip>
</Box>
)}
{chunkSplitMode === DataChunkSplitModeEnum.char && (
<HStack mt={1.5}>
<Box flex={'1 0 0'}>
<MySelect<string>
list={customSplitList}
size={'sm'}
bg={'myGray.50'}
value={customListSelectValue}
h={'32px'}
onChange={(val) => {
setCustomListSelectValue(val);
}}
/>
</Box>
{customListSelectValue === 'Other' && (
<Input
flex={'1 0 0'}
h={'32px'}
size={'sm'}
bg={'myGray.50'}
placeholder="\n;======;==SPLIT=="
{...register('chunkSplitter')}
/>
)}
</HStack>
)}
</Box>
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
<Box>
<Flex alignItems={'center'} mt={3}>
<Box>{t('dataset:index_size')}</Box>
<QuestionTip label={t('dataset:index_size_tips')} />
</Flex>
<Box mt={1}>
<MySelect<number>
bg={'myGray.50'}
list={indexSizeSeletorList}
value={indexSize}
onChange={(val) => {
setValue('indexSize', val);
}}
/>
</Box>
</Box>
)}
{showQAPromptInput && (
<Box mt={3}>
<Box>{t('common:core.dataset.collection.QA Prompt')}</Box>
<Box
position={'relative'}
py={2}
px={3}
bg={'myGray.50'}
fontSize={'xs'}
whiteSpace={'pre-wrap'}
border={'1px'}
borderColor={'borderColor.base'}
borderRadius={'md'}
maxH={'140px'}
overflow={'auto'}
_hover={{
'& .mask': {
display: 'block'
}
}}
>
{qaPrompt}
<Box
display={'none'}
className="mask"
position={'absolute'}
top={0}
right={0}
bottom={0}
left={0}
background={
'linear-gradient(182deg, rgba(255, 255, 255, 0.00) 1.76%, #FFF 84.07%)'
}
>
<Button
size="xs"
variant={'whiteBase'}
leftIcon={<MyIcon name={'edit'} w={'13px'} />}
color={'black'}
position={'absolute'}
right={2}
bottom={2}
onClick={onOpenCustomPrompt}
>
{t('common:core.dataset.import.Custom prompt')}
</Button>
</Box>
</Box>
</Box>
)}
</Box>
)
}
]}
gridGap={3}
px={3}
py={3}
defaultBg="white"
activeBg="white"
value={chunkSettingMode}
w={'100%'}
onChange={(e) => {
setValue('chunkSettingMode', e);
}}
/>
</Box>
{/* @ts-ignore */}
<CollectionChunkForm form={processParamsForm} />
</AccordionPanel>
</AccordionItem>
@@ -425,57 +114,8 @@ function DataProcess() {
</Flex>
</Accordion>
</Box>
{isOpenCustomPrompt && (
<PromptTextarea
defaultValue={qaPrompt}
onChange={(e) => {
setValue('qaPrompt', e);
}}
onClose={onCloseCustomPrompt}
/>
)}
</>
);
}
export default React.memo(DataProcess);
const PromptTextarea = ({
defaultValue,
onChange,
onClose
}: {
defaultValue: string;
onChange: (e: string) => void;
onClose: () => void;
}) => {
const ref = useRef<HTMLTextAreaElement>(null);
const { t } = useTranslation();
return (
<MyModal
isOpen
title={t('common:core.dataset.import.Custom prompt')}
iconSrc="modal/edit"
w={'600px'}
onClose={onClose}
>
<ModalBody whiteSpace={'pre-wrap'} fontSize={'sm'} px={[3, 6]} pt={[3, 6]}>
<Textarea ref={ref} rows={8} fontSize={'sm'} defaultValue={defaultValue} />
<Box>{Prompt_AgentQA.fixedText}</Box>
</ModalBody>
<ModalFooter>
<Button
onClick={() => {
const val = ref.current?.value || Prompt_AgentQA.description;
onChange(val);
onClose();
}}
>
{t('common:common.Confirm')}
</Button>
</ModalFooter>
</MyModal>
);
};