feat: 拆分文本增加滑块,增加直接分段导入方式
This commit is contained in:
@@ -122,9 +122,9 @@ const InputDataModal = ({
|
||||
<Box h={'30px'}>问题</Box>
|
||||
<Textarea
|
||||
placeholder={
|
||||
'相关问题,可以输入多个问法, 最多500字。例如:\n1. laf 是什么?\n2. laf 可以做什么?\n3. laf怎么用'
|
||||
'相关问题,可以输入多个问法, 最多 1000 字。例如:\n1. laf 是什么?\n2. laf 可以做什么?\n3. laf怎么用'
|
||||
}
|
||||
maxLength={500}
|
||||
maxLength={1000}
|
||||
resize={'none'}
|
||||
h={'calc(100% - 30px)'}
|
||||
{...register(`q`, {
|
||||
@@ -136,9 +136,9 @@ const InputDataModal = ({
|
||||
<Box h={'30px'}>知识点</Box>
|
||||
<Textarea
|
||||
placeholder={
|
||||
'知识点,最多1000字。请保持主语的完整性,缺少主语会导致效果不佳。例如:\n1. laf是一个云函数开发平台。\n2. laf 什么都能做。\n3. 下面是使用 laf 的例子: ……'
|
||||
'知识点,最多 2000 字。例如:\n1. laf是一个云函数开发平台。\n2. laf 什么都能做。\n3. 下面是使用 laf 的例子: ……'
|
||||
}
|
||||
maxLength={1000}
|
||||
maxLength={2000}
|
||||
resize={'none'}
|
||||
h={'calc(100% - 30px)'}
|
||||
{...register(`a`, {
|
||||
|
||||
@@ -18,6 +18,7 @@ import {
|
||||
MenuItem,
|
||||
Input
|
||||
} from '@chakra-ui/react';
|
||||
import type { BoxProps } from '@chakra-ui/react';
|
||||
import type { ModelSchema } from '@/types/mongoSchema';
|
||||
import type { ModelDataItemType } from '@/types/model';
|
||||
import { ModelDataStatusMap } from '@/constants/model';
|
||||
@@ -114,6 +115,14 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
}
|
||||
});
|
||||
|
||||
const tdStyles: BoxProps = {
|
||||
fontSize: 'xs',
|
||||
maxW: '500px',
|
||||
whiteSpace: 'pre-wrap',
|
||||
maxH: '250px',
|
||||
overflowY: 'auto'
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<Flex>
|
||||
@@ -156,8 +165,8 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
>
|
||||
手动输入
|
||||
</MenuItem>
|
||||
<MenuItem onClick={onOpenSelectFileModal}>文本/文件 QA 拆分</MenuItem>
|
||||
<MenuItem onClick={onOpenSelectUrlModal}>网站内容 QA 拆分</MenuItem>
|
||||
<MenuItem onClick={onOpenSelectFileModal}>文本/文件拆分</MenuItem>
|
||||
{/* <MenuItem onClick={onOpenSelectUrlModal}>网站内容拆分</MenuItem> */}
|
||||
<MenuItem onClick={onOpenSelectCsvModal}>csv 问答对导入</MenuItem>
|
||||
</MenuList>
|
||||
</Menu>
|
||||
@@ -191,33 +200,23 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
|
||||
<Box mt={4}>
|
||||
<TableContainer minH={'500px'}>
|
||||
<Table variant={'simple'}>
|
||||
<Table variant={'simple'} w={'100%'}>
|
||||
<Thead>
|
||||
<Tr>
|
||||
<Th>Question</Th>
|
||||
<Th>Text</Th>
|
||||
<Th>Status</Th>
|
||||
<Th>匹配内容(问题)</Th>
|
||||
<Th>对应答案</Th>
|
||||
<Th>状态</Th>
|
||||
<Th>操作</Th>
|
||||
</Tr>
|
||||
</Thead>
|
||||
<Tbody>
|
||||
{modelDataList.map((item) => (
|
||||
<Tr key={item.id}>
|
||||
<Td minW={'200px'}>
|
||||
<Box fontSize={'xs'} whiteSpace={'pre-wrap'}>
|
||||
{item.q}
|
||||
</Box>
|
||||
<Td>
|
||||
<Box {...tdStyles}>{item.q}</Box>
|
||||
</Td>
|
||||
<Td minW={'200px'}>
|
||||
<Box
|
||||
w={'100%'}
|
||||
fontSize={'xs'}
|
||||
whiteSpace={'pre-wrap'}
|
||||
maxH={'250px'}
|
||||
overflowY={'auto'}
|
||||
>
|
||||
{item.a}
|
||||
</Box>
|
||||
<Td>
|
||||
<Box {...tdStyles}>{item.a || '-'}</Box>
|
||||
</Td>
|
||||
<Td>{ModelDataStatusMap[item.status]}</Td>
|
||||
<Td>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { useState, useCallback } from 'react';
|
||||
import React, { useState, useCallback, useMemo } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Flex,
|
||||
@@ -20,9 +20,26 @@ import { readTxtContent, readPdfContent, readDocContent } from '@/utils/file';
|
||||
import { useMutation } from '@tanstack/react-query';
|
||||
import { postModelDataSplitData } from '@/api/model';
|
||||
import { formatPrice } from '@/utils/user';
|
||||
import Radio from '@/components/Radio';
|
||||
import { splitText } from '@/utils/file';
|
||||
|
||||
const fileExtension = '.txt,.doc,.docx,.pdf,.md';
|
||||
|
||||
const modeMap = {
|
||||
qa: {
|
||||
maxLen: 2800,
|
||||
slideLen: 800,
|
||||
price: 3,
|
||||
isPrompt: true
|
||||
},
|
||||
subsection: {
|
||||
maxLen: 1000,
|
||||
slideLen: 300,
|
||||
price: 0.4,
|
||||
isPrompt: false
|
||||
}
|
||||
};
|
||||
|
||||
const SelectFileModal = ({
|
||||
onClose,
|
||||
onSuccess,
|
||||
@@ -36,38 +53,45 @@ const SelectFileModal = ({
|
||||
const { toast } = useToast();
|
||||
const [prompt, setPrompt] = useState('');
|
||||
const { File, onOpen } = useSelectFile({ fileType: fileExtension, multiple: true });
|
||||
const [fileText, setFileText] = useState('');
|
||||
const [mode, setMode] = useState<'qa' | 'subsection'>('qa');
|
||||
const [fileTextArr, setFileTextArr] = useState<string[]>(['']);
|
||||
const { openConfirm, ConfirmChild } = useConfirm({
|
||||
content: '确认导入该文件,需要一定时间进行拆解,该任务无法终止!如果余额不足,任务讲被终止。'
|
||||
});
|
||||
|
||||
const fileText = useMemo(() => {
|
||||
const chunks = fileTextArr.map((item) =>
|
||||
splitText({
|
||||
text: item,
|
||||
...modeMap[mode]
|
||||
})
|
||||
);
|
||||
return chunks.join('');
|
||||
}, [fileTextArr, mode]);
|
||||
|
||||
const onSelectFile = useCallback(
|
||||
async (e: File[]) => {
|
||||
setSelecting(true);
|
||||
try {
|
||||
const fileTexts = (
|
||||
await Promise.all(
|
||||
e.map((file) => {
|
||||
// @ts-ignore
|
||||
const extension = file?.name?.split('.').pop().toLowerCase();
|
||||
switch (extension) {
|
||||
case 'txt':
|
||||
case 'md':
|
||||
return readTxtContent(file);
|
||||
case 'pdf':
|
||||
return readPdfContent(file);
|
||||
case 'doc':
|
||||
case 'docx':
|
||||
return readDocContent(file);
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
})
|
||||
)
|
||||
)
|
||||
.join(' ')
|
||||
.replace(/(\\n|\n)+/g, '\n');
|
||||
setFileText(fileTexts);
|
||||
const fileTexts = await Promise.all(
|
||||
e.map((file) => {
|
||||
// @ts-ignore
|
||||
const extension = file?.name?.split('.').pop().toLowerCase();
|
||||
switch (extension) {
|
||||
case 'txt':
|
||||
case 'md':
|
||||
return readTxtContent(file);
|
||||
case 'pdf':
|
||||
return readPdfContent(file);
|
||||
case 'doc':
|
||||
case 'docx':
|
||||
return readDocContent(file);
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
})
|
||||
);
|
||||
setFileTextArr(fileTexts);
|
||||
} catch (error: any) {
|
||||
console.log(error);
|
||||
toast({
|
||||
@@ -77,16 +101,25 @@ const SelectFileModal = ({
|
||||
}
|
||||
setSelecting(false);
|
||||
},
|
||||
[setSelecting, toast]
|
||||
[toast]
|
||||
);
|
||||
|
||||
const { mutate, isLoading } = useMutation({
|
||||
mutationFn: async () => {
|
||||
if (!fileText) return;
|
||||
const chunks = fileTextArr
|
||||
.map((item) =>
|
||||
splitText({
|
||||
text: item,
|
||||
...modeMap[mode]
|
||||
})
|
||||
)
|
||||
.flat();
|
||||
await postModelDataSplitData({
|
||||
modelId,
|
||||
text: fileText.replace(/\\n/g, '\n').replace(/\n+/g, '\n'),
|
||||
prompt: `下面是"${prompt || '一段长文本'}"`
|
||||
chunks,
|
||||
prompt: `下面是"${prompt || '一段长文本'}"`,
|
||||
mode
|
||||
});
|
||||
toast({
|
||||
title: '导入数据成功,需要一段拆解和训练',
|
||||
@@ -106,58 +139,82 @@ const SelectFileModal = ({
|
||||
return (
|
||||
<Modal isOpen={true} onClose={onClose} isCentered>
|
||||
<ModalOverlay />
|
||||
<ModalContent maxW={'min(900px, 90vw)'} m={0} position={'relative'} h={'90vh'}>
|
||||
<ModalContent maxW={'min(1000px, 90vw)'} m={0} position={'relative'} h={'90vh'}>
|
||||
<ModalHeader>文件导入</ModalHeader>
|
||||
<ModalCloseButton />
|
||||
|
||||
<ModalBody
|
||||
display={'flex'}
|
||||
flexDirection={'column'}
|
||||
p={4}
|
||||
p={0}
|
||||
h={'100%'}
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
fontSize={'sm'}
|
||||
>
|
||||
<Button isLoading={selecting} onClick={onOpen}>
|
||||
选择文件
|
||||
</Button>
|
||||
<Box mt={2} maxW={['100%', '70%']}>
|
||||
<Box mt={2} px={4} maxW={['100%']} textAlign={'justify'} color={'blackAlpha.600'}>
|
||||
支持 {fileExtension} 文件。模型会自动对文本进行 QA 拆分,需要较长训练时间,拆分需要消耗
|
||||
tokens,账号余额不足时,未拆分的数据会被删除。
|
||||
tokens,账号余额不足时,未拆分的数据会被删除。当前一共 {encode(fileText).length}{' '}
|
||||
个tokens,大约 {formatPrice(encode(fileText).length * modeMap[mode].price)}元
|
||||
</Box>
|
||||
<Box mt={2}>
|
||||
一共 {encode(fileText).length} 个tokens,大约 {formatPrice(encode(fileText).length * 3)}
|
||||
元
|
||||
</Box>
|
||||
<Flex w={'100%'} alignItems={'center'} my={4}>
|
||||
<Box flex={'0 0 auto'} mr={2}>
|
||||
下面是
|
||||
</Box>
|
||||
<Input
|
||||
placeholder="提示词,例如: Laf的介绍/关于gpt4的论文/一段长文本"
|
||||
value={prompt}
|
||||
onChange={(e) => setPrompt(e.target.value)}
|
||||
size={'sm'}
|
||||
{/* 拆分模式 */}
|
||||
<Flex w={'100%'} px={5} alignItems={'center'} mt={4}>
|
||||
<Box flex={'0 0 70px'}>分段模式:</Box>
|
||||
<Radio
|
||||
ml={3}
|
||||
list={[
|
||||
{ label: 'QA拆分', value: 'qa' },
|
||||
{ label: '直接分段', value: 'subsection' }
|
||||
]}
|
||||
value={mode}
|
||||
onChange={(e) => setMode(e as 'subsection' | 'qa')}
|
||||
/>
|
||||
</Flex>
|
||||
<Textarea
|
||||
flex={'1 0 0'}
|
||||
h={0}
|
||||
w={'100%'}
|
||||
placeholder="文件内容"
|
||||
maxLength={-1}
|
||||
resize={'none'}
|
||||
fontSize={'xs'}
|
||||
whiteSpace={'pre-wrap'}
|
||||
value={fileText}
|
||||
onChange={(e) => setFileText(e.target.value)}
|
||||
/>
|
||||
{/* 内容介绍 */}
|
||||
{modeMap[mode].isPrompt && (
|
||||
<Flex w={'100%'} px={5} alignItems={'center'} mt={4}>
|
||||
<Box flex={'0 0 70px'} mr={2}>
|
||||
下面是
|
||||
</Box>
|
||||
<Input
|
||||
placeholder="提示词,例如: Laf的介绍/关于gpt4的论文/一段长文本"
|
||||
value={prompt}
|
||||
onChange={(e) => setPrompt(e.target.value)}
|
||||
size={'sm'}
|
||||
/>
|
||||
</Flex>
|
||||
)}
|
||||
{/* 文本内容 */}
|
||||
<Box flex={'1 0 0'} px={5} h={0} w={'100%'} overflowY={'auto'} mt={4}>
|
||||
{fileTextArr.map((item, i) => (
|
||||
<Box key={i} mb={5}>
|
||||
<Box mb={1}>文本{i + 1}</Box>
|
||||
<Textarea
|
||||
placeholder="文件内容"
|
||||
maxLength={-1}
|
||||
rows={10}
|
||||
fontSize={'xs'}
|
||||
whiteSpace={'pre-wrap'}
|
||||
value={item}
|
||||
onChange={(e) => {
|
||||
setFileTextArr([
|
||||
...fileTextArr.slice(0, i),
|
||||
e.target.value,
|
||||
...fileTextArr.slice(i + 1)
|
||||
]);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
))}
|
||||
</Box>
|
||||
</ModalBody>
|
||||
|
||||
<Flex px={6} pt={2} pb={4}>
|
||||
<Button isLoading={selecting} onClick={onOpen}>
|
||||
选择文件
|
||||
</Button>
|
||||
<Box flex={1}></Box>
|
||||
<Button variant={'outline'} mr={3} onClick={onClose}>
|
||||
<Button variant={'outline'} colorScheme={'gray'} mr={3} onClick={onClose}>
|
||||
取消
|
||||
</Button>
|
||||
<Button isLoading={isLoading} isDisabled={fileText === ''} onClick={openConfirm(mutate)}>
|
||||
|
||||
@@ -44,8 +44,9 @@ const SelectUrlModal = ({
|
||||
if (!webText) return;
|
||||
await postModelDataSplitData({
|
||||
modelId,
|
||||
text: webText,
|
||||
prompt: `下面是"${prompt || '一段长文本'}"`
|
||||
chunks: [],
|
||||
prompt: `下面是"${prompt || '一段长文本'}"`,
|
||||
mode: 'qa'
|
||||
});
|
||||
toast({
|
||||
title: '导入数据成功,需要一段拆解和训练',
|
||||
@@ -89,7 +90,7 @@ const SelectUrlModal = ({
|
||||
<Modal isOpen={true} onClose={onClose} isCentered>
|
||||
<ModalOverlay />
|
||||
<ModalContent maxW={'min(900px, 90vw)'} m={0} position={'relative'} h={'90vh'}>
|
||||
<ModalHeader>网站地址导入</ModalHeader>
|
||||
<ModalHeader>静态网站内容导入</ModalHeader>
|
||||
<ModalCloseButton />
|
||||
|
||||
<ModalBody
|
||||
@@ -102,7 +103,7 @@ const SelectUrlModal = ({
|
||||
fontSize={'sm'}
|
||||
>
|
||||
<Box mt={2} maxW={['100%', '70%']}>
|
||||
根据网站地址,获取网站文本内容(请注意获取后的内容,不是每个网站内容都能获取到的)。模型会对文本进行
|
||||
根据网站地址,获取网站文本内容(请注意仅能获取静态网站文本,注意看下获取后的内容是否正确)。模型会对文本进行
|
||||
QA 拆分,需要较长训练时间,拆分需要消耗 tokens,账号余额不足时,未拆分的数据会被删除。
|
||||
</Box>
|
||||
<Box mt={2}>
|
||||
|
||||
Reference in New Issue
Block a user