perf: token split text
This commit is contained in:
@@ -118,11 +118,11 @@ const InputDataModal = ({
|
||||
px={6}
|
||||
pb={2}
|
||||
>
|
||||
<Box flex={2} mr={[0, 4]} mb={[4, 0]} h={['230px', '100%']}>
|
||||
<Box flex={1} mr={[0, 4]} mb={[4, 0]} h={['230px', '100%']}>
|
||||
<Box h={'30px'}>{'匹配的知识点'}</Box>
|
||||
<Textarea
|
||||
placeholder={'匹配的知识点。这部分内容会被搜索,请把控内容的质量。最多 1000 字。'}
|
||||
maxLength={2000}
|
||||
placeholder={'匹配的知识点。这部分内容会被搜索,请把控内容的质量。最多 1500 字。'}
|
||||
maxLength={1500}
|
||||
resize={'none'}
|
||||
h={'calc(100% - 30px)'}
|
||||
{...register(`q`, {
|
||||
@@ -130,13 +130,13 @@ const InputDataModal = ({
|
||||
})}
|
||||
/>
|
||||
</Box>
|
||||
<Box flex={3} h={['330px', '100%']}>
|
||||
<Box flex={1} h={['330px', '100%']}>
|
||||
<Box h={'30px'}>补充知识</Box>
|
||||
<Textarea
|
||||
placeholder={
|
||||
'补充知识。这部分内容不会被搜索,但会作为"匹配的知识点"的内容补充,你可以讲一些细节的内容填写在这里。最多 2000 字。'
|
||||
'补充知识。这部分内容不会被搜索,但会作为"匹配的知识点"的内容补充,你可以讲一些细节的内容填写在这里。最多 1500 字。'
|
||||
}
|
||||
maxLength={2000}
|
||||
maxLength={1500}
|
||||
resize={'none'}
|
||||
h={'calc(100% - 30px)'}
|
||||
{...register('a')}
|
||||
|
||||
@@ -16,8 +16,10 @@ import {
|
||||
MenuButton,
|
||||
MenuList,
|
||||
MenuItem,
|
||||
Input
|
||||
Input,
|
||||
Tooltip
|
||||
} from '@chakra-ui/react';
|
||||
import { QuestionOutlineIcon } from '@chakra-ui/icons';
|
||||
import type { BoxProps } from '@chakra-ui/react';
|
||||
import type { ModelDataItemType } from '@/types/model';
|
||||
import { ModelDataStatusMap } from '@/constants/model';
|
||||
@@ -208,7 +210,16 @@ const ModelDataCard = ({ modelId, isOwner }: { modelId: string; isOwner: boolean
|
||||
<Table variant={'simple'} w={'100%'}>
|
||||
<Thead>
|
||||
<Tr>
|
||||
<Th>{'匹配的知识点'}</Th>
|
||||
<Th>
|
||||
匹配的知识点
|
||||
<Tooltip
|
||||
label={
|
||||
'对话时,会将用户的问题和知识库的 "匹配知识点" 进行比较,找到最相似的前 n 条记录,将这些记录的 "匹配知识点"+"补充知识点" 作为 chatgpt 的系统提示词。'
|
||||
}
|
||||
>
|
||||
<QuestionOutlineIcon ml={1} />
|
||||
</Tooltip>
|
||||
</Th>
|
||||
<Th>补充知识</Th>
|
||||
<Th>状态</Th>
|
||||
{isOwner && <Th>操作</Th>}
|
||||
|
||||
@@ -20,8 +20,7 @@ import { useMutation } from '@tanstack/react-query';
|
||||
import { postModelDataSplitData } from '@/api/model';
|
||||
import { formatPrice } from '@/utils/user';
|
||||
import Radio from '@/components/Radio';
|
||||
import { splitText } from '@/utils/file';
|
||||
import { countChatTokens } from '@/utils/tools';
|
||||
import { splitText_token } from '@/utils/file';
|
||||
|
||||
const fileExtension = '.txt,.doc,.docx,.pdf,.md';
|
||||
|
||||
@@ -49,7 +48,7 @@ const SelectFileModal = ({
|
||||
onSuccess: () => void;
|
||||
modelId: string;
|
||||
}) => {
|
||||
const [selecting, setSelecting] = useState(false);
|
||||
const [btnLoading, setBtnLoading] = useState(false);
|
||||
const { toast } = useToast();
|
||||
const [prompt, setPrompt] = useState('');
|
||||
const { File, onOpen } = useSelectFile({ fileType: fileExtension, multiple: true });
|
||||
@@ -62,17 +61,21 @@ const SelectFileModal = ({
|
||||
const { openConfirm, ConfirmChild } = useConfirm({
|
||||
content: `确认导入该文件,需要一定时间进行拆解,该任务无法终止!如果余额不足,未完成的任务会被直接清除。一共 ${
|
||||
splitRes.chunks.length
|
||||
} 组,大约 ${splitRes.tokens || '数量太多,未计算'} 个tokens, 约 ${formatPrice(
|
||||
splitRes.tokens * modeMap[mode].price
|
||||
)} 元`
|
||||
} 组。${
|
||||
splitRes.tokens
|
||||
? `大约 ${splitRes.tokens} 个tokens, 约 ${formatPrice(
|
||||
splitRes.tokens * modeMap[mode].price
|
||||
)} 元`
|
||||
: ''
|
||||
}`
|
||||
});
|
||||
|
||||
const onSelectFile = useCallback(
|
||||
async (e: File[]) => {
|
||||
setSelecting(true);
|
||||
async (files: File[]) => {
|
||||
setBtnLoading(true);
|
||||
try {
|
||||
let promise = Promise.resolve();
|
||||
e.map((file) => {
|
||||
files.forEach((file) => {
|
||||
promise = promise.then(async () => {
|
||||
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
|
||||
let text = '';
|
||||
@@ -101,7 +104,7 @@ const SelectFileModal = ({
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
setSelecting(false);
|
||||
setBtnLoading(false);
|
||||
},
|
||||
[toast]
|
||||
);
|
||||
@@ -131,31 +134,27 @@ const SelectFileModal = ({
|
||||
}
|
||||
});
|
||||
|
||||
const onclickImport = useCallback(() => {
|
||||
const chunks = fileTextArr
|
||||
const onclickImport = useCallback(async () => {
|
||||
setBtnLoading(true);
|
||||
let promise = Promise.resolve();
|
||||
|
||||
const splitRes = fileTextArr
|
||||
.filter((item) => item)
|
||||
.map((item) =>
|
||||
splitText({
|
||||
splitText_token({
|
||||
text: item,
|
||||
...modeMap[mode]
|
||||
})
|
||||
)
|
||||
.flat();
|
||||
|
||||
let tokens: number[] = [];
|
||||
|
||||
// just count 100 sets of tokens
|
||||
if (chunks.length < 100) {
|
||||
tokens = chunks.map((item) =>
|
||||
countChatTokens({ messages: [{ role: 'system', content: item }] })
|
||||
);
|
||||
}
|
||||
|
||||
setSplitRes({
|
||||
tokens: tokens.reduce((sum, item) => sum + item, 0),
|
||||
chunks
|
||||
tokens: splitRes.reduce((sum, item) => sum + item.tokens, 0),
|
||||
chunks: splitRes.map((item) => item.chunks).flat()
|
||||
});
|
||||
|
||||
setBtnLoading(false);
|
||||
|
||||
await promise;
|
||||
openConfirm(mutate)();
|
||||
}, [fileTextArr, mode, mutate, openConfirm]);
|
||||
|
||||
@@ -239,7 +238,7 @@ const SelectFileModal = ({
|
||||
</ModalBody>
|
||||
|
||||
<Flex px={6} pt={2} pb={4}>
|
||||
<Button isLoading={selecting} onClick={onOpen}>
|
||||
<Button isLoading={btnLoading} onClick={onOpen}>
|
||||
选择文件
|
||||
</Button>
|
||||
<Box flex={1}></Box>
|
||||
@@ -247,8 +246,8 @@ const SelectFileModal = ({
|
||||
取消
|
||||
</Button>
|
||||
<Button
|
||||
isLoading={isLoading}
|
||||
isDisabled={selecting || fileTextArr[0] === ''}
|
||||
isLoading={isLoading || btnLoading}
|
||||
isDisabled={isLoading || btnLoading || fileTextArr[0] === ''}
|
||||
onClick={onclickImport}
|
||||
>
|
||||
确认导入
|
||||
|
||||
Reference in New Issue
Block a user