feat: 根据url获取网站文本

This commit is contained in:
archer
2023-04-05 16:10:47 +08:00
parent 5feb2e19bf
commit dc329041f3
8 changed files with 278 additions and 25 deletions

View File

@@ -0,0 +1,168 @@
import React, { useState } from 'react';
import {
Box,
Flex,
Button,
Modal,
ModalOverlay,
ModalContent,
ModalHeader,
ModalCloseButton,
ModalBody,
Input,
Textarea
} from '@chakra-ui/react';
import { useToast } from '@/hooks/useToast';
import { customAlphabet } from 'nanoid';
import { encode } from 'gpt-token-utils';
import { useConfirm } from '@/hooks/useConfirm';
import { useMutation } from '@tanstack/react-query';
import { postModelDataSplitData, getWebContent } from '@/api/model';
import { formatPrice } from '@/utils/user';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
const SelectUrlModal = ({
onClose,
onSuccess,
modelId
}: {
onClose: () => void;
onSuccess: () => void;
modelId: string;
}) => {
const { toast } = useToast();
const [webUrl, setWebUrl] = useState('');
const [webText, setWebText] = useState('');
const [prompt, setPrompt] = useState(''); // 提示词
const { openConfirm, ConfirmChild } = useConfirm({
content: '确认导入该文件,需要一定时间进行拆解,该任务无法终止!如果余额不足,任务讲被终止。'
});
const { mutate: onclickImport, isLoading: isImporting } = useMutation({
mutationFn: async () => {
if (!webText) return;
await postModelDataSplitData({
modelId,
text: webText,
prompt: `下面是${prompt || '一段长文本'}`
});
toast({
title: '导入数据成功,需要一段拆解和训练',
status: 'success'
});
onClose();
onSuccess();
},
onError(error) {
console.log(error);
toast({
title: '导入数据失败',
status: 'error'
});
}
});
const { mutate: onclickFetchingUrl, isLoading: isFetching } = useMutation({
mutationFn: async () => {
if (!webUrl) return;
const res = await getWebContent(webUrl);
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(res, 'text/html');
const data = htmlDoc?.body?.innerText || '';
if (!data) {
throw new Error('获取不到数据');
}
setWebText(data.replace(/\s+/g, ' '));
},
onError(error) {
console.log(error);
toast({
status: 'error',
title: '获取网站内容失败'
});
}
});
return (
<Modal isOpen={true} onClose={onClose} isCentered>
<ModalOverlay />
<ModalContent maxW={'min(900px, 90vw)'} m={0} position={'relative'} h={'90vh'}>
<ModalHeader></ModalHeader>
<ModalCloseButton />
<ModalBody
display={'flex'}
flexDirection={'column'}
p={4}
h={'100%'}
alignItems={'center'}
justifyContent={'center'}
fontSize={'sm'}
>
<Box mt={2} maxW={['100%', '70%']}>
QA tokens
</Box>
<Box mt={2}>
{encode(webText).length} tokens {formatPrice(encode(webText).length * 4)}
</Box>
<Flex w={'100%'} alignItems={'center'} my={4}>
<Box flex={'0 0 70px'}></Box>
<Input
mx={2}
placeholder="需要获取内容的地址。例如https://fastgpt.ahapocket.cn"
value={webUrl}
onChange={(e) => setWebUrl(e.target.value)}
size={'sm'}
/>
<Button isLoading={isFetching} onClick={() => onclickFetchingUrl()}>
</Button>
</Flex>
<Flex w={'100%'} alignItems={'center'} my={4}>
<Box flex={'0 0 70px'} mr={2}>
</Box>
<Input
placeholder="内容提示词。例如: Laf的介绍/关于gpt4的论文/一段长文本"
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
size={'sm'}
/>
</Flex>
<Textarea
flex={'1 0 0'}
h={0}
w={'100%'}
placeholder="网站的内容"
maxLength={-1}
resize={'none'}
fontSize={'xs'}
whiteSpace={'pre-wrap'}
value={webText}
onChange={(e) => setWebText(e.target.value)}
/>
</ModalBody>
<Flex px={6} pt={2} pb={4}>
<Box flex={1}></Box>
<Button variant={'outline'} mr={3} onClick={onClose}>
</Button>
<Button
isLoading={isImporting}
isDisabled={webText === ''}
onClick={openConfirm(onclickImport)}
>
</Button>
</Flex>
</ModalContent>
<ConfirmChild />
</Modal>
);
};
export default SelectUrlModal;