feat: 根据url获取网站文本

This commit is contained in:
archer
2023-04-05 16:10:47 +08:00
parent 5feb2e19bf
commit dc329041f3
8 changed files with 278 additions and 25 deletions

View File

@@ -24,7 +24,7 @@ import { usePagination } from '@/hooks/usePagination';
import {
getModelDataList,
delOneModelData,
getModelSplitDataList,
getModelSplitDataListLen,
getExportDataList
} from '@/api/model';
import { DeleteIcon, RepeatIcon, EditIcon } from '@chakra-ui/icons';
@@ -36,6 +36,7 @@ import type { FormData as InputDataType } from './InputDataModal';
const InputModel = dynamic(() => import('./InputDataModal'));
const SelectFileModel = dynamic(() => import('./SelectFileModal'));
const SelectUrlModel = dynamic(() => import('./SelectUrlModal'));
const SelectJsonModel = dynamic(() => import('./SelectJsonModal'));
const ModelDataCard = ({ model }: { model: ModelSchema }) => {
@@ -63,14 +64,19 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
onOpen: onOpenSelectFileModal,
onClose: onCloseSelectFileModal
} = useDisclosure();
const {
isOpen: isOpenSelectUrlModal,
onOpen: onOpenSelectUrlModal,
onClose: onCloseSelectUrlModal
} = useDisclosure();
const {
isOpen: isOpenSelectJsonModal,
onOpen: onOpenSelectJsonModal,
onClose: onCloseSelectJsonModal
} = useDisclosure();
const { data: splitDataList, refetch } = useQuery(['getModelSplitDataList'], () =>
getModelSplitDataList(model._id)
const { data: splitDataLen, refetch } = useQuery(['getModelSplitDataList'], () =>
getModelSplitDataListLen(model._id)
);
const refetchData = useCallback(
@@ -143,14 +149,13 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
</MenuItem>
<MenuItem onClick={onOpenSelectFileModal}></MenuItem>
<MenuItem onClick={onOpenSelectUrlModal}></MenuItem>
<MenuItem onClick={onOpenSelectJsonModal}>JSON导入</MenuItem>
</MenuList>
</Menu>
</Flex>
{splitDataList && splitDataList.length > 0 && (
<Box fontSize={'xs'}>
{splitDataList.map((item) => item.textList).flat().length}...
</Box>
{!!(splitDataLen && splitDataLen > 0) && (
<Box fontSize={'xs'}>{splitDataLen}...</Box>
)}
<Box mt={4}>
<TableContainer minH={'500px'}>
@@ -236,6 +241,13 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
onSuccess={refetchData}
/>
)}
{isOpenSelectUrlModal && (
<SelectUrlModel
modelId={model._id}
onClose={onCloseSelectUrlModal}
onSuccess={refetchData}
/>
)}
{isOpenSelectJsonModal && (
<SelectJsonModel
modelId={model._id}

View File

@@ -19,7 +19,8 @@ import { encode } from 'gpt-token-utils';
import { useConfirm } from '@/hooks/useConfirm';
import { readTxtContent, readPdfContent, readDocContent } from '@/utils/tools';
import { useMutation } from '@tanstack/react-query';
import { postModelDataFileText } from '@/api/model';
import { postModelDataSplitData } from '@/api/model';
import { formatPrice } from '@/utils/user';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
@@ -85,7 +86,7 @@ const SelectFileModal = ({
const { mutate, isLoading } = useMutation({
mutationFn: async () => {
if (!fileText) return;
await postModelDataFileText({
await postModelDataSplitData({
modelId,
text: fileText,
prompt: `下面是${prompt || '一段长文本'}`
@@ -126,10 +127,11 @@ const SelectFileModal = ({
</Button>
<Box mt={2} maxW={['100%', '70%']}>
{fileExtension} QA
tokens0.04/1k tokens
tokens
</Box>
<Box mt={2}>
{fileText.length} {encode(fileText).length} tokens
{encode(fileText).length} tokens {formatPrice(encode(fileText).length * 4)}
</Box>
<Flex w={'100%'} alignItems={'center'} my={4}>
<Box flex={'0 0 auto'} mr={2}>

View File

@@ -0,0 +1,168 @@
import React, { useState } from 'react';
import {
Box,
Flex,
Button,
Modal,
ModalOverlay,
ModalContent,
ModalHeader,
ModalCloseButton,
ModalBody,
Input,
Textarea
} from '@chakra-ui/react';
import { useToast } from '@/hooks/useToast';
import { customAlphabet } from 'nanoid';
import { encode } from 'gpt-token-utils';
import { useConfirm } from '@/hooks/useConfirm';
import { useMutation } from '@tanstack/react-query';
import { postModelDataSplitData, getWebContent } from '@/api/model';
import { formatPrice } from '@/utils/user';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
const SelectUrlModal = ({
onClose,
onSuccess,
modelId
}: {
onClose: () => void;
onSuccess: () => void;
modelId: string;
}) => {
const { toast } = useToast();
const [webUrl, setWebUrl] = useState('');
const [webText, setWebText] = useState('');
const [prompt, setPrompt] = useState(''); // 提示词
const { openConfirm, ConfirmChild } = useConfirm({
content: '确认导入该文件,需要一定时间进行拆解,该任务无法终止!如果余额不足,任务讲被终止。'
});
const { mutate: onclickImport, isLoading: isImporting } = useMutation({
mutationFn: async () => {
if (!webText) return;
await postModelDataSplitData({
modelId,
text: webText,
prompt: `下面是${prompt || '一段长文本'}`
});
toast({
title: '导入数据成功,需要一段拆解和训练',
status: 'success'
});
onClose();
onSuccess();
},
onError(error) {
console.log(error);
toast({
title: '导入数据失败',
status: 'error'
});
}
});
const { mutate: onclickFetchingUrl, isLoading: isFetching } = useMutation({
mutationFn: async () => {
if (!webUrl) return;
const res = await getWebContent(webUrl);
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(res, 'text/html');
const data = htmlDoc?.body?.innerText || '';
if (!data) {
throw new Error('获取不到数据');
}
setWebText(data.replace(/\s+/g, ' '));
},
onError(error) {
console.log(error);
toast({
status: 'error',
title: '获取网站内容失败'
});
}
});
return (
<Modal isOpen={true} onClose={onClose} isCentered>
<ModalOverlay />
<ModalContent maxW={'min(900px, 90vw)'} m={0} position={'relative'} h={'90vh'}>
<ModalHeader></ModalHeader>
<ModalCloseButton />
<ModalBody
display={'flex'}
flexDirection={'column'}
p={4}
h={'100%'}
alignItems={'center'}
justifyContent={'center'}
fontSize={'sm'}
>
<Box mt={2} maxW={['100%', '70%']}>
QA tokens
</Box>
<Box mt={2}>
{encode(webText).length} tokens {formatPrice(encode(webText).length * 4)}
</Box>
<Flex w={'100%'} alignItems={'center'} my={4}>
<Box flex={'0 0 70px'}></Box>
<Input
mx={2}
placeholder="需要获取内容的地址。例如https://fastgpt.ahapocket.cn"
value={webUrl}
onChange={(e) => setWebUrl(e.target.value)}
size={'sm'}
/>
<Button isLoading={isFetching} onClick={() => onclickFetchingUrl()}>
</Button>
</Flex>
<Flex w={'100%'} alignItems={'center'} my={4}>
<Box flex={'0 0 70px'} mr={2}>
</Box>
<Input
placeholder="内容提示词。例如: Laf的介绍/关于gpt4的论文/一段长文本"
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
size={'sm'}
/>
</Flex>
<Textarea
flex={'1 0 0'}
h={0}
w={'100%'}
placeholder="网站的内容"
maxLength={-1}
resize={'none'}
fontSize={'xs'}
whiteSpace={'pre-wrap'}
value={webText}
onChange={(e) => setWebText(e.target.value)}
/>
</ModalBody>
<Flex px={6} pt={2} pb={4}>
<Box flex={1}></Box>
<Button variant={'outline'} mr={3} onClick={onClose}>
</Button>
<Button
isLoading={isImporting}
isDisabled={webText === ''}
onClick={openConfirm(onclickImport)}
>
</Button>
</Flex>
</ModalContent>
<ConfirmChild />
</Modal>
);
};
export default SelectUrlModal;