perf: csv文件选择
This commit is contained in:
@@ -28,8 +28,8 @@ import {
|
||||
getExportDataList
|
||||
} from '@/api/model';
|
||||
import { DeleteIcon, RepeatIcon, EditIcon } from '@chakra-ui/icons';
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
import { useLoading } from '@/hooks/useLoading';
|
||||
import { fileDownload } from '@/utils/file';
|
||||
import dynamic from 'next/dynamic';
|
||||
import { useMutation, useQuery } from '@tanstack/react-query';
|
||||
import type { FormData as InputDataType } from './InputDataModal';
|
||||
@@ -37,10 +37,10 @@ import type { FormData as InputDataType } from './InputDataModal';
|
||||
const InputModel = dynamic(() => import('./InputDataModal'));
|
||||
const SelectFileModel = dynamic(() => import('./SelectFileModal'));
|
||||
const SelectUrlModel = dynamic(() => import('./SelectUrlModal'));
|
||||
const SelectJsonModel = dynamic(() => import('./SelectJsonModal'));
|
||||
const SelectCsvModal = dynamic(() => import('./SelectCsvModal'));
|
||||
|
||||
const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
const { Loading } = useLoading();
|
||||
const { Loading, setIsLoading } = useLoading();
|
||||
|
||||
const {
|
||||
data: modelDataList,
|
||||
@@ -70,9 +70,9 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
onClose: onCloseSelectUrlModal
|
||||
} = useDisclosure();
|
||||
const {
|
||||
isOpen: isOpenSelectJsonModal,
|
||||
onOpen: onOpenSelectJsonModal,
|
||||
onClose: onCloseSelectJsonModal
|
||||
isOpen: isOpenSelectCsvModal,
|
||||
onOpen: onOpenSelectCsvModal,
|
||||
onClose: onCloseSelectCsvModal
|
||||
} = useDisclosure();
|
||||
|
||||
const { data: splitDataLen, refetch } = useQuery(['getModelSplitDataList'], () =>
|
||||
@@ -91,18 +91,18 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
const { mutate: onclickExport, isLoading: isLoadingExport } = useMutation({
|
||||
mutationFn: () => getExportDataList(model._id),
|
||||
onSuccess(res) {
|
||||
// 导出为文件
|
||||
const blob = new Blob([res], { type: 'application/json;charset=utf-8' });
|
||||
|
||||
// 创建下载链接
|
||||
const downloadLink = document.createElement('a');
|
||||
downloadLink.href = window.URL.createObjectURL(blob);
|
||||
downloadLink.download = `data.json`;
|
||||
|
||||
// 添加链接到页面并触发下载
|
||||
document.body.appendChild(downloadLink);
|
||||
downloadLink.click();
|
||||
document.body.removeChild(downloadLink);
|
||||
try {
|
||||
console.log(res);
|
||||
setIsLoading(true);
|
||||
fileDownload({
|
||||
text: res,
|
||||
type: 'text/csv',
|
||||
filename: 'data.csv'
|
||||
});
|
||||
} catch (error) {
|
||||
error;
|
||||
}
|
||||
setIsLoading(false);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -110,7 +110,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
<>
|
||||
<Flex>
|
||||
<Box fontWeight={'bold'} fontSize={'lg'} flex={1} mr={2}>
|
||||
模型数据: {total}组{' '}
|
||||
模型数据: {total}组
|
||||
<Box as={'span'} fontSize={'sm'}>
|
||||
(测试版本)
|
||||
</Box>
|
||||
@@ -128,7 +128,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
mr={2}
|
||||
size={'sm'}
|
||||
isLoading={isLoadingExport}
|
||||
title={'v2.3之前版本的数据无法导出'}
|
||||
title={'换行数据导出时,会进行格式转换'}
|
||||
onClick={() => onclickExport()}
|
||||
>
|
||||
导出
|
||||
@@ -148,9 +148,9 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
>
|
||||
手动输入
|
||||
</MenuItem>
|
||||
<MenuItem onClick={onOpenSelectFileModal}>文件QA拆分</MenuItem>
|
||||
<MenuItem onClick={onOpenSelectUrlModal}>网站内容QA拆分</MenuItem>
|
||||
<MenuItem onClick={onOpenSelectJsonModal}>JSON导入</MenuItem>
|
||||
<MenuItem onClick={onOpenSelectFileModal}>文本内容 QA 拆分</MenuItem>
|
||||
<MenuItem onClick={onOpenSelectUrlModal}>网站内容 QA 拆分</MenuItem>
|
||||
<MenuItem onClick={onOpenSelectCsvModal}>csv 问答对导入</MenuItem>
|
||||
</MenuList>
|
||||
</Menu>
|
||||
</Flex>
|
||||
@@ -248,10 +248,10 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
onSuccess={refetchData}
|
||||
/>
|
||||
)}
|
||||
{isOpenSelectJsonModal && (
|
||||
<SelectJsonModel
|
||||
{isOpenSelectCsvModal && (
|
||||
<SelectCsvModal
|
||||
modelId={model._id}
|
||||
onClose={onCloseSelectJsonModal}
|
||||
onClose={onCloseSelectCsvModal}
|
||||
onSuccess={refetchData}
|
||||
/>
|
||||
)}
|
||||
|
||||
@@ -13,10 +13,14 @@ import {
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
import { useSelectFile } from '@/hooks/useSelectFile';
|
||||
import { useConfirm } from '@/hooks/useConfirm';
|
||||
import { readTxtContent } from '@/utils/tools';
|
||||
import { readCsvContent } from '@/utils/file';
|
||||
import { useMutation } from '@tanstack/react-query';
|
||||
import { postModelDataJsonData } from '@/api/model';
|
||||
import Markdown from '@/components/Markdown';
|
||||
import { useMarkdown } from '@/hooks/useMarkdown';
|
||||
import { fileDownload } from '@/utils/file';
|
||||
|
||||
const csvTemplate = `question,answer\n"什么是 laf","laf 是一个云函数开发平台……"\n"什么是 sealos","Sealos 是以 kubernetes 为内核的云操作系统发行版,可以……"`;
|
||||
|
||||
const SelectJsonModal = ({
|
||||
onClose,
|
||||
@@ -29,7 +33,7 @@ const SelectJsonModal = ({
|
||||
}) => {
|
||||
const [selecting, setSelecting] = useState(false);
|
||||
const { toast } = useToast();
|
||||
const { File, onOpen } = useSelectFile({ fileType: '.json', multiple: true });
|
||||
const { File, onOpen } = useSelectFile({ fileType: '.csv', multiple: true });
|
||||
const [fileData, setFileData] = useState<
|
||||
{ prompt: string; completion: string; vector?: number[] }[]
|
||||
>([]);
|
||||
@@ -41,21 +45,12 @@ const SelectJsonModal = ({
|
||||
async (e: File[]) => {
|
||||
setSelecting(true);
|
||||
try {
|
||||
const jsonData = (
|
||||
await Promise.all(e.map((item) => readTxtContent(item).then((text) => JSON.parse(text))))
|
||||
).flat();
|
||||
// check 文件类型
|
||||
for (let i = 0; i < jsonData.length; i++) {
|
||||
if (!jsonData[i]?.prompt || !jsonData[i]?.completion) {
|
||||
throw new Error('缺少 prompt 或 completion');
|
||||
}
|
||||
}
|
||||
|
||||
setFileData(jsonData);
|
||||
const data = await Promise.all(e.map((item) => readCsvContent(item)));
|
||||
console.log(data);
|
||||
} catch (error: any) {
|
||||
console.log(error);
|
||||
toast({
|
||||
title: error?.message || 'JSON文件格式有误',
|
||||
title: error?.message || 'csv 文件格式有误',
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
@@ -84,34 +79,36 @@ const SelectJsonModal = ({
|
||||
}
|
||||
});
|
||||
|
||||
const { data: intro } = useMarkdown({ url: '/csvSelect.md' });
|
||||
|
||||
return (
|
||||
<Modal isOpen={true} onClose={onClose} isCentered>
|
||||
<ModalOverlay />
|
||||
<ModalContent maxW={'90vw'} position={'relative'} m={0} h={'90vh'}>
|
||||
<ModalHeader>JSON数据集</ModalHeader>
|
||||
<ModalHeader>csv 问答对导入</ModalHeader>
|
||||
<ModalCloseButton />
|
||||
|
||||
<ModalBody h={'100%'} display={['block', 'flex']} fontSize={'sm'} overflowY={'auto'}>
|
||||
<Box flex={'2 0 0'} w={['100%', 0]} mr={[0, 4]} mb={[4, 0]}>
|
||||
<Markdown
|
||||
source={`接受一个对象数组,每个对象必须包含 prompt 和 completion 格式,可以包含vector。prompt 代表问题,completion 代表回答的内容,可以多个问题对应一个回答,vector 为 prompt 的向量,如果没有讲有系统生成。例如:
|
||||
~~~json
|
||||
[
|
||||
{
|
||||
"prompt":"sealos是什么?\\n介绍下sealos\\nsealos有什么用",
|
||||
"completion":"sealos是xxxxxx"
|
||||
},
|
||||
{
|
||||
"prompt":"laf是什么?",
|
||||
"completion":"laf是xxxxxx",
|
||||
"vector":[-0.42,-0.4314314,0.43143]
|
||||
}
|
||||
]
|
||||
~~~`}
|
||||
/>
|
||||
<Box flex={'1 0 0'} w={['100%', 0]} mr={[0, 4]} mb={[4, 0]}>
|
||||
<Markdown source={intro} />
|
||||
<Box
|
||||
my={3}
|
||||
cursor={'pointer'}
|
||||
textDecoration={'underline'}
|
||||
color={'blue.600'}
|
||||
onClick={() =>
|
||||
fileDownload({
|
||||
text: csvTemplate,
|
||||
type: 'text/csv',
|
||||
filename: 'template.csv'
|
||||
})
|
||||
}
|
||||
>
|
||||
点击下载csv模板
|
||||
</Box>
|
||||
<Flex alignItems={'center'}>
|
||||
<Button isLoading={selecting} onClick={onOpen}>
|
||||
选择 JSON 数据集
|
||||
选择 csv 问答对
|
||||
</Button>
|
||||
|
||||
<Box ml={4}>一共 {fileData.length} 组数据</Box>
|
||||
@@ -16,7 +16,7 @@ import { useToast } from '@/hooks/useToast';
|
||||
import { useSelectFile } from '@/hooks/useSelectFile';
|
||||
import { encode } from 'gpt-token-utils';
|
||||
import { useConfirm } from '@/hooks/useConfirm';
|
||||
import { readTxtContent, readPdfContent, readDocContent } from '@/utils/tools';
|
||||
import { readTxtContent, readPdfContent, readDocContent } from '@/utils/file';
|
||||
import { useMutation } from '@tanstack/react-query';
|
||||
import { postModelDataSplitData } from '@/api/model';
|
||||
import { formatPrice } from '@/utils/user';
|
||||
|
||||
Reference in New Issue
Block a user