feat: self vector search

This commit is contained in:
archer
2023-08-26 18:24:16 +08:00
parent 13439c5183
commit be33794a5f
22 changed files with 151 additions and 71 deletions

View File

@@ -19,7 +19,6 @@ import { postKbDataFromList } from '@/api/plugins/kb';
import { splitText2Chunks } from '@/utils/file';
import { getErrText } from '@/utils/tools';
import { formatPrice } from '@/utils/user';
import { vectorModelList } from '@/store/static';
import MyIcon from '@/components/Icon';
import CloseIcon from '@/components/Icon/close';
import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete';
@@ -27,17 +26,20 @@ import MyTooltip from '@/components/MyTooltip';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
import { TrainingModeEnum } from '@/constants/plugin';
import FileSelect, { type FileItemType } from './FileSelect';
import { useUserStore } from '@/store/user';
const fileExtension = '.txt, .doc, .docx, .pdf, .md';
const ChunkImport = ({ kbId }: { kbId: string }) => {
const model = vectorModelList[0]?.model || 'text-embedding-ada-002';
const unitPrice = vectorModelList[0]?.price || 0.2;
const { kbDetail } = useUserStore();
const vectorModel = kbDetail.vectorModel;
const unitPrice = vectorModel?.price || 0.2;
const theme = useTheme();
const router = useRouter();
const { toast } = useToast();
const [chunkLen, setChunkLen] = useState(500);
const [chunkLen, setChunkLen] = useState(vectorModel?.defaultToken || 300);
const [showRePreview, setShowRePreview] = useState(false);
const [files, setFiles] = useState<FileItemType[]>([]);
const [previewFile, setPreviewFile] = useState<FileItemType>();
@@ -205,24 +207,34 @@ const ChunkImport = ({ kbId }: { kbId: string }) => {
<QuestionOutlineIcon ml={1} />
</MyTooltip>
</Box>
<NumberInput
ml={4}
<Box
flex={1}
defaultValue={chunkLen}
min={300}
max={2000}
step={10}
onChange={(e) => {
setChunkLen(+e);
setShowRePreview(true);
css={{
'& > span': {
display: 'block'
}
}}
>
<NumberInputField />
<NumberInputStepper>
<NumberIncrementStepper />
<NumberDecrementStepper />
</NumberInputStepper>
</NumberInput>
<MyTooltip label={`范围: 100~${kbDetail.vectorModel.maxToken}`}>
<NumberInput
ml={4}
defaultValue={chunkLen}
min={100}
max={kbDetail.vectorModel.maxToken}
step={10}
onChange={(e) => {
setChunkLen(+e);
setShowRePreview(true);
}}
>
<NumberInputField />
<NumberInputStepper>
<NumberIncrementStepper />
<NumberDecrementStepper />
</NumberInputStepper>
</NumberInput>
</MyTooltip>
</Box>
</Flex>
{/* price */}
<Flex py={5} alignItems={'center'}>

View File

@@ -11,11 +11,13 @@ import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete';
import { TrainingModeEnum } from '@/constants/plugin';
import FileSelect, { type FileItemType } from './FileSelect';
import { useRouter } from 'next/router';
import { useUserStore } from '@/store/user';
const fileExtension = '.csv';
const CsvImport = ({ kbId }: { kbId: string }) => {
const model = vectorModelList[0]?.model;
const { kbDetail } = useUserStore();
const theme = useTheme();
const router = useRouter();
const { toast } = useToast();
@@ -37,13 +39,22 @@ const CsvImport = ({ kbId }: { kbId: string }) => {
mutationFn: async () => {
const chunks = files.map((file) => file.chunks).flat();
const filterChunks = chunks.filter((item) => item.q.length < kbDetail.vectorModel.maxToken);
if (filterChunks.length !== chunks.length) {
toast({
title: `${chunks.length - filterChunks.length}条数据超出长度,已被过滤`,
status: 'info'
});
}
// subsection import
let success = 0;
const step = 500;
for (let i = 0; i < chunks.length; i += step) {
for (let i = 0; i < filterChunks.length; i += step) {
const { insertLen } = await postKbDataFromList({
kbId,
data: chunks.slice(i, i + step),
data: filterChunks.slice(i, i + step),
mode: TrainingModeEnum.index
});

View File

@@ -1,4 +1,4 @@
import React from 'react';
import React, { useState } from 'react';
import { Box, Textarea, Button } from '@chakra-ui/react';
import { useForm } from 'react-hook-form';
import { useToast } from '@/hooks/useToast';
@@ -6,14 +6,18 @@ import { useRequest } from '@/hooks/useRequest';
import { getErrText } from '@/utils/tools';
import { postKbDataFromList } from '@/api/plugins/kb';
import { TrainingModeEnum } from '@/constants/plugin';
import { useUserStore } from '@/store/user';
type ManualFormType = { q: string; a: string };
const ManualImport = ({ kbId }: { kbId: string }) => {
const { kbDetail } = useUserStore();
const { register, handleSubmit, reset } = useForm({
defaultValues: { q: '', a: '' }
});
const { toast } = useToast();
const [qLen, setQLen] = useState(0);
const { mutate: onImportData, isLoading } = useRequest({
mutationFn: async (e: ManualFormType) => {
@@ -64,16 +68,22 @@ const ManualImport = ({ kbId }: { kbId: string }) => {
return (
<Box p={[4, 8]} h={'100%'} overflow={'overlay'}>
<Box display={'flex'} flexDirection={['column', 'row']}>
<Box flex={1} mr={[0, 4]} mb={[4, 0]} h={['50%', '100%']}>
<Box flex={1} mr={[0, 4]} mb={[4, 0]} h={['50%', '100%']} position={'relative'}>
<Box h={'30px'}>{'匹配的知识点'}</Box>
<Textarea
placeholder={'匹配的知识点。这部分内容会被搜索,请把控内容的质量。总和最多 3000 字。'}
maxLength={3000}
placeholder={`匹配的知识点。这部分内容会被搜索,请把控内容的质量。最多 ${kbDetail.vectorModel.maxToken} 字。`}
maxLength={kbDetail.vectorModel.maxToken}
h={['250px', '500px']}
{...register(`q`, {
required: true
required: true,
onChange(e) {
setQLen(e.target.value.length);
}
})}
/>
<Box position={'absolute'} color={'myGray.500'} right={5} bottom={3} zIndex={99}>
{qLen}
</Box>
</Box>
<Box flex={1} h={['50%', '100%']}>
<Box h={'30px'}></Box>

View File

@@ -154,7 +154,7 @@ const Info = (
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
</Box>
<Box flex={[1, '0 0 300px']}>{getValues('vectorModelName')}</Box>
<Box flex={[1, '0 0 300px']}>{getValues('vectorModel').name}</Box>
</Flex>
<Flex mt={5} w={'100%'} alignItems={'center'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>

View File

@@ -10,13 +10,15 @@ import InputDataModal, { type FormData } from './InputDataModal';
import { useGlobalStore } from '@/store/global';
import { getErrText } from '@/utils/tools';
import { useToast } from '@/hooks/useToast';
import { vectorModelList } from '@/store/static';
import { customAlphabet } from 'nanoid';
import MyTooltip from '@/components/MyTooltip';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
import { useUserStore } from '@/store/user';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
const Test = ({ kbId }: { kbId: string }) => {
const { kbDetail } = useUserStore();
const theme = useTheme();
const { toast } = useToast();
const { setLoading } = useGlobalStore();
@@ -31,7 +33,7 @@ const Test = ({ kbId }: { kbId: string }) => {
);
const { mutate, isLoading } = useRequest({
mutationFn: () => searchText({ model: vectorModelList[0].model, kbId, text: inputText.trim() }),
mutationFn: () => searchText({ kbId, text: inputText.trim() }),
onSuccess(res) {
const testItem = {
id: nanoid(),
@@ -75,12 +77,15 @@ const Test = ({ kbId }: { kbId: string }) => {
rows={6}
resize={'none'}
variant={'unstyled'}
maxLength={1000}
maxLength={kbDetail.vectorModel.maxToken}
placeholder="输入需要测试的文本"
value={inputText}
onChange={(e) => setInputText(e.target.value)}
/>
<Flex justifyContent={'flex-end'}>
<Flex alignItems={'center'} justifyContent={'flex-end'}>
<Box mr={3} color={'myGray.500'}>
{inputText.length}
</Box>
<Button isDisabled={inputText === ''} isLoading={isLoading} onClick={mutate}>
</Button>
@@ -177,6 +182,7 @@ const Test = ({ kbId }: { kbId: string }) => {
'repeat(1,1fr)',
'repeat(1,1fr)',
'repeat(1,1fr)',
'repeat(1,1fr)',
'repeat(2,1fr)'
]}
gridGap={4}

View File

@@ -165,12 +165,14 @@ const Detail = ({ kbId, currentTab }: { kbId: string; currentTab: `${TabEnum}` }
</Box>
)}
<Box flex={'1 0 0'} h={'100%'} pb={[4, 0]}>
{currentTab === TabEnum.data && <DataCard kbId={kbId} />}
{currentTab === TabEnum.import && <ImportData kbId={kbId} />}
{currentTab === TabEnum.test && <Test kbId={kbId} />}
{currentTab === TabEnum.info && <Info ref={InfoRef} kbId={kbId} form={form} />}
</Box>
{!!kbDetail._id && (
<Box flex={'1 0 0'} h={'100%'} pb={[4, 0]}>
{currentTab === TabEnum.data && <DataCard kbId={kbId} />}
{currentTab === TabEnum.import && <ImportData kbId={kbId} />}
{currentTab === TabEnum.test && <Test kbId={kbId} />}
{currentTab === TabEnum.info && <Info ref={InfoRef} kbId={kbId} form={form} />}
</Box>
)}
</Box>
</PageContainer>
);

View File

@@ -141,7 +141,7 @@ const Kb = () => {
</Box>
<Flex justifyContent={'flex-end'} alignItems={'center'} fontSize={'sm'}>
<MyIcon mr={1} name="kbTest" w={'12px'} />
<Box color={'myGray.500'}>{kb.vectorModelName}</Box>
<Box color={'myGray.500'}>{kb.vectorModel.name}</Box>
</Flex>
</Card>
))}