Feat: Images dataset collection (#4941)

* New pic (#4858)

* 更新数据集相关类型,添加图像文件ID和预览URL支持;优化数据集导入功能,新增图像数据集处理组件;修复部分国际化文本;更新文件上传逻辑以支持新功能。

* 与原先代码的差别

* 新增 V4.9.10 更新说明,支持 PG 设置`systemEnv.hnswMaxScanTuples`参数,优化 LLM stream 调用超时,修复全文检索多知识库排序问题。同时更新数据集索引,移除 datasetId 字段以简化查询。

* 更换成fileId_image逻辑,并增加训练队列匹配的逻辑

* 新增图片集合判断逻辑,优化预览URL生成流程,确保仅在数据集为图片集合时生成预览URL,并添加相关日志输出以便调试。

* Refactor Docker Compose configuration to comment out exposed ports for production environments, update image versions for pgvector, fastgpt, and mcp_server, and enhance Redis service with a health check. Additionally, standardize dataset collection labels in constants and improve internationalization strings across multiple languages.

* Enhance TrainingStates component by adding internationalization support for the imageParse training mode and update defaultCounts to include imageParse mode in trainingDetail API.

* Enhance dataset import context by adding additional steps for image dataset import process and improve internationalization strings for modal buttons in the useEditTitle hook.

* Update DatasetImportContext to conditionally render MyStep component based on data source type, improving the import process for non-image datasets.

* Refactor image dataset handling by improving internationalization strings, enhancing error messages, and streamlining the preview URL generation process.

* 图片上传到新建的 dataset_collection_images 表,逻辑跟随更改

* 修改了除了controller的其他部分问题

* 把图片数据集的逻辑整合到controller里面

* 补充i18n

* 补充i18n

* resolve评论:主要是上传逻辑的更改和组件复用

* 图片名称的图标显示

* 修改编译报错的命名问题

* 删除不需要的collectionid部分

* 多余文件的处理和改动一个删除按钮

* 除了loading和统一的imageId,其他都resolve掉的

* 处理图标报错

* 复用了MyPhotoView并采用全部替换的方式将imageFileId变成imageId

* 去除不必要文件修改

* 报错和字段修改

* 增加上传成功后删除临时文件的逻辑以及回退一些修改

* 删除path字段,将图片保存到gridfs内,并修改增删等操作的代码

* 修正编译错误

---------

Co-authored-by: archer <545436317@qq.com>

* perf: image dataset

* feat: insert image

* perf: image icon

* fix: training state

---------

Co-authored-by: Zhuangzai fa <143257420+ctrlz526@users.noreply.github.com>
This commit is contained in:
Archer
2025-06-03 16:30:59 +08:00
committed by archer
parent 9fb5d05865
commit 92c38d9d2f
104 changed files with 2341 additions and 693 deletions

View File

@@ -3,38 +3,28 @@ import { Skeleton, type ImageProps } from '@chakra-ui/react';
import CustomImage from '@fastgpt/web/components/common/Image/MyImage';
export const MyImage = (props: ImageProps) => {
const [isLoading, setIsLoading] = useState(true);
const [succeed, setSucceed] = useState(false);
return (
<Skeleton
minH="100px"
isLoaded={!isLoading}
fadeDuration={2}
display={'flex'}
justifyContent={'center'}
my={1}
>
<CustomImage
display={'inline-block'}
borderRadius={'md'}
alt={''}
fallbackSrc={'/imgs/errImg.png'}
fallbackStrategy={'onError'}
cursor={succeed ? 'pointer' : 'default'}
objectFit={'contain'}
loading={'lazy'}
onLoad={() => {
setIsLoading(false);
setSucceed(true);
}}
onError={() => setIsLoading(false)}
onClick={() => {
if (!succeed) return;
window.open(props.src, '_blank');
}}
{...props}
/>
</Skeleton>
<CustomImage
title={'Preview image'}
display={'inline-block'}
borderRadius={'md'}
alt={''}
fallbackSrc={'/imgs/errImg.png'}
fallbackStrategy={'onError'}
cursor={succeed ? 'pointer' : 'default'}
objectFit={'contain'}
loading={'lazy'}
onLoad={() => {
setSucceed(true);
}}
onClick={() => {
if (!succeed) return;
window.open(props.src, '_blank');
}}
{...props}
/>
);
};

View File

@@ -18,7 +18,7 @@ const NextHead = ({ title, icon, desc }: { title?: string; icon?: string; desc?:
name="viewport"
content="width=device-width,initial-scale=1.0,maximum-scale=1.0,minimum-scale=1.0,user-scalable=no, viewport-fit=cover"
/>
<meta httpEquiv="Content-Security-Policy" content="img-src * data:;" />
<meta httpEquiv="Content-Security-Policy" content="img-src * data: blob:;" />
{desc && <meta name="description" content={desc} />}
{icon && <link rel="icon" href={formatIcon} />}
</Head>

View File

@@ -240,7 +240,7 @@ const LexiconConfigModal = ({ appId, onClose }: { appId: string; onClose: () =>
onSuccess() {
setNewData(undefined);
},
errorToast: t('common:error.Create failed')
errorToast: t('common:create_failed')
}
);

View File

@@ -57,11 +57,12 @@ const QuoteList = React.memo(function QuoteList({
return {
...item,
q: currentFilterItem?.q || '',
a: currentFilterItem?.a || ''
a: currentFilterItem?.a || '',
imagePreivewUrl: currentFilterItem?.imagePreivewUrl
};
}
return { ...item, q: item.q || '', a: item.a || '' };
return { ...item, q: item.q || '' };
});
return processedData.sort((a, b) => {
@@ -87,6 +88,7 @@ const QuoteList = React.memo(function QuoteList({
<QuoteItem
quoteItem={item}
canViewSource={showRawSource}
canEditData={showRouteToDatasetDetail}
canEditDataset={showRouteToDatasetDetail}
{...RawSourceBoxProps}
/>

View File

@@ -81,7 +81,9 @@ const ResponseTags = ({
.map((item) => ({
sourceName: item.sourceName,
sourceId: item.sourceId,
icon: getSourceNameIcon({ sourceId: item.sourceId, sourceName: item.sourceName }),
icon: item.imageId
? 'core/dataset/imageFill'
: getSourceNameIcon({ sourceId: item.sourceId, sourceName: item.sourceName }),
collectionId: item.collectionId,
datasetId: item.datasetId
}));

View File

@@ -300,7 +300,7 @@ export const WholeResponseContent = ({
<Row label={t('chat:query_extension_result')} value={`${activeModule?.extensionResult}`} />
{activeModule.quoteList && activeModule.quoteList.length > 0 && (
<Row
label={t('common:core.chat.response.module quoteList')}
label={t('chat:search_results')}
rawDom={<QuoteList chatItemDataId={dataId} rawSearch={activeModule.quoteList} />}
/>
)}

View File

@@ -8,7 +8,11 @@ import { useTranslation } from 'next-i18next';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import dynamic from 'next/dynamic';
import MyBox from '@fastgpt/web/components/common/MyBox';
import { SearchScoreTypeEnum, SearchScoreTypeMap } from '@fastgpt/global/core/dataset/constants';
import {
DatasetCollectionTypeEnum,
SearchScoreTypeEnum,
SearchScoreTypeMap
} from '@fastgpt/global/core/dataset/constants';
import type { readCollectionSourceBody } from '@/pages/api/core/dataset/collection/read';
import Markdown from '@/components/Markdown';
@@ -88,11 +92,13 @@ export const formatScore = (score: ScoreItemType[]) => {
const QuoteItem = ({
quoteItem,
canViewSource,
canEditData,
canEditDataset,
...RawSourceBoxProps
}: {
quoteItem: SearchDataResponseItemType;
canViewSource?: boolean;
canEditData?: boolean;
canEditDataset?: boolean;
} & Omit<readCollectionSourceBody, 'collectionId'>) => {
const { t } = useTranslation();
@@ -206,7 +212,7 @@ const QuoteItem = ({
{...RawSourceBoxProps}
/>
<Box flex={1} />
{quoteItem.id && canEditDataset && (
{quoteItem.id && canEditData && (
<MyTooltip label={t('common:core.dataset.data.Edit')}>
<Box
className="hover-data"
@@ -238,12 +244,13 @@ const QuoteItem = ({
<Link
as={NextLink}
className="hover-data"
visibility={'hidden'}
display={'flex'}
alignItems={'center'}
visibility={'hidden'}
color={'primary.500'}
href={`/dataset/detail?datasetId=${quoteItem.datasetId}&currentTab=dataCard&collectionId=${quoteItem.collectionId}`}
>
{t('chat:to_dataset')}
{t('common:to_dataset')}
<MyIcon name={'common/rightArrowLight'} w={'10px'} />
</Link>
)}

View File

@@ -3,20 +3,22 @@ import { Box, type BoxProps } from '@chakra-ui/react';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import { useTranslation } from 'next-i18next';
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils';
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
import MyIcon from '@fastgpt/web/components/common/Icon';
import type { readCollectionSourceBody } from '@/pages/api/core/dataset/collection/read';
import type { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
type Props = BoxProps &
readCollectionSourceBody & {
collectionType?: DatasetCollectionTypeEnum;
sourceName?: string;
collectionId: string;
sourceId?: string;
canView?: boolean;
};
const RawSourceBox = ({
sourceId,
collectionType,
sourceName = '',
canView = true,
@@ -35,7 +37,10 @@ const RawSourceBox = ({
const canPreview = !!sourceId && canView;
const icon = useMemo(() => getSourceNameIcon({ sourceId, sourceName }), [sourceId, sourceName]);
const icon = useMemo(
() => getCollectionIcon({ type: collectionType, sourceId, name: sourceName }),
[collectionType, sourceId, sourceName]
);
const read = getCollectionSourceAndOpen({
collectionId,
appId,

View File

@@ -34,9 +34,11 @@ export type DatasetDataListItemType = {
_id: string;
datasetId: string;
collectionId: string;
q: string; // embedding content
a: string; // bonus content
q?: string;
a?: string;
imageId?: string;
imageSize?: number;
imagePreviewUrl?: string; //image preview url
chunkIndex?: number;
updated?: boolean;
// indexes: DatasetDataSchemaType['indexes'];
};

View File

@@ -50,7 +50,7 @@ const BackupImportModal = ({
maxCount={1}
fileType="csv"
selectFiles={selectFiles}
setSelectFiles={setSelectFiles}
setSelectFiles={(e) => setSelectFiles(e)}
/>
{/* File render */}
{selectFiles.length > 0 && (

View File

@@ -248,6 +248,26 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
});
}
},
...(feConfigs?.isPlus
? [
{
label: (
<Flex>
<MyIcon name={'image'} mr={2} w={'20px'} />
{t('dataset:core.dataset.Image collection')}
</Flex>
),
onClick: () =>
router.replace({
query: {
...router.query,
currentTab: TabEnum.import,
source: ImportDataSourceEnum.imageDataset
}
})
}
]
: []),
{
label: (
<Flex>
@@ -473,7 +493,10 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
name={editFolderData.name}
/>
)}
<EditCreateVirtualFileModal iconSrc={'modal/manualDataset'} closeBtnText={''} />
<EditCreateVirtualFileModal
iconSrc={'modal/manualDataset'}
closeBtnText={t('common:Cancel')}
/>
{isOpenFileSourceSelector && <FileSourceSelector onClose={onCloseFileSourceSelector} />}
{isOpenBackupImportModal && (
<BackupImportModal

View File

@@ -421,7 +421,7 @@ const AddTagToCollections = ({
() =>
collectionsList.map((item) => {
const collection = item.data;
const icon = getCollectionIcon(collection.type, collection.name);
const icon = getCollectionIcon({ type: collection.type, name: collection.name });
return {
id: collection._id,
tags: collection.tags,

View File

@@ -35,6 +35,8 @@ import { useForm } from 'react-hook-form';
import type { getTrainingDetailResponse } from '@/pages/api/core/dataset/collection/trainingDetail';
import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
import EmptyTip from '@fastgpt/web/components/common/EmptyTip';
import MyImage from '@/components/MyImage';
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
enum TrainingStatus {
NotStart = 'NotStart',
@@ -48,6 +50,8 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
const { t } = useTranslation();
const isQA = trainingDetail?.trainingType === DatasetCollectionDataProcessModeEnum.qa;
const isImageParse =
trainingDetail?.trainingType === DatasetCollectionDataProcessModeEnum.imageParse;
/*
状态计算
@@ -102,6 +106,18 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
status: TrainingStatus.Ready,
errorCount: 0
},
...(isImageParse
? [
{
errorCount: trainingDetail.errorCounts.imageParse,
label: t(TrainingProcess.parseImage.label),
statusText: getStatusText(TrainingModeEnum.imageParse),
status: getTrainingStatus({
errorCount: trainingDetail.errorCounts.imageParse
})
}
]
: []),
...(isQA
? [
{
@@ -114,7 +130,7 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
}
]
: []),
...(trainingDetail?.advancedTraining.imageIndex && !isQA
...(trainingDetail?.advancedTraining.imageIndex
? [
{
errorCount: trainingDetail.errorCounts.image,
@@ -126,7 +142,7 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
}
]
: []),
...(trainingDetail?.advancedTraining.autoIndexes && !isQA
...(trainingDetail?.advancedTraining.autoIndexes
? [
{
errorCount: trainingDetail.errorCounts.auto,
@@ -159,7 +175,17 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
];
return states;
}, [trainingDetail, t, isQA]);
}, [
trainingDetail.queuedCounts,
trainingDetail.trainingCounts,
trainingDetail.errorCounts,
trainingDetail?.advancedTraining.imageIndex,
trainingDetail?.advancedTraining.autoIndexes,
trainingDetail.trainedCount,
t,
isImageParse,
isQA
]);
return (
<Flex flexDirection={'column'} gap={6}>
@@ -254,11 +280,20 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
);
};
const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionId: string }) => {
const ErrorView = ({
datasetId,
collectionId,
refreshTrainingDetail
}: {
datasetId: string;
collectionId: string;
refreshTrainingDetail: () => void;
}) => {
const { t } = useTranslation();
const TrainingText = {
[TrainingModeEnum.chunk]: t('dataset:process.Vectorizing'),
[TrainingModeEnum.qa]: t('dataset:process.Get QA'),
[TrainingModeEnum.imageParse]: t('dataset:process.Image_Index'),
[TrainingModeEnum.image]: t('dataset:process.Image_Index'),
[TrainingModeEnum.auto]: t('dataset:process.Auto_Index')
};
@@ -308,6 +343,7 @@ const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionI
manual: true,
onSuccess: () => {
refreshList();
refreshTrainingDetail();
setEditChunk(undefined);
}
}
@@ -316,6 +352,7 @@ const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionI
if (editChunk) {
return (
<EditView
loading={updateLoading}
editChunk={editChunk}
onCancel={() => setEditChunk(undefined)}
onSave={(data) => {
@@ -401,10 +438,12 @@ const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionI
};
const EditView = ({
loading,
editChunk,
onCancel,
onSave
}: {
loading: boolean;
editChunk: getTrainingDataDetailResponse;
onCancel: () => void;
onSave: (data: { q: string; a?: string }) => void;
@@ -419,20 +458,41 @@ const EditView = ({
return (
<Flex flexDirection={'column'} gap={4}>
{editChunk?.a && <Box>q</Box>}
<MyTextarea {...register('q')} minH={editChunk?.a ? 200 : 400} />
{editChunk?.imagePreviewUrl && (
<Box>
<FormLabel>{t('file:image')}</FormLabel>
<Box w={'100%'} h={'200px'} border={'base'} borderRadius={'md'}>
<MyImage src={editChunk.imagePreviewUrl} alt="image" w={'100%'} h={'100%'} />
</Box>
</Box>
)}
<Box>
{(editChunk?.a || editChunk?.imagePreviewUrl) && (
<FormLabel>
{editChunk?.a
? t('common:dataset_data_input_chunk_content')
: t('common:dataset_data_input_q')}
</FormLabel>
)}
<MyTextarea
{...register('q', { required: true })}
minH={editChunk?.a || editChunk?.imagePreviewUrl ? 200 : 400}
/>
</Box>
{editChunk?.a && (
<>
<Box>a</Box>
<Box>
<Box>{t('common:dataset_data_input_a')}</Box>
<MyTextarea {...register('a')} minH={200} />
</>
</Box>
)}
<Flex justifyContent={'flex-end'} gap={4}>
<Button variant={'outline'} onClick={onCancel}>
{t('common:Cancel')}
</Button>
<Button variant={'primary'} onClick={handleSubmit(onSave)}>
{t('dataset:dataset.ReTrain')}
<Button isLoading={loading} variant={'primary'} onClick={handleSubmit(onSave)}>
{t('common:Confirm')}
</Button>
</Flex>
</Flex>
@@ -453,14 +513,15 @@ const TrainingStates = ({
const { t } = useTranslation();
const [tab, setTab] = useState<typeof defaultTab>(defaultTab);
const { data: trainingDetail, loading } = useRequest2(
() => getDatasetCollectionTrainingDetail(collectionId),
{
pollingInterval: 5000,
pollingWhenHidden: false,
manual: false
}
);
const {
data: trainingDetail,
loading,
runAsync: refreshTrainingDetail
} = useRequest2(() => getDatasetCollectionTrainingDetail(collectionId), {
pollingInterval: 5000,
pollingWhenHidden: false,
manual: false
});
const errorCounts = (Object.values(trainingDetail?.errorCounts || {}) as number[]).reduce(
(acc, count) => acc + count,
@@ -493,7 +554,13 @@ const TrainingStates = ({
]}
/>
{tab === 'states' && trainingDetail && <ProgressView trainingDetail={trainingDetail} />}
{tab === 'errors' && <ErrorView datasetId={datasetId} collectionId={collectionId} />}
{tab === 'errors' && (
<ErrorView
datasetId={datasetId}
collectionId={collectionId}
refreshTrainingDetail={refreshTrainingDetail}
/>
)}
</ModalBody>
</MyModal>
);

View File

@@ -75,7 +75,7 @@ const CollectionCard = () => {
const formatCollections = useMemo(
() =>
collections.map((collection) => {
const icon = getCollectionIcon(collection.type, collection.name);
const icon = getCollectionIcon({ type: collection.type, name: collection.name });
const status = (() => {
if (collection.hasError) {
return {

View File

@@ -1,5 +1,5 @@
import React, { useState, useMemo } from 'react';
import { Box, Card, IconButton, Flex, Button, useTheme } from '@chakra-ui/react';
import { Box, Card, IconButton, Flex, Button, useTheme, Image } from '@chakra-ui/react';
import {
getDatasetDataList,
delOneDatasetDataById,
@@ -24,28 +24,36 @@ import TagsPopOver from './CollectionCard/TagsPopOver';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyDivider from '@fastgpt/web/components/common/MyDivider';
import Markdown from '@/components/Markdown';
import { useMemoizedFn } from 'ahooks';
import { useBoolean, useMemoizedFn } from 'ahooks';
import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
import { TabEnum } from './NavBar';
import {
DatasetCollectionDataProcessModeEnum,
DatasetCollectionTypeEnum,
ImportDataSourceEnum
} from '@fastgpt/global/core/dataset/constants';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import TrainingStates from './CollectionCard/TrainingStates';
import { getTextValidLength } from '@fastgpt/global/common/string/utils';
import PopoverConfirm from '@fastgpt/web/components/common/MyPopover/PopoverConfirm';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import MyImage from '@fastgpt/web/components/common/Image/MyImage';
import dynamic from 'next/dynamic';
const InsertImagesModal = dynamic(() => import('./data/InsertImageModal'), {
ssr: false
});
const DataCard = () => {
const theme = useTheme();
const router = useRouter();
const { isPc } = useSystem();
const { collectionId = '', datasetId } = router.query as {
const { feConfigs } = useSystemStore();
const { collectionId = '' } = router.query as {
collectionId: string;
datasetId: string;
};
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const { feConfigs } = useSystemStore();
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
const { t } = useTranslation();
const [searchText, setSearchText] = useState('');
@@ -78,21 +86,30 @@ const DataCard = () => {
const [editDataId, setEditDataId] = useState<string>();
// get file info
const { data: collection } = useRequest2(() => getDatasetCollectionById(collectionId), {
refreshDeps: [collectionId],
manual: false,
onError: () => {
router.replace({
query: {
datasetId
}
});
// Get collection info
const { data: collection, runAsync: reloadCollection } = useRequest2(
() => getDatasetCollectionById(collectionId),
{
refreshDeps: [collectionId],
manual: false,
onError: () => {
router.replace({
query: {
datasetId
}
});
}
}
});
);
const canWrite = useMemo(() => datasetDetail.permission.hasWritePer, [datasetDetail]);
const [
isInsertImagesModalOpen,
{ setTrue: openInsertImagesModal, setFalse: closeInsertImagesModal }
] = useBoolean();
const isImageCollection = collection?.type === DatasetCollectionTypeEnum.images;
const onDeleteOneData = useMemoizedFn(async (dataId: string) => {
try {
await delOneDatasetDataById(dataId);
@@ -125,6 +142,7 @@ const DataCard = () => {
>
{collection?._id && (
<RawSourceBox
collectionType={collection.type}
collectionId={collection._id}
{...getCollectionSourceData(collection)}
fontSize={['sm', 'md']}
@@ -158,7 +176,7 @@ const DataCard = () => {
{t('dataset:retain_collection')}
</Button>
)}
{canWrite && (
{canWrite && !isImageCollection && (
<Button
ml={2}
variant={'whitePrimary'}
@@ -171,6 +189,17 @@ const DataCard = () => {
{t('common:dataset.Insert Data')}
</Button>
)}
{canWrite && isImageCollection && (
<Button
ml={2}
variant={'whitePrimary'}
size={['sm', 'md']}
isDisabled={!collection}
onClick={openInsertImagesModal}
>
{t('dataset:insert_images')}
</Button>
)}
</Flex>
<Box justifyContent={'center'} px={6} pos={'relative'} w={'100%'}>
<MyDivider my={'17px'} w={'100%'} />
@@ -236,7 +265,7 @@ const DataCard = () => {
userSelect={'none'}
boxShadow={'none'}
bg={index % 2 === 1 ? 'myGray.50' : 'blue.50'}
border={theme.borders.sm}
border={'sm'}
position={'relative'}
overflow={'hidden'}
_hover={{
@@ -282,17 +311,35 @@ const DataCard = () => {
</Flex>
{/* Data content */}
<Box wordBreak={'break-all'} fontSize={'sm'}>
<Markdown source={item.q} isDisabled />
{!!item.a && (
<>
<MyDivider />
<Markdown source={item.a} isDisabled />
</>
)}
</Box>
{item.imagePreviewUrl ? (
<Box display={['block', 'flex']} alignItems={'center'} gap={[3, 6]}>
<Box flex="1 0 0">
<MyImage
src={item.imagePreviewUrl}
alt={''}
w={'100%'}
h="100%"
maxH={'300px'}
objectFit="contain"
/>
</Box>
<Box flex="1 0 0" maxH={'300px'} overflow={'hidden'} fontSize="sm">
<Markdown source={item.q} isDisabled />
</Box>
</Box>
) : (
<Box wordBreak={'break-all'} fontSize={'sm'}>
<Markdown source={item.q} isDisabled />
{!!item.a && (
<>
<MyDivider />
<Markdown source={item.a} isDisabled />
</>
)}
</Box>
)}
{/* Mask */}
{/* Footer */}
<Flex
className="footer"
position={'absolute'}
@@ -317,17 +364,23 @@ const DataCard = () => {
py={1}
mr={2}
>
<MyIcon
bg={'white'}
color={'myGray.600'}
borderRadius={'sm'}
border={'1px'}
borderColor={'myGray.200'}
name="common/text/t"
w={'14px'}
mr={1}
/>
{getTextValidLength(item.q + item.a || '')}
{item.imageSize ? (
<>{formatFileSize(item.imageSize)}</>
) : (
<>
<MyIcon
bg={'white'}
color={'myGray.600'}
borderRadius={'sm'}
border={'1px'}
borderColor={'myGray.200'}
name="common/text/t"
w={'14px'}
mr={1}
/>
{getTextValidLength((item?.q || '') + (item?.a || ''))}
</>
)}
</Flex>
{canWrite && (
<PopoverConfirm
@@ -362,7 +415,7 @@ const DataCard = () => {
collectionId={collection._id}
dataId={editDataId}
onClose={() => setEditDataId(undefined)}
onSuccess={(data) => {
onSuccess={(data: any) => {
if (editDataId === '') {
refreshList();
return;
@@ -386,9 +439,16 @@ const DataCard = () => {
datasetId={datasetId}
defaultTab={'errors'}
collectionId={errorModalId}
onClose={() => setErrorModalId('')}
onClose={() => {
setErrorModalId('');
refreshList();
reloadCollection();
}}
/>
)}
{isInsertImagesModalOpen && (
<InsertImagesModal collectionId={collectionId} onClose={closeInsertImagesModal} />
)}
</MyBox>
);
};

View File

@@ -173,6 +173,20 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
{
title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.imageDataset]: [
{
title: t('dataset:import_select_file')
},
{
title: t('dataset:import_param_setting')
},
{
title: t('dataset:import_data_preview')
},
{
title: t('dataset:import_confirm')
}
]
};
const steps = modeSteps[source];
@@ -238,20 +252,22 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
<Box flex={1} />
</Flex>
{/* step */}
<Box
mt={4}
mb={5}
px={3}
py={[2, 4]}
bg={'myGray.50'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
borderRadius={'md'}
>
<Box maxW={['100%', '900px']} mx={'auto'}>
<MyStep />
{source !== ImportDataSourceEnum.imageDataset && (
<Box
mt={4}
mb={5}
px={3}
py={[2, 4]}
bg={'myGray.50'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
borderRadius={'md'}
>
<Box maxW={['100%', '900px']} mx={'auto'}>
<MyStep />
</Box>
</Box>
</Box>
)}
{children}
</DatasetImportContext.Provider>
);

View File

@@ -7,15 +7,8 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import React, { type DragEvent, useCallback, useMemo, useState } from 'react';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { uploadFile2DB } from '@/web/common/file/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import type { ImportSourceItemType } from '@/web/core/dataset/type';
import { useContextSelector } from 'use-context-selector';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { getErrText } from '@fastgpt/global/common/error/utils';
export type SelectFileItemType = {
fileId: string;
@@ -26,23 +19,18 @@ export type SelectFileItemType = {
const FileSelector = ({
fileType,
selectFiles,
setSelectFiles,
onStartSelect,
onFinishSelect,
onSelectFiles,
...props
}: {
fileType: string;
selectFiles: ImportSourceItemType[];
setSelectFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
onStartSelect: () => void;
onFinishSelect: () => void;
onSelectFiles: (e: SelectFileItemType[]) => any;
} & FlexProps) => {
const { t } = useTranslation();
const { toast } = useToast();
const { feConfigs } = useSystemStore();
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
const maxCount = feConfigs?.uploadFileMaxAmount || 1000;
const maxSize = (feConfigs?.uploadFileMaxSize || 1024) * 1024 * 1024;
@@ -65,90 +53,6 @@ const FileSelector = ({
'i'
);
const { runAsync: onSelectFile, loading: isLoading } = useRequest2(
async (files: SelectFileItemType[]) => {
{
await Promise.all(
files.map(async ({ fileId, file }) => {
try {
const { fileId: uploadFileId } = await uploadFile2DB({
file,
bucketName: BucketNameEnum.dataset,
data: {
datasetId
},
percentListen: (e) => {
setSelectFiles((state) =>
state.map((item) =>
item.id === fileId
? {
...item,
uploadedFileRate: item.uploadedFileRate
? Math.max(e, item.uploadedFileRate)
: e
}
: item
)
);
}
});
setSelectFiles((state) =>
state.map((item) =>
item.id === fileId
? {
...item,
dbFileId: uploadFileId,
isUploading: false,
uploadedFileRate: 100
}
: item
)
);
} catch (error) {
setSelectFiles((state) =>
state.map((item) =>
item.id === fileId
? {
...item,
isUploading: false,
errorMsg: getErrText(error)
}
: item
)
);
}
})
);
}
},
{
onBefore([files]) {
onStartSelect();
setSelectFiles((state) => {
const formatFiles = files.map<ImportSourceItemType>((selectFile) => {
const { fileId, file } = selectFile;
return {
id: fileId,
createStatus: 'waiting',
file,
sourceName: file.name,
sourceSize: formatFileSize(file.size),
icon: getFileIcon(file.name),
isUploading: true,
uploadedFileRate: 0
};
});
const results = formatFiles.concat(state).slice(0, maxCount);
return results;
});
},
onFinally() {
onFinishSelect();
}
}
);
const selectFileCallback = useCallback(
(files: SelectFileItemType[]) => {
if (selectFiles.length + files.length > maxCount) {
@@ -160,7 +64,7 @@ const FileSelector = ({
}
// size check
if (!maxSize) {
return onSelectFile(files);
return onSelectFiles(files);
}
const filterFiles = files.filter((item) => item.file.size <= maxSize);
@@ -171,9 +75,9 @@ const FileSelector = ({
});
}
return onSelectFile(filterFiles);
return onSelectFiles(filterFiles);
},
[t, maxCount, maxSize, onSelectFile, selectFiles.length, toast]
[t, maxCount, maxSize, onSelectFiles, selectFiles.length, toast]
);
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
@@ -278,7 +182,6 @@ const FileSelector = ({
return (
<MyBox
isLoading={isLoading}
display={'flex'}
flexDirection={'column'}
alignItems={'center'}

View File

@@ -71,7 +71,7 @@ const CustomTextInput = () => {
<Box maxW={['100%', '800px']}>
<Box display={['block', 'flex']} alignItems={'center'}>
<Box flex={'0 0 120px'} fontSize={'sm'}>
{t('common:core.dataset.collection.Collection name')}
{t('dataset:collection_name')}
</Box>
<Input
flex={'1 0 0'}
@@ -79,7 +79,7 @@ const CustomTextInput = () => {
{...register('name', {
required: true
})}
placeholder={t('common:core.dataset.collection.Collection name')}
placeholder={t('dataset:collection_name')}
bg={'myGray.50'}
/>
</Box>

View File

@@ -1,14 +1,20 @@
import React, { useCallback, useEffect, useMemo, useState } from 'react';
import { type ImportSourceItemType } from '@/web/core/dataset/type.d';
import { Box, Button } from '@chakra-ui/react';
import FileSelector from '../components/FileSelector';
import FileSelector, { type SelectFileItemType } from '../components/FileSelector';
import { useTranslation } from 'next-i18next';
import dynamic from 'next/dynamic';
import Loading from '@fastgpt/web/components/common/MyLoading';
import { RenderUploadFiles } from '../components/RenderFiles';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { uploadFile2DB } from '@/web/common/file/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'));
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
@@ -33,14 +39,16 @@ export default React.memo(FileLocal);
const SelectFile = React.memo(function SelectFile() {
const { t } = useTranslation();
const { goToNext, sources, setSources } = useContextSelector(DatasetImportContext, (v) => v);
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
sources.map((source) => ({
isUploading: false,
...source
}))
);
const [uploading, setUploading] = useState(false);
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
useEffect(() => {
@@ -53,15 +61,90 @@ const SelectFile = React.memo(function SelectFile() {
goToNext();
}, [goToNext]);
const { runAsync: onSelectFiles, loading: uploading } = useRequest2(
async (files: SelectFileItemType[]) => {
{
await Promise.all(
files.map(async ({ fileId, file }) => {
try {
const { fileId: uploadFileId } = await uploadFile2DB({
file,
bucketName: BucketNameEnum.dataset,
data: {
datasetId
},
percentListen: (e) => {
setSelectFiles((state) =>
state.map((item) =>
item.id === fileId
? {
...item,
uploadedFileRate: item.uploadedFileRate
? Math.max(e, item.uploadedFileRate)
: e
}
: item
)
);
}
});
setSelectFiles((state) =>
state.map((item) =>
item.id === fileId
? {
...item,
dbFileId: uploadFileId,
isUploading: false,
uploadedFileRate: 100
}
: item
)
);
} catch (error) {
setSelectFiles((state) =>
state.map((item) =>
item.id === fileId
? {
...item,
isUploading: false,
errorMsg: getErrText(error)
}
: item
)
);
}
})
);
}
},
{
onBefore([files]) {
setSelectFiles((state) => {
return [
...state,
...files.map<ImportSourceItemType>((selectFile) => {
const { fileId, file } = selectFile;
return {
id: fileId,
createStatus: 'waiting',
file,
sourceName: file.name,
sourceSize: formatFileSize(file.size),
icon: getFileIcon(file.name),
isUploading: true,
uploadedFileRate: 0
};
})
];
});
}
}
);
return (
<Box>
<FileSelector
fileType={fileType}
selectFiles={selectFiles}
setSelectFiles={setSelectFiles}
onStartSelect={() => setUploading(true)}
onFinishSelect={() => setUploading(false)}
/>
<FileSelector fileType={fileType} selectFiles={selectFiles} onSelectFiles={onSelectFiles} />
{/* render files */}
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} />

View File

@@ -0,0 +1,189 @@
import React, { useState } from 'react';
import { Box, Button, Flex, Input, Image } from '@chakra-ui/react';
import { useTranslation } from 'next-i18next';
import { useRouter } from 'next/router';
import { TabEnum } from '../../NavBar';
import { createImageDatasetCollection } from '@/web/core/dataset/image/api';
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
import { useForm } from 'react-hook-form';
import FileSelector, { type SelectFileItemType } from '../components/FileSelector';
import type { ImportSourceItemType } from '@/web/core/dataset/type';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { useContextSelector } from 'use-context-selector';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { DatasetImportContext } from '../Context';
import MyImage from '@fastgpt/web/components/common/Image/MyImage';
const fileType = '.jpg, .jpeg, .png';
const ImageDataset = () => {
return <SelectFile />;
};
export default React.memo(ImageDataset);
const SelectFile = React.memo(function SelectFile() {
const { t } = useTranslation();
const router = useRouter();
const parentId = useContextSelector(DatasetImportContext, (v) => v.parentId);
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>([]);
const [uploadProgress, setUploadProgress] = useState(0);
const { register, handleSubmit } = useForm({
defaultValues: {
name: ''
}
});
const onSelectFiles = (files: SelectFileItemType[]) => {
setSelectFiles((pre) => {
const formatFiles = Array.from(files).map<ImportSourceItemType>((item) => {
const previewUrl = URL.createObjectURL(item.file);
return {
id: getNanoid(),
createStatus: 'waiting',
file: item.file,
sourceName: item.file.name,
icon: previewUrl
};
});
return [...pre, ...formatFiles];
});
};
const onRemoveFile = (index: number) => {
setSelectFiles((prev) => {
return prev.filter((_, i) => i !== index);
});
};
const { runAsync: onCreate, loading: creating } = useRequest2(
async ({ name: collectionName }: { name: string }) => {
return await createImageDatasetCollection({
parentId,
datasetId,
collectionName,
files: selectFiles.map((item) => item.file!).filter(Boolean),
onUploadProgress: setUploadProgress
});
},
{
manual: true,
successToast: t('common:create_success'),
onSuccess() {
router.replace({
query: {
datasetId: router.query.datasetId,
currentTab: TabEnum.collectionCard
}
});
}
}
);
return (
<Flex flexDirection={'column'} maxW={'850px'} mx={'auto'} mt={7}>
<Flex alignItems="center" width="100%">
<FormLabel required width={['100px', '140px']}>
{t('dataset:collection_name')}
</FormLabel>
<Input
flex="0 0 400px"
bg="myGray.50"
placeholder={t('dataset:collection_name')}
{...register('name', { required: true })}
/>
</Flex>
<Flex mt={7} alignItems="flex-start" width="100%">
<FormLabel required width={['100px', '140px']}>
{t('common:core.dataset.collection.Collection raw text')}
</FormLabel>
<Box flex={'1 0 0'}>
<Box>
<FileSelector
fileType={fileType}
selectFiles={selectFiles}
onSelectFiles={onSelectFiles}
/>
</Box>
{selectFiles.length > 0 && (
<Flex flexWrap={'wrap'} gap={4} mt={3} width="100%">
{selectFiles.map((file, index) => (
<Box
key={index}
w="100px"
h={'100px'}
position={'relative'}
_hover={{
'.close-icon': { display: 'block' }
}}
bg={'myGray.50'}
borderRadius={'md'}
border={'base'}
borderStyle={'dashed'}
p={1}
>
<MyImage
src={file.icon}
w="100%"
h={'100%'}
objectFit={'contain'}
alt={file.sourceName}
/>
<MyIcon
name={'closeSolid'}
w={'1rem'}
h={'1rem'}
color={'myGray.700'}
cursor={'pointer'}
_hover={{ color: 'red.500' }}
position={'absolute'}
rounded={'full'}
bg={'white'}
right={'-8px'}
top={'-2px'}
onClick={() => onRemoveFile(index)}
className="close-icon"
display={['', 'none']}
zIndex={10}
/>
</Box>
))}
</Flex>
)}
</Box>
</Flex>
<Flex width="100%" justifyContent="flex-end" mt="9">
<Button isDisabled={selectFiles.length === 0 || creating} onClick={handleSubmit(onCreate)}>
{creating ? (
uploadProgress >= 100 ? (
<Box>{t('dataset:images_creating')}</Box>
) : (
<Box>{t('dataset:uploading_progress', { num: uploadProgress })}</Box>
)
) : selectFiles.length > 0 ? (
<>
<Box>
{t('dataset:confirm_import_images', {
num: selectFiles.length
})}
</Box>
</>
) : (
<Box>{t('common:comfirn_create')}</Box>
)}
</Button>
</Flex>
</Flex>
);
});

View File

@@ -37,7 +37,7 @@ const ReTraining = () => {
apiFileId: collection.apiFileId,
createStatus: 'waiting',
icon: getCollectionIcon(collection.type, collection.name),
icon: getCollectionIcon({ type: collection.type, name: collection.name }),
id: collection._id,
isUploading: false,
sourceName: collection.name,

View File

@@ -11,6 +11,7 @@ const FileCustomText = dynamic(() => import('./diffSource/FileCustomText'));
const ExternalFileCollection = dynamic(() => import('./diffSource/ExternalFile'));
const APIDatasetCollection = dynamic(() => import('./diffSource/APIDataset'));
const ReTraining = dynamic(() => import('./diffSource/ReTraining'));
const ImageDataset = dynamic(() => import('./diffSource/ImageDataset'));
const ImportDataset = () => {
const importSource = useContextSelector(DatasetImportContext, (v) => v.importSource);
@@ -22,6 +23,8 @@ const ImportDataset = () => {
if (importSource === ImportDataSourceEnum.fileCustom) return FileCustomText;
if (importSource === ImportDataSourceEnum.externalFile) return ExternalFileCollection;
if (importSource === ImportDataSourceEnum.apiDataset) return APIDatasetCollection;
if (importSource === ImportDataSourceEnum.imageDataset) return ImageDataset;
return null;
}, [importSource]);
return ImportComponent ? (

View File

@@ -1,37 +1,39 @@
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import { Box, Flex, Button, Textarea, ModalFooter, HStack, VStack } from '@chakra-ui/react';
import { type UseFormRegister, useFieldArray, useForm } from 'react-hook-form';
import { Box, Flex, Button, Textarea, ModalFooter, HStack, VStack, Image } from '@chakra-ui/react';
import type { UseFormRegister } from 'react-hook-form';
import { useFieldArray, useForm } from 'react-hook-form';
import {
postInsertData2Dataset,
putDatasetDataById,
getDatasetCollectionById,
getDatasetDataItemById
} from '@/web/core/dataset/api';
import { useToast } from '@fastgpt/web/hooks/useToast';
import MyIcon from '@fastgpt/web/components/common/Icon';
import MyModal from '@fastgpt/web/components/common/MyModal';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import { useTranslation } from 'next-i18next';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils';
import { type DatasetDataIndexItemType } from '@fastgpt/global/core/dataset/type';
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
import type { DatasetDataIndexItemType } from '@fastgpt/global/core/dataset/type';
import DeleteIcon from '@fastgpt/web/components/common/Icon/delete';
import { defaultCollectionDetail } from '@/web/core/dataset/constants';
import MyBox from '@fastgpt/web/components/common/MyBox';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import styles from './styles.module.scss';
import {
DatasetDataIndexTypeEnum,
getDatasetIndexMapData
} from '@fastgpt/global/core/dataset/data/constants';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
import FillRowTabs from '@fastgpt/web/components/common/Tabs/FillRowTabs';
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
import MyIconButton from '@fastgpt/web/components/common/Icon/button';
import MyImage from '@/components/MyImage/index';
export type InputDataType = {
q: string;
a: string;
imagePreivewUrl?: string;
indexes: (Omit<DatasetDataIndexItemType, 'dataId'> & {
dataId?: string; // pg data id
fold: boolean;
@@ -40,7 +42,8 @@ export type InputDataType = {
enum TabEnum {
chunk = 'chunk',
qa = 'qa'
qa = 'qa',
image = 'image'
}
const InputDataModal = ({
@@ -52,17 +55,16 @@ const InputDataModal = ({
}: {
collectionId: string;
dataId?: string;
defaultValue?: { q: string; a?: string };
defaultValue?: { q?: string; a?: string; imagePreivewUrl?: string };
onClose: () => void;
onSuccess: (data: InputDataType & { dataId: string }) => void;
}) => {
const { t } = useTranslation();
const { toast } = useToast();
const { embeddingModelList, defaultModels } = useSystemStore();
const [currentTab, setCurrentTab] = useState(TabEnum.chunk);
const [currentTab, setCurrentTab] = useState<TabEnum>();
const { register, handleSubmit, reset, control } = useForm<InputDataType>();
const { register, handleSubmit, reset, control, watch } = useForm<InputDataType>();
const {
fields: indexes,
prepend: prependIndexes,
@@ -72,16 +74,24 @@ const InputDataModal = ({
control,
name: 'indexes'
});
const imagePreivewUrl = watch('imagePreivewUrl');
const { data: collection = defaultCollectionDetail } = useRequest2(
() => {
return getDatasetCollectionById(collectionId);
},
() => getDatasetCollectionById(collectionId),
{
manual: false,
refreshDeps: [collectionId]
refreshDeps: [collectionId],
onSuccess(res) {
if (res.type === DatasetCollectionTypeEnum.images) {
setCurrentTab(TabEnum.image);
} else {
setCurrentTab(TabEnum.chunk);
}
}
}
);
// Get data
const { loading: isFetchingData } = useRequest2(
async () => {
if (dataId) return getDatasetDataItemById(dataId);
@@ -93,8 +103,9 @@ const InputDataModal = ({
onSuccess(res) {
if (res) {
reset({
q: res.q,
a: res.a,
q: res.q || '',
a: res.a || '',
imagePreivewUrl: res.imagePreivewUrl,
indexes: res.indexes.map((item) => ({
...item,
fold: true
@@ -102,54 +113,32 @@ const InputDataModal = ({
});
} else if (defaultValue) {
reset({
q: defaultValue.q,
a: defaultValue.a
q: defaultValue.q || '',
a: defaultValue.a || '',
imagePreivewUrl: defaultValue.imagePreivewUrl
});
}
if (res?.a || defaultValue?.a) {
setCurrentTab(TabEnum.qa);
}
},
onError(err) {
toast({
status: 'error',
title: t(getErrText(err) as any)
});
onClose();
}
}
);
const maxToken = useMemo(() => {
const vectorModel =
embeddingModelList.find((item) => item.model === collection.dataset.vectorModel) ||
defaultModels.embedding;
return vectorModel?.maxToken || 3000;
}, [collection.dataset.vectorModel, defaultModels.embedding, embeddingModelList]);
// import new data
// Import new data
const { runAsync: sureImportData, loading: isImporting } = useRequest2(
async (e: InputDataType) => {
if (!e.q) {
return Promise.reject(t('common:dataset.data.input is empty'));
}
const totalLength = e.q.length + (e.a?.length || 0);
if (totalLength >= maxToken * 1.4) {
return Promise.reject(t('common:core.dataset.data.Too Long'));
}
const data = { ...e };
const dataId = await postInsertData2Dataset({
const postData: any = {
collectionId: collection._id,
q: e.q,
a: currentTab === TabEnum.qa ? e.a : '',
// Contains no default index
indexes: e.indexes?.filter((item) => !!item.text?.trim())
});
indexes: e.indexes.filter((item) => !!item.text?.trim())
};
const dataId = await postInsertData2Dataset(postData);
return {
...data,
@@ -166,23 +155,26 @@ const InputDataModal = ({
a: '',
indexes: []
});
onSuccess(e);
},
errorToast: t('common:error.unKnow')
errorToast: t('dataset:common.error.unKnow')
}
);
// update
// Update data
const { runAsync: onUpdateData, loading: isUpdating } = useRequest2(
async (e: InputDataType) => {
if (!dataId) return Promise.reject(t('common:error.unKnow'));
await putDatasetDataById({
const updateData: any = {
dataId,
q: e.q,
a: currentTab === TabEnum.qa ? e.a : '',
indexes: e.indexes.filter((item) => !!item.text?.trim())
});
};
await putDatasetDataById(updateData);
return {
dataId,
@@ -202,10 +194,18 @@ const InputDataModal = ({
const isLoading = isFetchingData;
const icon = useMemo(
() => getSourceNameIcon({ sourceName: collection.sourceName, sourceId: collection.sourceId }),
() => getCollectionIcon({ type: collection.type, name: collection.sourceName }),
[collection]
);
const maxToken = useMemo(() => {
const vectorModel =
embeddingModelList.find((item) => item.model === collection.dataset.vectorModel) ||
defaultModels.embedding;
return vectorModel?.maxToken || 2000;
}, [collection.dataset.vectorModel, defaultModels.embedding, embeddingModelList]);
return (
<MyModal
isOpen={true}
@@ -243,17 +243,19 @@ const InputDataModal = ({
>
{/* Tab */}
<Box px={[5, '3.25rem']}>
<FillRowTabs
list={[
{ label: t('common:dataset_data_input_chunk'), value: TabEnum.chunk },
{ label: t('common:dataset_data_input_qa'), value: TabEnum.qa }
]}
py={1}
value={currentTab}
onChange={(e) => {
setCurrentTab(e);
}}
/>
{(currentTab === TabEnum.chunk || currentTab === TabEnum.qa) && (
<FillRowTabs
list={[
{ label: t('common:dataset_data_input_chunk'), value: TabEnum.chunk },
{ label: t('common:dataset_data_input_qa'), value: TabEnum.qa }
]}
py={1}
value={currentTab}
onChange={(e) => {
setCurrentTab(e);
}}
/>
)}
</Box>
<Flex flex={'1 0 0'} h={['auto', '0']} gap={6} flexDir={['column', 'row']} px={[5, '0']}>
@@ -268,45 +270,64 @@ const InputDataModal = ({
w={['100%', 0]}
overflow={['unset', 'auto']}
>
<Flex flexDir={'column'} h={'100%'}>
<FormLabel required mb={1} h={'30px'}>
{currentTab === TabEnum.chunk
? t('common:dataset_data_input_chunk_content')
: t('common:dataset_data_input_q')}
</FormLabel>
<Textarea
resize={'none'}
placeholder={t('common:dataset_data_import_q_placeholder', { maxToken })}
className={styles.scrollbar}
maxLength={maxToken}
flex={'1 0 0'}
tabIndex={1}
_focus={{
borderColor: 'primary.500',
boxShadow: '0px 0px 0px 2.4px rgba(51, 112, 255, 0.15)',
bg: 'white'
}}
bg={'myGray.25'}
borderRadius={'md'}
borderColor={'myGray.200'}
{...register(`q`, {
required: true
})}
/>
<Flex flexDir={'column'} flex={'1 0 0'} h={0}>
{currentTab === TabEnum.image && (
<>
<FormLabel required mb={1} h={'30px'}>
{t('file:image')}
</FormLabel>
<Box flex={'1 0 0'} h={0} w="100%">
<Box height="100%" position="relative" border="base" borderRadius={'md'} p={1}>
<MyImage
src={imagePreivewUrl}
h="100%"
w="100%"
objectFit="contain"
alt={t('file:Image_Preview')}
/>
</Box>
</Box>
</>
)}
{(currentTab === TabEnum.chunk || currentTab === TabEnum.qa) && (
<>
<FormLabel required mb={1} h={'30px'}>
{currentTab === TabEnum.chunk
? t('common:dataset_data_input_chunk_content')
: t('common:dataset_data_input_q')}
</FormLabel>
<Textarea
resize={'none'}
className={styles.scrollbar}
flex={'1 0 0'}
tabIndex={1}
_focus={{
borderColor: 'primary.500',
boxShadow: '0px 0px 0px 2.4px rgba(51, 112, 255, 0.15)',
bg: 'white'
}}
bg={'myGray.25'}
borderRadius={'md'}
borderColor={'myGray.200'}
{...register(`q`, {
required: true
})}
/>
</>
)}
</Flex>
{currentTab === TabEnum.qa && (
<Flex flexDir={'column'} h={'100%'}>
<Flex flexDir={'column'} flex={'1 0 0'}>
<FormLabel required mb={1}>
{t('common:dataset_data_input_a')}
</FormLabel>
<Textarea
resize={'none'}
placeholder={t('common:dataset_data_import_q_placeholder', { maxToken })}
className={styles.scrollbar}
flex={'1 0 0'}
tabIndex={1}
bg={'myGray.25'}
maxLength={maxToken}
borderRadius={'md'}
border={'1.5px solid '}
borderColor={'myGray.200'}
@@ -314,6 +335,27 @@ const InputDataModal = ({
/>
</Flex>
)}
{currentTab === TabEnum.image && (
<Flex flexDir={'column'} flex={'1 0 0'}>
<FormLabel required mb={1}>
{t('file:image_description')}
</FormLabel>
<Textarea
resize={'none'}
placeholder={t('file:image_description_tip')}
className={styles.scrollbar}
flex={'1 0 0'}
tabIndex={1}
bg={'myGray.25'}
borderRadius={'md'}
border={'1.5px solid '}
borderColor={'myGray.200'}
{...register('q', {
required: true
})}
/>
</Flex>
)}
</Flex>
{/* Index */}
<Box

View File

@@ -9,7 +9,8 @@ import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
import {
DatasetCollectionDataProcessModeMap,
DatasetCollectionTypeMap
DatasetCollectionTypeMap,
DatasetCollectionTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
import MyIcon from '@fastgpt/web/components/common/Icon';
@@ -38,6 +39,7 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
manual: false
}
);
const metadataList = useMemo<{ label?: string; value?: any }[]>(() => {
if (!collection) return [];
@@ -49,13 +51,17 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
value: t(DatasetCollectionTypeMap[collection.type]?.name as any)
},
{
label: t('common:core.dataset.collection.metadata.source name'),
label: t('dataset:collection_name'),
value: collection.file?.filename || collection?.rawLink || collection?.name
},
{
label: t('common:core.dataset.collection.metadata.source size'),
value: collection.file ? formatFileSize(collection.file.length) : '-'
},
...(collection.file
? [
{
label: t('common:core.dataset.collection.metadata.source size'),
value: formatFileSize(collection.file.length)
}
]
: []),
{
label: t('common:core.dataset.collection.metadata.Createtime'),
value: formatTime2YMDHM(collection.createTime)
@@ -64,18 +70,30 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
label: t('common:core.dataset.collection.metadata.Updatetime'),
value: formatTime2YMDHM(collection.updateTime)
},
{
label: t('dataset:collection_metadata_custom_pdf_parse'),
value: collection.customPdfParse ? 'Yes' : 'No'
},
{
label: t('common:core.dataset.collection.metadata.Raw text length'),
value: collection.rawTextLength ?? '-'
},
{
label: t('dataset:collection.training_type'),
value: t(DatasetCollectionDataProcessModeMap[collection.trainingType]?.label as any)
},
...(collection.customPdfParse !== undefined
? [
{
label: t('dataset:collection_metadata_custom_pdf_parse'),
value: collection.customPdfParse ? 'Yes' : 'No'
}
]
: []),
...(collection.rawTextLength !== undefined
? [
{
label: t('common:core.dataset.collection.metadata.Raw text length'),
value: collection.rawTextLength
}
]
: []),
...(DatasetCollectionDataProcessModeMap[collection.trainingType]
? [
{
label: t('dataset:collection.training_type'),
value: t(DatasetCollectionDataProcessModeMap[collection.trainingType]?.label as any)
}
]
: []),
...(collection.imageIndex !== undefined
? [
{
@@ -92,7 +110,7 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
}
]
: []),
...(collection.chunkSize
...(collection.chunkSize !== undefined
? [
{
label: t('dataset:chunk_size'),
@@ -100,7 +118,7 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
}
]
: []),
...(collection.indexSize
...(collection.indexSize !== undefined
? [
{
label: t('dataset:index_size'),
@@ -108,7 +126,7 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
}
]
: []),
...(webSelector
...(webSelector !== undefined
? [
{
label: t('common:core.dataset.collection.metadata.Web page selector'),
@@ -116,16 +134,14 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
}
]
: []),
{
...(collection.tags
? [
{
label: t('dataset:collection_tags'),
value: collection.tags?.join(', ') || '-'
}
]
: [])
}
...(collection.tags
? [
{
label: t('dataset:collection_tags'),
value: collection.tags?.join(', ') || '-'
}
]
: [])
];
}, [collection, t]);

View File

@@ -456,7 +456,7 @@ const TestResults = React.memo(function TestResults({
<Box mt={1} gap={4}>
{datasetTestItem?.results.map((item, index) => (
<Box key={item.id} p={3} borderRadius={'lg'} bg={'myGray.100'} _notLast={{ mb: 2 }}>
<QuoteItem quoteItem={item} canViewSource />
<QuoteItem quoteItem={item} canViewSource canEditData />
</Box>
))}
</Box>

View File

@@ -25,7 +25,7 @@ const FileSelector = ({
}: {
fileType: string;
selectFiles: SelectFileItemType[];
setSelectFiles: React.Dispatch<React.SetStateAction<SelectFileItemType[]>>;
setSelectFiles: (files: SelectFileItemType[]) => void;
maxCount?: number;
} & FlexProps) => {
const { t } = useTranslation();
@@ -62,11 +62,11 @@ const FileSelector = ({
name: file.name,
size: formatFileSize(file.size)
}));
setSelectFiles((state) => {
return [...fileList, ...state].slice(0, maxCount);
});
const newFiles = [...fileList, ...selectFiles].slice(0, maxCount);
setSelectFiles(newFiles);
},
[maxCount, setSelectFiles]
[maxCount, selectFiles, setSelectFiles]
);
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {

View File

@@ -0,0 +1,138 @@
import MyModal from '@fastgpt/web/components/common/MyModal';
import React, { useState } from 'react';
import { useTranslation } from 'next-i18next';
import { Box, Button, Flex, ModalBody, ModalFooter } from '@chakra-ui/react';
import FileSelector, { type SelectFileItemType } from '../components/FileSelector';
import MyImage from '@/components/MyImage';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { insertImagesToCollection } from '@/web/core/dataset/image/api';
const fileType = '.jpg, .jpeg, .png';
type MySelectFileItemType = SelectFileItemType & { previewUrl: string };
const InsertImageModal = ({
collectionId,
onClose
}: {
collectionId: string;
onClose: () => void;
}) => {
const { t } = useTranslation();
const [selectFiles, setSelectFiles] = useState<MySelectFileItemType[]>([]);
const onSelectFiles = (files: SelectFileItemType[]) => {
setSelectFiles((pre) => {
const formatFiles = Array.from(files).map<MySelectFileItemType>((item) => {
const previewUrl = URL.createObjectURL(item.file);
return {
...item,
previewUrl
};
});
return [...pre, ...formatFiles];
});
};
const onRemoveFile = (index: number) => {
setSelectFiles((prev) => {
return prev.filter((_, i) => i !== index);
});
};
const [uploadProgress, setUploadProgress] = useState(0);
const { runAsync: onInsertImages, loading: inserting } = useRequest2(
async () => {
return await insertImagesToCollection({
collectionId,
files: selectFiles.map((item) => item.file!).filter(Boolean),
onUploadProgress: setUploadProgress
});
},
{
manual: true,
successToast: t('dataset:insert_images_success'),
onSuccess() {
onClose();
}
}
);
return (
<MyModal
isOpen
iconSrc="core/dataset/imageFill"
title={t('dataset:insert_images')}
maxW={['90vw', '605px']}
>
<ModalBody userSelect={'none'}>
<Box>
<FileSelector
fileType={fileType}
selectFiles={selectFiles}
setSelectFiles={onSelectFiles}
/>
</Box>
{selectFiles.length > 0 && (
<Flex flexWrap={'wrap'} gap={3} mt={3} width="100%">
{selectFiles.map((file, index) => (
<Box
key={index}
w="100px"
h={'100px'}
position={'relative'}
_hover={{
'.close-icon': { display: 'block' }
}}
bg={'myGray.50'}
borderRadius={'md'}
border={'base'}
borderStyle={'dashed'}
p={1}
>
<MyImage src={file.previewUrl} w="100%" h={'100%'} objectFit={'contain'} />
<MyIcon
name={'closeSolid'}
w={'1rem'}
h={'1rem'}
color={'myGray.700'}
cursor={'pointer'}
_hover={{ color: 'red.500' }}
position={'absolute'}
rounded={'full'}
bg={'white'}
right={'-8px'}
top={'-2px'}
onClick={() => onRemoveFile(index)}
className="close-icon"
display={['', 'none']}
zIndex={10}
/>
</Box>
))}
</Flex>
)}
</ModalBody>
<ModalFooter>
<Button isDisabled={inserting} variant={'whitePrimary'} mr={4} onClick={onClose}>
{t('common:Cancel')}
</Button>
<Button
isDisabled={selectFiles.length === 0 || inserting}
variant={'primary'}
onClick={onInsertImages}
>
{inserting ? (
<Box>{t('dataset:uploading_progress', { num: uploadProgress })}</Box>
) : (
<Box>{t('common:Confirm')}</Box>
)}
</Button>
</ModalFooter>
</MyModal>
);
};
export default InsertImageModal;

View File

@@ -38,7 +38,7 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const upload = getUploadModel({
maxSize: global.feConfigs?.uploadFileMaxSize
});
const { file, bucketName, metadata, data } = await upload.doUpload<
const { file, bucketName, metadata, data } = await upload.getUploadFile<
UploadChatFileProps | UploadDatasetFileProps
>(req, res);
filePaths.push(file.path);

View File

@@ -15,7 +15,7 @@ export type getResDataQuery = OutLinkChatAuthProps & {
export type getResDataBody = {};
export type getResDataResponse = ChatHistoryItemResType[] | {};
export type getResDataResponse = ChatHistoryItemResType[] | [];
async function handler(
req: ApiRequestProps<getResDataBody, getResDataQuery>,
@@ -23,7 +23,7 @@ async function handler(
): Promise<getResDataResponse> {
const { appId, chatId, dataId, shareId } = req.query;
if (!appId || !chatId || !dataId) {
return {};
return [];
}
const [{ responseDetail }, chatData] = await Promise.all([
@@ -44,10 +44,10 @@ async function handler(
]);
if (chatData?.obj !== ChatRoleEnum.AI) {
return {};
return [];
}
const flowResponses = chatData.responseData ?? {};
const flowResponses = chatData.responseData ?? [];
return req.query.shareId
? filterPublicNodeResponseData({
responseDetail,

View File

@@ -12,6 +12,7 @@ import { quoteDataFieldSelector, type QuoteDataItemType } from '@/service/core/c
import { processChatTimeFilter } from '@/service/core/chat/utils';
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller';
import { getFormatDatasetCiteList } from '@fastgpt/service/core/dataset/data/controller';
export type GetCollectionQuoteProps = LinkedPaginationProps & {
chatId: string;
@@ -139,7 +140,7 @@ async function handleInitialLoad({
const hasMoreNext = list.length === pageSize;
return {
list: processChatTimeFilter(list, chatTime),
list: processChatTimeFilter(getFormatDatasetCiteList(list), chatTime),
hasMorePrev: false,
hasMoreNext
};
@@ -164,7 +165,7 @@ async function handleInitialLoad({
const resultList = [...prevList, centerNode, ...nextList];
return {
list: processChatTimeFilter(resultList, chatTime),
list: processChatTimeFilter(getFormatDatasetCiteList(resultList), chatTime),
hasMorePrev,
hasMoreNext
};
@@ -192,7 +193,7 @@ async function handlePaginatedLoad({
? await getPrevNodes(prevId, prevIndex, pageSize, baseMatch)
: await getNextNodes(nextId!, nextIndex!, pageSize, baseMatch);
const processedList = processChatTimeFilter(list, chatTime);
const processedList = processChatTimeFilter(getFormatDatasetCiteList(list), chatTime);
return {
list: processedList,

View File

@@ -5,6 +5,10 @@ import { type ApiRequestProps } from '@fastgpt/service/type/next';
import { quoteDataFieldSelector, type QuoteDataItemType } from '@/service/core/chat/constants';
import { processChatTimeFilter } from '@/service/core/chat/utils';
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
import {
formatDatasetDataValue,
getFormatDatasetCiteList
} from '@fastgpt/service/core/dataset/data/controller';
export type GetQuoteProps = {
datasetDataIdList: string[];
@@ -56,7 +60,10 @@ async function handler(req: ApiRequestProps<GetQuoteProps>): Promise<GetQuotesRe
quoteDataFieldSelector
).lean();
const quoteList = processChatTimeFilter(list, chatItem.time);
// Get image preview url
const formatPreviewUrlList = getFormatDatasetCiteList(list);
const quoteList = processChatTimeFilter(formatPreviewUrlList, chatItem.time);
return quoteList;
}

View File

@@ -25,7 +25,7 @@ async function handler(req: ApiRequestProps<backupBody, backupQuery>, res: ApiRe
const upload = getUploadModel({
maxSize: global.feConfigs?.uploadFileMaxSize
});
const { file, data } = await upload.doUpload<{ datasetId: string }>(req, res);
const { file, data } = await upload.getUploadFile<{ datasetId: string }>(req, res);
filePaths.push(file.path);
if (file.mimetype !== 'text/csv') {

View File

@@ -0,0 +1,104 @@
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import type { ImageCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
DatasetCollectionDataProcessModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { NextAPI } from '@/service/middleware/entry';
import { type ApiRequestProps } from '@fastgpt/service/type/next';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import type { CreateCollectionResponse } from '@/global/core/dataset/api';
import { getUploadModel } from '@fastgpt/service/common/file/multer';
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
import type { NextApiResponse } from 'next';
import { i18nT } from '@fastgpt/web/i18n/utils';
import { authFrequencyLimit } from '@/service/common/frequencyLimit/api';
import { addSeconds } from 'date-fns';
import { createDatasetImage } from '@fastgpt/service/core/dataset/image/controller';
const authUploadLimit = (tmbId: string, num: number) => {
if (!global.feConfigs.uploadFileMaxAmount) return;
return authFrequencyLimit({
eventId: `${tmbId}-uploadfile`,
maxAmount: global.feConfigs.uploadFileMaxAmount * 2,
expiredTime: addSeconds(new Date(), 30), // 30s
num
});
};
async function handler(
req: ApiRequestProps<ImageCreateDatasetCollectionParams>,
res: NextApiResponse<any>
): CreateCollectionResponse {
const filePaths: string[] = [];
try {
const upload = getUploadModel({
maxSize: global.feConfigs?.uploadFileMaxSize
});
const {
files,
data: { parentId, datasetId, collectionName }
} = await upload.getUploadFiles<ImageCreateDatasetCollectionParams>(req, res);
filePaths.push(...files.map((item) => item.path));
const { dataset, teamId, tmbId } = await authDataset({
datasetId,
per: WritePermissionVal,
req,
authToken: true,
authApiKey: true
});
await authUploadLimit(tmbId, files.length);
if (!dataset.vlmModel) {
return Promise.reject(i18nT('file:Image_dataset_requires_VLM_model_to_be_configured'));
}
// 1. Save image to db
const imageIds = await Promise.all(
files.map(async (file) => {
return (
await createDatasetImage({
teamId,
datasetId,
file
})
).imageId;
})
);
// 2. Create collection
const { collectionId, insertResults } = await createCollectionAndInsertData({
dataset,
imageIds,
createCollectionParams: {
parentId,
teamId,
tmbId,
datasetId,
type: DatasetCollectionTypeEnum.images,
name: collectionName,
trainingType: DatasetCollectionDataProcessModeEnum.imageParse
}
});
return {
collectionId,
results: insertResults
};
} catch (error) {
return Promise.reject(error);
} finally {
removeFilesByPaths(filePaths);
}
}
export default NextAPI(handler);
export const config = {
api: {
bodyParser: false
}
};

View File

@@ -21,11 +21,12 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>): CreateCo
const upload = getUploadModel({
maxSize: global.feConfigs?.uploadFileMaxSize
});
const { file, data, bucketName } = await upload.doUpload<FileCreateDatasetCollectionParams>(
req,
res,
BucketNameEnum.dataset
);
const { file, data, bucketName } =
await upload.getUploadFile<FileCreateDatasetCollectionParams>(
req,
res,
BucketNameEnum.dataset
);
filePaths = [file.path];
if (!file || !bucketName) {

View File

@@ -31,7 +31,8 @@ const defaultCounts: Record<TrainingModeEnum, number> = {
qa: 0,
chunk: 0,
image: 0,
auto: 0
auto: 0,
imageParse: 0
};
async function handler(

View File

@@ -1,21 +1,26 @@
import type { NextApiRequest } from 'next';
import { NextAPI } from '@/service/middleware/entry';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { authDatasetData } from '@fastgpt/service/support/permission/dataset/auth';
import type { ApiRequestProps } from '@fastgpt/service/type/next';
export type Response = {
id: string;
q: string;
a: string;
imageId?: string;
source: string;
};
async function handler(req: NextApiRequest) {
const { id: dataId } = req.query as {
id: string;
};
async function handler(
req: ApiRequestProps<
{},
{
id: string;
}
>
) {
const { id: dataId } = req.query;
// 凭证校验
const { datasetData } = await authDatasetData({
req,
authToken: true,

View File

@@ -13,7 +13,7 @@ import { i18nT } from '@fastgpt/web/i18n/utils';
export type GetQuoteDataResponse = {
collection: DatasetCollectionSchemaType;
q: string;
a: string;
a?: string;
};
export type GetQuoteDataProps =

View File

@@ -10,7 +10,7 @@ import { insertData2Dataset } from '@/service/core/dataset/data/controller';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller';
import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
import { type InsertOneDatasetDataProps } from '@/global/core/dataset/api';
import type { InsertOneDatasetDataProps } from '@/global/core/dataset/api';
import { simpleText } from '@fastgpt/global/common/string/tools';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { NextAPI } from '@/service/middleware/entry';
@@ -25,11 +25,11 @@ async function handler(req: NextApiRequest) {
const { collectionId, q, a, indexes } = req.body as InsertOneDatasetDataProps;
if (!q) {
Promise.reject(CommonErrEnum.missingParams);
return Promise.reject(CommonErrEnum.missingParams);
}
if (!collectionId) {
Promise.reject(CommonErrEnum.missingParams);
return Promise.reject(CommonErrEnum.missingParams);
}
// 凭证校验
@@ -46,14 +46,12 @@ async function handler(req: NextApiRequest) {
insertLen: 1
});
// auth collection and get dataset
const [
{
dataset: { _id: datasetId, vectorModel, agentModel }
}
] = await Promise.all([getCollectionWithDataset(collectionId)]);
// format data
const formatQ = simpleText(q);
const formatA = simpleText(a);
const formatIndexes = indexes?.map((item) => ({
@@ -61,7 +59,6 @@ async function handler(req: NextApiRequest) {
text: simpleText(item.text)
}));
// token check
const token = await countPromptTokens(formatQ + formatA, '');
const vectorModelData = getEmbeddingModel(vectorModel);
const llmModelData = getLLMModel(agentModel);
@@ -71,7 +68,6 @@ async function handler(req: NextApiRequest) {
return Promise.reject(`Content over max chunk size: ${maxChunkSize}`);
}
// Duplicate data check
await hasSameValue({
teamId,
datasetId,
@@ -99,7 +95,7 @@ async function handler(req: NextApiRequest) {
model: vectorModelData.model
});
(async () => {
(() => {
addOperationLog({
tmbId,
teamId,

View File

@@ -0,0 +1,130 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middleware/entry';
import { authFrequencyLimit } from '@/service/common/frequencyLimit/api';
import { addSeconds } from 'date-fns';
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
import { getUploadModel } from '@fastgpt/service/common/file/multer';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { createDatasetImage } from '@fastgpt/service/core/dataset/image/controller';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getEmbeddingModel, getLLMModel, getVlmModel } from '@fastgpt/service/core/ai/model';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { removeDatasetImageExpiredTime } from '@fastgpt/service/core/dataset/image/utils';
export type insertImagesQuery = {};
export type insertImagesBody = {
collectionId: string;
};
export type insertImagesResponse = {};
const authUploadLimit = (tmbId: string, num: number) => {
if (!global.feConfigs.uploadFileMaxAmount) return;
return authFrequencyLimit({
eventId: `${tmbId}-uploadfile`,
maxAmount: global.feConfigs.uploadFileMaxAmount * 2,
expiredTime: addSeconds(new Date(), 30), // 30s
num
});
};
async function handler(
req: ApiRequestProps<insertImagesBody, insertImagesQuery>,
res: ApiResponseType<any>
): Promise<insertImagesResponse> {
const filePaths: string[] = [];
try {
const upload = getUploadModel({
maxSize: global.feConfigs?.uploadFileMaxSize
});
const {
files,
data: { collectionId }
} = await upload.getUploadFiles<insertImagesBody>(req, res);
filePaths.push(...files.map((item) => item.path));
const { collection, teamId, tmbId } = await authDatasetCollection({
collectionId,
per: WritePermissionVal,
req,
authToken: true,
authApiKey: true
});
const dataset = collection.dataset;
await authUploadLimit(tmbId, files.length);
// 1. Upload images to db
const imageIds = await Promise.all(
files.map(async (file) => {
return (
await createDatasetImage({
teamId,
datasetId: dataset._id,
file
})
).imageId;
})
);
// 2. Insert images to training queue
await mongoSessionRun(async (session) => {
const traingBillId = await (async () => {
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: collection.name,
billSource: UsageSourceEnum.training,
vectorModel: getEmbeddingModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
vllmModel: getVlmModel(dataset.vlmModel)?.name,
session
});
return billId;
})();
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
vlmModel: dataset.vlmModel,
mode: TrainingModeEnum.imageParse,
billId: traingBillId,
data: imageIds.map((item, index) => ({
imageId: item
})),
session
});
// 3. Clear ttl
await removeDatasetImageExpiredTime({
ids: imageIds,
collectionId,
session
});
});
return {};
} catch (error) {
return Promise.reject(error);
} finally {
removeFilesByPaths(filePaths);
}
}
export default NextAPI(handler);
export const config = {
api: {
bodyParser: false
}
};

View File

@@ -43,7 +43,7 @@ async function handler(req: ApiRequestProps<UpdateDatasetDataProps>) {
model: vectorModel
});
(async () => {
(() => {
addOperationLog({
tmbId,
teamId,
@@ -55,10 +55,6 @@ async function handler(req: ApiRequestProps<UpdateDatasetDataProps>) {
}
});
})();
} else {
// await MongoDatasetData.findByIdAndUpdate(dataId, {
// ...(forbid !== undefined && { forbid })
// });
}
}

View File

@@ -3,10 +3,13 @@ import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { replaceRegChars } from '@fastgpt/global/common/string/tools';
import { NextAPI } from '@/service/middleware/entry';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { type ApiRequestProps } from '@fastgpt/service/type/next';
import { type DatasetDataListItemType } from '@/global/core/dataset/type';
import { type PaginationProps, type PaginationResponse } from '@fastgpt/web/common/fetch/type';
import type { ApiRequestProps } from '@fastgpt/service/type/next';
import type { DatasetDataListItemType } from '@/global/core/dataset/type';
import type { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination';
import { MongoDatasetImageSchema } from '@fastgpt/service/core/dataset/image/schema';
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
import { getDatasetImagePreviewUrl } from '@fastgpt/service/core/dataset/image/utils';
export type GetDatasetDataListProps = PaginationProps & {
searchText?: string;
@@ -22,7 +25,6 @@ async function handler(
pageSize = Math.min(pageSize, 30);
// 凭证校验
const { teamId, collection } = await authDatasetCollection({
req,
authToken: true,
@@ -44,7 +46,7 @@ async function handler(
};
const [list, total] = await Promise.all([
MongoDatasetData.find(match, '_id datasetId collectionId q a chunkIndex')
MongoDatasetData.find(match, '_id datasetId collectionId q a chunkIndex imageId teamId')
.sort({ chunkIndex: 1, _id: -1 })
.skip(offset)
.limit(pageSize)
@@ -52,8 +54,41 @@ async function handler(
MongoDatasetData.countDocuments(match)
]);
const imageIds = list.map((item) => item.imageId!).filter(Boolean);
const imageSizeMap = new Map<string, number>();
if (imageIds.length > 0) {
const imageInfos = await MongoDatasetImageSchema.find(
{ _id: { $in: imageIds } },
'_id length',
{
...readFromSecondary
}
).lean();
imageInfos.forEach((item) => {
imageSizeMap.set(String(item._id), item.length);
});
}
return {
list,
list: list.map((item) => {
const imageSize = item.imageId ? imageSizeMap.get(String(item.imageId)) : undefined;
const imagePreviewUrl = item.imageId
? getDatasetImagePreviewUrl({
imageId: item.imageId,
teamId,
datasetId: collection.datasetId,
expiredMinutes: 30
})
: undefined;
return {
...item,
imageSize,
imagePreviewUrl
};
}),
total
};
}

View File

@@ -45,6 +45,7 @@ async function handler(req: NextApiRequest) {
datasetId: { $in: datasetIds }
});
// Remove cron job
await Promise.all(
datasets.map((dataset) => {
if (dataset.type === DatasetTypeEnum.websiteDataset)

View File

@@ -0,0 +1,57 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { getDownloadStream, getFileById } from '@fastgpt/service/common/file/gridfs/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import type { ApiRequestProps } from '@fastgpt/service/type/next';
import { authDatasetImagePreviewUrl } from '@fastgpt/service/core/dataset/image/utils';
import { getDatasetImageReadData } from '@fastgpt/service/core/dataset/image/controller';
const previewableExtensions = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp'];
export default async function handler(
req: ApiRequestProps<
{},
{
imageId: string;
token: string;
}
>,
res: NextApiResponse<any>
) {
try {
const { imageId, token } = req.query;
if (!imageId || !token) {
return jsonRes(res, {
code: 401,
error: 'ImageId not found'
});
}
// Verify token and permissions
await authDatasetImagePreviewUrl(token);
const { fileInfo, stream } = await getDatasetImageReadData(imageId);
// Set response headers
res.setHeader('Content-Type', fileInfo.contentType);
res.setHeader('Cache-Control', 'public, max-age=31536000');
res.setHeader('Content-Length', fileInfo.length);
stream.pipe(res);
stream.on('error', (error) => {
if (!res.headersSent) {
res.status(500).end();
}
});
stream.on('end', () => {
res.end();
});
} catch (error) {
return jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -3,6 +3,7 @@ import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/sch
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { NextAPI } from '@/service/middleware/entry';
import { type ApiRequestProps } from '@fastgpt/service/type/next';
import { getDatasetImagePreviewUrl } from '@fastgpt/service/core/dataset/image/utils';
export type getTrainingDataDetailQuery = {};
@@ -17,8 +18,9 @@ export type getTrainingDataDetailResponse =
_id: string;
datasetId: string;
mode: string;
q: string;
a: string;
q?: string;
a?: string;
imagePreviewUrl?: string;
}
| undefined;
@@ -44,6 +46,14 @@ async function handler(
_id: data._id,
datasetId: data.datasetId,
mode: data.mode,
imagePreviewUrl: data.imageId
? getDatasetImagePreviewUrl({
imageId: data.imageId,
teamId,
datasetId,
expiredMinutes: 30
})
: undefined,
q: data.q,
a: data.a
};

View File

@@ -3,7 +3,7 @@ import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/sch
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { NextAPI } from '@/service/middleware/entry';
import { type ApiRequestProps } from '@fastgpt/service/type/next';
import { addMinutes } from 'date-fns';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
export type updateTrainingDataBody = {
datasetId: string;
@@ -37,21 +37,41 @@ async function handler(
return Promise.reject('data not found');
}
await MongoDatasetTraining.updateOne(
{
teamId,
datasetId,
_id: dataId
},
{
$unset: { errorMsg: '' },
retryCount: 3,
...(q !== undefined && { q }),
...(a !== undefined && { a }),
...(chunkIndex !== undefined && { chunkIndex }),
lockTime: addMinutes(new Date(), -10)
}
);
// Add to chunk
if (data.imageId && q) {
await MongoDatasetTraining.updateOne(
{
teamId,
datasetId,
_id: dataId
},
{
$unset: { errorMsg: '' },
retryCount: 3,
mode: TrainingModeEnum.chunk,
...(q !== undefined && { q }),
...(a !== undefined && { a }),
...(chunkIndex !== undefined && { chunkIndex }),
lockTime: new Date('2000')
}
);
} else {
await MongoDatasetTraining.updateOne(
{
teamId,
datasetId,
_id: dataId
},
{
$unset: { errorMsg: '' },
retryCount: 3,
...(q !== undefined && { q }),
...(a !== undefined && { a }),
...(chunkIndex !== undefined && { chunkIndex }),
lockTime: new Date('2000')
}
);
}
return {};
}

View File

@@ -22,7 +22,7 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let {
file,
data: { appId, duration, shareId, outLinkUid, teamId: spaceTeamId, teamToken }
} = await upload.doUpload<
} = await upload.getUploadFile<
OutLinkChatAuthProps & {
appId: string;
duration: number;

View File

@@ -12,6 +12,7 @@ import { TimerIdEnum } from '@fastgpt/service/common/system/timerLock/constants'
import { addHours } from 'date-fns';
import { getScheduleTriggerApp } from '@/service/core/app/utils';
import { clearExpiredRawTextBufferCron } from '@fastgpt/service/common/buffer/rawText/controller';
import { clearExpiredDatasetImageCron } from '@fastgpt/service/core/dataset/image/controller';
// Try to run train every minute
const setTrainingQueueCron = () => {
@@ -85,4 +86,5 @@ export const startCron = () => {
clearInvalidDataCron();
scheduleTriggerAppCron();
clearExpiredRawTextBufferCron();
clearExpiredDatasetImageCron();
};

View File

@@ -1,11 +1,13 @@
import { type DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type';
export const quoteDataFieldSelector = '_id q a history updateTime chunkIndex';
export const quoteDataFieldSelector =
'_id teamId datasetId q a imageId history updateTime chunkIndex';
export type QuoteDataItemType = {
_id: string;
q: DatasetDataSchemaType['q'];
a: DatasetDataSchemaType['a'];
q: string;
a?: string;
imagePreivewUrl?: string;
history?: DatasetDataSchemaType['history'];
updateTime: DatasetDataSchemaType['updateTime'];
index: DatasetDataSchemaType['chunkIndex'];

View File

@@ -1,19 +1,12 @@
import { type DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type';
import { type QuoteDataItemType } from './constants';
// 获取对话时间时,引用的内容
export function processChatTimeFilter(
dataList: DatasetDataSchemaType[],
dataList: QuoteDataItemType[],
chatTime: Date
): QuoteDataItemType[] {
return dataList.map((item) => {
const defaultItem = {
_id: item._id,
q: item.q,
a: item.a,
updateTime: item.updateTime,
index: item.chunkIndex
};
const defaultItem = item;
if (!item.history) return defaultItem;
@@ -35,11 +28,10 @@ export function processChatTimeFilter(
const latestHistory = history[latestHistoryIndex];
return {
_id: item._id,
...item,
q: latestHistory.q,
a: latestHistory.a,
updateTime: latestHistory.updateTime,
index: item.chunkIndex,
updated: true
};
});

View File

@@ -18,6 +18,7 @@ import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTex
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { countPromptTokens } from '@fastgpt/service/common/string/tiktoken';
import { deleteDatasetImage } from '@fastgpt/service/core/dataset/image/controller';
const formatIndexes = async ({
indexes = [],
@@ -142,7 +143,8 @@ export async function insertData2Dataset({
datasetId,
collectionId,
q,
a = '',
a,
imageId,
chunkIndex = 0,
indexSize = 512,
indexes,
@@ -207,6 +209,7 @@ export async function insertData2Dataset({
tmbId,
datasetId,
collectionId,
imageId,
q,
a,
chunkIndex,
@@ -391,8 +394,16 @@ export async function updateData2Dataset({
export const deleteDatasetData = async (data: DatasetDataItemType) => {
await mongoSessionRun(async (session) => {
// 1. Delete MongoDB data
await MongoDatasetData.deleteOne({ _id: data.id }, { session });
await MongoDatasetDataText.deleteMany({ dataId: data.id }, { session });
// 2. If there are any image files, delete the image records and GridFS file.
if (data.imageId) {
await deleteDatasetImage(data.imageId);
}
// 3. Delete vector data
await deleteDatasetDataVector({
teamId: data.teamId,
idList: data.indexes.map((item) => item.dataId)

View File

@@ -15,6 +15,7 @@ import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { type DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import type { Document } from '@fastgpt/service/common/mongo';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { getMaxIndexSize } from '@fastgpt/global/core/dataset/training/utils';
const reduceQueue = () => {
global.vectorQueueLen = global.vectorQueueLen > 0 ? global.vectorQueueLen - 1 : 0;
@@ -261,8 +262,9 @@ const insertData = async ({
collectionId: trainingData.collectionId,
q: trainingData.q,
a: trainingData.a,
imageId: trainingData.imageId,
chunkIndex: trainingData.chunkIndex,
indexSize: trainingData.indexSize,
indexSize: trainingData.indexSize || getMaxIndexSize(getEmbeddingModel(trainingData.model)),
indexes: trainingData.indexes,
embeddingModel: trainingData.model,
session

View File

@@ -1,14 +1,14 @@
import { postUploadImg, postUploadFiles } from '@/web/common/file/api';
import { type UploadImgProps } from '@fastgpt/global/common/file/api';
import type { UploadImgProps } from '@fastgpt/global/common/file/api';
import type { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { type preUploadImgProps } from '@fastgpt/global/common/file/api';
import type { preUploadImgProps } from '@fastgpt/global/common/file/api';
import { compressBase64Img, type CompressImgProps } from '@fastgpt/web/common/file/img';
import type { UploadChatFileProps, UploadDatasetFileProps } from '@/pages/api/common/file/upload';
/**
* upload file to mongo gridfs
*/
export const uploadFile2DB = ({
export const uploadFile2DB = async ({
file,
bucketName,
data,
@@ -21,18 +21,21 @@ export const uploadFile2DB = ({
metadata?: Record<string, any>;
percentListen?: (percent: number) => void;
}) => {
const form = new FormData();
form.append('metadata', JSON.stringify(metadata));
form.append('bucketName', bucketName);
form.append('file', file, encodeURIComponent(file.name));
form.append('data', JSON.stringify(data));
const formData = new FormData();
formData.append('metadata', JSON.stringify(metadata));
formData.append('bucketName', bucketName);
formData.append('file', file, encodeURIComponent(file.name));
if (data) {
formData.append('data', JSON.stringify(data));
}
return postUploadFiles(form, (e) => {
const res = await postUploadFiles(formData, (e) => {
if (!e.total) return;
const percent = Math.round((e.loaded / e.total) * 100);
percentListen?.(percent);
});
return res;
};
/**
@@ -74,7 +77,6 @@ export const compressImgFileAndUpload = async ({
resolve(reader.result as string);
};
reader.onerror = (err) => {
console.log(err);
reject('Load image error');
};
});

View File

@@ -66,7 +66,7 @@ const SelectCollections = ({
const formatCollections = useMemo(
() =>
data?.list.map((collection) => {
const icon = getCollectionIcon(collection.type, collection.name);
const icon = getCollectionIcon({ type: collection.type, name: collection.name });
return {
...collection,

View File

@@ -85,6 +85,10 @@ export const TrainingProcess = {
label: i18nT('dataset:process.Parsing'),
value: 'parsing'
},
parseImage: {
label: i18nT('dataset:process.Parse_Image'),
value: 'parseImage'
},
getQA: {
label: i18nT('dataset:process.Get QA'),
value: 'getQA'

View File

@@ -0,0 +1,63 @@
import { POST } from '@/web/common/api/request';
import type { ImageCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
export const createImageDatasetCollection = async ({
files,
onUploadProgress,
...data
}: ImageCreateDatasetCollectionParams & {
onUploadProgress?: (e: number) => void;
files: File[];
}) => {
const formData = new FormData();
files.forEach((file) => {
formData.append('file', file, encodeURIComponent(file.name));
});
formData.append('data', JSON.stringify(data));
return await POST<{ collectionId: string }>('/core/dataset/collection/create/images', formData, {
timeout: 600000,
headers: {
'Content-Type': 'multipart/form-data; charset=utf-8'
},
onUploadProgress: (e) => {
if (!onUploadProgress) return;
if (!e.progress) {
return onUploadProgress(0);
}
const percent = +Math.round(e.progress * 100).toFixed(2);
onUploadProgress(percent);
}
});
};
export const insertImagesToCollection = async ({
files,
collectionId,
onUploadProgress
}: {
onUploadProgress?: (e: number) => void;
files: File[];
collectionId: string;
}) => {
const formData = new FormData();
files.forEach((file) => {
formData.append('file', file, encodeURIComponent(file.name));
});
formData.append('data', JSON.stringify({ collectionId }));
return await POST<{ collectionId: string }>('/core/dataset/data/insertImages', formData, {
timeout: 600000,
headers: {
'Content-Type': 'multipart/form-data; charset=utf-8'
},
onUploadProgress: (e) => {
if (!onUploadProgress) return;
if (!e.progress) {
return onUploadProgress(0);
}
const percent = +Math.round(e.progress * 100).toFixed(2);
onUploadProgress(percent);
}
});
};

View File

@@ -19,8 +19,9 @@ export type ImportSourceItemType = {
sourceSize?: string;
isUploading?: boolean;
uploadedFileRate?: number;
dbFileId?: string; // 存储在数据库里的文件Id,这个 ID 还是图片和集合的 metadata 中 relateId
file?: File;
dbFileId?: string; // 存储在数据库里的文件Id
file?: File; // Local file
// link
link?: string;