Feat: pptx and xlsx loader (#1118)
* perf: plan tip * perf: upload size controller * feat: add image ttl index * feat: new upload file ux * remove file * feat: support read pptx * feat: support xlsx * fix: rerank docker flie
This commit is contained in:
22
projects/app/src/global/core/dataset/api.d.ts
vendored
22
projects/app/src/global/core/dataset/api.d.ts
vendored
@@ -2,6 +2,7 @@ import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
|
||||
import {
|
||||
DatasetSearchModeEnum,
|
||||
DatasetTypeEnum,
|
||||
ImportDataSourceEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
@@ -67,3 +68,24 @@ export type SearchTestResponse = {
|
||||
similarity: number;
|
||||
usingQueryExtension: boolean;
|
||||
};
|
||||
|
||||
/* =========== training =========== */
|
||||
export type PostPreviewFilesChunksProps = {
|
||||
type: `${ImportDataSourceEnum}`;
|
||||
sourceId: string;
|
||||
chunkSize: number;
|
||||
overlapRatio: number;
|
||||
customSplitChar?: string;
|
||||
};
|
||||
|
||||
export type PostPreviewFilesChunksResponse = {
|
||||
fileId: string;
|
||||
rawTextLength: number;
|
||||
chunks: string[];
|
||||
}[];
|
||||
export type PostPreviewTableChunksResponse = {
|
||||
fileId: string;
|
||||
totalChunks: number;
|
||||
chunks: { q: string; a: string; chunkIndex: number }[];
|
||||
errorText?: string;
|
||||
}[];
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
/* ================= dataset ===================== */
|
||||
|
||||
/* ================= collection ===================== */
|
||||
|
||||
/* ================= data ===================== */
|
||||
@@ -397,14 +397,22 @@ const PlanUsage = () => {
|
||||
<Box fontWeight={'bold'} fontSize="xl">
|
||||
{t(planName)}
|
||||
</Box>
|
||||
<Flex mt="2" color={'#485264'} fontSize="sm">
|
||||
<Box>{t('support.wallet.Plan expired time')}:</Box>
|
||||
<Box ml={2}>{formatTime2YMD(standardPlan?.expiredTime)}</Box>
|
||||
</Flex>
|
||||
{isFreeTeam && (
|
||||
<Box mt="2" color={'#485264'} fontSize="sm">
|
||||
免费版用户30天无任何使用记录时,系统会自动清理账号知识库。
|
||||
</Box>
|
||||
|
||||
{isFreeTeam ? (
|
||||
<>
|
||||
<Flex mt="2" color={'#485264'} fontSize="sm">
|
||||
<Box>{t('support.wallet.Plan reset time')}:</Box>
|
||||
<Box ml={2}>{formatTime2YMD(standardPlan?.expiredTime)}</Box>
|
||||
</Flex>
|
||||
<Box mt="2" color={'#485264'} fontSize="sm">
|
||||
免费版用户30天无任何使用记录时,系统会自动清理账号知识库。
|
||||
</Box>
|
||||
</>
|
||||
) : (
|
||||
<Flex mt="2" color={'#485264'} fontSize="sm">
|
||||
<Box>{t('support.wallet.Plan expired time')}:</Box>
|
||||
<Box ml={2}>{formatTime2YMD(standardPlan?.expiredTime)}</Box>
|
||||
</Flex>
|
||||
)}
|
||||
</Box>
|
||||
<Button onClick={() => router.push('/price')}>
|
||||
|
||||
@@ -2,51 +2,15 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { addLog } from '@fastgpt/service/common/system/log';
|
||||
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
|
||||
import { addHours } from 'date-fns';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
|
||||
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
|
||||
/*
|
||||
检测无效的数据集图片
|
||||
|
||||
可能异常情况:
|
||||
1. 上传文件过程中,上传了图片,但是最终没有创建数据集。
|
||||
*/
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
|
||||
let deleteImageAmount = 0;
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
const {
|
||||
startHour = 72,
|
||||
endHour = 24,
|
||||
limit = 10
|
||||
} = req.body as { startHour?: number; endHour?: number; limit?: number };
|
||||
await authCert({ req, authRoot: true });
|
||||
await connectToDatabase();
|
||||
|
||||
// start: now - maxDay, end: now - 3 day
|
||||
const start = addHours(new Date(), -startHour);
|
||||
const end = addHours(new Date(), -endHour);
|
||||
deleteImageAmount = 0;
|
||||
|
||||
await checkInvalid(start, end, limit);
|
||||
|
||||
jsonRes(res, {
|
||||
data: deleteImageAmount
|
||||
});
|
||||
} catch (error) {
|
||||
addLog.error(`check Invalid user error`, error);
|
||||
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export async function checkInvalid(start: Date, end: Date, limit = 50) {
|
||||
async function checkInvalidImg(start: Date, end: Date, limit = 50) {
|
||||
const images = await MongoImage.find(
|
||||
{
|
||||
createTime: {
|
||||
@@ -86,3 +50,37 @@ export async function checkInvalid(start: Date, end: Date, limit = 50) {
|
||||
|
||||
console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`);
|
||||
}
|
||||
|
||||
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
await authCert({ req, authRoot: true });
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
console.log('执行脏数据清理任务');
|
||||
const end = addHours(new Date(), -1);
|
||||
const start = addHours(new Date(), -360 * 24);
|
||||
await checkFiles(start, end);
|
||||
await checkInvalidImg(start, end);
|
||||
await checkInvalidCollection(start, end);
|
||||
await checkInvalidVector(start, end);
|
||||
console.log('执行脏数据清理任务完毕');
|
||||
} catch (error) {
|
||||
console.log('执行脏数据清理任务出错了');
|
||||
}
|
||||
})();
|
||||
|
||||
jsonRes(res, {
|
||||
message: 'success'
|
||||
});
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -6,9 +6,52 @@ import { MongoUsage } from '@fastgpt/service/support/wallet/usage/schema';
|
||||
import { connectionMongo } from '@fastgpt/service/common/mongo';
|
||||
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
|
||||
import { addHours } from 'date-fns';
|
||||
import { checkInvalid as checkInvalidImg } from '../timerTask/dataset/checkInvalidDatasetImage';
|
||||
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
|
||||
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
|
||||
let deleteImageAmount = 0;
|
||||
export async function checkInvalidImg(start: Date, end: Date, limit = 50) {
|
||||
const images = await MongoImage.find(
|
||||
{
|
||||
createTime: {
|
||||
$gte: start,
|
||||
$lte: end
|
||||
},
|
||||
'metadata.relatedId': { $exists: true }
|
||||
},
|
||||
'_id teamId metadata'
|
||||
);
|
||||
console.log('total images', images.length);
|
||||
let index = 0;
|
||||
|
||||
for await (const image of images) {
|
||||
try {
|
||||
// 1. 检测是否有对应的集合
|
||||
const collection = await MongoDatasetCollection.findOne(
|
||||
{
|
||||
teamId: image.teamId,
|
||||
'metadata.relatedImgId': image.metadata?.relatedId
|
||||
},
|
||||
'_id'
|
||||
);
|
||||
|
||||
if (!collection) {
|
||||
await image.deleteOne();
|
||||
deleteImageAmount++;
|
||||
}
|
||||
|
||||
index++;
|
||||
|
||||
index % 100 === 0 && console.log(index);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`);
|
||||
}
|
||||
|
||||
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
|
||||
@@ -2,13 +2,6 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { MongoUsage } from '@fastgpt/service/support/wallet/usage/schema';
|
||||
import { connectionMongo } from '@fastgpt/service/common/mongo';
|
||||
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
|
||||
import { addHours } from 'date-fns';
|
||||
import { checkInvalid as checkInvalidImg } from '../timerTask/dataset/checkInvalidDatasetImage';
|
||||
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
|
||||
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
|
||||
import { MongoPlugin } from '@fastgpt/service/core/plugin/schema';
|
||||
import { PluginTypeEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
|
||||
|
||||
41
projects/app/src/pages/api/common/file/previewContent.ts
Normal file
41
projects/app/src/pages/api/common/file/previewContent.ts
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
Read db file content and response 3000 words
|
||||
*/
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { authFile } from '@fastgpt/service/support/permission/auth/file';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { fileId, csvFormat } = req.body as { fileId: string; csvFormat?: boolean };
|
||||
|
||||
if (!fileId) {
|
||||
throw new Error('fileId is empty');
|
||||
}
|
||||
|
||||
const { teamId } = await authFile({ req, authToken: true, fileId });
|
||||
|
||||
const { rawText } = await readFileContent({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId,
|
||||
csvFormat
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: {
|
||||
previewContent: rawText.slice(0, 3000),
|
||||
totalLength: rawText.length
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -2,9 +2,12 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authFileToken } from '@fastgpt/service/support/permission/controller';
|
||||
import { getDownloadStream, getFileById } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import {
|
||||
getDownloadStream,
|
||||
getFileById,
|
||||
readFileEncode
|
||||
} from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -18,8 +21,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
throw new Error('fileId is empty');
|
||||
}
|
||||
|
||||
const [file, encodeStream] = await Promise.all([
|
||||
const [file, encoding, fileStream] = await Promise.all([
|
||||
getFileById({ bucketName, fileId }),
|
||||
readFileEncode({ bucketName, fileId }),
|
||||
getDownloadStream({ bucketName, fileId })
|
||||
]);
|
||||
|
||||
@@ -27,24 +31,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
return Promise.reject(CommonErrEnum.fileNotFound);
|
||||
}
|
||||
|
||||
// get encoding
|
||||
let buffers: Buffer = Buffer.from([]);
|
||||
for await (const chunk of encodeStream) {
|
||||
buffers = Buffer.concat([buffers, chunk]);
|
||||
if (buffers.length > 10) {
|
||||
encodeStream.abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const encoding = detectFileEncoding(buffers);
|
||||
|
||||
res.setHeader('Content-Type', `${file.contentType}; charset=${encoding}`);
|
||||
res.setHeader('Cache-Control', 'public, max-age=3600');
|
||||
res.setHeader('Content-Disposition', `inline; filename="${encodeURIComponent(file.filename)}"`);
|
||||
|
||||
const fileStream = await getDownloadStream({ bucketName, fileId });
|
||||
|
||||
fileStream.pipe(res);
|
||||
|
||||
fileStream.on('error', () => {
|
||||
|
||||
@@ -4,24 +4,22 @@ import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { getUploadModel } from '@fastgpt/service/common/file/multer';
|
||||
|
||||
/**
|
||||
* Creates the multer uploader
|
||||
*/
|
||||
const upload = getUploadModel({
|
||||
maxSize: 500 * 1024 * 1024
|
||||
});
|
||||
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
let filePaths: string[] = [];
|
||||
/* Creates the multer uploader */
|
||||
const upload = getUploadModel({
|
||||
maxSize: (global.feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024
|
||||
});
|
||||
const filePaths: string[] = [];
|
||||
|
||||
try {
|
||||
const { teamId, tmbId } = await authCert({ req, authToken: true });
|
||||
|
||||
await connectToDatabase();
|
||||
const { file, bucketName, metadata } = await upload.doUpload(req, res);
|
||||
|
||||
filePaths = [file.path];
|
||||
await connectToDatabase();
|
||||
filePaths.push(file.path);
|
||||
|
||||
const { teamId, tmbId } = await authCert({ req, authToken: true });
|
||||
|
||||
if (!bucketName) {
|
||||
throw new Error('bucketName is empty');
|
||||
@@ -46,6 +44,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
error
|
||||
});
|
||||
}
|
||||
|
||||
removeFilesByPaths(filePaths);
|
||||
}
|
||||
|
||||
export const config = {
|
||||
|
||||
@@ -12,12 +12,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
|
||||
const { teamId } = await authChatCert({ req, authToken: true });
|
||||
|
||||
const data = await uploadMongoImg({
|
||||
const imgId = await uploadMongoImg({
|
||||
teamId,
|
||||
...body
|
||||
});
|
||||
|
||||
jsonRes(res, { data });
|
||||
jsonRes(res, { data: imgId });
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
|
||||
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { datasetId, parentId, fileId } = req.body as FileIdCreateDatasetCollectionParams;
|
||||
const trainingType = TrainingModeEnum.chunk;
|
||||
|
||||
try {
|
||||
await connectToDatabase();
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
per: 'w',
|
||||
datasetId: datasetId
|
||||
});
|
||||
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContent({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId
|
||||
});
|
||||
// 2. split chunks
|
||||
const { chunks = [] } = parseCsvTable2Chunks(rawText);
|
||||
|
||||
// 3. auth limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
await mongoSessionRun(async (session) => {
|
||||
// 4. create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
teamId,
|
||||
tmbId,
|
||||
name: filename,
|
||||
parentId,
|
||||
datasetId,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
fileId,
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
chunkSize: 0,
|
||||
|
||||
session
|
||||
});
|
||||
|
||||
// 5. create training bill
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: filename,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
session
|
||||
});
|
||||
|
||||
// 6. insert to training queue
|
||||
await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
billId,
|
||||
data: chunks.map((chunk, index) => ({
|
||||
q: chunk.q,
|
||||
a: chunk.a,
|
||||
chunkIndex: index
|
||||
})),
|
||||
session
|
||||
});
|
||||
|
||||
return collectionId;
|
||||
});
|
||||
|
||||
startTrainingQueue(true);
|
||||
|
||||
jsonRes(res);
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1,94 +1,151 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { delFileByFileIdList, uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { getUploadModel } from '@fastgpt/service/common/file/multer';
|
||||
import {
|
||||
delFileByFileIdList,
|
||||
readFileContent
|
||||
} from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { FileCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
|
||||
/**
|
||||
* Creates the multer uploader
|
||||
*/
|
||||
const upload = getUploadModel({
|
||||
maxSize: 500 * 1024 * 1024
|
||||
});
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
let filePaths: string[] = [];
|
||||
let fileId: string = '';
|
||||
const { datasetId } = req.query as { datasetId: string };
|
||||
const {
|
||||
fileId,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as FileIdCreateDatasetCollectionParams;
|
||||
|
||||
try {
|
||||
await connectToDatabase();
|
||||
|
||||
const { teamId, tmbId } = await authDataset({
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
per: 'w',
|
||||
datasetId
|
||||
datasetId: body.datasetId
|
||||
});
|
||||
|
||||
const { file, bucketName, data } = await upload.doUpload<FileCreateDatasetCollectionParams>(
|
||||
req,
|
||||
res
|
||||
);
|
||||
filePaths = [file.path];
|
||||
|
||||
if (!file || !bucketName) {
|
||||
throw new Error('file is empty');
|
||||
}
|
||||
|
||||
const { fileMetadata, collectionMetadata, ...collectionData } = data;
|
||||
|
||||
// upload file and create collection
|
||||
fileId = await uploadFile({
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContent({
|
||||
teamId,
|
||||
tmbId,
|
||||
bucketName,
|
||||
path: file.path,
|
||||
filename: file.originalname,
|
||||
contentType: file.mimetype,
|
||||
metadata: fileMetadata
|
||||
});
|
||||
|
||||
// create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...collectionData,
|
||||
metadata: collectionMetadata,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: collectionId
|
||||
// 2. split chunks
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: rawText,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : []
|
||||
});
|
||||
|
||||
// 3. auth limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
await mongoSessionRun(async (session) => {
|
||||
// 4. create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...body,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
name: filename,
|
||||
fileId,
|
||||
metadata: {
|
||||
relatedImgId: fileId
|
||||
},
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
hashRawText: hashStr(rawText),
|
||||
rawTextLength: rawText.length,
|
||||
session
|
||||
});
|
||||
|
||||
// 5. create training bill
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: filename,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
session
|
||||
});
|
||||
|
||||
// 6. insert to training queue
|
||||
await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((text, index) => ({
|
||||
q: text,
|
||||
chunkIndex: index
|
||||
})),
|
||||
session
|
||||
});
|
||||
|
||||
// 7. remove related image ttl
|
||||
await MongoImage.updateMany(
|
||||
{
|
||||
teamId,
|
||||
'metadata.relatedId': fileId
|
||||
},
|
||||
{
|
||||
// Remove expiredTime to avoid ttl expiration
|
||||
$unset: {
|
||||
expiredTime: 1
|
||||
}
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
|
||||
return collectionId;
|
||||
});
|
||||
|
||||
startTrainingQueue(true);
|
||||
|
||||
jsonRes(res);
|
||||
} catch (error) {
|
||||
if (fileId) {
|
||||
try {
|
||||
await delFileByFileIdList({
|
||||
fileIdList: [fileId],
|
||||
bucketName: BucketNameEnum.dataset
|
||||
});
|
||||
} catch (error) {}
|
||||
}
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
|
||||
removeFilesByPaths(filePaths);
|
||||
}
|
||||
|
||||
export const config = {
|
||||
api: {
|
||||
bodyParser: false
|
||||
}
|
||||
};
|
||||
|
||||
@@ -19,6 +19,7 @@ import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -55,9 +56,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
// 3. create collection and training bill
|
||||
const [{ _id: collectionId }, { billId }] = await Promise.all([
|
||||
createOneCollection({
|
||||
const createResult = await mongoSessionRun(async (session) => {
|
||||
// 3. create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...body,
|
||||
teamId,
|
||||
tmbId,
|
||||
@@ -70,34 +71,44 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
qaPrompt,
|
||||
|
||||
hashRawText: hashStr(text),
|
||||
rawTextLength: text.length
|
||||
}),
|
||||
createTrainingUsage({
|
||||
rawTextLength: text.length,
|
||||
session
|
||||
});
|
||||
|
||||
// 4. create training bill
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: name,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name
|
||||
})
|
||||
]);
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
session
|
||||
});
|
||||
|
||||
// 4. push chunks to training queue
|
||||
const insertResults = await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
collectionId,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((text, index) => ({
|
||||
q: text,
|
||||
chunkIndex: index
|
||||
}))
|
||||
// 5. push chunks to training queue
|
||||
const insertResults = await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((text, index) => ({
|
||||
q: text,
|
||||
chunkIndex: index
|
||||
})),
|
||||
session
|
||||
});
|
||||
|
||||
return { collectionId, results: insertResults };
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: { collectionId, results: insertResults }
|
||||
data: createResult
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
|
||||
@@ -15,7 +15,8 @@ import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/train
|
||||
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { collectionId, data } = req.body as PushDatasetDataProps;
|
||||
const body = req.body as PushDatasetDataProps;
|
||||
const { collectionId, data } = body;
|
||||
|
||||
if (!collectionId || !Array.isArray(data)) {
|
||||
throw new Error('collectionId or data is empty');
|
||||
@@ -42,9 +43,12 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
|
||||
jsonRes<PushDatasetDataResponse>(res, {
|
||||
data: await pushDataListToTrainingQueue({
|
||||
...req.body,
|
||||
...body,
|
||||
teamId,
|
||||
tmbId
|
||||
tmbId,
|
||||
datasetId: collection.datasetId._id,
|
||||
agentModel: collection.datasetId.agentModel,
|
||||
vectorModel: collection.datasetId.vectorModel
|
||||
})
|
||||
});
|
||||
} catch (err) {
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { authFile } from '@fastgpt/service/support/permission/auth/file';
|
||||
import { PostPreviewFilesChunksProps } from '@/global/core/dataset/api';
|
||||
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
|
||||
const { type, sourceId, chunkSize, customSplitChar, overlapRatio } =
|
||||
req.body as PostPreviewFilesChunksProps;
|
||||
|
||||
if (!sourceId) {
|
||||
throw new Error('fileIdList is empty');
|
||||
}
|
||||
if (chunkSize > 30000) {
|
||||
throw new Error('chunkSize is too large, should be less than 30000');
|
||||
}
|
||||
|
||||
const { chunks } = await (async () => {
|
||||
if (type === ImportDataSourceEnum.fileLocal) {
|
||||
const { file, teamId } = await authFile({ req, authToken: true, fileId: sourceId });
|
||||
const fileId = String(file._id);
|
||||
|
||||
const { rawText } = await readFileContent({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId,
|
||||
csvFormat: true
|
||||
});
|
||||
// split chunks (5 chunk)
|
||||
const sliceRawText = 10 * chunkSize;
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: rawText.slice(0, sliceRawText),
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio,
|
||||
customReg: customSplitChar ? [customSplitChar] : []
|
||||
});
|
||||
|
||||
return {
|
||||
chunks: chunks.map((item) => ({
|
||||
q: item,
|
||||
a: ''
|
||||
}))
|
||||
};
|
||||
}
|
||||
if (type === ImportDataSourceEnum.csvTable) {
|
||||
const { file, teamId } = await authFile({ req, authToken: true, fileId: sourceId });
|
||||
const fileId = String(file._id);
|
||||
const { rawText } = await readFileContent({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId,
|
||||
csvFormat: false
|
||||
});
|
||||
const { chunks } = parseCsvTable2Chunks(rawText);
|
||||
|
||||
return {
|
||||
chunks: chunks || []
|
||||
};
|
||||
}
|
||||
return { chunks: [] };
|
||||
})();
|
||||
|
||||
jsonRes<{ q: string; a: string }[]>(res, {
|
||||
data: chunks.slice(0, 5)
|
||||
});
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -32,7 +32,6 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import MyInput from '@/components/MyInput';
|
||||
import dayjs from 'dayjs';
|
||||
import { useRequest } from '@fastgpt/web/hooks/useRequest';
|
||||
import { useLoading } from '@fastgpt/web/hooks/useLoading';
|
||||
import { useRouter } from 'next/router';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import MyMenu from '@/components/MyMenu';
|
||||
@@ -62,11 +61,11 @@ import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import { DatasetCollectionSyncResultEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import MyBox from '@/components/common/MyBox';
|
||||
import { ImportDataSourceEnum } from './Import';
|
||||
import { usePagination } from '@fastgpt/web/hooks/usePagination';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
const WebSiteConfigModal = dynamic(() => import('./Import/WebsiteConfig'), {});
|
||||
const FileSourceSelector = dynamic(() => import('./Import/sourceSelector/FileSourceSelector'), {});
|
||||
const FileSourceSelector = dynamic(() => import('./Import/components/FileSourceSelector'), {});
|
||||
|
||||
const CollectionCard = () => {
|
||||
const BoxRef = useRef<HTMLDivElement>(null);
|
||||
@@ -76,14 +75,14 @@ const CollectionCard = () => {
|
||||
const { toast } = useToast();
|
||||
const { parentId = '', datasetId } = router.query as { parentId: string; datasetId: string };
|
||||
const { t } = useTranslation();
|
||||
const { Loading } = useLoading();
|
||||
const { isPc } = useSystemStore();
|
||||
const { userInfo } = useUserStore();
|
||||
const [searchText, setSearchText] = useState('');
|
||||
const { datasetDetail, updateDataset, startWebsiteSync, loadDatasetDetail } = useDatasetStore();
|
||||
|
||||
const { openConfirm: openDeleteConfirm, ConfirmModal: ConfirmDeleteModal } = useConfirm({
|
||||
content: t('dataset.Confirm to delete the file')
|
||||
content: t('dataset.Confirm to delete the file'),
|
||||
type: 'delete'
|
||||
});
|
||||
const { openConfirm: openSyncConfirm, ConfirmModal: ConfirmSyncModal } = useConfirm({
|
||||
content: t('core.dataset.collection.Start Sync Tip')
|
||||
@@ -452,7 +451,7 @@ const CollectionCard = () => {
|
||||
query: {
|
||||
...router.query,
|
||||
currentTab: TabEnum.import,
|
||||
source: ImportDataSourceEnum.tableLocal
|
||||
source: ImportDataSourceEnum.csvTable
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import React, { useContext, useCallback, createContext, useState, useMemo, useEffect } from 'react';
|
||||
import React, { useContext, createContext, useState, useMemo, useEffect } from 'react';
|
||||
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { DatasetItemType } from '@fastgpt/global/core/dataset/type';
|
||||
@@ -8,6 +7,7 @@ import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
|
||||
import { UseFormReturn, useForm } from 'react-hook-form';
|
||||
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
type ChunkSizeFieldType = 'embeddingChunkSize';
|
||||
export type FormType = {
|
||||
@@ -29,14 +29,11 @@ type useImportStoreType = {
|
||||
showPromptInput: boolean;
|
||||
sources: ImportSourceItemType[];
|
||||
setSources: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
|
||||
showRePreview: boolean;
|
||||
totalChunkChars: number;
|
||||
totalChunks: number;
|
||||
chunkSize: number;
|
||||
predictPoints: number;
|
||||
chunkOverlapRatio: number;
|
||||
priceTip: string;
|
||||
uploadRate: number;
|
||||
splitSources2Chunks: () => void;
|
||||
importSource: `${ImportDataSourceEnum}`;
|
||||
};
|
||||
const StateContext = createContext<useImportStoreType>({
|
||||
processParamsForm: {} as any,
|
||||
@@ -49,23 +46,22 @@ const StateContext = createContext<useImportStoreType>({
|
||||
showChunkInput: false,
|
||||
showPromptInput: false,
|
||||
chunkSizeField: 'embeddingChunkSize',
|
||||
showRePreview: false,
|
||||
totalChunkChars: 0,
|
||||
totalChunks: 0,
|
||||
chunkSize: 0,
|
||||
predictPoints: 0,
|
||||
chunkOverlapRatio: 0,
|
||||
priceTip: '',
|
||||
uploadRate: 50,
|
||||
splitSources2Chunks: () => {}
|
||||
importSource: ImportDataSourceEnum.fileLocal
|
||||
});
|
||||
|
||||
export const useImportStore = () => useContext(StateContext);
|
||||
|
||||
const Provider = ({
|
||||
importSource,
|
||||
dataset,
|
||||
parentId,
|
||||
children
|
||||
}: {
|
||||
importSource: `${ImportDataSourceEnum}`;
|
||||
dataset: DatasetItemType;
|
||||
parentId?: string;
|
||||
children: React.ReactNode;
|
||||
@@ -86,7 +82,6 @@ const Provider = ({
|
||||
|
||||
const { t } = useTranslation();
|
||||
const [sources, setSources] = useState<ImportSourceItemType[]>([]);
|
||||
const [showRePreview, setShowRePreview] = useState(false);
|
||||
|
||||
// watch form
|
||||
const mode = processParamsForm.watch('mode');
|
||||
@@ -154,68 +149,15 @@ const Provider = ({
|
||||
|
||||
const chunkSize = wayStaticPrams[way].chunkSize;
|
||||
|
||||
useEffect(() => {
|
||||
setShowRePreview(true);
|
||||
}, [mode, way, chunkSize, customSplitChar]);
|
||||
|
||||
const totalChunkChars = useMemo(
|
||||
() => sources.reduce((sum, file) => sum + file.chunkChars, 0),
|
||||
[sources]
|
||||
);
|
||||
const predictPoints = useMemo(() => {
|
||||
const totalTokensPredict = totalChunkChars / 1000;
|
||||
if (mode === TrainingModeEnum.auto) {
|
||||
const price = totalTokensPredict * 1.3 * agentModel.charsPointsPrice;
|
||||
return +price.toFixed(2);
|
||||
}
|
||||
if (mode === TrainingModeEnum.qa) {
|
||||
const price = totalTokensPredict * 1.2 * agentModel.charsPointsPrice;
|
||||
return +price.toFixed(2);
|
||||
}
|
||||
|
||||
return +(totalTokensPredict * vectorModel.charsPointsPrice).toFixed(2);
|
||||
}, [agentModel.charsPointsPrice, mode, totalChunkChars, vectorModel.charsPointsPrice]);
|
||||
const totalChunks = useMemo(
|
||||
() => sources.reduce((sum, file) => sum + file.chunks.length, 0),
|
||||
[sources]
|
||||
);
|
||||
|
||||
const splitSources2Chunks = useCallback(() => {
|
||||
setSources((state) =>
|
||||
state.map((file) => {
|
||||
const { chunks, chars } = splitText2Chunks({
|
||||
text: file.rawText,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: selectModelStaticParam.chunkOverlapRatio,
|
||||
customReg: customSplitChar ? [customSplitChar] : []
|
||||
});
|
||||
|
||||
return {
|
||||
...file,
|
||||
chunkChars: chars,
|
||||
chunks: chunks.map((chunk, i) => ({
|
||||
chunkIndex: i,
|
||||
q: chunk,
|
||||
a: ''
|
||||
}))
|
||||
};
|
||||
})
|
||||
);
|
||||
setShowRePreview(false);
|
||||
}, [chunkSize, customSplitChar, selectModelStaticParam.chunkOverlapRatio]);
|
||||
|
||||
const value = {
|
||||
const value: useImportStoreType = {
|
||||
parentId,
|
||||
processParamsForm,
|
||||
...selectModelStaticParam,
|
||||
sources,
|
||||
setSources,
|
||||
showRePreview,
|
||||
totalChunkChars,
|
||||
totalChunks,
|
||||
chunkSize,
|
||||
predictPoints,
|
||||
splitSources2Chunks
|
||||
|
||||
importSource
|
||||
};
|
||||
return <StateContext.Provider value={value}>{children}</StateContext.Provider>;
|
||||
};
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { useEffect, useMemo, useRef, useState } from 'react';
|
||||
import React, { useMemo, useRef, useState } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Flex,
|
||||
@@ -21,11 +21,11 @@ import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
|
||||
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import { useImportStore } from '../Provider';
|
||||
import Tag from '@/components/Tag';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import MyModal from '@fastgpt/web/components/common/MyModal';
|
||||
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
|
||||
import Preview from '../components/Preview';
|
||||
import Tag from '@/components/Tag';
|
||||
|
||||
function DataProcess({
|
||||
showPreviewChunks = true,
|
||||
@@ -38,17 +38,11 @@ function DataProcess({
|
||||
const { feConfigs } = useSystemStore();
|
||||
const {
|
||||
processParamsForm,
|
||||
sources,
|
||||
chunkSizeField,
|
||||
minChunkSize,
|
||||
showChunkInput,
|
||||
showPromptInput,
|
||||
maxChunkSize,
|
||||
totalChunkChars,
|
||||
totalChunks,
|
||||
predictPoints,
|
||||
showRePreview,
|
||||
splitSources2Chunks,
|
||||
priceTip
|
||||
} = useImportStore();
|
||||
const { getValues, setValue, register } = processParamsForm;
|
||||
@@ -69,16 +63,10 @@ function DataProcess({
|
||||
});
|
||||
}, [feConfigs?.isPlus]);
|
||||
|
||||
useEffect(() => {
|
||||
if (showPreviewChunks) {
|
||||
splitSources2Chunks();
|
||||
}
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<Box h={'100%'} display={['block', 'flex']} gap={5}>
|
||||
<Box flex={'1 0 0'} maxW={'600px'}>
|
||||
<Flex fontWeight={'bold'} alignItems={'center'}>
|
||||
<Box flex={'1 0 0'} minW={['auto', '540px']} maxW={'600px'}>
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={'common/settingLight'} w={'20px'} />
|
||||
<Box fontSize={'lg'}>{t('core.dataset.import.Data process params')}</Box>
|
||||
</Flex>
|
||||
@@ -273,34 +261,18 @@ function DataProcess({
|
||||
}}
|
||||
></LeftRadio>
|
||||
</Flex>
|
||||
{showPreviewChunks && (
|
||||
<Flex mt={5} alignItems={'center'} pl={'100px'} gap={3}>
|
||||
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
|
||||
{t('core.dataset.Total chunks', { total: totalChunks })}
|
||||
</Tag>
|
||||
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
|
||||
{t('core.Total chars', { total: totalChunkChars })}
|
||||
</Tag>
|
||||
{feConfigs?.show_pay && (
|
||||
<MyTooltip label={priceTip}>
|
||||
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
|
||||
{t('core.dataset.import.Estimated points', { points: predictPoints })}
|
||||
</Tag>
|
||||
</MyTooltip>
|
||||
)}
|
||||
</Flex>
|
||||
)}
|
||||
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
|
||||
{showPreviewChunks && showRePreview && (
|
||||
<Button variant={'primaryOutline'} onClick={splitSources2Chunks}>
|
||||
{t('core.dataset.import.Re Preview')}
|
||||
</Button>
|
||||
<Flex mt={5} alignItems={'center'} pl={'100px'} gap={3}>
|
||||
{feConfigs?.show_pay && (
|
||||
<MyTooltip label={priceTip}>
|
||||
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
|
||||
{priceTip}
|
||||
</Tag>
|
||||
</MyTooltip>
|
||||
)}
|
||||
</Flex>
|
||||
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
|
||||
<Button
|
||||
onClick={() => {
|
||||
if (showRePreview) {
|
||||
splitSources2Chunks();
|
||||
}
|
||||
goToNext();
|
||||
}}
|
||||
>
|
||||
@@ -308,7 +280,9 @@ function DataProcess({
|
||||
</Button>
|
||||
</Flex>
|
||||
</Box>
|
||||
<Preview sources={sources} showPreviewChunks={showPreviewChunks} />
|
||||
<Box flex={'1 0 0'} w={'0'}>
|
||||
<Preview showPreviewChunks={showPreviewChunks} />
|
||||
</Box>
|
||||
|
||||
{isOpenCustomPrompt && (
|
||||
<PromptTextarea
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import React from 'react';
|
||||
import { useImportStore } from '../Provider';
|
||||
import Preview from '../components/Preview';
|
||||
import { Box, Button, Flex } from '@chakra-ui/react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
@@ -12,12 +11,11 @@ const PreviewData = ({
|
||||
goToNext: () => void;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const { sources, setSources } = useImportStore();
|
||||
|
||||
return (
|
||||
<Flex flexDirection={'column'} h={'100%'}>
|
||||
<Box flex={'1 0 0 '}>
|
||||
<Preview showPreviewChunks={showPreviewChunks} sources={sources} />
|
||||
<Preview showPreviewChunks={showPreviewChunks} />
|
||||
</Box>
|
||||
<Flex mt={2} justifyContent={'flex-end'}>
|
||||
<Button onClick={goToNext}>{t('common.Next Step')}</Button>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { useEffect, useState } from 'react';
|
||||
import React from 'react';
|
||||
import {
|
||||
Box,
|
||||
TableContainer,
|
||||
@@ -8,164 +8,109 @@ import {
|
||||
Th,
|
||||
Td,
|
||||
Tbody,
|
||||
Progress,
|
||||
Flex,
|
||||
Button
|
||||
} from '@chakra-ui/react';
|
||||
import { useImportStore, type FormType } from '../Provider';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useRequest } from '@fastgpt/web/hooks/useRequest';
|
||||
import { postCreateTrainingUsage } from '@/web/support/wallet/usage/api';
|
||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||
import { chunksUpload, fileCollectionCreate } from '@/web/core/dataset/utils';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { useRouter } from 'next/router';
|
||||
import { TabEnum } from '../../../index';
|
||||
import { postCreateDatasetLinkCollection, postDatasetCollection } from '@/web/core/dataset/api';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { checkTeamDatasetSizeLimit } from '@/web/support/user/team/api';
|
||||
import {
|
||||
postCreateDatasetCsvTableCollection,
|
||||
postCreateDatasetFileCollection,
|
||||
postCreateDatasetLinkCollection,
|
||||
postCreateDatasetTextCollection
|
||||
} from '@/web/core/dataset/api';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import Tag from '@/components/Tag';
|
||||
|
||||
const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
|
||||
const Upload = () => {
|
||||
const { t } = useTranslation();
|
||||
const { toast } = useToast();
|
||||
const router = useRouter();
|
||||
const { datasetDetail } = useDatasetStore();
|
||||
const { parentId, sources, processParamsForm, chunkSize, totalChunks, uploadRate } =
|
||||
const { importSource, parentId, sources, setSources, processParamsForm, chunkSize } =
|
||||
useImportStore();
|
||||
const [uploadList, setUploadList] = useState<
|
||||
(ImportSourceItemType & {
|
||||
uploadedFileRate: number;
|
||||
uploadedChunksRate: number;
|
||||
})[]
|
||||
>([]);
|
||||
|
||||
const { handleSubmit } = processParamsForm;
|
||||
|
||||
const { mutate: startUpload, isLoading } = useRequest({
|
||||
mutationFn: async ({ mode, customSplitChar, qaPrompt, webSelector }: FormType) => {
|
||||
if (uploadList.length === 0) return;
|
||||
|
||||
await checkTeamDatasetSizeLimit(totalChunks);
|
||||
|
||||
let totalInsertion = 0;
|
||||
if (sources.length === 0) return;
|
||||
const filterWaitingSources = sources.filter((item) => item.createStatus === 'waiting');
|
||||
|
||||
// Batch create collection and upload chunks
|
||||
for await (const item of uploadList) {
|
||||
// create collection
|
||||
const collectionId = await (async () => {
|
||||
const commonParams = {
|
||||
parentId,
|
||||
trainingType: mode,
|
||||
datasetId: datasetDetail._id,
|
||||
chunkSize,
|
||||
chunkSplitter: customSplitChar,
|
||||
qaPrompt,
|
||||
|
||||
name: item.sourceName,
|
||||
rawTextLength: item.rawText.length,
|
||||
hashRawText: hashStr(item.rawText)
|
||||
};
|
||||
if (item.file) {
|
||||
return fileCollectionCreate({
|
||||
file: item.file,
|
||||
data: {
|
||||
...commonParams,
|
||||
collectionMetadata: {
|
||||
relatedImgId: item.id
|
||||
for await (const item of filterWaitingSources) {
|
||||
setSources((state) =>
|
||||
state.map((source) =>
|
||||
source.id === item.id
|
||||
? {
|
||||
...source,
|
||||
createStatus: 'creating'
|
||||
}
|
||||
},
|
||||
percentListen: (e) => {
|
||||
setUploadList((state) =>
|
||||
state.map((uploadItem) =>
|
||||
uploadItem.id === item.id
|
||||
? {
|
||||
...uploadItem,
|
||||
uploadedFileRate: e
|
||||
}
|
||||
: uploadItem
|
||||
)
|
||||
);
|
||||
}
|
||||
});
|
||||
} else if (item.link) {
|
||||
const { collectionId } = await postCreateDatasetLinkCollection({
|
||||
...commonParams,
|
||||
link: item.link,
|
||||
metadata: {
|
||||
webPageSelector: webSelector
|
||||
}
|
||||
});
|
||||
setUploadList((state) =>
|
||||
state.map((uploadItem) =>
|
||||
uploadItem.id === item.id
|
||||
? {
|
||||
...uploadItem,
|
||||
uploadedFileRate: 100
|
||||
}
|
||||
: uploadItem
|
||||
)
|
||||
);
|
||||
return collectionId;
|
||||
} else if (item.rawText) {
|
||||
// manual collection
|
||||
return postDatasetCollection({
|
||||
...commonParams,
|
||||
type: DatasetCollectionTypeEnum.virtual
|
||||
});
|
||||
}
|
||||
return '';
|
||||
})();
|
||||
: source
|
||||
)
|
||||
);
|
||||
|
||||
if (!collectionId) continue;
|
||||
if (item.link) continue;
|
||||
// create collection
|
||||
const commonParams = {
|
||||
parentId,
|
||||
trainingType: mode,
|
||||
datasetId: datasetDetail._id,
|
||||
chunkSize,
|
||||
chunkSplitter: customSplitChar,
|
||||
qaPrompt,
|
||||
|
||||
const billId = await postCreateTrainingUsage({
|
||||
name: item.sourceName,
|
||||
datasetId: datasetDetail._id
|
||||
});
|
||||
name: item.sourceName
|
||||
};
|
||||
if (importSource === ImportDataSourceEnum.fileLocal && item.dbFileId) {
|
||||
await postCreateDatasetFileCollection({
|
||||
...commonParams,
|
||||
fileId: item.dbFileId
|
||||
});
|
||||
} else if (importSource === ImportDataSourceEnum.fileLink && item.link) {
|
||||
await postCreateDatasetLinkCollection({
|
||||
...commonParams,
|
||||
link: item.link,
|
||||
metadata: {
|
||||
webPageSelector: webSelector
|
||||
}
|
||||
});
|
||||
} else if (importSource === ImportDataSourceEnum.fileCustom && item.rawText) {
|
||||
// manual collection
|
||||
await postCreateDatasetTextCollection({
|
||||
...commonParams,
|
||||
text: item.rawText
|
||||
});
|
||||
} else if (importSource === ImportDataSourceEnum.csvTable && item.dbFileId) {
|
||||
await postCreateDatasetCsvTableCollection({
|
||||
...commonParams,
|
||||
fileId: item.dbFileId
|
||||
});
|
||||
}
|
||||
|
||||
// upload chunks
|
||||
const chunks = item.chunks;
|
||||
const { insertLen } = await chunksUpload({
|
||||
collectionId,
|
||||
billId,
|
||||
trainingMode: mode,
|
||||
chunks,
|
||||
rate: uploadRate,
|
||||
onUploading: (e) => {
|
||||
setUploadList((state) =>
|
||||
state.map((uploadItem) =>
|
||||
uploadItem.id === item.id
|
||||
? {
|
||||
...uploadItem,
|
||||
uploadedChunksRate: e
|
||||
}
|
||||
: uploadItem
|
||||
)
|
||||
);
|
||||
},
|
||||
prompt: qaPrompt
|
||||
});
|
||||
totalInsertion += insertLen;
|
||||
setSources((state) =>
|
||||
state.map((source) =>
|
||||
source.id === item.id
|
||||
? {
|
||||
...source,
|
||||
createStatus: 'finish'
|
||||
}
|
||||
: source
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
return totalInsertion;
|
||||
},
|
||||
onSuccess(num) {
|
||||
if (showPreviewChunks) {
|
||||
toast({
|
||||
title: t('core.dataset.import.Import Success Tip', { num }),
|
||||
status: 'success'
|
||||
});
|
||||
} else {
|
||||
toast({
|
||||
title: t('core.dataset.import.Upload success'),
|
||||
status: 'success'
|
||||
});
|
||||
}
|
||||
onSuccess() {
|
||||
toast({
|
||||
title: t('core.dataset.import.Import success'),
|
||||
status: 'success'
|
||||
});
|
||||
|
||||
// close import page
|
||||
router.replace({
|
||||
@@ -175,21 +120,21 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
|
||||
}
|
||||
});
|
||||
},
|
||||
onError() {
|
||||
setSources((state) =>
|
||||
state.map((source) =>
|
||||
source.createStatus === 'creating'
|
||||
? {
|
||||
...source,
|
||||
createStatus: 'waiting'
|
||||
}
|
||||
: source
|
||||
)
|
||||
);
|
||||
},
|
||||
errorToast: t('common.file.Upload failed')
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
setUploadList(
|
||||
sources.map((item) => {
|
||||
return {
|
||||
...item,
|
||||
uploadedFileRate: item.file ? 0 : -1,
|
||||
uploadedChunksRate: 0
|
||||
};
|
||||
})
|
||||
);
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<TableContainer>
|
||||
@@ -199,85 +144,35 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
|
||||
<Th borderLeftRadius={'md'} overflow={'hidden'} borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.import.Source name')}
|
||||
</Th>
|
||||
{showPreviewChunks ? (
|
||||
<>
|
||||
<Th borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.Chunk amount')}
|
||||
</Th>
|
||||
<Th borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.import.Upload file progress')}
|
||||
</Th>
|
||||
<Th borderRightRadius={'md'} overflow={'hidden'} borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.import.Data file progress')}
|
||||
</Th>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Th borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.import.Upload status')}
|
||||
</Th>
|
||||
</>
|
||||
)}
|
||||
<Th borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.import.Upload status')}
|
||||
</Th>
|
||||
</Tr>
|
||||
</Thead>
|
||||
<Tbody>
|
||||
{uploadList.map((item) => (
|
||||
{sources.map((item) => (
|
||||
<Tr key={item.id}>
|
||||
<Td display={'flex'} alignItems={'center'}>
|
||||
<MyIcon name={item.icon as any} w={'16px'} mr={1} />
|
||||
{item.sourceName}
|
||||
<Td>
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={item.icon as any} w={'16px'} mr={1} />
|
||||
<Box whiteSpace={'wrap'} maxW={'30vw'}>
|
||||
{item.sourceName}
|
||||
</Box>
|
||||
</Flex>
|
||||
</Td>
|
||||
<Td>
|
||||
<Box display={'inline-block'}>
|
||||
{item.createStatus === 'waiting' && (
|
||||
<Tag colorSchema={'gray'}>{t('common.Waiting')}</Tag>
|
||||
)}
|
||||
{item.createStatus === 'creating' && (
|
||||
<Tag colorSchema={'blue'}>{t('common.Creating')}</Tag>
|
||||
)}
|
||||
{item.createStatus === 'finish' && (
|
||||
<Tag colorSchema={'green'}>{t('common.Finish')}</Tag>
|
||||
)}
|
||||
</Box>
|
||||
</Td>
|
||||
{showPreviewChunks ? (
|
||||
<>
|
||||
<Td>{item.chunks.length}</Td>
|
||||
<Td>
|
||||
{item.uploadedFileRate === -1 ? (
|
||||
'-'
|
||||
) : (
|
||||
<Flex alignItems={'center'} fontSize={'xs'}>
|
||||
<Progress
|
||||
value={item.uploadedFileRate}
|
||||
h={'6px'}
|
||||
w={'100%'}
|
||||
maxW={'210px'}
|
||||
size="sm"
|
||||
borderRadius={'20px'}
|
||||
colorScheme={'blue'}
|
||||
bg="myGray.200"
|
||||
hasStripe
|
||||
isAnimated
|
||||
mr={2}
|
||||
/>
|
||||
{`${item.uploadedFileRate}%`}
|
||||
</Flex>
|
||||
)}
|
||||
</Td>
|
||||
<Td>
|
||||
<Flex alignItems={'center'} fontSize={'xs'}>
|
||||
<Progress
|
||||
value={item.uploadedChunksRate}
|
||||
h={'6px'}
|
||||
w={'100%'}
|
||||
maxW={'210px'}
|
||||
size="sm"
|
||||
borderRadius={'20px'}
|
||||
colorScheme={'purple'}
|
||||
bg="myGray.200"
|
||||
hasStripe
|
||||
isAnimated
|
||||
mr={2}
|
||||
/>
|
||||
{`${item.uploadedChunksRate}%`}
|
||||
</Flex>
|
||||
</Td>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Td color={item.uploadedFileRate === 100 ? 'green.600' : 'myGray.600'}>
|
||||
{item.uploadedFileRate === 100 ? t('common.Finish') : t('common.Waiting')}
|
||||
</Td>
|
||||
</>
|
||||
)}
|
||||
</Tr>
|
||||
))}
|
||||
</Tbody>
|
||||
@@ -286,8 +181,8 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
|
||||
|
||||
<Flex justifyContent={'flex-end'} mt={4}>
|
||||
<Button isLoading={isLoading} onClick={handleSubmit((data) => startUpload(data))}>
|
||||
{uploadList.length > 0
|
||||
? `${t('core.dataset.import.Total files', { total: uploadList.length })} | `
|
||||
{sources.length > 0
|
||||
? `${t('core.dataset.import.Total files', { total: sources.length })} | `
|
||||
: ''}
|
||||
{t('core.dataset.import.Start upload')}
|
||||
</Button>
|
||||
|
||||
@@ -0,0 +1,296 @@
|
||||
import MyBox from '@/components/common/MyBox';
|
||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { Box, FlexProps } from '@chakra-ui/react';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import React, { DragEvent, useCallback, useMemo, useState } from 'react';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import { useRequest } from '@fastgpt/web/hooks/useRequest';
|
||||
import { getFileIcon } from '@fastgpt/global/common/file/icon';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import { uploadFile2DB } from '@/web/common/file/controller';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
|
||||
export type SelectFileItemType = {
|
||||
fileId: string;
|
||||
folderPath: string;
|
||||
file: File;
|
||||
};
|
||||
|
||||
const FileSelector = ({
|
||||
fileType,
|
||||
selectFiles,
|
||||
setSelectFiles,
|
||||
onStartSelect,
|
||||
onFinishSelect,
|
||||
...props
|
||||
}: {
|
||||
fileType: string;
|
||||
selectFiles: ImportSourceItemType[];
|
||||
setSelectFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
|
||||
onStartSelect: () => void;
|
||||
onFinishSelect: () => void;
|
||||
} & FlexProps) => {
|
||||
const { t } = useTranslation();
|
||||
const { toast } = useToast();
|
||||
const { feConfigs } = useSystemStore();
|
||||
|
||||
const maxCount = feConfigs?.uploadFileMaxAmount || 1000;
|
||||
const maxSize = (feConfigs?.uploadFileMaxSize || 1024) * 1024 * 1024;
|
||||
|
||||
const { File, onOpen } = useSelectFile({
|
||||
fileType,
|
||||
multiple: true,
|
||||
maxCount
|
||||
});
|
||||
const [isDragging, setIsDragging] = useState(false);
|
||||
const isMaxSelected = useMemo(
|
||||
() => selectFiles.length >= maxCount,
|
||||
[maxCount, selectFiles.length]
|
||||
);
|
||||
|
||||
const filterTypeReg = new RegExp(
|
||||
`(${fileType
|
||||
.split(',')
|
||||
.map((item) => item.trim())
|
||||
.join('|')})$`,
|
||||
'i'
|
||||
);
|
||||
|
||||
const { mutate: onSelectFile, isLoading } = useRequest({
|
||||
mutationFn: async (files: SelectFileItemType[]) => {
|
||||
{
|
||||
onStartSelect();
|
||||
setSelectFiles((state) => {
|
||||
const formatFiles = files.map<ImportSourceItemType>((selectFile) => {
|
||||
const { fileId, file } = selectFile;
|
||||
|
||||
return {
|
||||
id: fileId,
|
||||
createStatus: 'waiting',
|
||||
file,
|
||||
sourceName: file.name,
|
||||
sourceSize: formatFileSize(file.size),
|
||||
icon: getFileIcon(file.name),
|
||||
isUploading: true,
|
||||
uploadedFileRate: 0
|
||||
};
|
||||
});
|
||||
const results = formatFiles.concat(state).slice(0, maxCount);
|
||||
return results;
|
||||
});
|
||||
try {
|
||||
// upload file
|
||||
await Promise.all(
|
||||
files.map(async ({ fileId, file }) => {
|
||||
const uploadFileId = await uploadFile2DB({
|
||||
file,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
percentListen: (e) => {
|
||||
setSelectFiles((state) =>
|
||||
state.map((item) =>
|
||||
item.id === fileId
|
||||
? {
|
||||
...item,
|
||||
uploadedFileRate: e
|
||||
}
|
||||
: item
|
||||
)
|
||||
);
|
||||
}
|
||||
});
|
||||
setSelectFiles((state) =>
|
||||
state.map((item) =>
|
||||
item.id === fileId
|
||||
? {
|
||||
...item,
|
||||
dbFileId: uploadFileId,
|
||||
isUploading: false
|
||||
}
|
||||
: item
|
||||
)
|
||||
);
|
||||
})
|
||||
);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
onFinishSelect();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const selectFileCallback = useCallback(
|
||||
(files: SelectFileItemType[]) => {
|
||||
if (selectFiles.length + files.length > maxCount) {
|
||||
files = files.slice(0, maxCount - selectFiles.length);
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: t('common.file.Some file count exceeds limit', { maxCount })
|
||||
});
|
||||
}
|
||||
// size check
|
||||
if (!maxSize) {
|
||||
return onSelectFile(files);
|
||||
}
|
||||
const filterFiles = files.filter((item) => item.file.size <= maxSize);
|
||||
|
||||
if (filterFiles.length < files.length) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: t('common.file.Some file size exceeds limit', { maxSize: formatFileSize(maxSize) })
|
||||
});
|
||||
}
|
||||
|
||||
return onSelectFile(filterFiles);
|
||||
},
|
||||
[maxCount, maxSize, onSelectFile, selectFiles.length, t, toast]
|
||||
);
|
||||
|
||||
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(true);
|
||||
};
|
||||
|
||||
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(false);
|
||||
};
|
||||
|
||||
const handleDrop = async (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(false);
|
||||
|
||||
const items = e.dataTransfer.items;
|
||||
const fileList: SelectFileItemType[] = [];
|
||||
|
||||
if (e.dataTransfer.items.length <= 1) {
|
||||
const traverseFileTree = async (item: any) => {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
if (item.isFile) {
|
||||
item.file((file: File) => {
|
||||
const folderPath = (item.fullPath || '').split('/').slice(2, -1).join('/');
|
||||
|
||||
if (filterTypeReg.test(file.name)) {
|
||||
fileList.push({
|
||||
fileId: getNanoid(),
|
||||
folderPath,
|
||||
file
|
||||
});
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
} else if (item.isDirectory) {
|
||||
const dirReader = item.createReader();
|
||||
dirReader.readEntries(async (entries: any[]) => {
|
||||
for (let i = 0; i < entries.length; i++) {
|
||||
await traverseFileTree(entries[i]);
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
for await (const item of items) {
|
||||
await traverseFileTree(item.webkitGetAsEntry());
|
||||
}
|
||||
} else {
|
||||
const files = Array.from(e.dataTransfer.files);
|
||||
let isErr = files.some((item) => item.type === '');
|
||||
if (isErr) {
|
||||
return toast({
|
||||
title: t('file.upload error description'),
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
|
||||
fileList.push(
|
||||
...files
|
||||
.filter((item) => filterTypeReg.test(item.name))
|
||||
.map((file) => ({
|
||||
fileId: getNanoid(),
|
||||
folderPath: '',
|
||||
file
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
selectFileCallback(fileList.slice(0, maxCount));
|
||||
};
|
||||
|
||||
return (
|
||||
<MyBox
|
||||
isLoading={isLoading}
|
||||
display={'flex'}
|
||||
flexDirection={'column'}
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
px={3}
|
||||
py={[4, 7]}
|
||||
borderWidth={'1.5px'}
|
||||
borderStyle={'dashed'}
|
||||
borderRadius={'md'}
|
||||
{...(isMaxSelected
|
||||
? {}
|
||||
: {
|
||||
cursor: 'pointer',
|
||||
_hover: {
|
||||
bg: 'primary.50',
|
||||
borderColor: 'primary.600'
|
||||
},
|
||||
borderColor: isDragging ? 'primary.600' : 'borderColor.high',
|
||||
onDragEnter: handleDragEnter,
|
||||
onDragOver: (e) => e.preventDefault(),
|
||||
onDragLeave: handleDragLeave,
|
||||
onDrop: handleDrop,
|
||||
onClick: onOpen
|
||||
})}
|
||||
{...props}
|
||||
>
|
||||
<MyIcon name={'common/uploadFileFill'} w={'32px'} />
|
||||
{isMaxSelected ? (
|
||||
<>
|
||||
<Box color={'myGray.500'} fontSize={'xs'}>
|
||||
已达到最大文件数量
|
||||
</Box>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Box fontWeight={'bold'}>
|
||||
{isDragging
|
||||
? t('file.Release the mouse to upload the file')
|
||||
: t('common.file.Select and drag file tip')}
|
||||
</Box>
|
||||
{/* file type */}
|
||||
<Box color={'myGray.500'} fontSize={'xs'}>
|
||||
{t('common.file.Support file type', { fileType })}
|
||||
</Box>
|
||||
<Box color={'myGray.500'} fontSize={'xs'}>
|
||||
{/* max count */}
|
||||
{maxCount && t('common.file.Support max count', { maxCount })}
|
||||
{/* max size */}
|
||||
{maxSize && t('common.file.Support max size', { maxSize: formatFileSize(maxSize) })}
|
||||
</Box>
|
||||
|
||||
<File
|
||||
onSelect={(files) =>
|
||||
selectFileCallback(
|
||||
files.map((file) => ({
|
||||
fileId: getNanoid(),
|
||||
folderPath: '',
|
||||
file
|
||||
}))
|
||||
)
|
||||
}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
</MyBox>
|
||||
);
|
||||
};
|
||||
|
||||
export default React.memo(FileSelector);
|
||||
@@ -3,9 +3,9 @@ import MyModal from '@fastgpt/web/components/common/MyModal';
|
||||
import { ModalBody, ModalFooter, Button } from '@chakra-ui/react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
|
||||
import { ImportDataSourceEnum } from '..';
|
||||
import { useRouter } from 'next/router';
|
||||
import { TabEnum } from '../../..';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
const FileModeSelector = ({ onClose }: { onClose: () => void }) => {
|
||||
const { t } = useTranslation();
|
||||
@@ -1,132 +1,94 @@
|
||||
import React, { useMemo, useState } from 'react';
|
||||
import { Box, Flex } from '@chakra-ui/react';
|
||||
import React, { useState } from 'react';
|
||||
import { Box, Flex, IconButton } from '@chakra-ui/react';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
|
||||
import RowTabs from '@fastgpt/web/components/common/Tabs/RowTabs';
|
||||
import { useImportStore } from '../Provider';
|
||||
import MyMenu from '@/components/MyMenu';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import dynamic from 'next/dynamic';
|
||||
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
|
||||
const PreviewChunks = dynamic(() => import('./PreviewChunks'));
|
||||
|
||||
enum PreviewListEnum {
|
||||
chunks = 'chunks',
|
||||
sources = 'sources'
|
||||
}
|
||||
|
||||
const Preview = ({
|
||||
sources,
|
||||
showPreviewChunks
|
||||
}: {
|
||||
sources: ImportSourceItemType[];
|
||||
showPreviewChunks: boolean;
|
||||
}) => {
|
||||
const Preview = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
|
||||
const { t } = useTranslation();
|
||||
const [previewListType, setPreviewListType] = useState(
|
||||
showPreviewChunks ? PreviewListEnum.chunks : PreviewListEnum.sources
|
||||
);
|
||||
|
||||
const chunks = useMemo(() => {
|
||||
const oneSourceChunkLength = Math.max(4, Math.floor(50 / sources.length));
|
||||
return sources
|
||||
.map((source) =>
|
||||
source.chunks.slice(0, oneSourceChunkLength).map((chunk, i) => ({
|
||||
...chunk,
|
||||
index: i + 1,
|
||||
sourceName: source.sourceName,
|
||||
sourceIcon: source.icon
|
||||
}))
|
||||
)
|
||||
.flat();
|
||||
}, [sources]);
|
||||
const { sources } = useImportStore();
|
||||
const [previewRawTextSource, setPreviewRawTextSource] = useState<ImportSourceItemType>();
|
||||
const [previewChunkSource, setPreviewChunkSource] = useState<ImportSourceItemType>();
|
||||
|
||||
return (
|
||||
<Box h={'100%'} display={['block', 'flex']} flexDirection={'column'} flex={'1 0 0'}>
|
||||
<Box>
|
||||
<RowTabs
|
||||
list={[
|
||||
...(showPreviewChunks
|
||||
? [
|
||||
{
|
||||
icon: 'common/viewLight',
|
||||
label: t('core.dataset.import.Preview chunks'),
|
||||
value: PreviewListEnum.chunks
|
||||
<Box h={'100%'} display={['block', 'flex']} flexDirection={'column'}>
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={'core/dataset/fileCollection'} w={'20px'} />
|
||||
<Box fontSize={'lg'}>{t('core.dataset.import.Sources list')}</Box>
|
||||
</Flex>
|
||||
<Box mt={3} flex={'1 0 0'} width={'100%'} overflow={'auto'}>
|
||||
{sources.map((source) => (
|
||||
<Flex
|
||||
key={source.id}
|
||||
bg={'white'}
|
||||
p={4}
|
||||
borderRadius={'md'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'borderColor.low'}
|
||||
boxShadow={'2'}
|
||||
mb={3}
|
||||
alignItems={'center'}
|
||||
>
|
||||
<MyIcon name={source.icon as any} w={'16px'} />
|
||||
<Box mx={1} flex={'1 0 0'} w={0} className="textEllipsis">
|
||||
{source.sourceName}
|
||||
</Box>
|
||||
{showPreviewChunks && (
|
||||
<Box fontSize={'xs'} color={'myGray.600'}>
|
||||
<MyMenu
|
||||
Button={
|
||||
<IconButton
|
||||
icon={<MyIcon name={'common/viewLight'} w={'14px'} p={2} />}
|
||||
aria-label={''}
|
||||
size={'sm'}
|
||||
variant={'whitePrimary'}
|
||||
/>
|
||||
}
|
||||
]
|
||||
: []),
|
||||
{
|
||||
icon: 'core/dataset/fileCollection',
|
||||
label: t('core.dataset.import.Sources list'),
|
||||
value: PreviewListEnum.sources
|
||||
}
|
||||
]}
|
||||
value={previewListType}
|
||||
onChange={(e) => setPreviewListType(e as PreviewListEnum)}
|
||||
/>
|
||||
</Box>
|
||||
<Box mt={3} flex={'1 0 0'} overflow={'auto'}>
|
||||
{previewListType === PreviewListEnum.chunks ? (
|
||||
<>
|
||||
{chunks.map((chunk, i) => (
|
||||
<Box
|
||||
key={i}
|
||||
p={4}
|
||||
bg={'white'}
|
||||
mb={3}
|
||||
borderRadius={'md'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'borderColor.low'}
|
||||
boxShadow={'2'}
|
||||
whiteSpace={'pre-wrap'}
|
||||
>
|
||||
<Flex mb={1} alignItems={'center'} fontSize={'sm'}>
|
||||
<Box
|
||||
flexShrink={0}
|
||||
px={1}
|
||||
color={'primary.600'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'primary.200'}
|
||||
bg={'primary.50'}
|
||||
borderRadius={'sm'}
|
||||
>
|
||||
# {chunk.index}
|
||||
</Box>
|
||||
<Flex ml={2} fontWeight={'bold'} alignItems={'center'} gap={1}>
|
||||
<MyIcon name={chunk.sourceIcon as any} w={'14px'} />
|
||||
{chunk.sourceName}
|
||||
</Flex>
|
||||
</Flex>
|
||||
<Box fontSize={'xs'} whiteSpace={'pre-wrap'} wordBreak={'break-all'}>
|
||||
<Box color={'myGray.900'}>{chunk.q}</Box>
|
||||
<Box color={'myGray.500'}>{chunk.a}</Box>
|
||||
</Box>
|
||||
menuList={[
|
||||
{
|
||||
label: (
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={'core/dataset/fileCollection'} w={'14px'} mr={2} />
|
||||
{t('core.dataset.import.Preview raw text')}
|
||||
</Flex>
|
||||
),
|
||||
onClick: () => setPreviewRawTextSource(source)
|
||||
},
|
||||
{
|
||||
label: (
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={'core/dataset/splitLight'} w={'14px'} mr={2} />
|
||||
{t('core.dataset.import.Preview chunks')}
|
||||
</Flex>
|
||||
),
|
||||
onClick: () => setPreviewChunkSource(source)
|
||||
}
|
||||
]}
|
||||
/>
|
||||
</Box>
|
||||
))}
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
{sources.map((source) => (
|
||||
<Flex
|
||||
key={source.id}
|
||||
bg={'white'}
|
||||
p={4}
|
||||
borderRadius={'md'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'borderColor.low'}
|
||||
boxShadow={'2'}
|
||||
mb={3}
|
||||
>
|
||||
<MyIcon name={source.icon as any} w={'16px'} />
|
||||
<Box mx={1} flex={'1 0 0'} className="textEllipsis">
|
||||
{source.sourceName}
|
||||
</Box>
|
||||
{showPreviewChunks && (
|
||||
<Box>
|
||||
{t('core.dataset.import.File chunk amount', { amount: source.chunks.length })}
|
||||
</Box>
|
||||
)}
|
||||
</Flex>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
)}
|
||||
</Flex>
|
||||
))}
|
||||
</Box>
|
||||
{!!previewRawTextSource && (
|
||||
<PreviewRawText
|
||||
previewSource={previewRawTextSource}
|
||||
onClose={() => setPreviewRawTextSource(undefined)}
|
||||
/>
|
||||
)}
|
||||
{!!previewChunkSource && (
|
||||
<PreviewChunks
|
||||
previewSource={previewChunkSource}
|
||||
onClose={() => setPreviewChunkSource(undefined)}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
import React, { useMemo } from 'react';
|
||||
import { Box } from '@chakra-ui/react';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
|
||||
import { getPreviewChunks } from '@/web/core/dataset/api';
|
||||
import { useImportStore } from '../Provider';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
|
||||
const PreviewChunks = ({
|
||||
previewSource,
|
||||
onClose
|
||||
}: {
|
||||
previewSource: ImportSourceItemType;
|
||||
onClose: () => void;
|
||||
}) => {
|
||||
const { toast } = useToast();
|
||||
const { importSource, chunkSize, chunkOverlapRatio, processParamsForm } = useImportStore();
|
||||
|
||||
const { data = [], isLoading } = useQuery(
|
||||
['previewSource'],
|
||||
() => {
|
||||
if (
|
||||
importSource === ImportDataSourceEnum.fileLocal ||
|
||||
importSource === ImportDataSourceEnum.csvTable ||
|
||||
importSource === ImportDataSourceEnum.fileLink
|
||||
) {
|
||||
return getPreviewChunks({
|
||||
type: importSource,
|
||||
sourceId: previewSource.dbFileId || previewSource.link || '',
|
||||
chunkSize,
|
||||
overlapRatio: chunkOverlapRatio,
|
||||
customSplitChar: processParamsForm.getValues('customSplitChar')
|
||||
});
|
||||
} else if (importSource === ImportDataSourceEnum.fileCustom) {
|
||||
const customSplitChar = processParamsForm.getValues('customSplitChar');
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: previewSource.rawText || '',
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: chunkOverlapRatio,
|
||||
customReg: customSplitChar ? [customSplitChar] : []
|
||||
});
|
||||
return chunks.map((chunk) => ({
|
||||
q: chunk,
|
||||
a: ''
|
||||
}));
|
||||
}
|
||||
return [];
|
||||
},
|
||||
{
|
||||
onError(err) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: getErrText(err)
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
return (
|
||||
<MyRightDrawer
|
||||
onClose={onClose}
|
||||
iconSrc={previewSource.icon}
|
||||
title={previewSource.sourceName}
|
||||
isLoading={isLoading}
|
||||
maxW={['90vw', '40vw']}
|
||||
>
|
||||
{data.map((item, index) => (
|
||||
<Box
|
||||
key={index}
|
||||
whiteSpace={'pre-wrap'}
|
||||
fontSize={'sm'}
|
||||
p={4}
|
||||
bg={index % 2 === 0 ? 'white' : 'myWhite.600'}
|
||||
mb={3}
|
||||
borderRadius={'md'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'borderColor.low'}
|
||||
boxShadow={'2'}
|
||||
_notLast={{
|
||||
mb: 2
|
||||
}}
|
||||
>
|
||||
<Box color={'myGray.900'}>{item.q}</Box>
|
||||
<Box color={'myGray.500'}>{item.a}</Box>
|
||||
</Box>
|
||||
))}
|
||||
</MyRightDrawer>
|
||||
);
|
||||
};
|
||||
|
||||
export default React.memo(PreviewChunks);
|
||||
@@ -1,28 +1,73 @@
|
||||
import React from 'react';
|
||||
import MyModal from '@fastgpt/web/components/common/MyModal';
|
||||
import { ModalBody } from '@chakra-ui/react';
|
||||
|
||||
export type PreviewRawTextProps = {
|
||||
icon: string;
|
||||
title: string;
|
||||
rawText: string;
|
||||
};
|
||||
import { Box } from '@chakra-ui/react';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import { getPreviewFileContent } from '@/web/common/file/api';
|
||||
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
|
||||
import { useImportStore } from '../Provider';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
|
||||
const PreviewRawText = ({
|
||||
icon,
|
||||
title,
|
||||
rawText,
|
||||
previewSource,
|
||||
onClose
|
||||
}: PreviewRawTextProps & {
|
||||
}: {
|
||||
previewSource: ImportSourceItemType;
|
||||
onClose: () => void;
|
||||
}) => {
|
||||
const { toast } = useToast();
|
||||
const { importSource } = useImportStore();
|
||||
|
||||
const { data, isLoading } = useQuery(
|
||||
['previewSource', previewSource?.dbFileId],
|
||||
() => {
|
||||
if (importSource === ImportDataSourceEnum.fileLocal && previewSource.dbFileId) {
|
||||
return getPreviewFileContent({
|
||||
fileId: previewSource.dbFileId,
|
||||
csvFormat: true
|
||||
});
|
||||
}
|
||||
if (importSource === ImportDataSourceEnum.csvTable && previewSource.dbFileId) {
|
||||
return getPreviewFileContent({
|
||||
fileId: previewSource.dbFileId,
|
||||
csvFormat: false
|
||||
});
|
||||
}
|
||||
if (importSource === ImportDataSourceEnum.fileCustom) {
|
||||
return {
|
||||
previewContent: (previewSource.rawText || '').slice(0, 3000)
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
previewContent: ''
|
||||
};
|
||||
},
|
||||
{
|
||||
onError(err) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: getErrText(err)
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
const rawText = data?.previewContent || '';
|
||||
|
||||
return (
|
||||
<MyModal isOpen onClose={onClose} iconSrc={icon} title={title}>
|
||||
<ModalBody whiteSpace={'pre-wrap'} overflowY={'auto'}>
|
||||
<MyRightDrawer
|
||||
onClose={onClose}
|
||||
iconSrc={previewSource.icon}
|
||||
title={previewSource.sourceName}
|
||||
isLoading={isLoading}
|
||||
>
|
||||
<Box whiteSpace={'pre-wrap'} overflowY={'auto'} fontSize={'sm'}>
|
||||
{rawText}
|
||||
</ModalBody>
|
||||
</MyModal>
|
||||
</Box>
|
||||
</MyRightDrawer>
|
||||
);
|
||||
};
|
||||
|
||||
export default PreviewRawText;
|
||||
export default React.memo(PreviewRawText);
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
import React, { useState } from 'react';
|
||||
import {
|
||||
Flex,
|
||||
TableContainer,
|
||||
Table,
|
||||
Thead,
|
||||
Tr,
|
||||
Th,
|
||||
Td,
|
||||
Tbody,
|
||||
Progress,
|
||||
IconButton
|
||||
} from '@chakra-ui/react';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import dynamic from 'next/dynamic';
|
||||
|
||||
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
|
||||
|
||||
export const RenderUploadFiles = ({
|
||||
files,
|
||||
setFiles,
|
||||
showPreviewContent
|
||||
}: {
|
||||
files: ImportSourceItemType[];
|
||||
setFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
|
||||
showPreviewContent?: boolean;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const [previewFile, setPreviewFile] = useState<ImportSourceItemType>();
|
||||
|
||||
return files.length > 0 ? (
|
||||
<>
|
||||
<TableContainer mt={5}>
|
||||
<Table variant={'simple'} fontSize={'sm'} draggable={false}>
|
||||
<Thead draggable={false}>
|
||||
<Tr bg={'myGray.100'} mb={2}>
|
||||
<Th borderLeftRadius={'md'} borderBottom={'none'} py={4}>
|
||||
{t('common.file.File Name')}
|
||||
</Th>
|
||||
<Th borderBottom={'none'} py={4}>
|
||||
{t('core.dataset.import.Upload file progress')}
|
||||
</Th>
|
||||
<Th borderBottom={'none'} py={4}>
|
||||
{t('common.file.File Size')}
|
||||
</Th>
|
||||
<Th borderRightRadius={'md'} borderBottom={'none'} py={4}>
|
||||
{t('common.Action')}
|
||||
</Th>
|
||||
</Tr>
|
||||
</Thead>
|
||||
<Tbody>
|
||||
{files.map((item) => (
|
||||
<Tr key={item.id}>
|
||||
<Td>
|
||||
<Flex alignItems={'center'}>
|
||||
<MyIcon name={item.icon as any} w={'16px'} mr={1} />
|
||||
{item.sourceName}
|
||||
</Flex>
|
||||
</Td>
|
||||
<Td>
|
||||
<Flex alignItems={'center'} fontSize={'xs'}>
|
||||
<Progress
|
||||
value={item.uploadedFileRate}
|
||||
h={'6px'}
|
||||
w={'100%'}
|
||||
maxW={'210px'}
|
||||
size="sm"
|
||||
borderRadius={'20px'}
|
||||
colorScheme={(item.uploadedFileRate || 0) >= 100 ? 'green' : 'blue'}
|
||||
bg="myGray.200"
|
||||
hasStripe
|
||||
isAnimated
|
||||
mr={2}
|
||||
/>
|
||||
{`${item.uploadedFileRate}%`}
|
||||
</Flex>
|
||||
</Td>
|
||||
<Td>{item.sourceSize}</Td>
|
||||
<Td>
|
||||
{!item.isUploading && (
|
||||
<Flex alignItems={'center'} gap={4}>
|
||||
{showPreviewContent && (
|
||||
<MyTooltip label={t('core.dataset.import.Preview raw text')}>
|
||||
<IconButton
|
||||
variant={'whitePrimary'}
|
||||
size={'sm'}
|
||||
icon={<MyIcon name={'common/viewLight'} w={'18px'} />}
|
||||
aria-label={''}
|
||||
onClick={() => setPreviewFile(item)}
|
||||
/>
|
||||
</MyTooltip>
|
||||
)}
|
||||
|
||||
<IconButton
|
||||
variant={'grayDanger'}
|
||||
size={'sm'}
|
||||
icon={<MyIcon name={'delete'} w={'14px'} />}
|
||||
aria-label={''}
|
||||
onClick={() => {
|
||||
setFiles((state) => state.filter((file) => file.id !== item.id));
|
||||
}}
|
||||
/>
|
||||
</Flex>
|
||||
)}
|
||||
</Td>
|
||||
</Tr>
|
||||
))}
|
||||
</Tbody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
{!!previewFile && (
|
||||
<PreviewRawText previewSource={previewFile} onClose={() => setPreviewFile(undefined)} />
|
||||
)}
|
||||
</>
|
||||
) : null;
|
||||
};
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { useEffect } from 'react';
|
||||
import React, { useCallback, useEffect } from 'react';
|
||||
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
|
||||
|
||||
import dynamic from 'next/dynamic';
|
||||
@@ -19,7 +19,7 @@ const CustomTet = ({ activeStep, goToNext }: ImportDataComponentProps) => {
|
||||
<>
|
||||
{activeStep === 0 && <CustomTextInput goToNext={goToNext} />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />}
|
||||
{activeStep === 2 && <Upload showPreviewChunks />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
};
|
||||
@@ -36,6 +36,24 @@ const CustomTextInput = ({ goToNext }: { goToNext: () => void }) => {
|
||||
}
|
||||
});
|
||||
|
||||
const onSubmit = useCallback(
|
||||
(data: { name: string; value: string }) => {
|
||||
const fileId = getNanoid(32);
|
||||
|
||||
setSources([
|
||||
{
|
||||
id: fileId,
|
||||
createStatus: 'waiting',
|
||||
rawText: data.value,
|
||||
sourceName: data.name,
|
||||
icon: 'file/fill/manual'
|
||||
}
|
||||
]);
|
||||
goToNext();
|
||||
},
|
||||
[goToNext, setSources]
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
const source = sources[0];
|
||||
if (source) {
|
||||
@@ -78,25 +96,7 @@ const CustomTextInput = ({ goToNext }: { goToNext: () => void }) => {
|
||||
/>
|
||||
</Box>
|
||||
<Flex mt={5} justifyContent={'flex-end'}>
|
||||
<Button
|
||||
onClick={handleSubmit((data) => {
|
||||
const fileId = getNanoid(32);
|
||||
|
||||
setSources([
|
||||
{
|
||||
id: fileId,
|
||||
rawText: data.value,
|
||||
chunks: [],
|
||||
chunkChars: 0,
|
||||
sourceName: data.name,
|
||||
icon: 'file/fill/manual'
|
||||
}
|
||||
]);
|
||||
goToNext();
|
||||
})}
|
||||
>
|
||||
{t('common.Next Step')}
|
||||
</Button>
|
||||
<Button onClick={handleSubmit((data) => onSubmit(data))}>{t('common.Next Step')}</Button>
|
||||
</Flex>
|
||||
</Box>
|
||||
);
|
||||
|
||||
@@ -23,7 +23,7 @@ const LinkCollection = ({ activeStep, goToNext }: ImportDataComponentProps) => {
|
||||
<>
|
||||
{activeStep === 0 && <CustomLinkImport goToNext={goToNext} />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks={false} goToNext={goToNext} />}
|
||||
{activeStep === 2 && <Upload showPreviewChunks={false} />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
};
|
||||
@@ -128,10 +128,8 @@ const CustomLinkImport = ({ goToNext }: { goToNext: () => void }) => {
|
||||
setSources(
|
||||
newLinkList.map((link) => ({
|
||||
id: getNanoid(32),
|
||||
createStatus: 'waiting',
|
||||
link,
|
||||
rawText: '',
|
||||
chunks: [],
|
||||
chunkChars: 0,
|
||||
sourceName: link,
|
||||
icon: LinkCollectionIcon
|
||||
}))
|
||||
|
||||
@@ -1,41 +1,27 @@
|
||||
import React, { useEffect, useMemo, useState } from 'react';
|
||||
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
|
||||
import { Box, Button, Flex } from '@chakra-ui/react';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
|
||||
import FileSelector, { type SelectFileItemType } from '@/web/core/dataset/components/FileSelector';
|
||||
import { getFileIcon } from '@fastgpt/global/common/file/icon';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import React, { useCallback, useEffect, useMemo, useState } from 'react';
|
||||
import { ImportDataComponentProps, ImportSourceItemType } from '@/web/core/dataset/type.d';
|
||||
import { Box, Button } from '@chakra-ui/react';
|
||||
import FileSelector from '../components/FileSelector';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import { useRequest } from '@fastgpt/web/hooks/useRequest';
|
||||
import { readFileRawContent } from '@fastgpt/web/common/file/read';
|
||||
import { getUploadBase64ImgController } from '@/web/common/file/controller';
|
||||
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import type { PreviewRawTextProps } from '../components/PreviewRawText';
|
||||
import { useImportStore } from '../Provider';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
|
||||
import dynamic from 'next/dynamic';
|
||||
import Loading from '@fastgpt/web/components/common/MyLoading';
|
||||
import { RenderUploadFiles } from '../components/RenderFiles';
|
||||
|
||||
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'), {
|
||||
loading: () => <Loading fixed={false} />
|
||||
});
|
||||
const Upload = dynamic(() => import('../commonProgress/Upload'));
|
||||
const PreviewRawText = dynamic(() => import('../components/PreviewRawText'));
|
||||
|
||||
type FileItemType = ImportSourceItemType & { file: File };
|
||||
const fileType = '.txt, .docx, .csv, .pdf, .md, .html';
|
||||
const maxSelectFileCount = 1000;
|
||||
const fileType = '.txt, .docx, .csv, .xlsx, .pdf, .md, .html, .pptx';
|
||||
|
||||
const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <SelectFile goToNext={goToNext} />}
|
||||
{activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />}
|
||||
{activeStep === 2 && <Upload showPreviewChunks />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
};
|
||||
@@ -44,135 +30,47 @@ export default React.memo(FileLocal);
|
||||
|
||||
const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) {
|
||||
const { t } = useTranslation();
|
||||
const { feConfigs } = useSystemStore();
|
||||
const { sources, setSources } = useImportStore();
|
||||
// @ts-ignore
|
||||
const [selectFiles, setSelectFiles] = useState<FileItemType[]>(sources);
|
||||
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
|
||||
sources.map((source) => ({
|
||||
isUploading: false,
|
||||
...source
|
||||
}))
|
||||
);
|
||||
const [uploading, setUploading] = useState(false);
|
||||
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
|
||||
|
||||
const [previewRaw, setPreviewRaw] = useState<PreviewRawTextProps>();
|
||||
|
||||
useEffect(() => {
|
||||
setSources(successFiles);
|
||||
}, [successFiles]);
|
||||
}, [setSources, successFiles]);
|
||||
|
||||
const { mutate: onSelectFile, isLoading } = useRequest({
|
||||
mutationFn: async (files: SelectFileItemType[]) => {
|
||||
{
|
||||
for await (const selectFile of files) {
|
||||
const { file, folderPath } = selectFile;
|
||||
const relatedId = getNanoid(32);
|
||||
|
||||
const { rawText } = await (() => {
|
||||
try {
|
||||
return readFileRawContent({
|
||||
file,
|
||||
uploadBase64Controller: (base64Img) =>
|
||||
getUploadBase64ImgController({
|
||||
base64Img,
|
||||
type: MongoImageTypeEnum.collectionImage,
|
||||
metadata: {
|
||||
relatedId
|
||||
}
|
||||
})
|
||||
});
|
||||
} catch (error) {
|
||||
return { rawText: '' };
|
||||
}
|
||||
})();
|
||||
|
||||
const item: FileItemType = {
|
||||
id: relatedId,
|
||||
file,
|
||||
rawText,
|
||||
chunks: [],
|
||||
chunkChars: 0,
|
||||
sourceFolderPath: folderPath,
|
||||
sourceName: file.name,
|
||||
sourceSize: formatFileSize(file.size),
|
||||
icon: getFileIcon(file.name),
|
||||
errorMsg: rawText.length === 0 ? t('common.file.Empty file tip') : ''
|
||||
};
|
||||
|
||||
setSelectFiles((state) => {
|
||||
const results = [item].concat(state).slice(0, maxSelectFileCount);
|
||||
return results;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
const onclickNext = useCallback(() => {
|
||||
// filter uploaded files
|
||||
setSelectFiles((state) => state.filter((item) => (item.uploadedFileRate || 0) >= 100));
|
||||
goToNext();
|
||||
}, [goToNext]);
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<FileSelector
|
||||
isLoading={isLoading}
|
||||
fileType={fileType}
|
||||
multiple
|
||||
maxCount={maxSelectFileCount}
|
||||
maxSize={(feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024}
|
||||
onSelectFile={onSelectFile}
|
||||
selectFiles={selectFiles}
|
||||
setSelectFiles={setSelectFiles}
|
||||
onStartSelect={() => setUploading(true)}
|
||||
onFinishSelect={() => setUploading(false)}
|
||||
/>
|
||||
|
||||
{/* render files */}
|
||||
<Flex my={4} flexWrap={'wrap'} gap={5} alignItems={'center'}>
|
||||
{selectFiles.map((item) => (
|
||||
<MyTooltip key={item.id} label={t('core.dataset.import.Preview raw text')}>
|
||||
<Flex
|
||||
alignItems={'center'}
|
||||
px={4}
|
||||
py={3}
|
||||
borderRadius={'md'}
|
||||
bg={'myGray.100'}
|
||||
cursor={'pointer'}
|
||||
onClick={() =>
|
||||
setPreviewRaw({
|
||||
icon: item.icon,
|
||||
title: item.sourceName,
|
||||
rawText: item.rawText.slice(0, 10000)
|
||||
})
|
||||
}
|
||||
>
|
||||
<MyIcon name={item.icon as any} w={'16px'} />
|
||||
<Box ml={1} mr={3}>
|
||||
{item.sourceName}
|
||||
</Box>
|
||||
<Box mr={1} fontSize={'xs'} color={'myGray.500'}>
|
||||
{item.sourceSize}
|
||||
{item.rawText.length > 0 && (
|
||||
<>,{t('common.Number of words', { amount: item.rawText.length })}</>
|
||||
)}
|
||||
</Box>
|
||||
{item.errorMsg && (
|
||||
<MyTooltip label={item.errorMsg}>
|
||||
<MyIcon name={'common/errorFill'} w={'14px'} mr={3} />
|
||||
</MyTooltip>
|
||||
)}
|
||||
<MyIcon
|
||||
name={'common/closeLight'}
|
||||
w={'14px'}
|
||||
color={'myGray.500'}
|
||||
cursor={'pointer'}
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
setSelectFiles((state) => state.filter((file) => file.id !== item.id));
|
||||
}}
|
||||
/>
|
||||
</Flex>
|
||||
</MyTooltip>
|
||||
))}
|
||||
</Flex>
|
||||
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} showPreviewContent />
|
||||
|
||||
<Box textAlign={'right'}>
|
||||
<Button isDisabled={successFiles.length === 0 || isLoading} onClick={goToNext}>
|
||||
<Box textAlign={'right'} mt={5}>
|
||||
<Button isDisabled={successFiles.length === 0 || uploading} onClick={onclickNext}>
|
||||
{selectFiles.length > 0
|
||||
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
|
||||
: ''}
|
||||
{t('common.Next Step')}
|
||||
</Button>
|
||||
</Box>
|
||||
|
||||
{previewRaw && <PreviewRawText {...previewRaw} onClose={() => setPreviewRaw(undefined)} />}
|
||||
</Box>
|
||||
);
|
||||
});
|
||||
|
||||
@@ -1,108 +1,62 @@
|
||||
import React, { useEffect, useMemo, useState } from 'react';
|
||||
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
|
||||
import { Box, Button, Flex } from '@chakra-ui/react';
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
|
||||
import FileSelector, { type SelectFileItemType } from '@/web/core/dataset/components/FileSelector';
|
||||
import { getFileIcon } from '@fastgpt/global/common/file/icon';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import { ImportDataComponentProps, ImportSourceItemType } from '@/web/core/dataset/type.d';
|
||||
import { Box, Button } from '@chakra-ui/react';
|
||||
import FileSelector from '../components/FileSelector';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import { useRequest } from '@fastgpt/web/hooks/useRequest';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import { useImportStore } from '../Provider';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
|
||||
import dynamic from 'next/dynamic';
|
||||
import { fileDownload } from '@/web/common/file/utils';
|
||||
import { readCsvContent } from '@fastgpt/web/common/file/read/csv';
|
||||
import { RenderUploadFiles } from '../components/RenderFiles';
|
||||
|
||||
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
|
||||
const Upload = dynamic(() => import('../commonProgress/Upload'));
|
||||
|
||||
type FileItemType = ImportSourceItemType & { file: File };
|
||||
const fileType = '.csv';
|
||||
const maxSelectFileCount = 1000;
|
||||
|
||||
const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => {
|
||||
return (
|
||||
<>
|
||||
{activeStep === 0 && <SelectFile goToNext={goToNext} />}
|
||||
{activeStep === 1 && <PreviewData showPreviewChunks goToNext={goToNext} />}
|
||||
{activeStep === 2 && <Upload showPreviewChunks />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default React.memo(FileLocal);
|
||||
|
||||
const csvTemplate = `index,content
|
||||
"必填内容","可选内容。CSV 中请注意内容不能包含双引号,双引号是列分割符号"
|
||||
const csvTemplate = `"第一列内容","第二列内容"
|
||||
"必填列","可选列。CSV 中请注意内容不能包含双引号,双引号是列分割符号"
|
||||
"只会讲第一和第二列内容导入,其余列会被忽略",""
|
||||
"结合人工智能的演进历程,AIGC的发展大致可以分为三个阶段,即:早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期),以及快速发展展阶段(21世纪10年代中期至今)。",""
|
||||
"AIGC发展分为几个阶段?","早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期)、快速发展展阶段(21世纪10年代中期至今)"`;
|
||||
|
||||
const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) {
|
||||
const { t } = useTranslation();
|
||||
const { feConfigs } = useSystemStore();
|
||||
const { sources, setSources } = useImportStore();
|
||||
// @ts-ignore
|
||||
const [selectFiles, setSelectFiles] = useState<FileItemType[]>(sources);
|
||||
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
|
||||
sources.map((source) => ({
|
||||
isUploading: false,
|
||||
...source
|
||||
}))
|
||||
);
|
||||
const [uploading, setUploading] = useState(false);
|
||||
|
||||
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
|
||||
|
||||
useEffect(() => {
|
||||
setSources(successFiles);
|
||||
}, [successFiles]);
|
||||
|
||||
const { mutate: onSelectFile, isLoading } = useRequest({
|
||||
mutationFn: async (files: SelectFileItemType[]) => {
|
||||
{
|
||||
for await (const selectFile of files) {
|
||||
const { file, folderPath } = selectFile;
|
||||
const { header, data } = await readCsvContent({ file });
|
||||
|
||||
const filterData: FileItemType['chunks'] = data
|
||||
.filter((item) => item[0])
|
||||
.map((item) => ({
|
||||
q: item[0] || '',
|
||||
a: item[1] || '',
|
||||
chunkIndex: 0
|
||||
}));
|
||||
|
||||
const item: FileItemType = {
|
||||
id: getNanoid(32),
|
||||
file,
|
||||
rawText: '',
|
||||
chunks: filterData,
|
||||
chunkChars: 0,
|
||||
sourceFolderPath: folderPath,
|
||||
sourceName: file.name,
|
||||
sourceSize: formatFileSize(file.size),
|
||||
icon: getFileIcon(file.name),
|
||||
errorMsg:
|
||||
header[0] !== 'index' || header[1] !== 'content' || filterData.length === 0
|
||||
? t('core.dataset.import.Csv format error')
|
||||
: ''
|
||||
};
|
||||
|
||||
setSelectFiles((state) => {
|
||||
const results = [item].concat(state).slice(0, 10);
|
||||
return results;
|
||||
});
|
||||
}
|
||||
}
|
||||
},
|
||||
errorToast: t('common.file.Select failed')
|
||||
});
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<FileSelector
|
||||
multiple
|
||||
maxCount={maxSelectFileCount}
|
||||
maxSize={(feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024}
|
||||
isLoading={isLoading}
|
||||
fileType={fileType}
|
||||
onSelectFile={onSelectFile}
|
||||
selectFiles={selectFiles}
|
||||
setSelectFiles={setSelectFiles}
|
||||
onStartSelect={() => setUploading(true)}
|
||||
onFinishSelect={() => setUploading(false)}
|
||||
/>
|
||||
|
||||
<Box
|
||||
@@ -122,43 +76,16 @@ const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () =
|
||||
</Box>
|
||||
|
||||
{/* render files */}
|
||||
<Flex my={4} flexWrap={'wrap'} gap={5} alignItems={'center'}>
|
||||
{selectFiles.map((item) => (
|
||||
<Flex
|
||||
key={item.id}
|
||||
alignItems={'center'}
|
||||
px={4}
|
||||
py={2}
|
||||
borderRadius={'md'}
|
||||
bg={'myGray.100'}
|
||||
>
|
||||
<MyIcon name={item.icon as any} w={'16px'} />
|
||||
<Box ml={1} mr={3}>
|
||||
{item.sourceName}
|
||||
</Box>
|
||||
<Box mr={1} fontSize={'xs'} color={'myGray.500'}>
|
||||
{item.sourceSize}
|
||||
</Box>
|
||||
{item.errorMsg && (
|
||||
<MyTooltip label={item.errorMsg}>
|
||||
<MyIcon name={'common/errorFill'} w={'14px'} mr={3} />
|
||||
</MyTooltip>
|
||||
)}
|
||||
<MyIcon
|
||||
name={'common/closeLight'}
|
||||
w={'14px'}
|
||||
color={'myGray.500'}
|
||||
cursor={'pointer'}
|
||||
onClick={() => {
|
||||
setSelectFiles((state) => state.filter((file) => file.id !== item.id));
|
||||
}}
|
||||
/>
|
||||
</Flex>
|
||||
))}
|
||||
</Flex>
|
||||
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} />
|
||||
|
||||
<Box textAlign={'right'}>
|
||||
<Button isDisabled={successFiles.length === 0 || isLoading} onClick={goToNext}>
|
||||
<Box textAlign={'right'} mt={5}>
|
||||
<Button
|
||||
isDisabled={successFiles.length === 0 || uploading}
|
||||
onClick={() => {
|
||||
setSelectFiles((state) => state.filter((item) => !item.errorMsg));
|
||||
goToNext();
|
||||
}}
|
||||
>
|
||||
{selectFiles.length > 0
|
||||
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
|
||||
: ''}
|
||||
|
||||
@@ -6,22 +6,15 @@ import { useRouter } from 'next/router';
|
||||
import { TabEnum } from '../../index';
|
||||
import { useMyStep } from '@fastgpt/web/hooks/useStep';
|
||||
import dynamic from 'next/dynamic';
|
||||
import Provider from './Provider';
|
||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import Provider from './Provider';
|
||||
|
||||
const FileLocal = dynamic(() => import('./diffSource/FileLocal'));
|
||||
const FileLink = dynamic(() => import('./diffSource/FileLink'));
|
||||
const FileCustomText = dynamic(() => import('./diffSource/FileCustomText'));
|
||||
const TableLocal = dynamic(() => import('./diffSource/TableLocal'));
|
||||
|
||||
export enum ImportDataSourceEnum {
|
||||
fileLocal = 'fileLocal',
|
||||
fileLink = 'fileLink',
|
||||
fileCustom = 'fileCustom',
|
||||
|
||||
tableLocal = 'tableLocal'
|
||||
}
|
||||
|
||||
const ImportDataset = () => {
|
||||
const { t } = useTranslation();
|
||||
const router = useRouter();
|
||||
@@ -65,7 +58,7 @@ const ImportDataset = () => {
|
||||
title: t('core.dataset.import.Upload data')
|
||||
}
|
||||
],
|
||||
[ImportDataSourceEnum.tableLocal]: [
|
||||
[ImportDataSourceEnum.csvTable]: [
|
||||
{
|
||||
title: t('core.dataset.import.Select file')
|
||||
},
|
||||
@@ -88,7 +81,7 @@ const ImportDataset = () => {
|
||||
if (source === ImportDataSourceEnum.fileLocal) return FileLocal;
|
||||
if (source === ImportDataSourceEnum.fileLink) return FileLink;
|
||||
if (source === ImportDataSourceEnum.fileCustom) return FileCustomText;
|
||||
if (source === ImportDataSourceEnum.tableLocal) return TableLocal;
|
||||
if (source === ImportDataSourceEnum.csvTable) return TableLocal;
|
||||
}, [source]);
|
||||
|
||||
return ImportComponent ? (
|
||||
@@ -142,7 +135,7 @@ const ImportDataset = () => {
|
||||
<MyStep />
|
||||
</Box>
|
||||
</Box>
|
||||
<Provider dataset={datasetDetail} parentId={parentId}>
|
||||
<Provider dataset={datasetDetail} parentId={parentId} importSource={source}>
|
||||
<Box flex={'1 0 0'} overflow={'auto'} position={'relative'}>
|
||||
<ImportComponent activeStep={activeStep} goToNext={goToNext} />
|
||||
</Box>
|
||||
|
||||
7
projects/app/src/pages/dataset/detail/components/Import/type.d.ts
vendored
Normal file
7
projects/app/src/pages/dataset/detail/components/Import/type.d.ts
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
import { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
|
||||
export type UploadFileItemType = ImportSourceItemType & {
|
||||
file?: File;
|
||||
isUploading: boolean;
|
||||
uploadedFileRate: number;
|
||||
};
|
||||
@@ -1,19 +1,5 @@
|
||||
import React, { useEffect, useMemo, useState } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Textarea,
|
||||
Button,
|
||||
Flex,
|
||||
useTheme,
|
||||
useDisclosure,
|
||||
Table,
|
||||
Thead,
|
||||
Tbody,
|
||||
Tr,
|
||||
Th,
|
||||
Td,
|
||||
TableContainer
|
||||
} from '@chakra-ui/react';
|
||||
import { Box, Textarea, Button, Flex, useTheme, useDisclosure } from '@chakra-ui/react';
|
||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||
import { useSearchTestStore, SearchTestStoreItemType } from '@/web/core/dataset/store/searchTest';
|
||||
import { postSearchText } from '@/web/core/dataset/api';
|
||||
@@ -36,10 +22,7 @@ import { useForm } from 'react-hook-form';
|
||||
import MySelect from '@fastgpt/web/components/common/MySelect';
|
||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||
import { fileDownload } from '@/web/common/file/utils';
|
||||
import { readCsvContent } from '@fastgpt/web/common/file/read/csv';
|
||||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import QuoteItem from '@/components/core/dataset/QuoteItem';
|
||||
import { ModuleInputKeyEnum } from '@fastgpt/global/core/module/constants';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import SearchParamsTip from '@/components/core/dataset/SearchParamsTip';
|
||||
|
||||
@@ -134,34 +117,6 @@ const Test = ({ datasetId }: { datasetId: string }) => {
|
||||
});
|
||||
}
|
||||
});
|
||||
// const { mutate: onFileTest, isLoading: fileTestIsLoading } = useRequest({
|
||||
// mutationFn: async ({ searchParams }: FormType) => {
|
||||
// if (!selectFile) return Promise.reject('File is not selected');
|
||||
// const { data } = await readCsvContent({ file: selectFile });
|
||||
// const testList = data.slice(0, 100);
|
||||
// const results: SearchTestResponse[] = [];
|
||||
|
||||
// for await (const item of testList) {
|
||||
// try {
|
||||
// const result = await postSearchText({ datasetId, text: item[0].trim(), ...searchParams });
|
||||
// results.push(result);
|
||||
// } catch (error) {
|
||||
// await delay(500);
|
||||
// }
|
||||
// }
|
||||
|
||||
// return results;
|
||||
// },
|
||||
// onSuccess(res: SearchTestResponse[]) {
|
||||
// console.log(res);
|
||||
// },
|
||||
// onError(err) {
|
||||
// toast({
|
||||
// title: getErrText(err),
|
||||
// status: 'error'
|
||||
// });
|
||||
// }
|
||||
// });
|
||||
|
||||
const onSelectFile = async (files: File[]) => {
|
||||
const file = files[0];
|
||||
|
||||
@@ -101,7 +101,9 @@ const Standard = ({
|
||||
{t('support.wallet.subscription.Sub plan')}
|
||||
</Box>
|
||||
<Box mt={8} mb={10} color={'myGray.500'} fontSize={'lg'}>
|
||||
{t('support.wallet.subscription.Sub plan tip')}
|
||||
{t('support.wallet.subscription.Sub plan tip', {
|
||||
title: feConfigs?.systemTitle
|
||||
})}
|
||||
</Box>
|
||||
<Box>
|
||||
<RowTabs
|
||||
|
||||
@@ -13,7 +13,7 @@ import { checkTeamAiPointsAndLock } from './utils';
|
||||
import { checkInvalidChunkAndLock } from '@fastgpt/service/core/dataset/training/utils';
|
||||
import { addMinutes } from 'date-fns';
|
||||
import { countGptMessagesTokens } from '@fastgpt/global/common/string/tiktoken';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { pushDataListToTrainingQueueByCollectionId } from '@fastgpt/service/core/dataset/training/controller';
|
||||
|
||||
const reduceQueue = () => {
|
||||
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
|
||||
@@ -128,7 +128,7 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
|
||||
});
|
||||
|
||||
// get vector and insert
|
||||
const { insertLen } = await pushDataListToTrainingQueue({
|
||||
const { insertLen } = await pushDataListToTrainingQueueByCollectionId({
|
||||
teamId: data.teamId,
|
||||
tmbId: data.tmbId,
|
||||
collectionId: data.collectionId,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { GET, POST, PUT, DELETE } from '@/web/common/api/request';
|
||||
import { GET, POST } from '@/web/common/api/request';
|
||||
import type { UploadImgProps } from '@fastgpt/global/common/file/api.d';
|
||||
import { AxiosProgressEvent } from 'axios';
|
||||
|
||||
@@ -8,10 +8,16 @@ export const postUploadFiles = (
|
||||
data: FormData,
|
||||
onUploadProgress: (progressEvent: AxiosProgressEvent) => void
|
||||
) =>
|
||||
POST<string[]>('/common/file/upload', data, {
|
||||
POST<string>('/common/file/upload', data, {
|
||||
timeout: 480000,
|
||||
onUploadProgress,
|
||||
headers: {
|
||||
'Content-Type': 'multipart/form-data; charset=utf-8'
|
||||
}
|
||||
});
|
||||
|
||||
export const getPreviewFileContent = (data: { fileId: string; csvFormat: boolean }) =>
|
||||
POST<{
|
||||
previewContent: string;
|
||||
totalLength: number;
|
||||
}>('/common/file/previewContent', data);
|
||||
|
||||
@@ -7,13 +7,13 @@ import { compressBase64Img, type CompressImgProps } from '@fastgpt/web/common/fi
|
||||
/**
|
||||
* upload file to mongo gridfs
|
||||
*/
|
||||
export const uploadFiles = ({
|
||||
files,
|
||||
export const uploadFile2DB = ({
|
||||
file,
|
||||
bucketName,
|
||||
metadata = {},
|
||||
percentListen
|
||||
}: {
|
||||
files: File[];
|
||||
file: File;
|
||||
bucketName: `${BucketNameEnum}`;
|
||||
metadata?: Record<string, any>;
|
||||
percentListen?: (percent: number) => void;
|
||||
@@ -21,9 +21,7 @@ export const uploadFiles = ({
|
||||
const form = new FormData();
|
||||
form.append('metadata', JSON.stringify(metadata));
|
||||
form.append('bucketName', bucketName);
|
||||
files.forEach((file) => {
|
||||
form.append('file', file, encodeURIComponent(file.name));
|
||||
});
|
||||
form.append('file', file, encodeURIComponent(file.name));
|
||||
return postUploadFiles(form, (e) => {
|
||||
if (!e.total) return;
|
||||
|
||||
|
||||
@@ -23,14 +23,18 @@ export const useSelectFile = (props?: {
|
||||
accept={fileType}
|
||||
multiple={multiple}
|
||||
onChange={(e) => {
|
||||
if (!e.target.files || e.target.files?.length === 0) return;
|
||||
if (e.target.files.length > maxCount) {
|
||||
return toast({
|
||||
const files = e.target.files;
|
||||
if (!files || files?.length === 0) return;
|
||||
|
||||
let fileList = Array.from(files);
|
||||
if (fileList.length > maxCount) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: t('common.file.Select file amount limit', { max: maxCount })
|
||||
});
|
||||
fileList = fileList.slice(0, maxCount);
|
||||
}
|
||||
onSelect(Array.from(e.target.files), openSign.current);
|
||||
onSelect(fileList, openSign.current);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
|
||||
@@ -77,15 +77,15 @@ export const useSpeech = (props?: OutLinkChatAuthProps) => {
|
||||
let options = {};
|
||||
if (MediaRecorder.isTypeSupported('audio/webm')) {
|
||||
options = { type: 'audio/webm' };
|
||||
} else if (MediaRecorder.isTypeSupported('video/mp4')) {
|
||||
options = { type: 'video/mp4' };
|
||||
} else if (MediaRecorder.isTypeSupported('video/mp3')) {
|
||||
options = { type: 'video/mp3' };
|
||||
} else {
|
||||
console.error('no suitable mimetype found for this device');
|
||||
}
|
||||
const blob = new Blob(chunks, options);
|
||||
const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
|
||||
|
||||
formData.append('file', blob, 'recording.mp4');
|
||||
formData.append('file', blob, 'recording.mp3');
|
||||
formData.append(
|
||||
'data',
|
||||
JSON.stringify({
|
||||
|
||||
@@ -8,13 +8,19 @@ import type {
|
||||
} from '@/global/core/api/datasetReq.d';
|
||||
import type {
|
||||
CreateDatasetCollectionParams,
|
||||
CsvTableCreateDatasetCollectionParams,
|
||||
DatasetUpdateBody,
|
||||
FileIdCreateDatasetCollectionParams,
|
||||
LinkCreateDatasetCollectionParams,
|
||||
PostWebsiteSyncParams
|
||||
PostWebsiteSyncParams,
|
||||
TextCreateDatasetCollectionParams
|
||||
} from '@fastgpt/global/core/dataset/api.d';
|
||||
import type {
|
||||
GetTrainingQueueProps,
|
||||
GetTrainingQueueResponse,
|
||||
PostPreviewFilesChunksProps,
|
||||
PostPreviewFilesChunksResponse,
|
||||
PostPreviewTableChunksResponse,
|
||||
SearchTestProps,
|
||||
SearchTestResponse
|
||||
} from '@/global/core/dataset/api.d';
|
||||
@@ -23,10 +29,7 @@ import type {
|
||||
CreateDatasetParams,
|
||||
InsertOneDatasetDataProps
|
||||
} from '@/global/core/dataset/api.d';
|
||||
import type {
|
||||
PushDatasetDataProps,
|
||||
PushDatasetDataResponse
|
||||
} from '@fastgpt/global/core/dataset/api.d';
|
||||
import type { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api.d';
|
||||
import type { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import {
|
||||
DatasetCollectionSyncResultEnum,
|
||||
@@ -75,8 +78,14 @@ export const getDatasetCollectionById = (id: string) =>
|
||||
GET<DatasetCollectionItemType>(`/core/dataset/collection/detail`, { id });
|
||||
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
|
||||
POST<string>(`/core/dataset/collection/create`, data);
|
||||
export const postCreateDatasetFileCollection = (data: FileIdCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/file`, data);
|
||||
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
|
||||
export const postCreateDatasetTextCollection = (data: TextCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/text`, data);
|
||||
export const postCreateDatasetCsvTableCollection = (data: CsvTableCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/csvTable`, data);
|
||||
|
||||
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
|
||||
POST(`/core/dataset/collection/update`, data);
|
||||
@@ -95,12 +104,6 @@ export const getDatasetDataList = (data: GetDatasetDataListProps) =>
|
||||
export const getDatasetDataItemById = (id: string) =>
|
||||
GET<DatasetDataItemType>(`/core/dataset/data/detail`, { id });
|
||||
|
||||
/**
|
||||
* push data to training queue
|
||||
*/
|
||||
export const postChunks2Dataset = (data: PushDatasetDataProps) =>
|
||||
POST<PushDatasetDataResponse>(`/core/dataset/data/pushData`, data);
|
||||
|
||||
/**
|
||||
* insert one data to dataset (immediately insert)
|
||||
*/
|
||||
@@ -122,6 +125,8 @@ export const delOneDatasetDataById = (id: string) =>
|
||||
/* get length of system training queue */
|
||||
export const getTrainingQueueLen = (data: GetTrainingQueueProps) =>
|
||||
GET<GetTrainingQueueResponse>(`/core/dataset/training/getQueueLen`, data);
|
||||
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
|
||||
POST<{ q: string; a: string }[]>('/core/dataset/file/getPreviewChunks', data);
|
||||
|
||||
/* ================== file ======================== */
|
||||
export const getFileViewUrl = (fileId: string) =>
|
||||
|
||||
@@ -1,200 +0,0 @@
|
||||
import MyBox from '@/components/common/MyBox';
|
||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { Box, FlexProps } from '@chakra-ui/react';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import React, { DragEvent, useCallback, useState } from 'react';
|
||||
|
||||
export type SelectFileItemType = {
|
||||
folderPath: string;
|
||||
file: File;
|
||||
};
|
||||
|
||||
const FileSelector = ({
|
||||
fileType,
|
||||
multiple,
|
||||
maxCount,
|
||||
maxSize,
|
||||
isLoading,
|
||||
onSelectFile,
|
||||
...props
|
||||
}: {
|
||||
fileType: string;
|
||||
multiple?: boolean;
|
||||
maxCount?: number;
|
||||
maxSize?: number;
|
||||
isLoading?: boolean;
|
||||
onSelectFile: (e: SelectFileItemType[]) => any;
|
||||
} & FlexProps) => {
|
||||
const { t } = useTranslation();
|
||||
const { toast } = useToast();
|
||||
const { File, onOpen } = useSelectFile({
|
||||
fileType,
|
||||
multiple,
|
||||
maxCount
|
||||
});
|
||||
const [isDragging, setIsDragging] = useState(false);
|
||||
|
||||
const filterTypeReg = new RegExp(
|
||||
`(${fileType
|
||||
.split(',')
|
||||
.map((item) => item.trim())
|
||||
.join('|')})$`,
|
||||
'i'
|
||||
);
|
||||
|
||||
const selectFileCallback = useCallback(
|
||||
(files: SelectFileItemType[]) => {
|
||||
// size check
|
||||
if (!maxSize) {
|
||||
return onSelectFile(files);
|
||||
}
|
||||
const filterFiles = files.filter((item) => item.file.size <= maxSize);
|
||||
|
||||
if (filterFiles.length < files.length) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: t('common.file.Some file size exceeds limit', { maxSize: formatFileSize(maxSize) })
|
||||
});
|
||||
}
|
||||
|
||||
return onSelectFile(filterFiles);
|
||||
},
|
||||
[maxSize, onSelectFile, t, toast]
|
||||
);
|
||||
|
||||
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(true);
|
||||
};
|
||||
|
||||
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(false);
|
||||
};
|
||||
|
||||
const handleDrop = async (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(false);
|
||||
|
||||
const items = e.dataTransfer.items;
|
||||
const fileList: SelectFileItemType[] = [];
|
||||
|
||||
if (e.dataTransfer.items.length <= 1) {
|
||||
const traverseFileTree = async (item: any) => {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
if (item.isFile) {
|
||||
item.file((file: File) => {
|
||||
const folderPath = (item.fullPath || '').split('/').slice(2, -1).join('/');
|
||||
|
||||
if (filterTypeReg.test(file.name)) {
|
||||
fileList.push({
|
||||
folderPath,
|
||||
file
|
||||
});
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
} else if (item.isDirectory) {
|
||||
const dirReader = item.createReader();
|
||||
dirReader.readEntries(async (entries: any[]) => {
|
||||
for (let i = 0; i < entries.length; i++) {
|
||||
await traverseFileTree(entries[i]);
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
for await (const item of items) {
|
||||
await traverseFileTree(item.webkitGetAsEntry());
|
||||
}
|
||||
} else {
|
||||
const files = Array.from(e.dataTransfer.files);
|
||||
let isErr = files.some((item) => item.type === '');
|
||||
if (isErr) {
|
||||
return toast({
|
||||
title: t('file.upload error description'),
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
|
||||
fileList.push(
|
||||
...files
|
||||
.filter((item) => filterTypeReg.test(item.name))
|
||||
.map((file) => ({
|
||||
folderPath: '',
|
||||
file
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
selectFileCallback(fileList.slice(0, maxCount));
|
||||
};
|
||||
|
||||
return (
|
||||
<MyBox
|
||||
isLoading={isLoading}
|
||||
display={'flex'}
|
||||
flexDirection={'column'}
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
px={3}
|
||||
py={[4, 7]}
|
||||
borderWidth={'1.5px'}
|
||||
borderStyle={'dashed'}
|
||||
borderRadius={'md'}
|
||||
cursor={'pointer'}
|
||||
_hover={{
|
||||
bg: 'primary.50',
|
||||
borderColor: 'primary.600'
|
||||
}}
|
||||
{...(isDragging
|
||||
? {
|
||||
borderColor: 'primary.600'
|
||||
}
|
||||
: {
|
||||
borderColor: 'borderColor.high'
|
||||
})}
|
||||
{...props}
|
||||
onDragEnter={handleDragEnter}
|
||||
onDragOver={(e) => e.preventDefault()}
|
||||
onDragLeave={handleDragLeave}
|
||||
onDrop={handleDrop}
|
||||
onClick={onOpen}
|
||||
>
|
||||
<MyIcon name={'common/uploadFileFill'} w={'32px'} />
|
||||
<Box fontWeight={'bold'}>
|
||||
{isDragging
|
||||
? t('file.Release the mouse to upload the file')
|
||||
: t('common.file.Select and drag file tip')}
|
||||
</Box>
|
||||
{/* file type */}
|
||||
<Box color={'myGray.500'} fontSize={'xs'}>
|
||||
{t('common.file.Support file type', { fileType })}
|
||||
</Box>
|
||||
<Box color={'myGray.500'} fontSize={'xs'}>
|
||||
{/* max count */}
|
||||
{maxCount && t('common.file.Support max count', { maxCount })}
|
||||
{/* max size */}
|
||||
{maxSize && t('common.file.Support max size', { maxSize: formatFileSize(maxSize) })}
|
||||
</Box>
|
||||
|
||||
<File
|
||||
onSelect={(files) =>
|
||||
selectFileCallback(
|
||||
files.map((file) => ({
|
||||
folderPath: '',
|
||||
file
|
||||
}))
|
||||
)
|
||||
}
|
||||
/>
|
||||
</MyBox>
|
||||
);
|
||||
};
|
||||
|
||||
export default React.memo(FileSelector);
|
||||
24
projects/app/src/web/core/dataset/type.d.ts
vendored
24
projects/app/src/web/core/dataset/type.d.ts
vendored
@@ -1,6 +1,6 @@
|
||||
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { ImportProcessWayEnum } from './constants';
|
||||
import { ImportProcessWayEnum, ImportSourceTypeEnum } from './constants';
|
||||
import { UseFormReturn } from 'react-hook-form';
|
||||
|
||||
export type ImportDataComponentProps = {
|
||||
@@ -10,19 +10,27 @@ export type ImportDataComponentProps = {
|
||||
|
||||
export type ImportSourceItemType = {
|
||||
id: string;
|
||||
rawText: string;
|
||||
chunks: PushDatasetDataChunkProps[];
|
||||
chunkChars: number;
|
||||
sourceFolderPath?: string;
|
||||
sourceName: string;
|
||||
sourceSize?: string;
|
||||
icon: string;
|
||||
|
||||
createStatus: 'waiting' | 'creating' | 'finish';
|
||||
metadata?: Record<string, any>;
|
||||
errorMsg?: string;
|
||||
|
||||
// source
|
||||
sourceName: string;
|
||||
sourceSize?: string;
|
||||
icon: string;
|
||||
|
||||
// file
|
||||
isUploading?: boolean;
|
||||
uploadedFileRate?: number;
|
||||
dbFileId?: string; // 存储在数据库里的文件Id,这个 ID 还是图片和集合的 metadata 中 relateId
|
||||
file?: File;
|
||||
|
||||
// link
|
||||
link?: string;
|
||||
|
||||
// custom text
|
||||
rawText?: string;
|
||||
};
|
||||
|
||||
export type ImportSourceParamsType = UseFormReturn<
|
||||
|
||||
@@ -1,95 +1,5 @@
|
||||
import { getFileViewUrl, postChunks2Dataset } from '@/web/core/dataset/api';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import { getFileViewUrl } from '@/web/core/dataset/api';
|
||||
import { strIsLink } from '@fastgpt/global/common/string/tools';
|
||||
import type {
|
||||
FileCreateDatasetCollectionParams,
|
||||
PushDatasetDataChunkProps
|
||||
} from '@fastgpt/global/core/dataset/api.d';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { POST } from '@/web/common/api/request';
|
||||
|
||||
/* upload a file to create collection */
|
||||
export const fileCollectionCreate = ({
|
||||
file,
|
||||
metadata = {},
|
||||
data,
|
||||
percentListen
|
||||
}: {
|
||||
file: File;
|
||||
metadata?: Record<string, any>;
|
||||
data: FileCreateDatasetCollectionParams;
|
||||
percentListen: (percent: number) => void;
|
||||
}) => {
|
||||
const form = new FormData();
|
||||
form.append('data', JSON.stringify(data));
|
||||
form.append('metadata', JSON.stringify(metadata));
|
||||
form.append('bucketName', BucketNameEnum.dataset);
|
||||
form.append('file', file, encodeURIComponent(file.name));
|
||||
|
||||
return POST<string>(`/core/dataset/collection/create/file?datasetId=${data.datasetId}`, form, {
|
||||
timeout: 480000,
|
||||
onUploadProgress: (e) => {
|
||||
if (!e.total) return;
|
||||
|
||||
const percent = Math.round((e.loaded / e.total) * 100);
|
||||
percentListen && percentListen(percent);
|
||||
},
|
||||
headers: {
|
||||
'Content-Type': 'multipart/form-data; charset=utf-8'
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
export async function chunksUpload({
|
||||
billId,
|
||||
collectionId,
|
||||
trainingMode,
|
||||
chunks,
|
||||
prompt,
|
||||
rate = 50,
|
||||
onUploading
|
||||
}: {
|
||||
billId: string;
|
||||
collectionId: string;
|
||||
trainingMode: `${TrainingModeEnum}`;
|
||||
chunks: PushDatasetDataChunkProps[];
|
||||
prompt?: string;
|
||||
rate?: number;
|
||||
onUploading?: (rate: number) => void;
|
||||
}) {
|
||||
async function upload(data: PushDatasetDataChunkProps[]) {
|
||||
return postChunks2Dataset({
|
||||
collectionId,
|
||||
trainingMode,
|
||||
data,
|
||||
prompt,
|
||||
billId
|
||||
});
|
||||
}
|
||||
|
||||
let successInsert = 0;
|
||||
let retryTimes = 10;
|
||||
for (let i = 0; i < chunks.length; i += rate) {
|
||||
try {
|
||||
const uploadChunks = chunks.slice(i, i + rate);
|
||||
const { insertLen } = await upload(uploadChunks);
|
||||
if (onUploading) {
|
||||
onUploading(Math.round(((i + uploadChunks.length) / chunks.length) * 100));
|
||||
}
|
||||
successInsert += insertLen;
|
||||
} catch (error) {
|
||||
if (retryTimes === 0) {
|
||||
return Promise.reject(error);
|
||||
}
|
||||
await delay(1000);
|
||||
retryTimes--;
|
||||
i -= rate;
|
||||
}
|
||||
}
|
||||
|
||||
return { insertLen: successInsert };
|
||||
}
|
||||
|
||||
export async function getFileAndOpen(fileId: string) {
|
||||
if (strIsLink(fileId)) {
|
||||
|
||||
Reference in New Issue
Block a user