feat: Text check before synchronization (#689)
* fix: icon * fix: web selector * fix: web selector * perf: link sync * dev doc * chomd doc * perf: git intro * 466 intro * intro img * add json editor (#5) * team limit * websync limit * json editor * text editor * perf: search test * change cq value type * doc * intro img --------- Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
@@ -50,7 +50,8 @@ const defaultFeConfigs: FastGPTFeConfigsType = {
|
||||
concatMd:
|
||||
'* 项目开源地址: [FastGPT GitHub](https://github.com/labring/FastGPT)\n* 交流群: ',
|
||||
limit: {
|
||||
exportLimitMinutes: 0
|
||||
exportDatasetLimitMinutes: 0,
|
||||
websiteSyncLimitMinuted: 0
|
||||
},
|
||||
scripts: [],
|
||||
favicon: '/favicon.ico'
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { MongoUser } from '@fastgpt/service/support/user/schema';
|
||||
import { addLog } from '@fastgpt/service/common/system/log';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
let { datasetId } = req.query as {
|
||||
datasetId: string;
|
||||
};
|
||||
|
||||
if (!datasetId) {
|
||||
throw new Error('缺少参数');
|
||||
}
|
||||
|
||||
// 凭证校验
|
||||
const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
|
||||
|
||||
await limitCheck({
|
||||
datasetId,
|
||||
userId
|
||||
});
|
||||
|
||||
jsonRes(res);
|
||||
} catch (err) {
|
||||
res.status(500);
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export async function limitCheck({ datasetId, userId }: { datasetId: string; userId: string }) {
|
||||
const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId);
|
||||
|
||||
const limitMinutesAgo = new Date(
|
||||
Date.now() - (global.feConfigs?.limit?.exportLimitMinutes || 0) * 60 * 1000
|
||||
);
|
||||
|
||||
// auth export times
|
||||
const authTimes = await MongoUser.findOne(
|
||||
{
|
||||
_id: userId,
|
||||
$or: [
|
||||
{ 'limit.exportKbTime': { $exists: false } },
|
||||
{ 'limit.exportKbTime': { $lte: limitMinutesAgo } }
|
||||
]
|
||||
},
|
||||
'_id limit'
|
||||
);
|
||||
|
||||
if (!authTimes) {
|
||||
const minutes = `${global.feConfigs?.limit?.exportLimitMinutes || 0} 分钟`;
|
||||
return Promise.reject(`上次导出未到 ${minutes},每 ${minutes}仅可导出一次。`);
|
||||
}
|
||||
|
||||
// auth max data
|
||||
const total = await MongoDatasetData.countDocuments({
|
||||
datasetId: { $in: exportIds }
|
||||
});
|
||||
|
||||
addLog.info(`export datasets: ${datasetId}`, { total });
|
||||
|
||||
if (total > 100000) {
|
||||
return Promise.reject('数据量超出 10 万,无法导出');
|
||||
}
|
||||
}
|
||||
@@ -2,14 +2,20 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { loadingOneChunkCollection } from '@fastgpt/service/core/dataset/collection/utils';
|
||||
import {
|
||||
getCollectionAndRawText,
|
||||
reloadCollectionChunks
|
||||
} from '@fastgpt/service/core/dataset/collection/utils';
|
||||
import { delCollectionRelevantData } from '@fastgpt/service/core/dataset/data/controller';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import {
|
||||
DatasetCollectionSyncResultEnum,
|
||||
DatasetCollectionTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constant';
|
||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
|
||||
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
|
||||
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -32,6 +38,18 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
return Promise.reject(DatasetErrEnum.unLinkCollection);
|
||||
}
|
||||
|
||||
const { rawText, isSameRawText } = await getCollectionAndRawText({
|
||||
collection
|
||||
});
|
||||
|
||||
if (isSameRawText) {
|
||||
return jsonRes(res, {
|
||||
data: DatasetCollectionSyncResultEnum.sameRaw
|
||||
});
|
||||
}
|
||||
|
||||
/* Not the same original text, create and reload */
|
||||
|
||||
const vectorModelData = getVectorModel(collection.datasetId.vectorModel);
|
||||
const agentModelData = getQAModel(collection.datasetId.agentModel);
|
||||
// create training bill
|
||||
@@ -45,26 +63,27 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
});
|
||||
|
||||
// create a collection and delete old
|
||||
const { _id } = await MongoDatasetCollection.create({
|
||||
parentId: collection.parentId,
|
||||
const _id = await createOneCollection({
|
||||
teamId: collection.teamId,
|
||||
tmbId: collection.tmbId,
|
||||
parentId: collection.parentId,
|
||||
datasetId: collection.datasetId._id,
|
||||
type: collection.type,
|
||||
name: collection.name,
|
||||
createTime: collection.createTime,
|
||||
type: collection.type,
|
||||
trainingType: collection.trainingType,
|
||||
chunkSize: collection.chunkSize,
|
||||
fileId: collection.fileId,
|
||||
rawLink: collection.rawLink,
|
||||
metadata: collection.metadata
|
||||
metadata: collection.metadata,
|
||||
createTime: collection.createTime
|
||||
});
|
||||
|
||||
// start load
|
||||
await loadingOneChunkCollection({
|
||||
await reloadCollectionChunks({
|
||||
collectionId: _id,
|
||||
tmbId,
|
||||
billId
|
||||
billId,
|
||||
rawText
|
||||
});
|
||||
|
||||
// delete old collection
|
||||
@@ -73,7 +92,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
fileIds: collection.fileId ? [collection.fileId] : []
|
||||
});
|
||||
|
||||
jsonRes(res);
|
||||
jsonRes(res, {
|
||||
data: DatasetCollectionSyncResultEnum.success
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes, responseWriteController } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { MongoUser } from '@fastgpt/service/support/user/schema';
|
||||
import { addLog } from '@fastgpt/service/common/system/log';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
|
||||
import { limitCheck } from './checkExportLimit';
|
||||
import { withNextCors } from '@fastgpt/service/common/middle/cors';
|
||||
import {
|
||||
checkExportDatasetLimit,
|
||||
updateExportDatasetLimit
|
||||
} from '@fastgpt/service/support/user/utils';
|
||||
|
||||
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -21,11 +23,11 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
}
|
||||
|
||||
// 凭证校验
|
||||
const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
|
||||
const { teamId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
|
||||
|
||||
await limitCheck({
|
||||
userId,
|
||||
datasetId
|
||||
await checkExportDatasetLimit({
|
||||
teamId,
|
||||
limitMinutes: global.feConfigs?.limit?.exportDatasetLimitMinutes
|
||||
});
|
||||
|
||||
const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId);
|
||||
@@ -43,7 +45,9 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
datasetId: { $in: exportIds }
|
||||
},
|
||||
'q a'
|
||||
).cursor();
|
||||
)
|
||||
.limit(50000)
|
||||
.cursor();
|
||||
|
||||
const write = responseWriteController({
|
||||
res,
|
||||
@@ -59,12 +63,10 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
write(`\n"${q}","${a}"`);
|
||||
});
|
||||
|
||||
cursor.on('end', async () => {
|
||||
cursor.on('end', () => {
|
||||
cursor.close();
|
||||
res.end();
|
||||
await MongoUser.findByIdAndUpdate(userId, {
|
||||
'limit.exportKbTime': new Date()
|
||||
});
|
||||
updateExportDatasetLimit(teamId);
|
||||
});
|
||||
|
||||
cursor.on('error', (err) => {
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { checkExportDatasetLimit } from '@fastgpt/service/support/user/utils';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { datasetId } = req.query as {
|
||||
datasetId: string;
|
||||
};
|
||||
|
||||
if (!datasetId) {
|
||||
throw new Error('datasetId is required');
|
||||
}
|
||||
|
||||
// 凭证校验
|
||||
const { teamId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
|
||||
|
||||
await checkExportDatasetLimit({
|
||||
teamId,
|
||||
limitMinutes: global.feConfigs?.limit?.exportDatasetLimitMinutes
|
||||
});
|
||||
|
||||
jsonRes(res);
|
||||
} catch (err) {
|
||||
res.status(500);
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { checkWebSyncLimit } from '@fastgpt/service/support/user/utils';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
|
||||
// 凭证校验
|
||||
const { teamId } = await authCert({ req, authToken: true });
|
||||
|
||||
await checkWebSyncLimit({
|
||||
teamId,
|
||||
limitMinutes: global.feConfigs?.limit?.websiteSyncLimitMinuted
|
||||
});
|
||||
|
||||
jsonRes(res);
|
||||
} catch (err) {
|
||||
res.status(500);
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user