feat: Text check before synchronization (#689)

* fix: icon

* fix: web selector

* fix: web selector

* perf: link sync

* dev doc

* chomd doc

* perf: git intro

* 466 intro

* intro img

* add json editor (#5)

* team limit

* websync limit

* json editor

* text editor

* perf: search test

* change cq value type

* doc

* intro img

---------

Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-01-04 23:19:24 +08:00
committed by GitHub
parent c2abbb579f
commit 828829011a
64 changed files with 1789 additions and 1489 deletions

View File

@@ -50,7 +50,8 @@ const defaultFeConfigs: FastGPTFeConfigsType = {
concatMd:
'* 项目开源地址: [FastGPT GitHub](https://github.com/labring/FastGPT)\n* 交流群: ![](https://doc.fastgpt.in/wechat-fastgpt.webp)',
limit: {
exportLimitMinutes: 0
exportDatasetLimitMinutes: 0,
websiteSyncLimitMinuted: 0
},
scripts: [],
favicon: '/favicon.ico'

View File

@@ -1,73 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoUser } from '@fastgpt/service/support/user/schema';
import { addLog } from '@fastgpt/service/common/system/log';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
let { datasetId } = req.query as {
datasetId: string;
};
if (!datasetId) {
throw new Error('缺少参数');
}
// 凭证校验
const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
await limitCheck({
datasetId,
userId
});
jsonRes(res);
} catch (err) {
res.status(500);
jsonRes(res, {
code: 500,
error: err
});
}
}
export async function limitCheck({ datasetId, userId }: { datasetId: string; userId: string }) {
const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId);
const limitMinutesAgo = new Date(
Date.now() - (global.feConfigs?.limit?.exportLimitMinutes || 0) * 60 * 1000
);
// auth export times
const authTimes = await MongoUser.findOne(
{
_id: userId,
$or: [
{ 'limit.exportKbTime': { $exists: false } },
{ 'limit.exportKbTime': { $lte: limitMinutesAgo } }
]
},
'_id limit'
);
if (!authTimes) {
const minutes = `${global.feConfigs?.limit?.exportLimitMinutes || 0} 分钟`;
return Promise.reject(`上次导出未到 ${minutes},每 ${minutes}仅可导出一次。`);
}
// auth max data
const total = await MongoDatasetData.countDocuments({
datasetId: { $in: exportIds }
});
addLog.info(`export datasets: ${datasetId}`, { total });
if (total > 100000) {
return Promise.reject('数据量超出 10 万,无法导出');
}
}

View File

@@ -2,14 +2,20 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { loadingOneChunkCollection } from '@fastgpt/service/core/dataset/collection/utils';
import {
getCollectionAndRawText,
reloadCollectionChunks
} from '@fastgpt/service/core/dataset/collection/utils';
import { delCollectionRelevantData } from '@fastgpt/service/core/dataset/data/controller';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
import {
DatasetCollectionSyncResultEnum,
DatasetCollectionTypeEnum
} from '@fastgpt/global/core/dataset/constant';
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -32,6 +38,18 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
return Promise.reject(DatasetErrEnum.unLinkCollection);
}
const { rawText, isSameRawText } = await getCollectionAndRawText({
collection
});
if (isSameRawText) {
return jsonRes(res, {
data: DatasetCollectionSyncResultEnum.sameRaw
});
}
/* Not the same original text, create and reload */
const vectorModelData = getVectorModel(collection.datasetId.vectorModel);
const agentModelData = getQAModel(collection.datasetId.agentModel);
// create training bill
@@ -45,26 +63,27 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
});
// create a collection and delete old
const { _id } = await MongoDatasetCollection.create({
parentId: collection.parentId,
const _id = await createOneCollection({
teamId: collection.teamId,
tmbId: collection.tmbId,
parentId: collection.parentId,
datasetId: collection.datasetId._id,
type: collection.type,
name: collection.name,
createTime: collection.createTime,
type: collection.type,
trainingType: collection.trainingType,
chunkSize: collection.chunkSize,
fileId: collection.fileId,
rawLink: collection.rawLink,
metadata: collection.metadata
metadata: collection.metadata,
createTime: collection.createTime
});
// start load
await loadingOneChunkCollection({
await reloadCollectionChunks({
collectionId: _id,
tmbId,
billId
billId,
rawText
});
// delete old collection
@@ -73,7 +92,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
fileIds: collection.fileId ? [collection.fileId] : []
});
jsonRes(res);
jsonRes(res, {
data: DatasetCollectionSyncResultEnum.success
});
} catch (err) {
jsonRes(res, {
code: 500,

View File

@@ -1,13 +1,15 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes, responseWriteController } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoUser } from '@fastgpt/service/support/user/schema';
import { addLog } from '@fastgpt/service/common/system/log';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
import { limitCheck } from './checkExportLimit';
import { withNextCors } from '@fastgpt/service/common/middle/cors';
import {
checkExportDatasetLimit,
updateExportDatasetLimit
} from '@fastgpt/service/support/user/utils';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -21,11 +23,11 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
}
// 凭证校验
const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
const { teamId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
await limitCheck({
userId,
datasetId
await checkExportDatasetLimit({
teamId,
limitMinutes: global.feConfigs?.limit?.exportDatasetLimitMinutes
});
const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId);
@@ -43,7 +45,9 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
datasetId: { $in: exportIds }
},
'q a'
).cursor();
)
.limit(50000)
.cursor();
const write = responseWriteController({
res,
@@ -59,12 +63,10 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
write(`\n"${q}","${a}"`);
});
cursor.on('end', async () => {
cursor.on('end', () => {
cursor.close();
res.end();
await MongoUser.findByIdAndUpdate(userId, {
'limit.exportKbTime': new Date()
});
updateExportDatasetLimit(teamId);
});
cursor.on('error', (err) => {

View File

@@ -0,0 +1,34 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { checkExportDatasetLimit } from '@fastgpt/service/support/user/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { datasetId } = req.query as {
datasetId: string;
};
if (!datasetId) {
throw new Error('datasetId is required');
}
// 凭证校验
const { teamId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
await checkExportDatasetLimit({
teamId,
limitMinutes: global.feConfigs?.limit?.exportDatasetLimitMinutes
});
jsonRes(res);
} catch (err) {
res.status(500);
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -0,0 +1,27 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { checkWebSyncLimit } from '@fastgpt/service/support/user/utils';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
// 凭证校验
const { teamId } = await authCert({ req, authToken: true });
await checkWebSyncLimit({
teamId,
limitMinutes: global.feConfigs?.limit?.websiteSyncLimitMinuted
});
jsonRes(res);
} catch (err) {
res.status(500);
jsonRes(res, {
code: 500,
error: err
});
}
}