4.6.3-website dataset (#532)

This commit is contained in:
Archer
2023-12-03 20:45:57 +08:00
committed by GitHub
parent b916183848
commit a9ae270335
122 changed files with 3793 additions and 1360 deletions

View File

@@ -1,7 +1,7 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { delay } from '@/utils/tools';
import { delay } from '@fastgpt/global/common/system/utils';
import { PgClient } from '@fastgpt/service/common/pg';
import {
DatasetDataIndexTypeEnum,

View File

@@ -1,12 +1,9 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { delay } from '@/utils/tools';
import { delay } from '@fastgpt/global/common/system/utils';
import { PgClient } from '@fastgpt/service/common/pg';
import {
DatasetDataIndexTypeEnum,
PgDatasetTableName
} from '@fastgpt/global/core/dataset/constant';
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';

View File

@@ -8,7 +8,7 @@ import {
} from '@fastgpt/service/support/user/team/controller';
import { MongoUser } from '@fastgpt/service/support/user/schema';
import { UserModelSchema } from '@fastgpt/global/support/user/type';
import { delay } from '@/utils/tools';
import { delay } from '@fastgpt/global/common/system/utils';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';

View File

@@ -1,7 +1,7 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { delay } from '@/utils/tools';
import { delay } from '@fastgpt/global/common/system/utils';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoApp } from '@fastgpt/service/core/app/schema';
import { FlowNodeInputTypeEnum, FlowNodeTypeEnum } from '@fastgpt/global/core/module/node/constant';

View File

@@ -1,7 +1,7 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { delay } from '@/utils/tools';
import { delay } from '@fastgpt/global/common/system/utils';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { jiebaSplit } from '@/service/core/dataset/utils';
@@ -17,10 +17,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
console.log(
'total',
await MongoDatasetData.countDocuments({ fullTextToken: { $exists: false } })
await MongoDatasetData.countDocuments({
fullTextToken: { $exists: false },
updateTime: { $lt: new Date() }
})
);
await initFullTextToken(limit);
await initFullTextToken(limit, new Date());
jsonRes(res, {
message: 'success'
@@ -34,9 +37,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
});
}
}
export async function initFullTextToken(limit = 50): Promise<any> {
export async function initFullTextToken(limit = 50, endDate: Date): Promise<any> {
try {
const dataList = await MongoDatasetData.find({ fullTextToken: { $exists: false } }, '_id q a')
const dataList = await MongoDatasetData.find(
{ fullTextToken: { $exists: false }, updateTime: { $lt: endDate } },
'_id q a'
)
.limit(limit)
.lean();
if (dataList.length === 0) return;
@@ -56,9 +62,9 @@ export async function initFullTextToken(limit = 50): Promise<any> {
success += result.filter((item) => item.status === 'fulfilled').length;
console.log(`success: ${success}`);
return initFullTextToken(limit);
return initFullTextToken(limit, endDate);
} catch (error) {
await delay(1000);
return initFullTextToken(limit);
return initFullTextToken(limit, endDate);
}
}

View File

@@ -0,0 +1,62 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { delay } from '@fastgpt/global/common/system/utils';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { jiebaSplit } from '@/service/core/dataset/utils';
let success = 0;
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
const { limit = 50 } = req.body as { limit: number };
await authCert({ req, authRoot: true });
await connectToDatabase();
success = 0;
console.log('total', await MongoDatasetData.countDocuments({ inited: { $exists: false } }));
await initFullTextToken(limit);
jsonRes(res, {
message: 'success'
});
} catch (error) {
console.log(error);
jsonRes(res, {
code: 500,
error
});
}
}
export async function initFullTextToken(limit = 50): Promise<any> {
try {
const dataList = await MongoDatasetData.find({ inited: { $exists: false } }, '_id q a')
.limit(limit)
.lean();
if (dataList.length === 0) return;
const result = await Promise.allSettled(
dataList.map((item) => {
const text = item.q + (item.a || '');
const tokens = jiebaSplit({ text });
return MongoDatasetData.findByIdAndUpdate(item._id, {
$set: {
inited: true,
fullTextToken: tokens
}
});
})
);
success += result.filter((item) => item.status === 'fulfilled').length;
console.log(`success: ${success}`);
return initFullTextToken(limit);
} catch (error) {
await delay(1000);
return initFullTextToken(limit);
}
}

View File

@@ -4,7 +4,8 @@ import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import { DatasetStatusEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
let success = 0;
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
@@ -15,32 +16,85 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
await connectToDatabase();
success = 0;
await MongoDatasetCollection.updateMany({}, [
await MongoDatasetCollection.updateMany({ createTime: { $exists: false } }, [
{
$set: {
createTime: '$updateTime'
}
}
]);
await MongoDatasetCollection.updateMany({ trainingType: { $exists: false } }, [
{
$set: {
createTime: '$updateTime',
trainingType: {
$cond: {
if: { $ifNull: ['$a', false] },
then: TrainingModeEnum.qa,
else: TrainingModeEnum.chunk
}
},
chunkSize: 0,
fileId: '$metadata.fileId',
}
}
}
]);
await MongoDatasetCollection.updateMany({ chunkSize: { $exists: false } }, [
{
$set: {
chunkSize: 0
}
}
]);
await MongoDatasetCollection.updateMany({ fileId: { $exists: false } }, [
{
$set: {
fileId: '$metadata.fileId'
}
}
]);
await MongoDatasetCollection.updateMany({ rawLink: { $exists: false } }, [
{
$set: {
rawLink: '$metadata.rawLink'
}
}
]);
await MongoDatasetData.updateMany(
{},
{ chunkIndex: { $exists: false } },
{
chunkIndex: 0
}
);
await MongoDatasetData.updateMany(
{ updateTime: { $exists: false } },
{
chunkIndex: 0,
updateTime: new Date()
}
);
await MongoDataset.updateMany(
{ status: { $exists: false } },
{
$set: {
status: DatasetStatusEnum.active
}
}
);
// dataset tags to intro
await MongoDataset.updateMany({ tags: { $exists: true } }, [
{
$set: {
intro: {
$reduce: {
input: '$tags',
initialValue: '',
in: { $concat: ['$$value', ' ', '$$this'] }
}
}
}
}
]);
jsonRes(res, {
message: 'success'
});

View File

@@ -0,0 +1,92 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { delFileById, getGFSCollection } from '@fastgpt/service/common/file/gridfs/controller';
import { addLog } from '@fastgpt/service/common/mongo/controller';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { delay } from '@fastgpt/global/common/system/utils';
/*
check dataset.files data. If there is no match in dataset.collections, delete it
*/
let deleteFileAmount = 0;
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
const {
startDay = 10,
endDay = 3,
limit = 30
} = req.body as { startDay?: number; endDay?: number; limit?: number };
await authCert({ req, authRoot: true });
await connectToDatabase();
// start: now - maxDay, end: now - 3 day
const start = new Date(Date.now() - startDay * 24 * 60 * 60 * 1000);
const end = new Date(Date.now() - endDay * 24 * 60 * 60 * 1000);
deleteFileAmount = 0;
checkFiles(start, end, limit);
jsonRes(res, {
message: 'success'
});
} catch (error) {
addLog.error(`check valid dataset files error`, error);
jsonRes(res, {
code: 500,
error
});
}
}
export async function checkFiles(start: Date, end: Date, limit: number) {
const collection = getGFSCollection('dataset');
const where = {
uploadDate: { $gte: start, $lte: end }
};
// 1. get all _id
const ids = await collection
.find(where, {
projection: {
_id: 1
}
})
.toArray();
console.log('total files', ids.length);
for (let i = 0; i < limit; i++) {
check(i);
}
async function check(index: number): Promise<any> {
const id = ids[index];
if (!id) {
console.log(`检测完成,共删除 ${deleteFileAmount} 个无效文件`);
return;
}
try {
const { _id } = id;
// 2. find fileId in dataset.collections
const hasCollection = await MongoDatasetCollection.countDocuments({ fileId: _id });
// 3. if not found, delete file
if (hasCollection === 0) {
await delFileById({ bucketName: 'dataset', fileId: String(_id) });
console.log('delete file', _id);
deleteFileAmount++;
}
index % 100 === 0 && console.log(index);
return check(index + limit);
} catch (error) {
console.log(error);
await delay(2000);
return check(index);
}
}
}

View File

@@ -2,8 +2,8 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authFileToken } from '@fastgpt/service/support/permission/controller';
import jschardet from 'jschardet';
import { getDownloadBuf, getFileById } from '@fastgpt/service/common/file/gridfs/controller';
import { detect } from 'jschardet';
import { getDownloadStream, getFileById } from '@fastgpt/service/common/file/gridfs/controller';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -11,24 +11,43 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const { token } = req.query as { token: string };
const { fileId, teamId, tmbId, bucketName } = await authFileToken(token);
const { fileId, bucketName } = await authFileToken(token);
if (!fileId) {
throw new Error('fileId is empty');
}
const [file, buffer] = await Promise.all([
const [file, encodeStream] = await Promise.all([
getFileById({ bucketName, fileId }),
getDownloadBuf({ bucketName, fileId })
getDownloadStream({ bucketName, fileId })
]);
const encoding = jschardet.detect(buffer)?.encoding;
// get encoding
let buffers: Buffer = Buffer.from([]);
for await (const chunk of encodeStream) {
buffers = Buffer.concat([buffers, chunk]);
if (buffers.length > 10) {
encodeStream.abort();
break;
}
}
const encoding = detect(buffers)?.encoding || 'utf-8';
res.setHeader('Content-Type', `${file.contentType}; charset=${encoding}`);
res.setHeader('Cache-Control', 'public, max-age=3600');
res.setHeader('Content-Disposition', `inline; filename="${encodeURIComponent(file.filename)}"`);
res.end(buffer);
const fileStream = await getDownloadStream({ bucketName, fileId });
fileStream.pipe(res);
fileStream.on('error', () => {
res.status(500).end();
});
fileStream.on('end', () => {
res.end();
});
} catch (error) {
jsonRes(res, {
code: 500,

View File

@@ -52,12 +52,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
/* start process */
const { responseData } = await dispatchModules({
res,
appId,
modules,
variables,
teamId,
tmbId,
user,
appId,
modules,
variables,
params: {
history,
userChatInput: prompt

View File

@@ -2,10 +2,11 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
import { getVectorModel } from '@/service/core/ai/model';
import type { DatasetListItemType } from '@fastgpt/global/core/dataset/type.d';
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
/* get all dataset by teamId or tmbId */
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
@@ -16,18 +17,23 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const datasets = await MongoDataset.find({
...mongoRPermission({ teamId, tmbId, role }),
type: 'dataset'
type: { $ne: DatasetTypeEnum.folder }
}).lean();
const data = datasets.map((item) => ({
...item,
_id: item._id,
parentId: item.parentId,
avatar: item.avatar,
name: item.name,
intro: item.intro,
type: item.type,
permission: item.permission,
vectorModel: getVectorModel(item.vectorModel),
agentModel: getQAModel(item.agentModel),
canWrite: String(item.tmbId) === tmbId,
isOwner: teamOwner || String(item.tmbId) === tmbId
}));
jsonRes<DatasetItemType[]>(res, {
jsonRes<DatasetListItemType[]>(res, {
data
});
} catch (err) {

View File

@@ -4,15 +4,10 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type { CreateDatasetCollectionParams } from '@/global/core/api/datasetReq.d';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import {
TrainingModeEnum,
DatasetCollectionTypeEnum,
DatasetCollectionTrainingModeEnum
} from '@fastgpt/global/core/dataset/constant';
import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -42,68 +37,3 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
});
}
}
export async function createOneCollection({
name,
parentId,
datasetId,
type,
trainingType = DatasetCollectionTrainingModeEnum.manual,
chunkSize = 0,
fileId,
rawLink,
teamId,
tmbId
}: CreateDatasetCollectionParams & { teamId: string; tmbId: string }) {
const { _id } = await MongoDatasetCollection.create({
name,
teamId,
tmbId,
datasetId,
parentId: parentId || null,
type,
trainingType,
chunkSize,
fileId,
rawLink
});
// create default collection
if (type === DatasetCollectionTypeEnum.folder) {
await createDefaultCollection({
datasetId,
parentId: _id,
teamId,
tmbId
});
}
return _id;
}
// create default collection
export function createDefaultCollection({
name = '手动录入',
datasetId,
parentId,
teamId,
tmbId
}: {
name?: '手动录入' | '手动标注';
datasetId: string;
parentId?: string;
teamId: string;
tmbId: string;
}) {
return MongoDatasetCollection.create({
name,
teamId,
tmbId,
datasetId,
parentId,
type: DatasetCollectionTypeEnum.virtual,
trainingType: DatasetCollectionTrainingModeEnum.manual,
chunkSize: 0,
updateTime: new Date('2099')
});
}

View File

@@ -1,13 +1,10 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { findCollectionAndChild } from '@fastgpt/service/core/dataset/collection/utils';
import { delDataByCollectionId } from '@/service/core/dataset/data/controller';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { delCollectionRelevantData } from '@fastgpt/service/core/dataset/data/controller';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { delFileById } from '@fastgpt/service/common/file/gridfs/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -19,7 +16,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
throw new Error('CollectionIdId is required');
}
const { teamId } = await authDatasetCollection({
await authDatasetCollection({
req,
authToken: true,
collectionId,
@@ -30,26 +27,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const collections = await findCollectionAndChild(collectionId, '_id metadata');
const delIdList = collections.map((item) => item._id);
// delete training data
await MongoDatasetTraining.deleteMany({
collectionId: { $in: delIdList },
teamId
// delete
await delCollectionRelevantData({
collectionIds: delIdList,
fileIds: collections.map((item) => String(item.metadata?.fileId)).filter(Boolean)
});
// delete pg data
await delDataByCollectionId({ collectionIds: delIdList });
// delete file
await Promise.all(
collections.map((collection) => {
if (!collection?.fileId) return;
return delFileById({
bucketName: BucketNameEnum.dataset,
fileId: collection.fileId
});
})
);
// delete collection
await MongoDatasetCollection.deleteMany({
_id: { $in: delIdList }

View File

@@ -6,7 +6,10 @@ import { Types } from '@fastgpt/service/common/mongo';
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
import type { GetDatasetCollectionsProps } from '@/global/core/api/datasetReq';
import { PagingData } from '@/types';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import {
DatasetColCollectionName,
MongoDatasetCollection
} from '@fastgpt/service/core/dataset/collection/schema';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
import { startQueue } from '@/service/utils/tools';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
@@ -45,7 +48,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// not count data amount
if (simple) {
const collections = await MongoDatasetCollection.find(match, '_id name type parentId')
const collections = await MongoDatasetCollection.find(match, '_id parentId type name')
.sort({
updateTime: -1
})
@@ -72,6 +75,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
{
$match: match
},
// count training data
{
$lookup: {
from: DatasetTrainingCollectionName,
@@ -89,6 +93,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
as: 'trainings'
}
},
// count collection total data
{
$lookup: {
from: DatasetDataCollectionName,
@@ -106,7 +111,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
as: 'datas'
}
},
// 统计子集合的数量和子训练的数量
{
$project: {
_id: 1,
@@ -114,6 +118,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
tmbId: 1,
name: 1,
type: 1,
status: 1,
updateTime: 1,
dataAmount: { $size: '$datas' },
trainingAmount: { $size: '$trainings' },

View File

@@ -3,20 +3,20 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import type { CreateDatasetParams } from '@/global/core/dataset/api.d';
import { createDefaultCollection } from './collection/create';
import { createDefaultCollection } from '@fastgpt/service/core/dataset/collection/controller';
import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const {
parentId,
name,
tags,
type,
avatar,
vectorModel = global.vectorModels[0].model,
agentModel,
parentId,
type
agentModel
} = req.body as CreateDatasetParams;
// 凭证校验
@@ -26,7 +26,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
name,
teamId,
tmbId,
tags,
vectorModel,
agentModel,
avatar,
@@ -34,11 +33,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
type
});
await createDefaultCollection({
datasetId: _id,
teamId,
tmbId
});
if (type === DatasetTypeEnum.dataset) {
await createDefaultCollection({
datasetId: _id,
teamId,
tmbId
});
}
jsonRes(res, { data: _id });
} catch (err) {

View File

@@ -3,7 +3,7 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { withNextCors } from '@fastgpt/service/common/middle/cors';
import { connectToDatabase } from '@/service/mongo';
import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { deleteDataByDataId } from '@/service/core/dataset/data/controller';
import { delDatasetDataByDataId } from '@fastgpt/service/core/dataset/data/controller';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -19,7 +19,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
// 凭证校验
await authDatasetData({ req, authToken: true, dataId, per: 'w' });
await deleteDataByDataId(dataId);
await delDatasetDataByDataId(dataId);
jsonRes(res, {
data: 'success'

View File

@@ -1,13 +1,10 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { delDatasetFiles } from '@fastgpt/service/core/dataset/file/controller';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { delDataByDatasetId } from '@/service/core/dataset/data/controller';
import { delDatasetRelevantData } from '@fastgpt/service/core/dataset/data/controller';
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -25,21 +22,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const deletedIds = await findDatasetIdTreeByTopDatasetId(id);
// delete training data(There could be a training mission)
await MongoDatasetTraining.deleteMany({
datasetId: { $in: deletedIds }
});
// delete all dataset.data and pg data
await delDataByDatasetId({ datasetIds: deletedIds });
// delete related files
await delDatasetFiles({ datasetId: id });
// delete collections
await MongoDatasetCollection.deleteMany({
datasetId: { $in: deletedIds }
});
await delDatasetRelevantData({ datasetIds: deletedIds });
// delete dataset data
await MongoDataset.deleteMany({

View File

@@ -1,12 +1,12 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
import type { DatasetListItemType } from '@fastgpt/global/core/dataset/type.d';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { getVectorModel } from '@/service/core/ai/model';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -27,16 +27,21 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
.lean();
const data = await Promise.all(
datasets.map(async (item) => ({
...item,
vectorModel: getVectorModel(item.vectorModel),
agentModel: getQAModel(item.agentModel),
datasets.map<DatasetListItemType>((item) => ({
_id: item._id,
parentId: item.parentId,
avatar: item.avatar,
name: item.name,
intro: item.intro,
type: item.type,
permission: item.permission,
canWrite,
isOwner: teamOwner || String(item.tmbId) === tmbId
isOwner: teamOwner || String(item.tmbId) === tmbId,
vectorModel: getVectorModel(item.vectorModel)
}))
);
jsonRes<DatasetItemType[]>(res, {
jsonRes<DatasetListItemType[]>(res, {
data
});
} catch (err) {

View File

@@ -44,7 +44,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
});
// push bill
pushGenerateVectorBill({
const { total } = pushGenerateVectorBill({
teamId,
tmbId,
tokenLen: tokenLen,
@@ -54,11 +54,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
if (apikey) {
updateApiKeyUsage({
apikey,
usage: countModelPrice({
model: dataset.vectorModel,
tokens: tokenLen,
type: ModelTypeEnum.vector
})
usage: total
});
}

View File

@@ -2,14 +2,14 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import type { DatasetUpdateParams } from '@/global/core/api/datasetReq.d';
import type { DatasetUpdateBody } from '@fastgpt/global/core/dataset/api.d';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { id, parentId, name, avatar, tags, permission, agentModel } =
req.body as DatasetUpdateParams;
const { id, parentId, name, avatar, tags, permission, agentModel, websiteConfig, status } =
req.body as DatasetUpdateBody;
if (!id) {
throw new Error('缺少参数');
@@ -28,7 +28,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
...(avatar && { avatar }),
...(tags && { tags }),
...(permission && { permission }),
...(agentModel && { agentModel: agentModel.model })
...(agentModel && { agentModel: agentModel.model }),
...(websiteConfig && { websiteConfig }),
...(status && { status })
}
);

View File

@@ -1,73 +0,0 @@
// pages/api/fetchContent.ts
import { NextApiRequest, NextApiResponse } from 'next';
import axios from 'axios';
import { JSDOM } from 'jsdom';
import { Readability } from '@mozilla/readability';
import { jsonRes } from '@fastgpt/service/common/response';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import type { FetchResultItem } from '@fastgpt/global/common/plugin/types/pluginRes.d';
import { simpleText } from '@fastgpt/global/common/string/tools';
import { connectToDatabase } from '@/service/mongo';
export type UrlFetchResponse = FetchResultItem[];
const fetchContent = async (req: NextApiRequest, res: NextApiResponse) => {
try {
await connectToDatabase();
let { urlList = [] } = req.body as { urlList: string[] };
if (!urlList || urlList.length === 0) {
throw new Error('urlList is empty');
}
await authCert({ req, authToken: true });
urlList = urlList.filter((url) => /^(http|https):\/\/[^ "]+$/.test(url));
const response = (
await Promise.allSettled(
urlList.map(async (url) => {
try {
const fetchRes = await axios.get(url, {
timeout: 30000
});
const dom = new JSDOM(fetchRes.data, {
url,
contentType: 'text/html'
});
const reader = new Readability(dom.window.document);
const article = reader.parse();
const content = article?.textContent || '';
return {
url,
content: simpleText(`${article?.title}\n${content}`)
};
} catch (error) {
return {
url,
content: ''
};
}
})
)
)
.filter((item) => item.status === 'fulfilled')
.map((item: any) => item.value)
.filter((item) => item.content);
jsonRes<UrlFetchResponse>(res, {
data: response
});
} catch (error: any) {
jsonRes(res, {
code: 500,
error: error
});
}
};
export default fetchContent;

View File

@@ -24,13 +24,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
source: BillSourceEnum.training,
list: [
{
moduleName: '索引生成',
moduleName: 'wallet.moduleName.index',
model: vectorModelData.name,
amount: 0,
tokenLen: 0
},
{
moduleName: 'QA 拆分',
moduleName: 'wallet.moduleName.qa',
model: agentModelData.name,
amount: 0,
tokenLen: 0

View File

@@ -0,0 +1,34 @@
// pages/api/fetchContent.ts
import { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { connectToDatabase } from '@/service/mongo';
import { UrlFetchParams, UrlFetchResponse } from '@fastgpt/global/common/file/api.d';
import { urlsFetch } from '@fastgpt/global/common/file/tools';
const fetchContent = async (req: NextApiRequest, res: NextApiResponse) => {
try {
await connectToDatabase();
let { urlList = [], selector } = req.body as UrlFetchParams;
if (!urlList || urlList.length === 0) {
throw new Error('urlList is empty');
}
await authCert({ req, authToken: true });
jsonRes<UrlFetchResponse>(res, {
data: await urlsFetch({
urlList,
selector
})
});
} catch (error: any) {
jsonRes(res, {
code: 500,
error: error
});
}
};
export default fetchContent;

View File

@@ -14,10 +14,10 @@ import { getChatHistory } from './getHistory';
import { saveChat } from '@/service/utils/chat/saveChat';
import { responseWrite } from '@fastgpt/service/common/response';
import { pushChatBill } from '@/service/support/wallet/bill/push';
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
import { authOutLinkChat } from '@/service/support/permission/auth/outLink';
import { pushResult2Remote, updateOutLinkUsage } from '@fastgpt/service/support/outLink/tools';
import requestIp from 'request-ip';
import { getBillSourceByAuthType } from '@fastgpt/global/support/wallet/bill/tools';
import { selectShareResponse } from '@/utils/service/core/chat';
import { updateApiKeyUsage } from '@fastgpt/service/support/openapi/tools';
@@ -276,11 +276,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
appId: app._id,
teamId: user.team.teamId,
tmbId: user.team.tmbId,
source: (() => {
if (authType === 'apikey') return BillSourceEnum.api;
if (shareId) return BillSourceEnum.shareLink;
return BillSourceEnum.fastgpt;
})(),
source: getBillSourceByAuthType({ shareId, authType }),
response: responseData
});

View File

@@ -6,6 +6,8 @@ import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push';
import { connectToDatabase } from '@/service/mongo';
import { authTeamBalance } from '@/service/support/permission/auth/bill';
import { getVectorsByText, GetVectorProps } from '@/service/core/ai/vector';
import { updateApiKeyUsage } from '@fastgpt/service/support/openapi/tools';
import { getBillSourceByAuthType } from '@fastgpt/global/support/wallet/bill/tools';
type Props = GetVectorProps & {
billId?: string;
@@ -15,24 +17,21 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
try {
let { input, model, billId } = req.body as Props;
await connectToDatabase();
const { teamId, tmbId } = await authCert({ req, authToken: true, authApiKey: true });
if (!Array.isArray(input) || typeof input !== 'string') {
if (!Array.isArray(input) && typeof input !== 'string') {
throw new Error('input is nor array or string');
}
const { teamId, tmbId, apikey, authType } = await authCert({
req,
authToken: true,
authApiKey: true
});
await authTeamBalance(teamId);
const { tokenLen, vectors } = await getVectorsByText({ input, model });
pushGenerateVectorBill({
teamId,
tmbId,
tokenLen: tokenLen,
model,
billId
});
jsonRes(res, {
data: {
object: 'list',
@@ -48,6 +47,22 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
}
}
});
const { total } = pushGenerateVectorBill({
teamId,
tmbId,
tokenLen,
model,
billId,
source: getBillSourceByAuthType({ authType })
});
if (apikey) {
updateApiKeyUsage({
apikey,
usage: total
});
}
} catch (err) {
console.log(err);
jsonRes(res, {

View File

@@ -7,12 +7,13 @@ import { connectToDatabase } from '@/service/mongo';
import { authTeamBalance } from '@/service/support/permission/auth/bill';
import { PostReRankProps, PostReRankResponse } from '@fastgpt/global/core/ai/api';
import { reRankRecall } from '@/service/core/ai/rerank';
import { updateApiKeyUsage } from '@fastgpt/service/support/openapi/tools';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let { query, inputs } = req.body as PostReRankProps;
try {
await connectToDatabase();
const { teamId, tmbId } = await authCert({
const { teamId, tmbId, apikey } = await authCert({
req,
authApiKey: true
});
@@ -23,12 +24,19 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
const result = await reRankRecall({ query, inputs });
pushReRankBill({
const { total } = pushReRankBill({
teamId,
tmbId,
source: 'api'
});
if (apikey) {
updateApiKeyUsage({
apikey,
usage: total
});
}
jsonRes<PostReRankResponse>(res, {
data: result
});