4.6.7 first pr (#726)
This commit is contained in:
@@ -3,7 +3,7 @@ import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { getUploadModel, removeFilesByPaths } from '@fastgpt/service/common/file/upload/multer';
|
||||
import { getUploadModel } from '@fastgpt/service/common/file/multer';
|
||||
|
||||
/**
|
||||
* Creates the multer uploader
|
||||
@@ -16,12 +16,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
let filePaths: string[] = [];
|
||||
|
||||
try {
|
||||
const { userId, teamId, tmbId } = await authCert({ req, authToken: true });
|
||||
|
||||
const { files, bucketName, metadata } = await upload.doUpload(req, res);
|
||||
|
||||
filePaths = files.map((file) => file.path);
|
||||
|
||||
await connectToDatabase();
|
||||
const { userId, teamId, tmbId } = await authCert({ req, authToken: true });
|
||||
|
||||
if (!bucketName) {
|
||||
throw new Error('bucketName is empty');
|
||||
@@ -53,8 +54,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
error
|
||||
});
|
||||
}
|
||||
|
||||
removeFilesByPaths(filePaths);
|
||||
}
|
||||
|
||||
export const config = {
|
||||
|
||||
@@ -8,15 +8,13 @@ import { UploadImgProps } from '@fastgpt/global/common/file/api';
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { base64Img, expiredTime, metadata, shareId } = req.body as UploadImgProps;
|
||||
const { shareId, ...body } = req.body as UploadImgProps;
|
||||
|
||||
const { teamId } = await authCertOrShareId({ req, shareId, authToken: true });
|
||||
|
||||
const data = await uploadMongoImg({
|
||||
teamId,
|
||||
base64Img,
|
||||
expiredTime,
|
||||
metadata
|
||||
...body
|
||||
});
|
||||
|
||||
jsonRes(res, { data });
|
||||
|
||||
@@ -59,39 +59,44 @@ const defaultFeConfigs: FastGPTFeConfigsType = {
|
||||
};
|
||||
|
||||
export async function getInitConfig() {
|
||||
if (global.systemInitd) return;
|
||||
global.systemInitd = true;
|
||||
|
||||
try {
|
||||
if (global.feConfigs) return;
|
||||
await connectToDatabase();
|
||||
|
||||
initGlobal();
|
||||
await initSystemConfig();
|
||||
await Promise.all([
|
||||
initGlobal(),
|
||||
initSystemConfig(),
|
||||
getSimpleModeTemplates(),
|
||||
getSystemVersion(),
|
||||
getSystemPlugin()
|
||||
]);
|
||||
|
||||
console.log({
|
||||
simpleModeTemplates: global.simpleModeTemplates,
|
||||
communityPlugins: global.communityPlugins
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Load init config error', error);
|
||||
global.systemInitd = false;
|
||||
|
||||
if (!global.feConfigs) {
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
await getSimpleModeTemplates();
|
||||
}
|
||||
|
||||
getSystemVersion();
|
||||
getSystemPlugin();
|
||||
export function initGlobal() {
|
||||
if (global.communityPlugins) return;
|
||||
|
||||
console.log({
|
||||
feConfigs: global.feConfigs,
|
||||
systemEnv: global.systemEnv,
|
||||
chatModels: global.chatModels,
|
||||
qaModels: global.qaModels,
|
||||
cqModels: global.cqModels,
|
||||
extractModels: global.extractModels,
|
||||
qgModels: global.qgModels,
|
||||
vectorModels: global.vectorModels,
|
||||
reRankModels: global.reRankModels,
|
||||
audioSpeechModels: global.audioSpeechModels,
|
||||
whisperModel: global.whisperModel,
|
||||
simpleModeTemplates: global.simpleModeTemplates,
|
||||
communityPlugins: global.communityPlugins
|
||||
});
|
||||
global.communityPlugins = [];
|
||||
global.simpleModeTemplates = [];
|
||||
global.qaQueueLen = global.qaQueueLen ?? 0;
|
||||
global.vectorQueueLen = global.vectorQueueLen ?? 0;
|
||||
// init tikToken
|
||||
getTikTokenEnc();
|
||||
initHttpAgent();
|
||||
}
|
||||
|
||||
export async function initSystemConfig() {
|
||||
@@ -137,19 +142,24 @@ export async function initSystemConfig() {
|
||||
global.reRankModels = config.reRankModels;
|
||||
global.audioSpeechModels = config.audioSpeechModels;
|
||||
global.whisperModel = config.whisperModel;
|
||||
}
|
||||
|
||||
export function initGlobal() {
|
||||
global.communityPlugins = [];
|
||||
global.simpleModeTemplates = [];
|
||||
global.qaQueueLen = global.qaQueueLen ?? 0;
|
||||
global.vectorQueueLen = global.vectorQueueLen ?? 0;
|
||||
// init tikToken
|
||||
getTikTokenEnc();
|
||||
initHttpAgent();
|
||||
console.log({
|
||||
feConfigs: global.feConfigs,
|
||||
systemEnv: global.systemEnv,
|
||||
chatModels: global.chatModels,
|
||||
qaModels: global.qaModels,
|
||||
cqModels: global.cqModels,
|
||||
extractModels: global.extractModels,
|
||||
qgModels: global.qgModels,
|
||||
vectorModels: global.vectorModels,
|
||||
reRankModels: global.reRankModels,
|
||||
audioSpeechModels: global.audioSpeechModels,
|
||||
whisperModel: global.whisperModel
|
||||
});
|
||||
}
|
||||
|
||||
export function getSystemVersion() {
|
||||
if (global.systemVersion) return;
|
||||
try {
|
||||
if (process.env.NODE_ENV === 'development') {
|
||||
global.systemVersion = process.env.npm_package_version || '0.0.0';
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { initSystemConfig } from './getInitData';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
await authCert({ req, authRoot: true });
|
||||
await initSystemConfig();
|
||||
|
||||
console.log(`refresh config`);
|
||||
console.log({
|
||||
chatModels: global.chatModels,
|
||||
qaModels: global.qaModels,
|
||||
cqModels: global.cqModels,
|
||||
extractModels: global.extractModels,
|
||||
qgModels: global.qgModels,
|
||||
vectorModels: global.vectorModels,
|
||||
reRankModels: global.reRankModels,
|
||||
audioSpeechModels: global.audioSpeechModels,
|
||||
whisperModel: global.whisperModel,
|
||||
feConfigs: global.feConfigs,
|
||||
systemEnv: global.systemEnv
|
||||
});
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
jsonRes(res);
|
||||
}
|
||||
@@ -29,6 +29,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
|
||||
await MongoChatItem.findOneAndUpdate(
|
||||
{
|
||||
chatId,
|
||||
dataId: chatItemId
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
Create one dataset collection
|
||||
*/
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import type { LinkCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import { TrainingModeEnum, DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
|
||||
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
|
||||
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||
import { reloadCollectionChunks } from '@fastgpt/service/core/dataset/collection/utils';
|
||||
import { startQueue } from '@/service/utils/tools';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const {
|
||||
link,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as LinkCreateDatasetCollectionParams;
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
datasetId: body.datasetId,
|
||||
per: 'w'
|
||||
});
|
||||
|
||||
// 1. check dataset limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
|
||||
insertLen: predictDataLimitLength(trainingType, new Array(10))
|
||||
});
|
||||
|
||||
// 2. create collection
|
||||
const collectionId = await createOneCollection({
|
||||
...body,
|
||||
name: link,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.link,
|
||||
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
rawLink: link
|
||||
});
|
||||
|
||||
// 3. create bill and start sync
|
||||
const { billId } = await createTrainingBill({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: 'core.dataset.collection.Sync Collection',
|
||||
billSource: BillSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel).name,
|
||||
agentModel: getQAModel(dataset.agentModel).name
|
||||
});
|
||||
await reloadCollectionChunks({
|
||||
collectionId,
|
||||
tmbId,
|
||||
billId
|
||||
});
|
||||
|
||||
startQueue();
|
||||
|
||||
jsonRes(res, {
|
||||
data: { collectionId }
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
Create one dataset collection
|
||||
*/
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import type { TextCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import { TrainingModeEnum, DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataToDatasetCollection } from '@/service/core/dataset/data/controller';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const {
|
||||
text,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as TextCreateDatasetCollectionParams;
|
||||
|
||||
const { teamId, tmbId } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
datasetId: body.datasetId,
|
||||
per: 'w'
|
||||
});
|
||||
|
||||
// 1. split text to chunks
|
||||
const { chunks } = splitText2Chunks({
|
||||
text,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : [],
|
||||
countTokens: false
|
||||
});
|
||||
|
||||
// 2. check dataset limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
// 3. create collection
|
||||
const collectionId = await createOneCollection({
|
||||
...body,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.virtual,
|
||||
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
hashRawText: hashStr(text),
|
||||
rawTextLength: text.length
|
||||
});
|
||||
|
||||
// 4. push chunks to training queue
|
||||
const insertResults = await pushDataToDatasetCollection({
|
||||
teamId,
|
||||
tmbId,
|
||||
collectionId,
|
||||
trainingMode: trainingType,
|
||||
data: chunks.map((text, index) => ({
|
||||
q: text,
|
||||
chunkIndex: index
|
||||
}))
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: { collectionId, results: insertResults }
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -5,7 +5,6 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
|
||||
import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
|
||||
@@ -14,13 +13,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
await connectToDatabase();
|
||||
const body = req.body as CreateDatasetCollectionParams;
|
||||
|
||||
// auth. not visitor and dataset is public
|
||||
const { teamId, tmbId } = await authUserNotVisitor({ req, authToken: true });
|
||||
await authDataset({
|
||||
const { teamId, tmbId } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
datasetId: body.datasetId,
|
||||
per: 'r'
|
||||
per: 'w'
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
|
||||
@@ -4,13 +4,12 @@ import { connectToDatabase } from '@/service/mongo';
|
||||
import { findCollectionAndChild } from '@fastgpt/service/core/dataset/collection/utils';
|
||||
import { delCollectionRelevantData } from '@fastgpt/service/core/dataset/data/controller';
|
||||
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
|
||||
const { collectionId } = req.query as { collectionId: string };
|
||||
const { id: collectionId } = req.query as { id: string };
|
||||
|
||||
if (!collectionId) {
|
||||
throw new Error('CollectionIdId is required');
|
||||
@@ -19,6 +18,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
await authDatasetCollection({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
collectionId,
|
||||
per: 'w'
|
||||
});
|
||||
|
||||
@@ -22,6 +22,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
const { collection, canWrite } = await authDatasetCollection({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
collectionId: id,
|
||||
per: 'r'
|
||||
});
|
||||
|
||||
@@ -11,7 +11,6 @@ import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant
|
||||
import { startQueue } from '@/service/utils/tools';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { DatasetDataCollectionName } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -27,12 +26,19 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
simple = false
|
||||
} = req.body as GetDatasetCollectionsProps;
|
||||
searchText = searchText?.replace(/'/g, '');
|
||||
pageSize = Math.min(pageSize, 30);
|
||||
|
||||
// auth dataset and get my role
|
||||
const { tmbId } = await authDataset({ req, authToken: true, datasetId, per: 'r' });
|
||||
const { canWrite } = await authUserRole({ req, authToken: true });
|
||||
const { teamId, tmbId, canWrite } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
datasetId,
|
||||
per: 'r'
|
||||
});
|
||||
|
||||
const match = {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(datasetId),
|
||||
parentId: parentId ? new Types.ObjectId(parentId) : null,
|
||||
...(selectFolder ? { type: DatasetCollectionTypeEnum.folder } : {}),
|
||||
@@ -85,9 +91,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
}
|
||||
}
|
||||
},
|
||||
{ $project: { _id: 1 } }
|
||||
{ $count: 'count' }
|
||||
],
|
||||
as: 'trainings'
|
||||
as: 'trainingCount'
|
||||
}
|
||||
},
|
||||
// count collection total data
|
||||
@@ -103,9 +109,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
}
|
||||
}
|
||||
},
|
||||
{ $project: { _id: 1 } }
|
||||
{ $count: 'count' }
|
||||
],
|
||||
as: 'datas'
|
||||
as: 'dataCount'
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -117,10 +123,14 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
type: 1,
|
||||
status: 1,
|
||||
updateTime: 1,
|
||||
dataAmount: { $size: '$datas' },
|
||||
trainingAmount: { $size: '$trainings' },
|
||||
fileId: 1,
|
||||
rawLink: 1
|
||||
rawLink: 1,
|
||||
dataAmount: {
|
||||
$ifNull: [{ $arrayElemAt: ['$dataCount.count', 0] }, 0]
|
||||
},
|
||||
trainingAmount: {
|
||||
$ifNull: [{ $arrayElemAt: ['$trainingCount.count', 0] }, 0]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -144,7 +154,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
);
|
||||
|
||||
if (data.find((item) => item.trainingAmount > 0)) {
|
||||
startQueue(1);
|
||||
startQueue();
|
||||
}
|
||||
|
||||
// count collections
|
||||
|
||||
@@ -38,7 +38,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
return Promise.reject(DatasetErrEnum.unLinkCollection);
|
||||
}
|
||||
|
||||
const { rawText, isSameRawText } = await getCollectionAndRawText({
|
||||
const { title, rawText, isSameRawText } = await getCollectionAndRawText({
|
||||
collection
|
||||
});
|
||||
|
||||
@@ -68,7 +68,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
tmbId: collection.tmbId,
|
||||
parentId: collection.parentId,
|
||||
datasetId: collection.datasetId._id,
|
||||
name: collection.name,
|
||||
name: title || collection.name,
|
||||
type: collection.type,
|
||||
trainingType: collection.trainingType,
|
||||
chunkSize: collection.chunkSize,
|
||||
|
||||
@@ -16,7 +16,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
}
|
||||
|
||||
// 凭证校验
|
||||
await authDatasetCollection({ req, authToken: true, collectionId: id, per: 'w' });
|
||||
await authDatasetCollection({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
collectionId: id,
|
||||
per: 'w'
|
||||
});
|
||||
|
||||
const updateFields: Record<string, any> = {
|
||||
...(parentId !== undefined && { parentId: parentId || null }),
|
||||
|
||||
@@ -16,12 +16,28 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
type,
|
||||
avatar,
|
||||
vectorModel = global.vectorModels[0].model,
|
||||
agentModel
|
||||
agentModel = global.qaModels[0].model
|
||||
} = req.body as CreateDatasetParams;
|
||||
|
||||
// 凭证校验
|
||||
// auth
|
||||
const { teamId, tmbId } = await authUserNotVisitor({ req, authToken: true });
|
||||
|
||||
// check model valid
|
||||
const vectorModelStore = global.vectorModels.find((item) => item.model === vectorModel);
|
||||
const agentModelStore = global.qaModels.find((item) => item.model === agentModel);
|
||||
if (!vectorModelStore || !agentModelStore) {
|
||||
throw new Error('vectorModel or qaModel is invalid');
|
||||
}
|
||||
|
||||
// check limit
|
||||
const authCount = await MongoDataset.countDocuments({
|
||||
teamId,
|
||||
type: DatasetTypeEnum.dataset
|
||||
});
|
||||
if (authCount >= 50) {
|
||||
throw new Error('每个团队上限 50 个知识库');
|
||||
}
|
||||
|
||||
const { _id } = await MongoDataset.create({
|
||||
name,
|
||||
teamId,
|
||||
|
||||
@@ -8,8 +8,8 @@ import { delDatasetDataByDataId } from '@fastgpt/service/core/dataset/data/contr
|
||||
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { dataId } = req.query as {
|
||||
dataId: string;
|
||||
const { id: dataId } = req.query as {
|
||||
id: string;
|
||||
};
|
||||
|
||||
if (!dataId) {
|
||||
@@ -17,9 +17,18 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
}
|
||||
|
||||
// 凭证校验
|
||||
await authDatasetData({ req, authToken: true, dataId, per: 'w' });
|
||||
const { datasetData } = await authDatasetData({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
dataId,
|
||||
per: 'w'
|
||||
});
|
||||
|
||||
await delDatasetDataByDataId(dataId);
|
||||
await delDatasetDataByDataId({
|
||||
collectionId: datasetData.collectionId,
|
||||
mongoDataId: dataId
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: 'success'
|
||||
|
||||
@@ -13,12 +13,18 @@ export type Response = {
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { dataId } = req.query as {
|
||||
dataId: string;
|
||||
const { id: dataId } = req.query as {
|
||||
id: string;
|
||||
};
|
||||
|
||||
// 凭证校验
|
||||
const { datasetData } = await authDatasetData({ req, authToken: true, dataId, per: 'r' });
|
||||
const { datasetData } = await authDatasetData({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
dataId,
|
||||
per: 'r'
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: datasetData
|
||||
|
||||
@@ -16,6 +16,7 @@ import { authTeamBalance } from '@/service/support/permission/auth/bill';
|
||||
import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push';
|
||||
import { InsertOneDatasetDataProps } from '@/global/core/dataset/api';
|
||||
import { simpleText } from '@fastgpt/global/common/string/tools';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
|
||||
|
||||
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -39,6 +40,12 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
per: 'w'
|
||||
});
|
||||
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
|
||||
insertLen: 1
|
||||
});
|
||||
|
||||
// auth collection and get dataset
|
||||
const [
|
||||
{
|
||||
|
||||
@@ -17,8 +17,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
collectionId
|
||||
} = req.body as GetDatasetDataListProps;
|
||||
|
||||
pageSize = Math.min(pageSize, 30);
|
||||
|
||||
// 凭证校验
|
||||
await authDatasetCollection({ req, authToken: true, collectionId, per: 'r' });
|
||||
await authDatasetCollection({ req, authToken: true, authApiKey: true, collectionId, per: 'r' });
|
||||
|
||||
searchText = searchText.replace(/'/g, '');
|
||||
|
||||
@@ -32,7 +34,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
};
|
||||
|
||||
const [data, total] = await Promise.all([
|
||||
MongoDatasetData.find(match, '_id datasetId collectionId q a chunkIndex indexes')
|
||||
MongoDatasetData.find(match, '_id datasetId collectionId q a chunkIndex')
|
||||
.sort({ chunkIndex: 1, updateTime: -1 })
|
||||
.skip((pageNum - 1) * pageSize)
|
||||
.limit(pageSize)
|
||||
|
||||
@@ -2,38 +2,30 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { withNextCors } from '@fastgpt/service/common/middle/cors';
|
||||
import { TrainingModeEnum, TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
|
||||
import { startQueue } from '@/service/utils/tools';
|
||||
import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
|
||||
import type { PushDataResponse } from '@/global/core/api/datasetRes.d';
|
||||
import type { PushDatasetDataProps } from '@/global/core/dataset/api.d';
|
||||
import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
|
||||
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller';
|
||||
import { simpleText } from '@fastgpt/global/common/string/tools';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataToDatasetCollection } from '@/service/core/dataset/data/controller';
|
||||
|
||||
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { collectionId, data, mode = TrainingModeEnum.chunk } = req.body as PushDatasetDataProps;
|
||||
const { collectionId, data } = req.body as PushDatasetDataProps;
|
||||
|
||||
if (!collectionId || !Array.isArray(data)) {
|
||||
throw new Error('collectionId or data is empty');
|
||||
}
|
||||
|
||||
if (!TrainingTypeMap[mode]) {
|
||||
throw new Error(`Mode is not ${Object.keys(TrainingTypeMap).join(', ')}`);
|
||||
}
|
||||
|
||||
if (data.length > 200) {
|
||||
throw new Error('Data is too long, max 200');
|
||||
}
|
||||
|
||||
// 凭证校验
|
||||
const { teamId, tmbId } = await authDatasetCollection({
|
||||
const { teamId, tmbId, collection } = await authDatasetCollection({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
@@ -41,6 +33,13 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
per: 'w'
|
||||
});
|
||||
|
||||
// auth dataset limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
|
||||
insertLen: predictDataLimitLength(collection.trainingType, data)
|
||||
});
|
||||
|
||||
jsonRes<PushDataResponse>(res, {
|
||||
data: await pushDataToDatasetCollection({
|
||||
...req.body,
|
||||
@@ -56,141 +55,6 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
}
|
||||
});
|
||||
|
||||
export async function pushDataToDatasetCollection({
|
||||
teamId,
|
||||
tmbId,
|
||||
collectionId,
|
||||
data,
|
||||
mode,
|
||||
prompt,
|
||||
billId
|
||||
}: {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
} & PushDatasetDataProps): Promise<PushDataResponse> {
|
||||
const { datasetId, model, maxToken, weight } = await checkModelValid({
|
||||
mode,
|
||||
collectionId
|
||||
});
|
||||
|
||||
// format q and a, remove empty char
|
||||
data.forEach((item) => {
|
||||
item.q = simpleText(item.q);
|
||||
item.a = simpleText(item.a);
|
||||
|
||||
item.indexes = item.indexes
|
||||
?.map((index) => {
|
||||
return {
|
||||
...index,
|
||||
text: simpleText(index.text)
|
||||
};
|
||||
})
|
||||
.filter(Boolean);
|
||||
});
|
||||
|
||||
// filter repeat or equal content
|
||||
const set = new Set();
|
||||
const filterResult: Record<string, PushDatasetDataChunkProps[]> = {
|
||||
success: [],
|
||||
overToken: [],
|
||||
repeat: [],
|
||||
error: []
|
||||
};
|
||||
|
||||
data.forEach((item) => {
|
||||
if (!item.q) {
|
||||
filterResult.error.push(item);
|
||||
return;
|
||||
}
|
||||
|
||||
const text = item.q + item.a;
|
||||
|
||||
// count q token
|
||||
const token = countPromptTokens(item.q);
|
||||
|
||||
if (token > maxToken) {
|
||||
filterResult.overToken.push(item);
|
||||
return;
|
||||
}
|
||||
|
||||
if (set.has(text)) {
|
||||
console.log('repeat', item);
|
||||
filterResult.repeat.push(item);
|
||||
} else {
|
||||
filterResult.success.push(item);
|
||||
set.add(text);
|
||||
}
|
||||
});
|
||||
|
||||
// 插入记录
|
||||
const insertRes = await MongoDatasetTraining.insertMany(
|
||||
filterResult.success.map((item, i) => ({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId,
|
||||
collectionId,
|
||||
billId,
|
||||
mode,
|
||||
prompt,
|
||||
model,
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
chunkIndex: item.chunkIndex ?? i,
|
||||
weight: weight ?? 0,
|
||||
indexes: item.indexes
|
||||
}))
|
||||
);
|
||||
|
||||
insertRes.length > 0 && startQueue();
|
||||
delete filterResult.success;
|
||||
|
||||
return {
|
||||
insertLen: insertRes.length,
|
||||
...filterResult
|
||||
};
|
||||
}
|
||||
|
||||
export async function checkModelValid({
|
||||
mode,
|
||||
collectionId
|
||||
}: {
|
||||
mode: `${TrainingModeEnum}`;
|
||||
collectionId: string;
|
||||
}) {
|
||||
const {
|
||||
datasetId: { _id: datasetId, vectorModel, agentModel }
|
||||
} = await getCollectionWithDataset(collectionId);
|
||||
|
||||
if (mode === TrainingModeEnum.chunk) {
|
||||
if (!collectionId) return Promise.reject(`CollectionId is empty`);
|
||||
const vectorModelData = getVectorModel(vectorModel);
|
||||
if (!vectorModelData) {
|
||||
return Promise.reject(`Model ${vectorModel} is inValid`);
|
||||
}
|
||||
|
||||
return {
|
||||
datasetId,
|
||||
maxToken: vectorModelData.maxToken * 1.5,
|
||||
model: vectorModelData.model,
|
||||
weight: vectorModelData.weight
|
||||
};
|
||||
}
|
||||
|
||||
if (mode === TrainingModeEnum.qa) {
|
||||
const qaModelData = getQAModel(agentModel);
|
||||
if (!qaModelData) {
|
||||
return Promise.reject(`Model ${agentModel} is inValid`);
|
||||
}
|
||||
return {
|
||||
datasetId,
|
||||
maxToken: qaModelData.maxContext * 0.8,
|
||||
model: qaModelData.model,
|
||||
weight: 0
|
||||
};
|
||||
}
|
||||
return Promise.reject(`Mode ${mode} is inValid`);
|
||||
}
|
||||
|
||||
export const config = {
|
||||
api: {
|
||||
bodyParser: {
|
||||
|
||||
@@ -11,7 +11,7 @@ import { UpdateDatasetDataProps } from '@/global/core/dataset/api';
|
||||
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { id, q = '', a, indexes } = req.body as UpdateDatasetDataProps;
|
||||
const { id, q = '', a, indexes = [] } = req.body as UpdateDatasetDataProps;
|
||||
|
||||
// auth data permission
|
||||
const {
|
||||
@@ -23,6 +23,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
} = await authDatasetData({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
dataId: id,
|
||||
per: 'w'
|
||||
});
|
||||
|
||||
@@ -20,6 +20,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
const { dataset, canWrite, isOwner } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
datasetId,
|
||||
per: 'r'
|
||||
});
|
||||
|
||||
@@ -15,7 +15,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
// 凭证校验
|
||||
const { teamId, tmbId, teamOwner, role, canWrite } = await authUserRole({
|
||||
req,
|
||||
authToken: true
|
||||
authToken: true,
|
||||
authApiKey: true
|
||||
});
|
||||
|
||||
const datasets = await MongoDataset.find({
|
||||
|
||||
@@ -3,14 +3,11 @@ import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { request } from '@fastgpt/service/common/api/plusRequest';
|
||||
import type { Method } from 'axios';
|
||||
import { setCookie } from '@fastgpt/service/support/permission/controller';
|
||||
import { getInitConfig } from '../common/system/getInitData';
|
||||
import { FastGPTProUrl } from '@fastgpt/service/common/system/constants';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
if (!FastGPTProUrl) {
|
||||
await getInitConfig();
|
||||
}
|
||||
await connectToDatabase();
|
||||
|
||||
const method = (req.method || 'POST') as Method;
|
||||
const { path = [], ...query } = req.query as any;
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { getTeamDatasetValidSub } from '@fastgpt/service/support/wallet/sub/utils';
|
||||
import { getVectorCountByTeamId } from '@fastgpt/service/common/vectorStore/controller';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
|
||||
// 凭证校验
|
||||
const { teamId } = await authCert({
|
||||
req,
|
||||
authToken: true
|
||||
});
|
||||
|
||||
const [{ sub, maxSize }, usedSize] = await Promise.all([
|
||||
getTeamDatasetValidSub({
|
||||
teamId,
|
||||
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize
|
||||
}),
|
||||
getVectorCountByTeamId(teamId)
|
||||
]);
|
||||
|
||||
jsonRes(res, {
|
||||
data: {
|
||||
sub,
|
||||
maxSize,
|
||||
usedSize
|
||||
}
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,8 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { withNextCors } from '@fastgpt/service/common/middle/cors';
|
||||
import { getUploadModel, removeFilesByPaths } from '@fastgpt/service/common/file/upload/multer';
|
||||
import { getUploadModel } from '@fastgpt/service/common/file/multer';
|
||||
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
|
||||
import fs from 'fs';
|
||||
import { getAIApi } from '@fastgpt/service/core/ai/config';
|
||||
import { pushWhisperBill } from '@/service/support/wallet/bill/push';
|
||||
|
||||
@@ -35,19 +35,17 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
||||
|
||||
const { tokens, vectors } = await getVectorsByText({ input: query, model });
|
||||
|
||||
jsonRes(res, {
|
||||
data: {
|
||||
object: 'list',
|
||||
data: vectors.map((item, index) => ({
|
||||
object: 'embedding',
|
||||
index: index,
|
||||
embedding: item
|
||||
})),
|
||||
model,
|
||||
usage: {
|
||||
prompt_tokens: tokens,
|
||||
total_tokens: tokens
|
||||
}
|
||||
res.json({
|
||||
object: 'list',
|
||||
data: vectors.map((item, index) => ({
|
||||
object: 'embedding',
|
||||
index: index,
|
||||
embedding: item
|
||||
})),
|
||||
model,
|
||||
usage: {
|
||||
prompt_tokens: tokens,
|
||||
total_tokens: tokens
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user