4.6.7 first pr (#726)

This commit is contained in:
Archer
2024-01-10 23:35:04 +08:00
committed by GitHub
parent 414b693303
commit 006ad17c6a
186 changed files with 2996 additions and 1838 deletions

View File

@@ -3,7 +3,7 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
import { getUploadModel, removeFilesByPaths } from '@fastgpt/service/common/file/upload/multer';
import { getUploadModel } from '@fastgpt/service/common/file/multer';
/**
* Creates the multer uploader
@@ -16,12 +16,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
let filePaths: string[] = [];
try {
const { userId, teamId, tmbId } = await authCert({ req, authToken: true });
const { files, bucketName, metadata } = await upload.doUpload(req, res);
filePaths = files.map((file) => file.path);
await connectToDatabase();
const { userId, teamId, tmbId } = await authCert({ req, authToken: true });
if (!bucketName) {
throw new Error('bucketName is empty');
@@ -53,8 +54,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
error
});
}
removeFilesByPaths(filePaths);
}
export const config = {

View File

@@ -8,15 +8,13 @@ import { UploadImgProps } from '@fastgpt/global/common/file/api';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
await connectToDatabase();
const { base64Img, expiredTime, metadata, shareId } = req.body as UploadImgProps;
const { shareId, ...body } = req.body as UploadImgProps;
const { teamId } = await authCertOrShareId({ req, shareId, authToken: true });
const data = await uploadMongoImg({
teamId,
base64Img,
expiredTime,
metadata
...body
});
jsonRes(res, { data });

View File

@@ -59,39 +59,44 @@ const defaultFeConfigs: FastGPTFeConfigsType = {
};
export async function getInitConfig() {
if (global.systemInitd) return;
global.systemInitd = true;
try {
if (global.feConfigs) return;
await connectToDatabase();
initGlobal();
await initSystemConfig();
await Promise.all([
initGlobal(),
initSystemConfig(),
getSimpleModeTemplates(),
getSystemVersion(),
getSystemPlugin()
]);
console.log({
simpleModeTemplates: global.simpleModeTemplates,
communityPlugins: global.communityPlugins
});
} catch (error) {
console.error('Load init config error', error);
global.systemInitd = false;
if (!global.feConfigs) {
exit(1);
}
}
await getSimpleModeTemplates();
}
getSystemVersion();
getSystemPlugin();
export function initGlobal() {
if (global.communityPlugins) return;
console.log({
feConfigs: global.feConfigs,
systemEnv: global.systemEnv,
chatModels: global.chatModels,
qaModels: global.qaModels,
cqModels: global.cqModels,
extractModels: global.extractModels,
qgModels: global.qgModels,
vectorModels: global.vectorModels,
reRankModels: global.reRankModels,
audioSpeechModels: global.audioSpeechModels,
whisperModel: global.whisperModel,
simpleModeTemplates: global.simpleModeTemplates,
communityPlugins: global.communityPlugins
});
global.communityPlugins = [];
global.simpleModeTemplates = [];
global.qaQueueLen = global.qaQueueLen ?? 0;
global.vectorQueueLen = global.vectorQueueLen ?? 0;
// init tikToken
getTikTokenEnc();
initHttpAgent();
}
export async function initSystemConfig() {
@@ -137,19 +142,24 @@ export async function initSystemConfig() {
global.reRankModels = config.reRankModels;
global.audioSpeechModels = config.audioSpeechModels;
global.whisperModel = config.whisperModel;
}
export function initGlobal() {
global.communityPlugins = [];
global.simpleModeTemplates = [];
global.qaQueueLen = global.qaQueueLen ?? 0;
global.vectorQueueLen = global.vectorQueueLen ?? 0;
// init tikToken
getTikTokenEnc();
initHttpAgent();
console.log({
feConfigs: global.feConfigs,
systemEnv: global.systemEnv,
chatModels: global.chatModels,
qaModels: global.qaModels,
cqModels: global.cqModels,
extractModels: global.extractModels,
qgModels: global.qgModels,
vectorModels: global.vectorModels,
reRankModels: global.reRankModels,
audioSpeechModels: global.audioSpeechModels,
whisperModel: global.whisperModel
});
}
export function getSystemVersion() {
if (global.systemVersion) return;
try {
if (process.env.NODE_ENV === 'development') {
global.systemVersion = process.env.npm_package_version || '0.0.0';

View File

@@ -1,31 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { initSystemConfig } from './getInitData';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
await authCert({ req, authRoot: true });
await initSystemConfig();
console.log(`refresh config`);
console.log({
chatModels: global.chatModels,
qaModels: global.qaModels,
cqModels: global.cqModels,
extractModels: global.extractModels,
qgModels: global.qgModels,
vectorModels: global.vectorModels,
reRankModels: global.reRankModels,
audioSpeechModels: global.audioSpeechModels,
whisperModel: global.whisperModel,
feConfigs: global.feConfigs,
systemEnv: global.systemEnv
});
} catch (error) {
console.log(error);
}
jsonRes(res);
}

View File

@@ -29,6 +29,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
await MongoChatItem.findOneAndUpdate(
{
chatId,
dataId: chatItemId
},
{

View File

@@ -0,0 +1,88 @@
/*
Create one dataset collection
*/
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type { LinkCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import { TrainingModeEnum, DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
import { reloadCollectionChunks } from '@fastgpt/service/core/dataset/collection/utils';
import { startQueue } from '@/service/utils/tools';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const {
link,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body as LinkCreateDatasetCollectionParams;
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
datasetId: body.datasetId,
per: 'w'
});
// 1. check dataset limit
await checkDatasetLimit({
teamId,
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
insertLen: predictDataLimitLength(trainingType, new Array(10))
});
// 2. create collection
const collectionId = await createOneCollection({
...body,
name: link,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.link,
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
rawLink: link
});
// 3. create bill and start sync
const { billId } = await createTrainingBill({
teamId,
tmbId,
appName: 'core.dataset.collection.Sync Collection',
billSource: BillSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel).name,
agentModel: getQAModel(dataset.agentModel).name
});
await reloadCollectionChunks({
collectionId,
tmbId,
billId
});
startQueue();
jsonRes(res, {
data: { collectionId }
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -0,0 +1,90 @@
/*
Create one dataset collection
*/
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type { TextCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import { TrainingModeEnum, DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataToDatasetCollection } from '@/service/core/dataset/data/controller';
import { hashStr } from '@fastgpt/global/common/string/tools';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const {
text,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body as TextCreateDatasetCollectionParams;
const { teamId, tmbId } = await authDataset({
req,
authToken: true,
authApiKey: true,
datasetId: body.datasetId,
per: 'w'
});
// 1. split text to chunks
const { chunks } = splitText2Chunks({
text,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : [],
countTokens: false
});
// 2. check dataset limit
await checkDatasetLimit({
teamId,
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
insertLen: predictDataLimitLength(trainingType, chunks)
});
// 3. create collection
const collectionId = await createOneCollection({
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.virtual,
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
hashRawText: hashStr(text),
rawTextLength: text.length
});
// 4. push chunks to training queue
const insertResults = await pushDataToDatasetCollection({
teamId,
tmbId,
collectionId,
trainingMode: trainingType,
data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
}))
});
jsonRes(res, {
data: { collectionId, results: insertResults }
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -5,7 +5,6 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
@@ -14,13 +13,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
await connectToDatabase();
const body = req.body as CreateDatasetCollectionParams;
// auth. not visitor and dataset is public
const { teamId, tmbId } = await authUserNotVisitor({ req, authToken: true });
await authDataset({
const { teamId, tmbId } = await authDataset({
req,
authToken: true,
authApiKey: true,
datasetId: body.datasetId,
per: 'r'
per: 'w'
});
jsonRes(res, {

View File

@@ -4,13 +4,12 @@ import { connectToDatabase } from '@/service/mongo';
import { findCollectionAndChild } from '@fastgpt/service/core/dataset/collection/utils';
import { delCollectionRelevantData } from '@fastgpt/service/core/dataset/data/controller';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { collectionId } = req.query as { collectionId: string };
const { id: collectionId } = req.query as { id: string };
if (!collectionId) {
throw new Error('CollectionIdId is required');
@@ -19,6 +18,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId,
per: 'w'
});

View File

@@ -22,6 +22,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const { collection, canWrite } = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId: id,
per: 'r'
});

View File

@@ -11,7 +11,6 @@ import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant
import { startQueue } from '@/service/utils/tools';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { DatasetDataCollectionName } from '@fastgpt/service/core/dataset/data/schema';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -27,12 +26,19 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
simple = false
} = req.body as GetDatasetCollectionsProps;
searchText = searchText?.replace(/'/g, '');
pageSize = Math.min(pageSize, 30);
// auth dataset and get my role
const { tmbId } = await authDataset({ req, authToken: true, datasetId, per: 'r' });
const { canWrite } = await authUserRole({ req, authToken: true });
const { teamId, tmbId, canWrite } = await authDataset({
req,
authToken: true,
authApiKey: true,
datasetId,
per: 'r'
});
const match = {
teamId: new Types.ObjectId(teamId),
datasetId: new Types.ObjectId(datasetId),
parentId: parentId ? new Types.ObjectId(parentId) : null,
...(selectFolder ? { type: DatasetCollectionTypeEnum.folder } : {}),
@@ -85,9 +91,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
}
}
},
{ $project: { _id: 1 } }
{ $count: 'count' }
],
as: 'trainings'
as: 'trainingCount'
}
},
// count collection total data
@@ -103,9 +109,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
}
}
},
{ $project: { _id: 1 } }
{ $count: 'count' }
],
as: 'datas'
as: 'dataCount'
}
},
{
@@ -117,10 +123,14 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
type: 1,
status: 1,
updateTime: 1,
dataAmount: { $size: '$datas' },
trainingAmount: { $size: '$trainings' },
fileId: 1,
rawLink: 1
rawLink: 1,
dataAmount: {
$ifNull: [{ $arrayElemAt: ['$dataCount.count', 0] }, 0]
},
trainingAmount: {
$ifNull: [{ $arrayElemAt: ['$trainingCount.count', 0] }, 0]
}
}
},
{
@@ -144,7 +154,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
);
if (data.find((item) => item.trainingAmount > 0)) {
startQueue(1);
startQueue();
}
// count collections

View File

@@ -38,7 +38,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
return Promise.reject(DatasetErrEnum.unLinkCollection);
}
const { rawText, isSameRawText } = await getCollectionAndRawText({
const { title, rawText, isSameRawText } = await getCollectionAndRawText({
collection
});
@@ -68,7 +68,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
tmbId: collection.tmbId,
parentId: collection.parentId,
datasetId: collection.datasetId._id,
name: collection.name,
name: title || collection.name,
type: collection.type,
trainingType: collection.trainingType,
chunkSize: collection.chunkSize,

View File

@@ -16,7 +16,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
}
// 凭证校验
await authDatasetCollection({ req, authToken: true, collectionId: id, per: 'w' });
await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId: id,
per: 'w'
});
const updateFields: Record<string, any> = {
...(parentId !== undefined && { parentId: parentId || null }),

View File

@@ -16,12 +16,28 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
type,
avatar,
vectorModel = global.vectorModels[0].model,
agentModel
agentModel = global.qaModels[0].model
} = req.body as CreateDatasetParams;
// 凭证校验
// auth
const { teamId, tmbId } = await authUserNotVisitor({ req, authToken: true });
// check model valid
const vectorModelStore = global.vectorModels.find((item) => item.model === vectorModel);
const agentModelStore = global.qaModels.find((item) => item.model === agentModel);
if (!vectorModelStore || !agentModelStore) {
throw new Error('vectorModel or qaModel is invalid');
}
// check limit
const authCount = await MongoDataset.countDocuments({
teamId,
type: DatasetTypeEnum.dataset
});
if (authCount >= 50) {
throw new Error('每个团队上限 50 个知识库');
}
const { _id } = await MongoDataset.create({
name,
teamId,

View File

@@ -8,8 +8,8 @@ import { delDatasetDataByDataId } from '@fastgpt/service/core/dataset/data/contr
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { dataId } = req.query as {
dataId: string;
const { id: dataId } = req.query as {
id: string;
};
if (!dataId) {
@@ -17,9 +17,18 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
}
// 凭证校验
await authDatasetData({ req, authToken: true, dataId, per: 'w' });
const { datasetData } = await authDatasetData({
req,
authToken: true,
authApiKey: true,
dataId,
per: 'w'
});
await delDatasetDataByDataId(dataId);
await delDatasetDataByDataId({
collectionId: datasetData.collectionId,
mongoDataId: dataId
});
jsonRes(res, {
data: 'success'

View File

@@ -13,12 +13,18 @@ export type Response = {
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { dataId } = req.query as {
dataId: string;
const { id: dataId } = req.query as {
id: string;
};
// 凭证校验
const { datasetData } = await authDatasetData({ req, authToken: true, dataId, per: 'r' });
const { datasetData } = await authDatasetData({
req,
authToken: true,
authApiKey: true,
dataId,
per: 'r'
});
jsonRes(res, {
data: datasetData

View File

@@ -16,6 +16,7 @@ import { authTeamBalance } from '@/service/support/permission/auth/bill';
import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push';
import { InsertOneDatasetDataProps } from '@/global/core/dataset/api';
import { simpleText } from '@fastgpt/global/common/string/tools';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -39,6 +40,12 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
per: 'w'
});
await checkDatasetLimit({
teamId,
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
insertLen: 1
});
// auth collection and get dataset
const [
{

View File

@@ -17,8 +17,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
collectionId
} = req.body as GetDatasetDataListProps;
pageSize = Math.min(pageSize, 30);
// 凭证校验
await authDatasetCollection({ req, authToken: true, collectionId, per: 'r' });
await authDatasetCollection({ req, authToken: true, authApiKey: true, collectionId, per: 'r' });
searchText = searchText.replace(/'/g, '');
@@ -32,7 +34,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
};
const [data, total] = await Promise.all([
MongoDatasetData.find(match, '_id datasetId collectionId q a chunkIndex indexes')
MongoDatasetData.find(match, '_id datasetId collectionId q a chunkIndex')
.sort({ chunkIndex: 1, updateTime: -1 })
.skip((pageNum - 1) * pageSize)
.limit(pageSize)

View File

@@ -2,38 +2,30 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { withNextCors } from '@fastgpt/service/common/middle/cors';
import { TrainingModeEnum, TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
import { startQueue } from '@/service/utils/tools';
import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
import type { PushDataResponse } from '@/global/core/api/datasetRes.d';
import type { PushDatasetDataProps } from '@/global/core/dataset/api.d';
import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller';
import { simpleText } from '@fastgpt/global/common/string/tools';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataToDatasetCollection } from '@/service/core/dataset/data/controller';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { collectionId, data, mode = TrainingModeEnum.chunk } = req.body as PushDatasetDataProps;
const { collectionId, data } = req.body as PushDatasetDataProps;
if (!collectionId || !Array.isArray(data)) {
throw new Error('collectionId or data is empty');
}
if (!TrainingTypeMap[mode]) {
throw new Error(`Mode is not ${Object.keys(TrainingTypeMap).join(', ')}`);
}
if (data.length > 200) {
throw new Error('Data is too long, max 200');
}
// 凭证校验
const { teamId, tmbId } = await authDatasetCollection({
const { teamId, tmbId, collection } = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
@@ -41,6 +33,13 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
per: 'w'
});
// auth dataset limit
await checkDatasetLimit({
teamId,
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
insertLen: predictDataLimitLength(collection.trainingType, data)
});
jsonRes<PushDataResponse>(res, {
data: await pushDataToDatasetCollection({
...req.body,
@@ -56,141 +55,6 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
}
});
export async function pushDataToDatasetCollection({
teamId,
tmbId,
collectionId,
data,
mode,
prompt,
billId
}: {
teamId: string;
tmbId: string;
} & PushDatasetDataProps): Promise<PushDataResponse> {
const { datasetId, model, maxToken, weight } = await checkModelValid({
mode,
collectionId
});
// format q and a, remove empty char
data.forEach((item) => {
item.q = simpleText(item.q);
item.a = simpleText(item.a);
item.indexes = item.indexes
?.map((index) => {
return {
...index,
text: simpleText(index.text)
};
})
.filter(Boolean);
});
// filter repeat or equal content
const set = new Set();
const filterResult: Record<string, PushDatasetDataChunkProps[]> = {
success: [],
overToken: [],
repeat: [],
error: []
};
data.forEach((item) => {
if (!item.q) {
filterResult.error.push(item);
return;
}
const text = item.q + item.a;
// count q token
const token = countPromptTokens(item.q);
if (token > maxToken) {
filterResult.overToken.push(item);
return;
}
if (set.has(text)) {
console.log('repeat', item);
filterResult.repeat.push(item);
} else {
filterResult.success.push(item);
set.add(text);
}
});
// 插入记录
const insertRes = await MongoDatasetTraining.insertMany(
filterResult.success.map((item, i) => ({
teamId,
tmbId,
datasetId,
collectionId,
billId,
mode,
prompt,
model,
q: item.q,
a: item.a,
chunkIndex: item.chunkIndex ?? i,
weight: weight ?? 0,
indexes: item.indexes
}))
);
insertRes.length > 0 && startQueue();
delete filterResult.success;
return {
insertLen: insertRes.length,
...filterResult
};
}
export async function checkModelValid({
mode,
collectionId
}: {
mode: `${TrainingModeEnum}`;
collectionId: string;
}) {
const {
datasetId: { _id: datasetId, vectorModel, agentModel }
} = await getCollectionWithDataset(collectionId);
if (mode === TrainingModeEnum.chunk) {
if (!collectionId) return Promise.reject(`CollectionId is empty`);
const vectorModelData = getVectorModel(vectorModel);
if (!vectorModelData) {
return Promise.reject(`Model ${vectorModel} is inValid`);
}
return {
datasetId,
maxToken: vectorModelData.maxToken * 1.5,
model: vectorModelData.model,
weight: vectorModelData.weight
};
}
if (mode === TrainingModeEnum.qa) {
const qaModelData = getQAModel(agentModel);
if (!qaModelData) {
return Promise.reject(`Model ${agentModel} is inValid`);
}
return {
datasetId,
maxToken: qaModelData.maxContext * 0.8,
model: qaModelData.model,
weight: 0
};
}
return Promise.reject(`Mode ${mode} is inValid`);
}
export const config = {
api: {
bodyParser: {

View File

@@ -11,7 +11,7 @@ import { UpdateDatasetDataProps } from '@/global/core/dataset/api';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { id, q = '', a, indexes } = req.body as UpdateDatasetDataProps;
const { id, q = '', a, indexes = [] } = req.body as UpdateDatasetDataProps;
// auth data permission
const {
@@ -23,6 +23,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
} = await authDatasetData({
req,
authToken: true,
authApiKey: true,
dataId: id,
per: 'w'
});

View File

@@ -20,6 +20,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const { dataset, canWrite, isOwner } = await authDataset({
req,
authToken: true,
authApiKey: true,
datasetId,
per: 'r'
});

View File

@@ -15,7 +15,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// 凭证校验
const { teamId, tmbId, teamOwner, role, canWrite } = await authUserRole({
req,
authToken: true
authToken: true,
authApiKey: true
});
const datasets = await MongoDataset.find({

View File

@@ -3,14 +3,11 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { request } from '@fastgpt/service/common/api/plusRequest';
import type { Method } from 'axios';
import { setCookie } from '@fastgpt/service/support/permission/controller';
import { getInitConfig } from '../common/system/getInitData';
import { FastGPTProUrl } from '@fastgpt/service/common/system/constants';
import { connectToDatabase } from '@/service/mongo';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
if (!FastGPTProUrl) {
await getInitConfig();
}
await connectToDatabase();
const method = (req.method || 'POST') as Method;
const { path = [], ...query } = req.query as any;

View File

@@ -0,0 +1,39 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { getTeamDatasetValidSub } from '@fastgpt/service/support/wallet/sub/utils';
import { getVectorCountByTeamId } from '@fastgpt/service/common/vectorStore/controller';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
// 凭证校验
const { teamId } = await authCert({
req,
authToken: true
});
const [{ sub, maxSize }, usedSize] = await Promise.all([
getTeamDatasetValidSub({
teamId,
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize
}),
getVectorCountByTeamId(teamId)
]);
jsonRes(res, {
data: {
sub,
maxSize,
usedSize
}
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -2,7 +2,8 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { withNextCors } from '@fastgpt/service/common/middle/cors';
import { getUploadModel, removeFilesByPaths } from '@fastgpt/service/common/file/upload/multer';
import { getUploadModel } from '@fastgpt/service/common/file/multer';
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
import fs from 'fs';
import { getAIApi } from '@fastgpt/service/core/ai/config';
import { pushWhisperBill } from '@/service/support/wallet/bill/push';

View File

@@ -35,19 +35,17 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
const { tokens, vectors } = await getVectorsByText({ input: query, model });
jsonRes(res, {
data: {
object: 'list',
data: vectors.map((item, index) => ({
object: 'embedding',
index: index,
embedding: item
})),
model,
usage: {
prompt_tokens: tokens,
total_tokens: tokens
}
res.json({
object: 'list',
data: vectors.map((item, index) => ({
object: 'embedding',
index: index,
embedding: item
})),
model,
usage: {
prompt_tokens: tokens,
total_tokens: tokens
}
});