This commit is contained in:
Archer
2023-11-15 11:36:25 +08:00
committed by GitHub
parent 592e1a93a2
commit bfd8be5df0
181 changed files with 2499 additions and 1552 deletions

View File

@@ -0,0 +1,169 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { delay } from '@/utils/tools';
import { PgClient } from '@fastgpt/service/common/pg';
import {
DatasetDataIndexTypeEnum,
PgDatasetTableName
} from '@fastgpt/global/core/dataset/constant';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { getUserDefaultTeam } from '@fastgpt/service/support/user/team/controller';
let success = 0;
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
const { limit = 50 } = req.body as { limit: number };
await authCert({ req, authRoot: true });
await connectToDatabase();
success = 0;
try {
await Promise.allSettled([
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ADD COLUMN data_id VARCHAR(50);`),
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN q DROP NOT NULL;`), // q can null
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN a DROP NOT NULL;`), // a can null
PgClient.query(
`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN team_id TYPE VARCHAR(50) USING team_id::VARCHAR(50);`
), // team_id varchar
PgClient.query(
`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN tmb_id TYPE VARCHAR(50) USING tmb_id::VARCHAR(50);`
), // tmb_id varchar
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN team_id SET NOT NULL;`), // team_id not null
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN tmb_id SET NOT NULL;`), // tmb_id not null
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN dataset_id SET NOT NULL;`), // dataset_id not null
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN collection_id SET NOT NULL;`) // collection_id not null
]);
} catch (error) {}
await initPgData();
jsonRes(res, {
data: await init(limit),
message:
'初始化任务已开始,请注意日志进度。可通过 select count(id) from modeldata where data_id is null; 检查是否完全初始化,如果结果为 0 ,则完全初始化。'
});
} catch (error) {
console.log(error);
jsonRes(res, {
code: 500,
error
});
}
}
type PgItemType = {
id: string;
q: string;
a: string;
dataset_id: string;
collection_id: string;
team_id: string;
tmb_id: string;
};
async function initPgData() {
const limit = 10;
const { rows } = await PgClient.query<{ user_id: string }>(`
SELECT DISTINCT user_id FROM ${PgDatasetTableName} WHERE team_id='null';
`);
console.log('init pg', rows.length);
let success = 0;
for (let i = 0; i < limit; i++) {
init(i);
}
async function init(index: number): Promise<any> {
const userId = rows[index]?.user_id;
if (!userId) return;
try {
const tmb = await getUserDefaultTeam({ userId });
// update pg
await PgClient.query(
`Update ${PgDatasetTableName} set team_id = '${tmb.teamId}', tmb_id = '${tmb.tmbId}' where user_id = '${userId}' AND team_id='null';`
);
console.log(++success);
init(index + limit);
} catch (error) {
if (error === 'default team not exist') {
return;
}
console.log(error);
await delay(1000);
return init(index);
}
}
}
async function init(limit: number): Promise<any> {
const { rows: idList } = await PgClient.query<{ id: string }>(
`SELECT id FROM ${PgDatasetTableName} WHERE data_id IS NULL`
);
console.log('totalCount', idList.length);
if (idList.length === 0) return;
for (let i = 0; i < limit; i++) {
initData(i);
}
async function initData(index: number): Promise<any> {
const dataId = idList[index]?.id;
if (!dataId) {
console.log('done');
return;
}
// get limit data where data_id is null
const { rows } = await PgClient.query<PgItemType>(
`SELECT id,q,a,dataset_id,collection_id,team_id,tmb_id FROM ${PgDatasetTableName} WHERE id=${dataId};`
);
const data = rows[0];
if (!data) {
console.log('done');
return;
}
let id = '';
try {
// create mongo data and update data_id
const { _id } = await MongoDatasetData.create({
teamId: data.team_id.trim(),
tmbId: data.tmb_id.trim(),
datasetId: data.dataset_id,
collectionId: data.collection_id,
q: data.q,
a: data.a,
indexes: [
{
defaultIndex: !data.a,
type: data.a ? DatasetDataIndexTypeEnum.qa : DatasetDataIndexTypeEnum.chunk,
dataId: data.id,
text: data.q
}
]
});
id = _id;
// update pg data_id
await PgClient.query(
`UPDATE ${PgDatasetTableName} SET data_id='${String(_id)}' WHERE id=${dataId};`
);
console.log(++success);
return initData(index + limit);
} catch (error) {
console.log(error);
console.log(data);
try {
if (id) {
await MongoDatasetData.findByIdAndDelete(id);
}
} catch (error) {}
await delay(500);
return initData(index);
}
}
}

View File

@@ -4,21 +4,14 @@ import { connectToDatabase } from '@/service/mongo';
import { MongoBill } from '@fastgpt/service/support/wallet/bill/schema';
import {
createDefaultTeam,
getTeamInfoByTmbId
getUserDefaultTeam
} from '@fastgpt/service/support/user/team/controller';
import { MongoUser } from '@fastgpt/service/support/user/schema';
import { UserModelSchema } from '@fastgpt/global/support/user/type';
import { delay } from '@/utils/tools';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import {
DatasetCollectionSchemaType,
DatasetSchemaType,
DatasetTrainingSchemaType
} from '@fastgpt/global/core/dataset/type';
import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { connectionMongo } from '@fastgpt/service/common/mongo';
import { Types } from 'mongoose';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { PgClient } from '@fastgpt/service/common/pg';
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
@@ -30,6 +23,7 @@ import { MongoChatItem } from '@fastgpt/service/core/chat/chatItemSchema';
import { MongoPlugin } from '@fastgpt/service/core/plugin/schema';
import { POST } from '@fastgpt/service/common/api/plusRequest';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { getGFSCollection } from '@fastgpt/service/common/file/gridfs/controller';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
@@ -182,7 +176,7 @@ async function initMongoTeamId(limit: number) {
async function init(userId: string): Promise<any> {
try {
const tmb = await getTeamInfoByTmbId({ userId });
const tmb = await getUserDefaultTeam({ userId });
await schema.updateMany(
{
@@ -225,7 +219,7 @@ async function initDatasetAndApp() {
}
async function initCollectionFileTeam(limit: number) {
/* init user default Team */
const DatasetFile = connectionMongo.connection.db.collection(`dataset.files`);
const DatasetFile = getGFSCollection('dataset');
const matchWhere = {
$or: [{ 'metadata.teamId': { $exists: false } }, { 'metadata.teamId': null }]
};
@@ -264,7 +258,7 @@ async function initCollectionFileTeam(limit: number) {
async function init(userId: string): Promise<any> {
try {
const tmb = await getTeamInfoByTmbId({
const tmb = await getUserDefaultTeam({
userId
});
@@ -295,8 +289,8 @@ async function initPgData() {
// add column
try {
await Promise.all([
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ADD COLUMN team_id CHAR(50);`),
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ADD COLUMN tmb_id CHAR(50);`),
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ADD COLUMN team_id VARCHAR(50);`),
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ADD COLUMN tmb_id VARCHAR(50);`),
PgClient.query(`ALTER TABLE ${PgDatasetTableName} ALTER COLUMN user_id DROP NOT NULL;`)
]);
} catch (error) {
@@ -316,7 +310,7 @@ async function initPgData() {
const userId = rows[index]?.user_id;
if (!userId) return;
try {
const tmb = await getTeamInfoByTmbId({ userId });
const tmb = await getUserDefaultTeam({ userId });
// update pg
await PgClient.query(
`Update ${PgDatasetTableName} set team_id = '${tmb.teamId}', tmb_id = '${tmb.tmbId}' where user_id = '${userId}' AND team_id IS NULL;`

View File

@@ -80,8 +80,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
try {
await connectToDatabase();
const { userId, teamId, tmbId } = await authCert({ req, authToken: true });
console.log(req.body);
const { files, bucketName, metadata } = await upload.doUpload(req, res);
const upLoadResults = await Promise.all(

View File

@@ -4,15 +4,16 @@ import { connectToDatabase } from '@/service/mongo';
import type { CreateQuestionGuideParams } from '@/global/core/ai/api.d';
import { pushQuestionGuideBill } from '@/service/support/wallet/bill/push';
import { createQuestionGuide } from '@fastgpt/service/core/ai/functions/createQuestionGuide';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { authCertAndShareId } from '@fastgpt/service/support/permission/auth/common';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { messages } = req.body as CreateQuestionGuideParams;
const { tmbId, teamId } = await authCert({
const { messages, shareId } = req.body as CreateQuestionGuideParams;
const { tmbId, teamId } = await authCertAndShareId({
req,
authToken: true
authToken: true,
shareId
});
const qgModel = global.qgModels[0];

View File

@@ -3,10 +3,11 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { getVectorModel } from '@/service/core/ai/model';
import type { DatasetItemType } from '@/types/core/dataset';
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
/* get all dataset by teamId or tmbId */
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
@@ -20,7 +21,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const data = datasets.map((item) => ({
...item.toJSON(),
tags: item.tags.join(' '),
vectorModel: getVectorModel(item.vectorModel),
canWrite: String(item.tmbId) === tmbId,
isOwner: teamOwner || String(item.tmbId) === tmbId

View File

@@ -30,15 +30,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const collections = await findCollectionAndChild(collectionId, '_id metadata');
const delIdList = collections.map((item) => item._id);
// delete pg data
await delDataByCollectionId({ collectionIds: delIdList });
// delete training data
await MongoDatasetTraining.deleteMany({
datasetCollectionId: { $in: delIdList },
collectionId: { $in: delIdList },
teamId
});
// delete pg data
await delDataByCollectionId({ collectionIds: delIdList });
// delete file
await Promise.all(
collections.map((collection) => {

View File

@@ -27,8 +27,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
jsonRes<DatasetCollectionItemType>(res, {
data: {
...collection,
datasetId: collection.datasetId._id,
canWrite
canWrite,
sourceName: collection?.name,
sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink
}
});
} catch (err) {

View File

@@ -3,15 +3,15 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { DatasetTrainingCollectionName } from '@fastgpt/service/core/dataset/training/schema';
import { Types } from '@fastgpt/service/common/mongo';
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/response';
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
import type { GetDatasetCollectionsProps } from '@/global/core/api/datasetReq';
import { PagingData } from '@/types';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { countCollectionData } from '@/service/core/dataset/data/utils';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
import { startQueue } from '@/service/utils/tools';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { getTeamInfoByTmbId } from '@fastgpt/service/support/user/team/controller';
import { DatasetDataCollectionName } from '@fastgpt/service/core/dataset/data/schema';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -30,7 +30,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// auth dataset and get my role
const { tmbId } = await authDataset({ req, authToken: true, datasetId, per: 'r' });
const { canWrite } = await getTeamInfoByTmbId({ tmbId });
const { canWrite } = await authUserRole({ req, authToken: true });
const match = {
datasetId: new Types.ObjectId(datasetId),
@@ -59,7 +59,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
...item,
dataAmount: 0,
trainingAmount: 0,
canWrite // admin or owner can write
canWrite // admin or team owner can write
}))
),
total: await MongoDatasetCollection.countDocuments(match)
@@ -67,51 +67,75 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
});
}
const collections: DatasetCollectionsListItemType[] = await MongoDatasetCollection.aggregate([
{
$match: match
},
{
$lookup: {
from: DatasetTrainingCollectionName,
localField: '_id',
foreignField: 'datasetCollectionId',
as: 'trainings_amount'
const [collections, total]: [DatasetCollectionsListItemType[], number] = await Promise.all([
MongoDatasetCollection.aggregate([
{
$match: match
},
{
$lookup: {
from: DatasetTrainingCollectionName,
let: { id: '$_id' },
pipeline: [
{
$match: {
$expr: {
$eq: ['$collectionId', '$$id']
}
}
},
{ $project: { _id: 1 } }
],
as: 'trainings'
}
},
{
$lookup: {
from: DatasetDataCollectionName,
let: { id: '$_id' },
pipeline: [
{
$match: {
$expr: {
$eq: ['$collectionId', '$$id']
}
}
},
{ $project: { _id: 1 } }
],
as: 'datas'
}
},
// 统计子集合的数量和子训练的数量
{
$project: {
_id: 1,
parentId: 1,
tmbId: 1,
name: 1,
type: 1,
updateTime: 1,
trainingAmount: { $size: '$trainings' },
dataAmount: { $size: '$datas' },
metadata: 1
}
},
{
$sort: { updateTime: -1 }
},
{
$skip: (pageNum - 1) * pageSize
},
{
$limit: pageSize
}
},
// 统计子集合的数量和子训练的数量
{
$project: {
_id: 1,
parentId: 1,
tmbId: 1,
name: 1,
type: 1,
updateTime: 1,
trainingAmount: { $size: '$trainings_amount' },
metadata: 1
}
},
{
$sort: { updateTime: -1 }
},
{
$skip: (pageNum - 1) * pageSize
},
{
$limit: pageSize
}
]),
MongoDatasetCollection.countDocuments(match)
]);
const counts = await countCollectionData({
collectionIds: collections.map((item) => item._id),
datasetId
});
const data = await Promise.all(
collections.map(async (item, i) => ({
...item,
dataAmount: item.type === DatasetCollectionTypeEnum.folder ? undefined : counts[i],
canWrite: String(item.tmbId) === tmbId || canWrite
}))
);
@@ -126,7 +150,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
pageNum,
pageSize,
data,
total: await MongoDatasetCollection.countDocuments(match)
total
}
});
} catch (err) {

View File

@@ -1,7 +1,7 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type { DatasetPathItemType } from '@/types/core/dataset';
import type { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type.d';
import { getDatasetCollectionPaths } from '@fastgpt/service/core/dataset/collection/utils';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
@@ -22,7 +22,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
parentId
});
jsonRes<DatasetPathItemType[]>(res, {
jsonRes<ParentTreePathItemType[]>(res, {
data: paths
});
} catch (err) {

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import type { CreateDatasetParams } from '@/global/core/api/datasetReq.d';
import type { CreateDatasetParams } from '@/global/core/dataset/api.d';
import { createDefaultCollection } from './collection/create';
import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user';

View File

@@ -1,10 +1,9 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { PgClient } from '@fastgpt/service/common/pg';
import { withNextCors } from '@fastgpt/service/common/middle/cors';
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
import { connectToDatabase } from '@/service/mongo';
import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { deleteDataByDataId } from '@/service/core/dataset/data/controller';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -20,11 +19,11 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
// 凭证校验
await authDatasetData({ req, authToken: true, dataId, per: 'w' });
await PgClient.delete(PgDatasetTableName, {
where: [['id', dataId]]
});
await deleteDataByDataId(dataId);
jsonRes(res);
jsonRes(res, {
data: 'success'
});
} catch (err) {
console.log(err);
jsonRes(res, {

View File

@@ -13,12 +13,9 @@ export type Response = {
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
let { dataId } = req.query as {
const { dataId } = req.query as {
dataId: string;
};
if (!dataId) {
throw new Error('缺少参数');
}
// 凭证校验
const { datasetData } = await authDatasetData({ req, authToken: true, dataId, per: 'r' });

View File

@@ -1,132 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoUser } from '@fastgpt/service/support/user/schema';
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
import { findAllChildrenIds } from '../delete';
import QueryStream from 'pg-query-stream';
import { PgClient } from '@fastgpt/service/common/pg';
import { addLog } from '@fastgpt/service/common/mongo/controller';
import { responseWriteController } from '@fastgpt/service/common/response';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
let { datasetId } = req.query as {
datasetId: string;
};
if (!datasetId || !global.pgClient) {
throw new Error('缺少参数');
}
// 凭证校验
const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
const exportIds = [datasetId, ...(await findAllChildrenIds(datasetId))];
const limitMinutesAgo = new Date(
Date.now() - (global.feConfigs?.limit?.exportLimitMinutes || 0) * 60 * 1000
);
// auth export times
const authTimes = await MongoUser.findOne(
{
_id: userId,
$or: [
{ 'limit.exportKbTime': { $exists: false } },
{ 'limit.exportKbTime': { $lte: limitMinutesAgo } }
]
},
'_id limit'
);
if (!authTimes) {
const minutes = `${global.feConfigs?.limit?.exportLimitMinutes || 0} 分钟`;
throw new Error(`上次导出未到 ${minutes},每 ${minutes}仅可导出一次。`);
}
const { rows } = await PgClient.query(
`SELECT count(id) FROM ${PgDatasetTableName} where dataset_id IN (${exportIds
.map((id) => `'${id}'`)
.join(',')})`
);
const total = rows?.[0]?.count || 0;
addLog.info(`export datasets: ${userId}`, { total });
if (total > 100000) {
throw new Error('数据量超出 10 万,无法导出');
}
// connect pg
global.pgClient.connect((err, client, done) => {
if (err) {
console.error(err);
res.end('Error connecting to database');
return;
}
if (!client) return;
// create pg select stream
const query = new QueryStream(
`SELECT q, a FROM ${PgDatasetTableName} where dataset_id IN (${exportIds
.map((id) => `'${id}'`)
.join(',')})`
);
const stream = client.query(query);
res.setHeader('Content-Type', 'text/csv; charset=utf-8');
res.setHeader('Content-Disposition', 'attachment; filename=dataset.csv; ');
const write = responseWriteController({
res,
readStream: stream
});
write('index,content');
// parse data every row
stream.on('data', ({ q, a }: { q: string; a: string }) => {
if (res.closed) {
return stream.destroy();
}
q = q.replace(/"/g, '""');
a = a.replace(/"/g, '""');
// source = source?.replace(/"/g, '""');
write(`\n"${q}","${a || ''}"`);
});
// finish
stream.on('end', async () => {
try {
// update export time
await MongoUser.findByIdAndUpdate(userId, {
'limit.exportKbTime': new Date()
});
} catch (error) {}
// close response
done();
res.end();
});
stream.on('error', (err) => {
done(err);
res.end('Error exporting data');
});
});
} catch (err) {
res.status(500);
jsonRes(res, {
code: 500,
error: err
});
}
}
export const config = {
api: {
responseLimit: '100mb'
}
};

View File

@@ -6,8 +6,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { withNextCors } from '@fastgpt/service/common/middle/cors';
import { SetOneDatasetDataProps } from '@/global/core/api/datasetReq';
import { countPromptTokens } from '@/global/common/tiktoken';
import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
import { getVectorModel } from '@/service/core/ai/model';
import { hasSameValue } from '@/service/core/dataset/data/utils';
import { insertData2Dataset } from '@/service/core/dataset/data/controller';
@@ -15,11 +14,12 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller';
import { authTeamBalance } from '@/service/support/permission/auth/bill';
import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push';
import { InsertOneDatasetDataProps } from '@/global/core/dataset/api';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { collectionId, q, a } = req.body as SetOneDatasetDataProps;
const { collectionId, q, a, indexes } = req.body as InsertOneDatasetDataProps;
if (!q) {
return Promise.reject('q is required');
@@ -38,8 +38,6 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
per: 'w'
});
await authTeamBalance(teamId);
// auth collection and get dataset
const [
{
@@ -68,11 +66,12 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
const { insertId, tokenLen } = await insertData2Dataset({
teamId,
tmbId,
datasetId,
collectionId,
q: formatQ,
a: formatA,
collectionId,
datasetId,
model: vectorModel
model: vectorModel,
indexes
});
pushGenerateVectorBill({

View File

@@ -1,11 +1,11 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { PgClient } from '@fastgpt/service/common/pg';
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
import type { DatasetDataListItemType } from '@/global/core/dataset/response.d';
import type { DatasetDataListItemType } from '@/global/core/dataset/type.d';
import type { GetDatasetDataListProps } from '@/global/core/api/datasetReq';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { PagingData } from '@/types';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -22,30 +22,29 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
searchText = searchText.replace(/'/g, '');
const where: any = [
['collection_id', collectionId],
searchText ? `AND (q ILIKE '%${searchText}%' OR a ILIKE '%${searchText}%')` : ''
];
const match = {
collectionId,
...(searchText
? {
$or: [{ q: new RegExp(searchText, 'i') }, { a: new RegExp(searchText, 'i') }]
}
: {})
};
const [searchRes, total] = await Promise.all([
PgClient.select<DatasetDataListItemType>(PgDatasetTableName, {
fields: ['id', 'q', 'a'],
where,
order: [{ field: 'id', mode: 'DESC' }],
limit: pageSize,
offset: pageSize * (pageNum - 1)
}),
PgClient.count(PgDatasetTableName, {
fields: ['id'],
where
})
const [data, total] = await Promise.all([
MongoDatasetData.find(match, '_id datasetId collectionId q a indexes')
.sort({ _id: -1 })
.skip((pageNum - 1) * pageSize)
.limit(pageSize)
.lean(),
MongoDatasetData.countDocuments(match)
]);
jsonRes(res, {
jsonRes<PagingData<DatasetDataListItemType>>(res, {
data: {
pageNum,
pageSize,
data: searchRes.rows,
data,
total
}
});

View File

@@ -4,32 +4,27 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { withNextCors } from '@fastgpt/service/common/middle/cors';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import { TrainingModeEnum, TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
import { startQueue } from '@/service/utils/tools';
import { DatasetChunkItemType } from '@fastgpt/global/core/dataset/type';
import { countPromptTokens } from '@/global/common/tiktoken';
import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
import type { PushDataResponse } from '@/global/core/api/datasetRes.d';
import type { PushDataProps } from '@/global/core/api/datasetReq.d';
import type { PushDatasetDataProps } from '@/global/core/dataset/api.d';
import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
import { getVectorModel } from '@/service/core/ai/model';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller';
const modeMap = {
[TrainingModeEnum.index]: true,
[TrainingModeEnum.qa]: true
};
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { collectionId, data, mode = TrainingModeEnum.index } = req.body as PushDataProps;
const { collectionId, data, mode = TrainingModeEnum.chunk } = req.body as PushDatasetDataProps;
if (!collectionId || !Array.isArray(data)) {
throw new Error('collectionId or data is empty');
}
if (modeMap[mode] === undefined) {
throw new Error('Mode is not index or qa');
if (!TrainingTypeMap[mode]) {
throw new Error(`Mode is not ${Object.keys(TrainingTypeMap).join(', ')}`);
}
if (data.length > 200) {
@@ -68,8 +63,8 @@ export async function pushDataToDatasetCollection({
mode,
prompt,
billId
}: { teamId: string; tmbId: string } & PushDataProps): Promise<PushDataResponse> {
// get vector model
}: { teamId: string; tmbId: string } & PushDatasetDataProps): Promise<PushDataResponse> {
// get dataset vector model
const {
datasetId: { _id: datasetId, vectorModel }
} = await getCollectionWithDataset(collectionId);
@@ -77,7 +72,7 @@ export async function pushDataToDatasetCollection({
const vectorModelData = getVectorModel(vectorModel);
const modeMap = {
[TrainingModeEnum.index]: {
[TrainingModeEnum.chunk]: {
maxToken: vectorModelData.maxToken * 1.5,
model: vectorModelData.model
},
@@ -89,13 +84,12 @@ export async function pushDataToDatasetCollection({
// filter repeat or equal content
const set = new Set();
const filterResult: Record<string, DatasetChunkItemType[]> = {
const filterResult: Record<string, PushDatasetDataChunkProps[]> = {
success: [],
overToken: [],
repeat: [],
error: []
};
await Promise.all(
data.map(async (item) => {
if (!item.q) {
@@ -128,13 +122,14 @@ export async function pushDataToDatasetCollection({
teamId,
tmbId,
datasetId,
datasetCollectionId: collectionId,
collectionId,
billId,
mode,
prompt,
model: modeMap[mode].model,
q: item.q,
a: item.a
a: item.a,
indexes: item.indexes
}))
);

View File

@@ -2,52 +2,50 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { withNextCors } from '@fastgpt/service/common/middle/cors';
import { connectToDatabase } from '@/service/mongo';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import type { SetOneDatasetDataProps } from '@/global/core/api/datasetReq.d';
import { updateData2Dataset } from '@/service/core/dataset/data/controller';
import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { authTeamBalance } from '@/service/support/permission/auth/bill';
import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push';
import { UpdateDatasetDataProps } from '@/global/core/dataset/api';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { id, collectionId, q = '', a } = req.body as SetOneDatasetDataProps;
if (!id || !collectionId) {
throw new Error('缺少参数');
}
const { id, q = '', a, indexes } = req.body as UpdateDatasetDataProps;
// auth data permission
const { datasetData, teamId, tmbId } = await authDatasetData({
const {
collection: {
datasetId: { vectorModel }
},
teamId,
tmbId
} = await authDatasetData({
req,
authToken: true,
dataId: id,
per: 'w'
});
// auth team balance
await authTeamBalance(teamId);
// auth user and get kb
const dataset = await MongoDataset.findById(datasetData.datasetId, 'vectorModel');
if (!dataset) {
throw new Error("Can't find database");
}
const { tokenLen } = await updateData2Dataset({
dataId: id,
q,
a,
model: dataset.vectorModel
indexes,
model: vectorModel
});
pushGenerateVectorBill({
teamId,
tmbId,
tokenLen: tokenLen,
model: dataset.vectorModel
});
if (tokenLen) {
pushGenerateVectorBill({
teamId,
tmbId,
tokenLen: tokenLen,
model: vectorModel
});
}
jsonRes(res);
} catch (err) {

View File

@@ -3,12 +3,12 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { PgClient } from '@fastgpt/service/common/pg';
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
import { delDatasetFiles } from '@fastgpt/service/core/dataset/file/controller';
import { Types } from '@fastgpt/service/common/mongo';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { delDataByDatasetId } from '@/service/core/dataset/data/controller';
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -24,17 +24,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// auth owner
await authDataset({ req, authToken: true, datasetId: id, per: 'owner' });
const deletedIds = [id, ...(await findAllChildrenIds(id))];
const deletedIds = await findDatasetIdTreeByTopDatasetId(id);
// delete training data
// delete training data(There could be a training mission)
await MongoDatasetTraining.deleteMany({
datasetId: { $in: deletedIds.map((id) => new Types.ObjectId(id)) }
datasetId: { $in: deletedIds }
});
// delete all pg data
await PgClient.delete(PgDatasetTableName, {
where: [`dataset_id IN (${deletedIds.map((id) => `'${id}'`).join(',')})`]
});
// delete all dataset.data and pg data
await delDataByDatasetId({ datasetIds: deletedIds });
// delete related files
await delDatasetFiles({ datasetId: id });
@@ -57,17 +55,3 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
});
}
}
export async function findAllChildrenIds(id: string) {
// find children
const children = await MongoDataset.find({ parentId: id });
let allChildrenIds = children.map((child) => String(child._id));
for (const child of children) {
const grandChildrenIds = await findAllChildrenIds(child._id);
allChildrenIds = allChildrenIds.concat(grandChildrenIds);
}
return allChildrenIds;
}

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { getVectorModel } from '@/service/core/ai/model';
import { DatasetItemType } from '@/types/core/dataset';
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
@@ -27,7 +27,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
jsonRes<DatasetItemType>(res, {
data: {
...dataset,
tags: dataset.tags.join(' '),
vectorModel: getVectorModel(dataset.vectorModel),
canWrite,
isOwner

View File

@@ -0,0 +1,117 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoUser } from '@fastgpt/service/support/user/schema';
import { addLog } from '@fastgpt/service/common/mongo/controller';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
import { Readable } from 'stream';
import type { Cursor } from '@fastgpt/service/common/mongo';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
let { datasetId } = req.query as {
datasetId: string;
};
if (!datasetId || !global.pgClient) {
throw new Error('缺少参数');
}
// 凭证校验
const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId);
const limitMinutesAgo = new Date(
Date.now() - (global.feConfigs?.limit?.exportLimitMinutes || 0) * 60 * 1000
);
// auth export times
const authTimes = await MongoUser.findOne(
{
_id: userId,
$or: [
{ 'limit.exportKbTime': { $exists: false } },
{ 'limit.exportKbTime': { $lte: limitMinutesAgo } }
]
},
'_id limit'
);
if (!authTimes) {
const minutes = `${global.feConfigs?.limit?.exportLimitMinutes || 0} 分钟`;
throw new Error(`上次导出未到 ${minutes},每 ${minutes}仅可导出一次。`);
}
// auth max data
const total = await MongoDatasetData.countDocuments({
datasetId: { $in: exportIds }
});
addLog.info(`export datasets: ${datasetId}`, { total });
if (total > 100000) {
throw new Error('数据量超出 10 万,无法导出');
}
res.setHeader('Content-Type', 'text/csv; charset=utf-8;');
res.setHeader('Content-Disposition', 'attachment; filename=dataset.csv; ');
const cursor = MongoDatasetData.find<{
_id: string;
collectionId: { name: string };
q: string;
a: string;
}>(
{
datasetId: { $in: exportIds }
},
'q a'
).cursor();
function cursorToReadableStream(cursor: Cursor) {
const readable = new Readable({
objectMode: true,
read() {}
});
readable.push(`\uFEFFindex,content`);
cursor.on('data', (doc) => {
const q = doc.q.replace(/"/g, '""') || '';
const a = doc.a.replace(/"/g, '""') || '';
readable.push(`\n"${q}","${a}"`);
});
cursor.on('end', async () => {
readable.push(null);
cursor.close();
await MongoUser.findByIdAndUpdate(userId, {
'limit.exportKbTime': new Date()
});
});
return readable;
}
// @ts-ignore
const stream = cursorToReadableStream(cursor);
stream.pipe(res);
} catch (err) {
res.status(500);
jsonRes(res, {
code: 500,
error: err
});
}
}
export const config = {
api: {
responseLimit: '100mb'
}
};

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { getVectorModel } from '@/service/core/ai/model';
import type { DatasetItemType } from '@/types/core/dataset';
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
@@ -27,7 +27,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const data = await Promise.all(
datasets.map(async (item) => ({
...item.toJSON(),
tags: item.tags.join(' '),
vectorModel: getVectorModel(item.vectorModel),
canWrite,
isOwner: teamOwner || String(item.tmbId) === tmbId

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import type { DatasetPathItemType } from '@/types/core/dataset';
import type { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type.d';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
@@ -19,7 +19,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
await authDataset({ req, authToken: true, datasetId: parentId, per: 'r' });
jsonRes<DatasetPathItemType[]>(res, {
jsonRes<ParentTreePathItemType[]>(res, {
data: await getParents(parentId)
});
} catch (err) {
@@ -30,7 +30,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
}
}
async function getParents(parentId?: string): Promise<DatasetPathItemType[]> {
async function getParents(parentId?: string): Promise<ParentTreePathItemType[]> {
if (!parentId) {
return [];
}

View File

@@ -8,7 +8,7 @@ import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { authTeamBalance } from '@/service/support/permission/auth/bill';
import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push';
import { countModelPrice } from '@/service/support/wallet/bill/utils';
import { searchDatasetData } from '@/service/core/dataset/data/utils';
import { searchDatasetData } from '@/service/core/dataset/data/pg';
import { updateApiKeyUsage } from '@fastgpt/service/support/openapi/tools';
import { ModelTypeEnum } from '@/service/core/ai/model';
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';

View File

@@ -27,8 +27,11 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
url,
data,
{
// @ts-ignore
headers: req.headers
headers: {
...req.headers,
// @ts-ignore
rootkey: undefined
}
},
method
);

View File

@@ -4,7 +4,7 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { readFileSync } from 'fs';
import type { InitDateResponse } from '@/global/common/api/systemRes';
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
import { getTikTokenEnc } from '@/global/common/tiktoken';
import { getTikTokenEnc } from '@fastgpt/global/common/string/tiktoken';
import { initHttpAgent } from '@fastgpt/service/common/middle/httpAgent';
import {
defaultChatModels,
@@ -98,7 +98,9 @@ export function getInitConfig() {
global.systemEnv = res.SystemParams
? { ...defaultSystemEnv, ...res.SystemParams }
: defaultSystemEnv;
global.feConfigs = res.FeConfig ? { ...defaultFeConfigs, ...res.FeConfig } : defaultFeConfigs;
global.feConfigs = res.FeConfig
? { ...defaultFeConfigs, ...res.FeConfig, isPlus: !!res.SystemParams?.pluginBaseUrl }
: defaultFeConfigs;
global.chatModels = res.ChatModels || defaultChatModels;
global.qaModels = res.QAModels || defaultQAModels;

View File

@@ -8,10 +8,10 @@ import { getUserDetail } from '@/service/support/user/controller';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
await connectToDatabase();
const { userId, tmbId } = await authCert({ req, authToken: true });
const { tmbId } = await authCert({ req, authToken: true });
jsonRes(res, {
data: await getUserDetail({ tmbId, userId })
data: await getUserDetail({ tmbId })
});
} catch (err) {
jsonRes(res, {

View File

@@ -170,7 +170,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
// auth app, get history
const { history } = await getChatHistory({ chatId, tmbId: user.team.tmbId });
const isOwner = !shareId && String(user.team.tmbId) === String(app.tmbId);
const isAppOwner = !shareId && String(user.team.tmbId) === String(app.tmbId);
/* format prompts */
const prompts = history.concat(gptMessage2ChatType(messages));
@@ -208,7 +208,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
teamId: user.team.teamId,
tmbId: user.team.tmbId,
variables,
isOwner, // owner update use time
updateUseTime: isAppOwner, // owner update use time
shareId,
source: (() => {
if (shareId) {