V4.8.15 feature (#3331)

* feat: add customize toolkit (#3205)

* chaoyang

* fix-auth

* add toolkit

* add order

* plugin usage

* fix

* delete console:

* Fix: Fix fullscreen preview top positioning and improve Markdown rendering logic (#3247)

* 完成任务:修复全屏预览顶部固定问题,优化 Markdown 渲染逻辑

* 有问题修改

* 问题再修改

* 修正问题

* fix: plugin standalone display issue (#3254)

* 4.8.15 test (#3246)

* o1 config

* perf: system plugin code

* 调整系统插件代码。增加html 渲染安全配置。 (#3258)

* perf: base64 picker

* perf: list app or dataset

* perf: plugin config code

* 小窗适配等问题 (#3257)

* 小窗适配等问题

* git问题

* 小窗剩余问题

* feat: system plugin auth and lock version (#3265)

* feat: system plugin auth and lock version

* update comment

* 4.8.15 test (#3267)

* tmp log

* perf: login direct

* perf: iframe html code

* remove log

* fix: plugin standalone display (#3277)

* refactor: 页面拆分&i18n拆分 (#3281)

* refactor: account组件拆成独立页面

* script: 新增i18n json文件创建脚本

* refactor: 页面i18n拆分

* i18n: add en&hant

* 4.8.15 test (#3285)

* tmp log

* remove log

* fix: watch avatar refresh

* perf: i18n code

* fix(plugin): use intro instead of userguide (#3290)

* Universal SSO (#3292)

* tmp log

* remove log

* feat: common oauth

* readme

* perf: sso provider

* remove sso code

* perf: refresh plugins

* feat: add api dataset (#3272)

* add api-dataset

* fix api-dataset

* fix api dataset

* fix ts

* perf: create collection code (#3301)

* tmp log

* remove log

* perf: i18n change

* update version doc

* feat: question guide from chatId

* perf: create collection code

* fix: request api

* fix: request api

* fix: tts auth and response type (#3303)

* perf: md splitter

* fix: tts auth and response type

* fix: api file dataset (#3307)

* perf: api dataset init (#3310)

* perf: collection schema

* perf: api dataset init

* refactor: 团队管理独立页面 (#3302)

* ui: 团队管理独立页面

* 代码优化

* fix

* perf: sync collection and ui check (#3314)

* perf: sync collection

* remove script

* perf: update api server

* perf: api dataset parent

* perf: team ui

* perf: team 18n

* update team ui

* perf: ui check

* perf: i18n

* fix: debug variables & cronjob & system plugin callback load (#3315)

* fix: debug variables & cronjob & system plugin callback load

* fix type

* fix

* fix

* fix: plugin dataset quote;perf: system variables init (#3316)

* fix: plugin dataset quote

* perf: system variables init

* perf: node templates ui;fix: dataset import ui (#3318)

* fix: dataset import ui

* perf: node templates ui

* perf: ui refresh

* feat:套餐改名和套餐跳转配置 (#3309)

* fixing:except Sidebar

* 去除了多余的代码

* 修正了套餐说明的代码

* 修正了误删除的show_git代码

* 修正了名字部分等代码

* 修正了问题,遗留了其他和ui讨论不一致的部分

* 4.8.15 test (#3319)

* remove log

* pref: bill ui

* pref: bill ui

* perf: log

* html渲染文档 (#3270)

* html渲染文档

* 文档有点小问题

* feat: doc (#3322)

* 集合重训练 (#3282)

* rebaser

* 一点补充

* 小问题

* 其他问题修正,删除集合保留文件的参数还没找到...

* reTraining

* delete uesless

* 删除了一行错误代码

* 集合重训练部分

* fixing

* 删除console代码

* feat: navbar item config (#3326)

* perf: custom navbar code;perf: retraining code;feat: api dataset and dataset api doc (#3329)

* feat: api dataset and dataset api doc

* perf: retraining code

* perf: custom navbar code

* fix: ts (#3330)

* fix: ts

* fix: ts

* retraining ui

* perf: api collection filter

* perf: retrining button

---------

Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Jiangween <145003935+Jiangween@users.noreply.github.com>
Co-authored-by: papapatrick <109422393+Patrickill@users.noreply.github.com>
This commit is contained in:
Archer
2024-12-06 10:56:53 +08:00
committed by GitHub
parent b188544386
commit 1aebe5f185
307 changed files with 7383 additions and 3981 deletions

View File

@@ -7,42 +7,62 @@ import { NextAPI } from '@/service/middleware/entry';
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { OwnerPermissionVal } from '@fastgpt/global/support/permission/constant';
import {
OwnerPermissionVal,
WritePermissionVal
} from '@fastgpt/global/support/permission/constant';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
export type PreviewContextProps = {
datasetId: string;
type: DatasetSourceReadTypeEnum;
sourceId: string;
isQAImport?: boolean;
selector?: string;
externalFileId?: string;
};
async function handler(req: ApiRequestProps<PreviewContextProps>, res: NextApiResponse<any>) {
const { type, sourceId, isQAImport, selector } = req.body;
const { type, sourceId, isQAImport, selector, datasetId, externalFileId } = req.body;
if (!sourceId) {
throw new Error('fileId is empty');
}
const { teamId } = await (async () => {
const { teamId, apiServer } = await (async () => {
if (type === DatasetSourceReadTypeEnum.fileLocal) {
return authCollectionFile({
const res = await authCollectionFile({
req,
authToken: true,
authApiKey: true,
fileId: sourceId,
per: OwnerPermissionVal
});
return {
teamId: res.teamId
};
}
return authCert({ req, authApiKey: true, authToken: true });
const { dataset } = await authDataset({
req,
authApiKey: true,
authToken: true,
datasetId,
per: WritePermissionVal
});
return {
teamId: dataset.teamId,
apiServer: dataset.apiServer
};
})();
const rawText = await readDatasetSourceRawText({
teamId,
type,
sourceId: sourceId,
sourceId,
isQAImport,
selector
selector,
apiServer,
externalFileId
});
return {

View File

@@ -3,11 +3,19 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { pushQuestionGuideUsage } from '@/service/support/wallet/usage/push';
import { createQuestionGuide } from '@fastgpt/service/core/ai/functions/createQuestionGuide';
import { authChatCert } from '@/service/support/permission/auth/chat';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middleware/entry';
import { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
import { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
import { AuthModeType } from '@fastgpt/service/support/permission/type';
import { AuthUserTypeEnum } from '@fastgpt/global/support/permission/constant';
import { authOutLinkValid } from '@fastgpt/service/support/permission/publish/authLink';
import { authOutLinkInit } from '@/service/support/permission/auth/outLink';
import { authTeamSpaceToken } from '@/service/support/permission/auth/team';
import { MongoTeamMember } from '@fastgpt/service/support/user/team/teamMemberSchema';
import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant';
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
async function handler(
req: ApiRequestProps<
@@ -52,3 +60,62 @@ async function handler(
}
export default NextAPI(handler);
/*
Abandoned
Different chat source
1. token (header)
2. apikey (header)
3. share page (body: shareId outLinkUid)
4. team chat page (body: teamId teamToken)
*/
async function authChatCert(props: AuthModeType): Promise<{
teamId: string;
tmbId: string;
authType: AuthUserTypeEnum;
apikey: string;
isOwner: boolean;
canWrite: boolean;
outLinkUid?: string;
}> {
const { teamId, teamToken, shareId, outLinkUid } = props.req.body as OutLinkChatAuthProps;
if (shareId && outLinkUid) {
const { outLinkConfig } = await authOutLinkValid({ shareId });
const { uid } = await authOutLinkInit({
outLinkUid,
tokenUrl: outLinkConfig.limit?.hookUrl
});
return {
teamId: String(outLinkConfig.teamId),
tmbId: String(outLinkConfig.tmbId),
authType: AuthUserTypeEnum.outLink,
apikey: '',
isOwner: false,
canWrite: false,
outLinkUid: uid
};
}
if (teamId && teamToken) {
const { uid } = await authTeamSpaceToken({ teamId, teamToken });
const tmb = await MongoTeamMember.findOne(
{ teamId, role: TeamMemberRoleEnum.owner },
'tmbId'
).lean();
if (!tmb) return Promise.reject(ChatErrEnum.unAuthChat);
return {
teamId,
tmbId: String(tmb._id),
authType: AuthUserTypeEnum.teamDomain,
apikey: '',
isOwner: false,
canWrite: false,
outLinkUid: uid
};
}
return authCert(props);
}

View File

@@ -1,47 +1,59 @@
import type { NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type { CreateQuestionGuideParams } from '@/global/core/ai/api.d';
import { pushQuestionGuideUsage } from '@/service/support/wallet/usage/push';
import { createQuestionGuide } from '@fastgpt/service/core/ai/functions/createQuestionGuide';
import { authChatCrud } from '@/service/support/permission/auth/chat';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middleware/entry';
import { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
import { getChatItems } from '@fastgpt/service/core/chat/controller';
import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
export type CreateQuestionGuideParams = OutLinkChatAuthProps & {
appId: string;
chatId: string;
};
async function handler(req: ApiRequestProps<CreateQuestionGuideParams>, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { messages } = req.body;
const { appId, chatId } = req.body;
const { tmbId, teamId } = await authChatCrud({
const [{ tmbId, teamId }] = await Promise.all([
authChatCrud({
req,
authToken: true,
authApiKey: true,
...req.body
});
})
]);
const qgModel = global.llmModels[0];
// Auth app and get questionGuide config
const { result, tokens } = await createQuestionGuide({
messages,
model: qgModel.model
});
// Get histories
const { histories } = await getChatItems({
appId,
chatId,
offset: 0,
limit: 6,
field: 'obj value time'
});
const messages = chats2GPTMessages({ messages: histories, reserveId: false });
jsonRes(res, {
data: result
});
const qgModel = global.llmModels[0];
pushQuestionGuideUsage({
tokens,
teamId,
tmbId
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
const { result, tokens } = await createQuestionGuide({
messages,
model: qgModel.model
});
jsonRes(res, {
data: result
});
pushQuestionGuideUsage({
tokens,
teamId,
tmbId
});
}
export default NextAPI(handler);

View File

@@ -28,33 +28,51 @@ export type ListAppBody = {
async function handler(req: ApiRequestProps<ListAppBody>): Promise<AppListItemType[]> {
const { parentId, type, getRecentlyChat, searchKey } = req.body;
// 凭证校验
const {
app: ParentApp,
tmbId,
teamId,
permission: myPer
} = await (async () => {
if (parentId) {
return await authApp({
req,
authToken: true,
authApiKey: true,
appId: parentId,
per: ReadPermissionVal
// Auth user permission
const [{ tmbId, teamId, permission: teamPer }] = await Promise.all([
authUserPer({
req,
authToken: true,
authApiKey: true,
per: ReadPermissionVal
}),
...(parentId
? [
authApp({
req,
authToken: true,
authApiKey: true,
appId: parentId,
per: ReadPermissionVal
})
]
: [])
]);
// Get team all app permissions
const [perList, myGroupMap] = await Promise.all([
MongoResourcePermission.find({
resourceType: PerResourceTypeEnum.app,
teamId,
resourceId: {
$exists: true
}
}).lean(),
getGroupsByTmbId({
tmbId,
teamId
}).then((item) => {
const map = new Map<string, 1>();
item.forEach((item) => {
map.set(String(item._id), 1);
});
} else {
return {
...(await authUserPer({
req,
authToken: true,
authApiKey: true,
per: ReadPermissionVal
})),
app: undefined
};
}
})();
return map;
})
]);
// Get my permissions
const myPerList = perList.filter(
(item) => String(item.tmbId) === String(tmbId) || myGroupMap.has(String(item.groupId))
);
const findAppsQuery = (() => {
const searchMatch = searchKey
@@ -65,10 +83,15 @@ async function handler(req: ApiRequestProps<ListAppBody>): Promise<AppListItemTy
]
}
: {};
// Filter apps by permission, if not owner, only get apps that I have permission to access
const appIdQuery = teamPer.isOwner
? {}
: { _id: { $in: myPerList.map((item) => item.resourceId) } };
if (getRecentlyChat) {
return {
// get all chat app
...appIdQuery,
teamId,
type: { $in: [AppTypeEnum.workflow, AppTypeEnum.simple, AppTypeEnum.plugin] },
...searchMatch
@@ -77,63 +100,46 @@ async function handler(req: ApiRequestProps<ListAppBody>): Promise<AppListItemTy
if (searchKey) {
return {
...appIdQuery,
teamId,
...searchMatch
};
}
return {
...appIdQuery,
teamId,
...(type && (Array.isArray(type) ? { type: { $in: type } } : { type })),
...parseParentIdInMongo(parentId)
};
})();
const limit = (() => {
if (getRecentlyChat) return 15;
if (searchKey) return 20;
return 1000;
})();
/* temp: get all apps and per */
const myGroupIds = (
await getGroupsByTmbId({
tmbId,
teamId
const myApps = await MongoApp.find(
findAppsQuery,
'_id parentId avatar type name intro tmbId updateTime pluginData inheritPermission'
)
.sort({
updateTime: -1
})
).map((item) => String(item._id));
.limit(limit)
.lean();
const [myApps, perList] = await Promise.all([
MongoApp.find(
findAppsQuery,
'_id parentId avatar type name intro tmbId updateTime pluginData inheritPermission'
)
.sort({
updateTime: -1
})
.limit(searchKey ? 20 : 1000)
.lean(),
MongoResourcePermission.find({
resourceType: PerResourceTypeEnum.app,
teamId,
resourceId: {
$exists: true
}
}).lean()
]);
// Filter apps by permission
const filterApps = myApps
// Add app permission and filter apps by read permission
const formatApps = myApps
.map((app) => {
const { Per, privateApp } = (() => {
const myPerList = perList.filter(
(item) =>
String(item.tmbId) === String(tmbId) || myGroupIds.includes(String(item.groupId))
);
const getPer = (appId: string) => {
const tmbPer = myPerList.find(
(item) => String(item.resourceId) === appId && !!item.tmbId
)?.permission;
const groupPer = getGroupPer(
myPerList
.filter(
(item) =>
String(item.resourceId) === appId && myGroupIds.includes(String(item.groupId))
)
.filter((item) => String(item.resourceId) === appId && !!item.groupId)
.map((item) => item.permission)
);
@@ -143,15 +149,15 @@ async function handler(req: ApiRequestProps<ListAppBody>): Promise<AppListItemTy
return {
Per: new AppPermission({
per: tmbPer ?? groupPer ?? AppDefaultPermissionVal,
isOwner: String(app.tmbId) === String(tmbId) || myPer.isOwner
isOwner: String(app.tmbId) === String(tmbId) || teamPer.isOwner
}),
privateApp: AppFolderTypeList.includes(app.type) ? clbCount <= 1 : clbCount === 0
};
};
// Inherit app
if (app.inheritPermission && ParentApp && !AppFolderTypeList.includes(app.type)) {
return getPer(String(ParentApp._id));
if (app.inheritPermission && parentId && !AppFolderTypeList.includes(app.type)) {
return getPer(String(parentId));
} else {
return getPer(String(app._id));
}
@@ -165,9 +171,7 @@ async function handler(req: ApiRequestProps<ListAppBody>): Promise<AppListItemTy
})
.filter((app) => app.permission.hasReadPer);
const sliceApps = getRecentlyChat ? filterApps.slice(0, 15) : filterApps;
return sliceApps.map((app) => ({
return formatApps.map((app) => ({
_id: app._id,
tmbId: app.tmbId,
avatar: app.avatar,

View File

@@ -2,12 +2,12 @@ import type { NextApiResponse } from 'next';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { NodeTemplateListItemType } from '@fastgpt/global/core/workflow/type/node.d';
import { NextAPI } from '@/service/middleware/entry';
import { getSystemPlugins } from '@/service/core/app/plugin';
import { FlowNodeTemplateTypeEnum } from '@fastgpt/global/core/workflow/constants';
import { getSystemPluginCb, getSystemPlugins } from '@/service/core/app/plugin';
import { FlowNodeTypeEnum } from '@fastgpt/global/core/workflow/node/constant';
import { ParentIdType } from '@fastgpt/global/common/parentFolder/type';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { replaceRegChars } from '@fastgpt/global/common/string/tools';
import { FlowNodeTemplateTypeEnum } from '@fastgpt/global/core/workflow/constants';
export type GetSystemPluginTemplatesBody = {
searchKey?: string;
@@ -24,6 +24,10 @@ async function handler(
const formatParentId = parentId || null;
// Make sure system plugin callbacks are loaded
if (!global.systemPluginCb || Object.keys(global.systemPluginCb).length === 0)
await getSystemPluginCb();
return getSystemPlugins().then((res) =>
res
// Just show the active plugins
@@ -39,7 +43,9 @@ async function handler(
intro: plugin.intro,
isTool: plugin.isTool,
currentCost: plugin.currentCost,
author: plugin.author
hasTokenFee: plugin.hasTokenFee,
author: plugin.author,
instructions: plugin.userGuide
}))
.filter((item) => {
if (searchKey) {

View File

@@ -6,8 +6,7 @@ import { NextAPI } from '@/service/middleware/entry';
import {
ManagePermissionVal,
PerResourceTypeEnum,
ReadPermissionVal,
WritePermissionVal
ReadPermissionVal
} from '@fastgpt/global/support/permission/constant';
import { parseParentIdInMongo } from '@fastgpt/global/common/parentFolder/utils';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
@@ -16,7 +15,7 @@ import {
syncChildrenPermission,
syncCollaborators
} from '@fastgpt/service/support/permission/inheritPermission';
import { AppFolderTypeList } from '@fastgpt/global/core/app/constants';
import { AppFolderTypeList, AppTypeEnum } from '@fastgpt/global/core/app/constants';
import { ClientSession } from 'mongoose';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { getResourceClbsAndGroups } from '@fastgpt/service/support/permission/controller';
@@ -91,7 +90,10 @@ async function handler(req: ApiRequestProps<AppUpdateBody, AppUpdateQuery>) {
const onUpdate = async (session?: ClientSession) => {
// format nodes data
// 1. dataset search limit, less than model quoteMaxToken
const { nodes: formatNodes } = beforeUpdateAppFormat({ nodes });
const { nodes: formatNodes } = beforeUpdateAppFormat({
nodes,
isPlugin: app.type === AppTypeEnum.plugin
});
return MongoApp.findByIdAndUpdate(
appId,

View File

@@ -9,6 +9,7 @@ import { getNextTimeByCronStringAndTimezone } from '@fastgpt/global/common/strin
import { PostPublishAppProps } from '@/global/core/app/api';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { AppTypeEnum } from '@fastgpt/global/core/app/constants';
async function handler(
req: ApiRequestProps<PostPublishAppProps>,
@@ -17,9 +18,12 @@ async function handler(
const { appId } = req.query as { appId: string };
const { nodes = [], edges = [], chatConfig, isPublish, versionName } = req.body;
const { tmbId } = await authApp({ appId, req, per: WritePermissionVal, authToken: true });
const { app, tmbId } = await authApp({ appId, req, per: WritePermissionVal, authToken: true });
const { nodes: formatNodes } = beforeUpdateAppFormat({ nodes });
const { nodes: formatNodes } = beforeUpdateAppFormat({
nodes,
isPlugin: app.type === AppTypeEnum.plugin
});
await mongoSessionRun(async (session) => {
// create version histories

View File

@@ -8,7 +8,6 @@ import { authChatCrud } from '@/service/support/permission/auth/chat';
import { authType2UsageSource } from '@/service/support/wallet/usage/utils';
import { getAudioSpeechModel } from '@fastgpt/service/core/ai/model';
import { MongoTTSBuffer } from '@fastgpt/service/common/buffer/tts/schema';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
/*
@@ -93,4 +92,5 @@ async function handler(req: ApiRequestProps<GetChatSpeechProps>, res: NextApiRes
}
}
export default NextAPI(handler);
// 不能使用 NextApiResponse
export default handler;

View File

@@ -0,0 +1,36 @@
import { NextAPI } from '@/service/middleware/entry';
import { ParentIdType } from '@fastgpt/global/common/parentFolder/type';
import { APIFileItem } from '@fastgpt/global/core/dataset/apiDataset';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { useApiDatasetRequest } from '@fastgpt/service/core/dataset/apiDataset/api';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { NextApiRequest } from 'next';
export type GetApiDatasetFileListProps = {
searchKey?: string;
parentId?: ParentIdType;
datasetId: string;
};
export type GetApiDatasetFileListResponse = APIFileItem[];
async function handler(req: NextApiRequest) {
let { searchKey = '', parentId = null, datasetId } = req.body;
const { dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
datasetId,
per: ReadPermissionVal
});
const apiServer = dataset.apiServer;
if (!apiServer) {
return Promise.reject('apiServer is required');
}
return useApiDatasetRequest({ apiServer }).listFiles({ searchKey, parentId });
}
export default NextAPI(handler);

View File

@@ -0,0 +1,40 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middleware/entry';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
export type listExistIdQuery = {
datasetId: string;
};
export type listExistIdBody = {};
export type listExistIdResponse = string[];
async function handler(
req: ApiRequestProps<listExistIdBody, listExistIdQuery>,
res: ApiResponseType<any>
): Promise<listExistIdResponse> {
const { datasetId } = req.query;
const { dataset } = await authDataset({
req,
datasetId,
per: ReadPermissionVal,
authToken: true,
authApiKey: true
});
const collections = await MongoDatasetCollection.find(
{
teamId: dataset.teamId,
datasetId: dataset._id
},
'_id apiFileId'
).lean();
return collections.map((col) => col.apiFileId).filter(Boolean) as string[];
}
export default NextAPI(handler);

View File

@@ -0,0 +1,88 @@
import type { NextApiRequest } from 'next';
import type { ApiDatasetCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
import {
TrainingModeEnum,
DatasetCollectionTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import { NextAPI } from '@/service/middleware/entry';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { CreateCollectionResponse } from '@/global/core/dataset/api';
import { readApiServerFileContent } from '@fastgpt/service/core/dataset/read';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
async function handler(req: NextApiRequest): CreateCollectionResponse {
const {
name,
apiFileId,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body as ApiDatasetCreateDatasetCollectionParams;
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
datasetId: body.datasetId,
per: WritePermissionVal
});
const apiServer = dataset.apiServer;
if (!apiServer) {
return Promise.reject('Api server not found');
}
if (!apiFileId) {
return Promise.reject('ApiFileId not found');
}
// Auth same apiFileId
const storeCol = await MongoDatasetCollection.findOne(
{
teamId,
datasetId: dataset._id,
apiFileId
},
'_id'
).lean();
if (storeCol) {
return Promise.reject(DatasetErrEnum.sameApiCollection);
}
const content = await readApiServerFileContent({
apiServer,
apiFileId,
teamId
});
const { collectionId, insertResults } = await createCollectionAndInsertData({
dataset,
rawText: content,
relatedId: apiFileId,
createCollectionParams: {
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.apiFile,
name: name,
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
apiFileId,
metadata: {
relatedImgId: apiFileId
}
}
});
return { collectionId, results: insertResults };
}
export default NextAPI(handler);

View File

@@ -2,23 +2,16 @@ import type { NextApiRequest } from 'next';
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { NextAPI } from '@/service/middleware/entry';
import { CreateCollectionResponse } from '@/global/core/dataset/api';
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
async function handler(req: NextApiRequest): CreateCollectionResponse {
const { datasetId, parentId, fileId, ...body } = req.body as FileIdCreateDatasetCollectionParams;
@@ -39,21 +32,11 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
isQAImport: true
});
// 2. split chunks
const chunks = rawText2Chunks({
const { collectionId, insertResults } = await createCollectionAndInsertData({
dataset,
rawText,
isQAImport: true
});
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
return mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
isQAImport: true,
createCollectionParams: {
...body,
teamId,
tmbId,
@@ -65,41 +48,13 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
// special metadata
trainingType,
chunkSize: 0,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
const insertResult = await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
billId,
data: chunks.map((chunk, index) => ({
q: chunk.q,
a: chunk.a,
chunkIndex: index
})),
session
});
return { collectionId, results: insertResult };
chunkSize: 0
}
});
// remove buffer
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
return { collectionId, results: insertResults };
}
export default NextAPI(handler);

View File

@@ -1,28 +1,22 @@
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { CreateCollectionResponse } from '@/global/core/dataset/api';
async function handler(req: ApiRequestProps<FileIdCreateDatasetCollectionParams>) {
async function handler(
req: ApiRequestProps<FileIdCreateDatasetCollectionParams>
): CreateCollectionResponse {
const {
fileId,
trainingType = TrainingModeEnum.chunk,
@@ -32,7 +26,6 @@ async function handler(req: ApiRequestProps<FileIdCreateDatasetCollectionParams>
...body
} = req.body;
const start = Date.now();
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
@@ -48,23 +41,10 @@ async function handler(req: ApiRequestProps<FileIdCreateDatasetCollectionParams>
fileId
});
// 2. split chunks
const chunks = rawText2Chunks({
const { collectionId, insertResults } = await createCollectionAndInsertData({
dataset,
rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
await mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
createCollectionParams: {
...body,
teamId,
tmbId,
@@ -79,63 +59,19 @@ async function handler(req: ApiRequestProps<FileIdCreateDatasetCollectionParams>
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
qaPrompt
},
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((item, index) => ({
...item,
chunkIndex: index
})),
session
});
// 7. remove related image ttl
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': fileId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
// remove buffer
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
return collectionId;
relatedId: fileId
});
// remove buffer
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
return {
collectionId,
results: insertResults
};
}
export default NextAPI(handler);

View File

@@ -1,21 +1,16 @@
import type { NextApiRequest } from 'next';
import type { LinkCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
import {
TrainingModeEnum,
DatasetCollectionTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { reloadCollectionChunks } from '@fastgpt/service/core/dataset/collection/utils';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { NextAPI } from '@/service/middleware/entry';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { CreateCollectionResponse } from '@/global/core/dataset/api';
import { urlsFetch } from '@fastgpt/service/common/string/cheerio';
import { hashStr } from '@fastgpt/global/common/string/tools';
async function handler(req: NextApiRequest): CreateCollectionResponse {
const {
@@ -35,59 +30,45 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
per: WritePermissionVal
});
// 1. check dataset limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, new Array(10))
const result = await urlsFetch({
urlList: [link],
selector: body?.metadata?.webPageSelector
});
const { title = link, content = '' } = result[0];
return mongoSessionRun(async (session) => {
// 2. create collection
const collection = await createOneCollection({
if (!content) {
return Promise.reject('Can not fetch content from link');
}
const { collectionId, insertResults } = await createCollectionAndInsertData({
dataset,
rawText: content,
createCollectionParams: {
...body,
name: link,
name: title,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.link,
metadata: {
relatedImgId: link,
webPageSelector: body?.metadata?.webPageSelector
},
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
rawLink: link,
session
});
rawLink: link
},
// 3. create bill and start sync
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: 'core.dataset.collection.Sync Collection',
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel).name,
agentModel: getLLMModel(dataset.agentModel).name,
session
});
// load
const result = await reloadCollectionChunks({
collection: {
...collection.toObject(),
datasetId: dataset
},
tmbId,
billId,
session
});
return {
collectionId: collection._id,
results: {
insertLen: result.insertLen
}
};
relatedId: link
});
return {
collectionId,
results: insertResults
};
}
export default NextAPI(handler);

View File

@@ -4,22 +4,10 @@ import { getUploadModel } from '@fastgpt/service/common/file/multer';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { FileCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { getNanoid, hashStr } from '@fastgpt/global/common/string/tools';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getDatasetModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { readRawTextByLocalFile } from '@fastgpt/service/common/file/read/utils';
import { NextAPI } from '@/service/middleware/entry';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
@@ -52,12 +40,6 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>): CreateCo
datasetId: data.datasetId
});
const {
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt
} = data;
const { fileMetadata, collectionMetadata, ...collectionData } = data;
const collectionName = file.originalname;
@@ -89,84 +71,22 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>): CreateCo
// 3. delete tmp file
removeFilesByPaths(filePaths);
// 4. split raw text to chunks
const { chunks } = splitText2Chunks({
text: rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 5. check dataset limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
// 6. create collection and training bill
const { collectionId, insertResults } = await mongoSessionRun(async (session) => {
const { _id: collectionId } = await createOneCollection({
const { collectionId, insertResults } = await createCollectionAndInsertData({
dataset,
rawText,
relatedId: fileId,
createCollectionParams: {
...collectionData,
name: collectionName,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
fileId,
rawTextLength: rawText.length,
hashRawText: hashStr(rawText),
metadata: {
...collectionMetadata,
relatedImgId
},
session
});
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: collectionName,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getDatasetModel(dataset.agentModel)?.name
});
// 7. push chunks to training queue
const insertResults = await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
}))
});
// 8. remove image expired time
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': relatedImgId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return {
collectionId,
insertResults
};
}
});
return { collectionId, results: insertResults };

View File

@@ -0,0 +1,131 @@
import { reTrainingDatasetFileCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
DatasetSourceReadTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { delOnlyCollection } from '@fastgpt/service/core/dataset/collection/controller';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { i18nT } from '@fastgpt/web/i18n/utils';
type RetrainingCollectionResponse = {
collectionId: string;
};
// 获取集合并处理
async function handler(
req: ApiRequestProps<reTrainingDatasetFileCollectionParams>
): Promise<RetrainingCollectionResponse> {
const {
collectionId,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt
} = req.body;
if (!collectionId) {
return Promise.reject(CommonErrEnum.missingParams);
}
// 凭证校验
const { collection } = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId: collectionId,
per: ReadPermissionVal
});
const sourceReadType = await (async () => {
if (collection.type === DatasetCollectionTypeEnum.link) {
if (!collection.rawLink) return Promise.reject('rawLink is missing');
return {
type: DatasetSourceReadTypeEnum.link,
sourceId: collection.rawLink,
selector: collection.metadata?.webPageSelector
};
}
if (collection.type === DatasetCollectionTypeEnum.file) {
if (!collection.fileId) return Promise.reject('fileId is missing');
return {
type: DatasetSourceReadTypeEnum.fileLocal,
sourceId: collection.fileId
};
}
if (collection.type === DatasetCollectionTypeEnum.apiFile) {
if (!collection.apiFileId) return Promise.reject('apiFileId is missing');
return {
type: DatasetSourceReadTypeEnum.apiFile,
sourceId: collection.apiFileId,
apiServer: collection.datasetId.apiServer
};
}
if (collection.type === DatasetCollectionTypeEnum.externalFile) {
if (!collection.externalFileUrl) return Promise.reject('externalFileId is missing');
return {
type: DatasetSourceReadTypeEnum.externalFile,
sourceId: collection.externalFileUrl,
externalFileId: collection.externalFileId
};
}
return Promise.reject(i18nT('dataset:collection_not_support_retraining'));
})();
const rawText = await readDatasetSourceRawText({
teamId: collection.teamId,
...sourceReadType
});
return mongoSessionRun(async (session) => {
const { collectionId } = await createCollectionAndInsertData({
dataset: collection.datasetId,
rawText,
createCollectionParams: {
teamId: collection.teamId,
tmbId: collection.tmbId,
datasetId: collection.datasetId._id,
name: collection.name,
type: collection.type,
fileId: collection.fileId,
rawLink: collection.rawLink,
externalFileId: collection.externalFileId,
externalFileUrl: collection.externalFileUrl,
apiFileId: collection.apiFileId,
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
tags: collection.tags,
createTime: collection.createTime,
parentId: collection.parentId,
// special metadata
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
metadata: collection.metadata
}
});
await delOnlyCollection({
collections: [collection],
session
});
return { collectionId };
});
}
export default NextAPI(handler);

View File

@@ -1,20 +1,12 @@
import type { NextApiRequest } from 'next';
import type { TextCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
import {
TrainingModeEnum,
DatasetCollectionTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { NextAPI } from '@/service/middleware/entry';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { CreateCollectionResponse } from '@/global/core/dataset/api';
@@ -38,23 +30,10 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
per: WritePermissionVal
});
// 1. split text to chunks
const { chunks } = splitText2Chunks({
text,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 2. check dataset limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
const createResult = await mongoSessionRun(async (session) => {
// 3. create collection
const { _id: collectionId } = await createOneCollection({
const { collectionId, insertResults } = await createCollectionAndInsertData({
dataset,
rawText: text,
createCollectionParams: {
...body,
teamId,
tmbId,
@@ -64,46 +43,14 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
hashRawText: hashStr(text),
rawTextLength: text.length,
session
});
// 4. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: name,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 5. push chunks to training queue
const insertResults = await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
})),
session
});
return { collectionId, results: insertResults };
qaPrompt
}
});
return createResult;
return {
collectionId,
results: insertResults
};
}
export const config = {

View File

@@ -1,6 +1,6 @@
import type { NextApiRequest } from 'next';
import { findCollectionAndChild } from '@fastgpt/service/core/dataset/collection/utils';
import { delCollectionAndRelatedSources } from '@fastgpt/service/core/dataset/collection/controller';
import { delCollection } from '@fastgpt/service/core/dataset/collection/controller';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { NextAPI } from '@/service/middleware/entry';
@@ -32,8 +32,9 @@ async function handler(req: NextApiRequest) {
// delete
await mongoSessionRun((session) =>
delCollectionAndRelatedSources({
delCollection({
collections,
delRelatedSource: true,
session
})
);

View File

@@ -11,6 +11,7 @@ import { MongoChatItem } from '@fastgpt/service/core/chat/chatItemSchema';
import { AIChatItemType, ChatHistoryItemResType } from '@fastgpt/global/core/chat/type';
import { authChatCrud } from '@/service/support/permission/auth/chat';
import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller';
import { useApiDatasetRequest } from '@fastgpt/service/core/dataset/apiDataset/api';
export type readCollectionSourceQuery = {};
@@ -145,6 +146,14 @@ async function handler(
if (collection.type === DatasetCollectionTypeEnum.link && collection.rawLink) {
return collection.rawLink;
}
if (collection.type === DatasetCollectionTypeEnum.apiFile && collection.apiFileId) {
const apiServer = collection.datasetId.apiServer;
if (!apiServer) return Promise.reject('apiServer not found');
return useApiDatasetRequest({ apiServer }).getFilePreviewUrl({
apiFileId: collection.apiFileId
});
}
if (collection.type === DatasetCollectionTypeEnum.externalFile) {
if (collection.externalFileId && collection.datasetId.externalReadUrl) {
return collection.datasetId.externalReadUrl.replace(

View File

@@ -0,0 +1,37 @@
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { NextAPI } from '@/service/middleware/entry';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { syncCollection } from '@fastgpt/service/core/dataset/collection/utils';
/*
Collection sync
1. Check collection type: link, api dataset collection
2. Get collection and raw text
3. Check whether the original text is the same: skip if same
4. Create new collection
5. Delete old collection
*/
export type CollectionSyncBody = {
collectionId: string;
};
async function handler(req: ApiRequestProps<CollectionSyncBody>) {
const { collectionId } = req.body;
if (!collectionId) {
return Promise.reject(CommonErrEnum.missingParams);
}
const { collection } = await authDatasetCollection({
req,
authToken: true,
collectionId,
per: WritePermissionVal
});
return syncCollection(collection);
}
export default NextAPI(handler);

View File

@@ -1,106 +0,0 @@
import type { NextApiRequest } from 'next';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import {
getCollectionAndRawText,
reloadCollectionChunks
} from '@fastgpt/service/core/dataset/collection/utils';
import { delCollectionAndRelatedSources } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionSyncResultEnum,
DatasetCollectionTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { NextAPI } from '@/service/middleware/entry';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
async function handler(req: NextApiRequest) {
const { collectionId } = req.body as { collectionId: string };
if (!collectionId) {
return Promise.reject(CommonErrEnum.missingParams);
}
const { collection, tmbId } = await authDatasetCollection({
req,
authToken: true,
collectionId,
per: WritePermissionVal
});
if (collection.type !== DatasetCollectionTypeEnum.link || !collection.rawLink) {
return Promise.reject(DatasetErrEnum.unLinkCollection);
}
const { title, rawText, isSameRawText } = await getCollectionAndRawText({
collection
});
if (isSameRawText) {
return DatasetCollectionSyncResultEnum.sameRaw;
}
/* Not the same original text, create and reload */
const vectorModelData = getVectorModel(collection.datasetId.vectorModel);
const agentModelData = getLLMModel(collection.datasetId.agentModel);
await mongoSessionRun(async (session) => {
// create training bill
const { billId } = await createTrainingUsage({
teamId: collection.teamId,
tmbId,
appName: 'core.dataset.collection.Sync Collection',
billSource: UsageSourceEnum.training,
vectorModel: vectorModelData.name,
agentModel: agentModelData.name,
session
});
// create a collection and delete old
const newCol = await createOneCollection({
teamId: collection.teamId,
tmbId: collection.tmbId,
parentId: collection.parentId,
datasetId: collection.datasetId._id,
name: title || collection.name,
type: collection.type,
trainingType: collection.trainingType,
chunkSize: collection.chunkSize,
chunkSplitter: collection.chunkSplitter,
qaPrompt: collection.qaPrompt,
fileId: collection.fileId,
rawLink: collection.rawLink,
metadata: collection.metadata,
createTime: collection.createTime,
session
});
// start load
await reloadCollectionChunks({
collection: {
...newCol.toObject(),
datasetId: collection.datasetId
},
tmbId,
billId,
rawText,
session
});
// delete old collection
await delCollectionAndRelatedSources({
collections: [collection],
session
});
});
return DatasetCollectionSyncResultEnum.success;
}
export default NextAPI(handler);

View File

@@ -24,7 +24,8 @@ async function handler(
type = DatasetTypeEnum.dataset,
avatar,
vectorModel = global.vectorModels[0].model,
agentModel = getDatasetModel().model
agentModel = getDatasetModel().model,
apiServer
} = req.body;
// auth
@@ -54,7 +55,8 @@ async function handler(
vectorModel,
agentModel,
avatar,
type
type,
apiServer
});
return _id;

View File

@@ -1,6 +1,5 @@
/* push data to training queue */
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import type {
PushDatasetDataProps,
PushDatasetDataResponse
@@ -39,15 +38,13 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
insertLen: predictDataLimitLength(collection.trainingType, data)
});
jsonRes<PushDatasetDataResponse>(res, {
data: await pushDataListToTrainingQueue({
...body,
teamId,
tmbId,
datasetId: collection.datasetId._id,
agentModel: collection.datasetId.agentModel,
vectorModel: collection.datasetId.vectorModel
})
return pushDataListToTrainingQueue({
...body,
teamId,
tmbId,
datasetId: collection.datasetId._id,
agentModel: collection.datasetId.agentModel,
vectorModel: collection.datasetId.vectorModel
});
}

View File

@@ -30,6 +30,12 @@ async function handler(req: ApiRequestProps<Query>): Promise<DatasetItemType> {
return {
...dataset,
apiServer: dataset.apiServer
? {
baseUrl: dataset.apiServer.baseUrl,
authorization: ''
}
: undefined,
permission,
vectorModel: getVectorModel(dataset.vectorModel),
agentModel: getLLMModel(dataset.agentModel)

View File

@@ -1,19 +1,27 @@
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { rawText2Chunks, readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { OwnerPermissionVal } from '@fastgpt/global/support/permission/constant';
import {
OwnerPermissionVal,
WritePermissionVal
} from '@fastgpt/global/support/permission/constant';
import { authCollectionFile } from '@fastgpt/service/support/permission/auth/file';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
export type PostPreviewFilesChunksProps = {
datasetId: string;
type: DatasetSourceReadTypeEnum;
sourceId: string;
chunkSize: number;
overlapRatio: number;
customSplitChar?: string;
// Read params
selector?: string;
isQAImport?: boolean;
externalFileId?: string;
};
export type PreviewChunksResponse = {
q: string;
@@ -23,8 +31,17 @@ export type PreviewChunksResponse = {
async function handler(
req: ApiRequestProps<PostPreviewFilesChunksProps>
): Promise<PreviewChunksResponse> {
const { type, sourceId, chunkSize, customSplitChar, overlapRatio, selector, isQAImport } =
req.body;
const {
type,
sourceId,
chunkSize,
customSplitChar,
overlapRatio,
selector,
isQAImport,
datasetId,
externalFileId
} = req.body;
if (!sourceId) {
throw new Error('sourceId is empty');
@@ -33,25 +50,40 @@ async function handler(
throw new Error('chunkSize is too large, should be less than 30000');
}
const { teamId } = await (async () => {
const { teamId, apiServer } = await (async () => {
if (type === DatasetSourceReadTypeEnum.fileLocal) {
return authCollectionFile({
const res = await authCollectionFile({
req,
authToken: true,
authApiKey: true,
fileId: sourceId,
per: OwnerPermissionVal
});
return {
teamId: res.teamId
};
}
return authCert({ req, authApiKey: true, authToken: true });
const { dataset } = await authDataset({
req,
authApiKey: true,
authToken: true,
datasetId,
per: WritePermissionVal
});
return {
teamId: dataset.teamId,
apiServer: dataset.apiServer
};
})();
const rawText = await readDatasetSourceRawText({
teamId,
type,
sourceId: sourceId,
sourceId,
selector,
isQAImport
isQAImport,
apiServer,
externalFileId
});
return rawText2Chunks({

View File

@@ -27,32 +27,51 @@ export type GetDatasetListBody = {
async function handler(req: ApiRequestProps<GetDatasetListBody>) {
const { parentId, type, searchKey } = req.body;
// 凭证校验
const {
dataset: parentDataset,
teamId,
tmbId,
permission: myPer
} = await (async () => {
if (parentId) {
return await authDataset({
req,
authToken: true,
authApiKey: true,
per: ReadPermissionVal,
datasetId: parentId
// Auth user permission
const [{ tmbId, teamId, permission: teamPer }] = await Promise.all([
authUserPer({
req,
authToken: true,
authApiKey: true,
per: ReadPermissionVal
}),
...(parentId
? [
authDataset({
req,
authToken: true,
authApiKey: true,
per: ReadPermissionVal,
datasetId: parentId
})
]
: [])
]);
// Get team all app permissions
const [perList, myGroupMap] = await Promise.all([
MongoResourcePermission.find({
resourceType: PerResourceTypeEnum.dataset,
teamId,
resourceId: {
$exists: true
}
}).lean(),
getGroupsByTmbId({
tmbId,
teamId
}).then((item) => {
const map = new Map<string, 1>();
item.forEach((item) => {
map.set(String(item._id), 1);
});
}
return {
...(await authUserPer({
req,
authToken: true,
authApiKey: true,
per: ReadPermissionVal
})),
dataset: undefined
};
})();
return map;
})
]);
const myPerList = perList.filter(
(item) => String(item.tmbId) === String(tmbId) || myGroupMap.has(String(item.groupId))
);
const findDatasetQuery = (() => {
const searchMatch = searchKey
@@ -63,61 +82,43 @@ async function handler(req: ApiRequestProps<GetDatasetListBody>) {
]
}
: {};
// Filter apps by permission, if not owner, only get apps that I have permission to access
const appIdQuery = teamPer.isOwner
? {}
: { _id: { $in: myPerList.map((item) => item.resourceId) } };
if (searchKey) {
return {
...appIdQuery,
teamId,
...searchMatch
};
}
return {
...appIdQuery,
teamId,
...(type ? (Array.isArray(type) ? { type: { $in: type } } : { type }) : {}),
...parseParentIdInMongo(parentId)
};
})();
const myGroupIds = (
await getGroupsByTmbId({
tmbId,
teamId
const myDatasets = await MongoDataset.find(findDatasetQuery)
.sort({
updateTime: -1
})
).map((item) => String(item._id));
.lean();
const [myDatasets, perList] = await Promise.all([
MongoDataset.find(findDatasetQuery)
.sort({
updateTime: -1
})
.lean(),
MongoResourcePermission.find({
resourceType: PerResourceTypeEnum.dataset,
teamId,
resourceId: {
$exists: true
}
}).lean()
]);
const filterDatasets = myDatasets
const formatDatasets = myDatasets
.map((dataset) => {
const { Per, privateDataset } = (() => {
const myPerList = perList.filter(
(item) =>
String(item.tmbId) === String(tmbId) || myGroupIds.includes(String(item.groupId))
);
const getPer = (datasetId: string) => {
const tmbPer = myPerList.find(
(item) => String(item.resourceId) === datasetId && !!item.tmbId
)?.permission;
const groupPer = getGroupPer(
myPerList
.filter(
(item) =>
String(item.resourceId) === datasetId && myGroupIds.includes(String(item.groupId))
)
.filter((item) => String(item.resourceId) === datasetId && !!item.groupId)
.map((item) => item.permission)
);
@@ -126,14 +127,14 @@ async function handler(req: ApiRequestProps<GetDatasetListBody>) {
return {
Per: new DatasetPermission({
per: tmbPer ?? groupPer ?? DatasetDefaultPermissionVal,
isOwner: String(dataset.tmbId) === String(tmbId) || myPer.isOwner
isOwner: String(dataset.tmbId) === String(tmbId) || teamPer.isOwner
}),
privateDataset: dataset.type === 'folder' ? clbCount <= 1 : clbCount === 0
};
};
// inherit
if (dataset.inheritPermission && parentDataset && dataset.type !== DatasetTypeEnum.folder) {
return getPer(String(parentDataset._id));
if (dataset.inheritPermission && parentId && dataset.type !== DatasetTypeEnum.folder) {
return getPer(String(parentId));
} else {
return getPer(String(dataset._id));
}
@@ -148,7 +149,7 @@ async function handler(req: ApiRequestProps<GetDatasetListBody>) {
.filter((app) => app.permission.hasReadPer);
const data = await Promise.all(
filterDatasets.map<DatasetListItemType>((item) => ({
formatDatasets.map<DatasetListItemType>((item) => ({
_id: item._id,
avatar: item.avatar,
name: item.name,

View File

@@ -41,8 +41,18 @@ async function handler(
req: ApiRequestProps<DatasetUpdateBody, DatasetUpdateQuery>,
_res: ApiResponseType<any>
): Promise<DatasetUpdateResponse> {
const { id, parentId, name, avatar, intro, agentModel, websiteConfig, externalReadUrl, status } =
req.body;
const {
id,
parentId,
name,
avatar,
intro,
agentModel,
websiteConfig,
externalReadUrl,
apiServer,
status
} = req.body;
if (!id) {
return Promise.reject(CommonErrEnum.missingParams);
@@ -103,6 +113,10 @@ async function handler(
...(status && { status }),
...(intro !== undefined && { intro }),
...(externalReadUrl !== undefined && { externalReadUrl }),
...(!!apiServer?.baseUrl && { 'apiServer.baseUrl': apiServer.baseUrl }),
...(!!apiServer?.authorization && {
'apiServer.authorization': apiServer.authorization
}),
...(isMove && { inheritPermission: true })
},
{ session }
@@ -165,7 +179,8 @@ async function updateTraining({
{
$set: {
model: agentModel,
retryCount: 5
retryCount: 5,
lockTime: new Date()
}
}
);

View File

@@ -1,14 +1,13 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
import { connectToDatabase } from '@/service/mongo';
import { getVectorsByText } from '@fastgpt/service/core/ai/embedding';
import { updateApiKeyUsage } from '@fastgpt/service/support/openapi/tools';
import { getUsageSourceByAuthType } from '@fastgpt/global/support/wallet/usage/tools';
import { getVectorModel } from '@fastgpt/service/core/ai/model';
import { checkTeamAIPoints } from '@fastgpt/service/support/permission/teamLimit';
import { EmbeddingTypeEnm } from '@fastgpt/global/core/ai/constants';
import { NextAPI } from '@/service/middleware/entry';
type Props = {
input: string | string[];
@@ -18,65 +17,58 @@ type Props = {
type: `${EmbeddingTypeEnm}`;
};
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
let { input, model, billId, type } = req.body as Props;
await connectToDatabase();
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let { input, model, billId, type } = req.body as Props;
if (!Array.isArray(input) && typeof input !== 'string') {
throw new Error('input is nor array or string');
if (!Array.isArray(input) && typeof input !== 'string') {
throw new Error('input is nor array or string');
}
const query = Array.isArray(input) ? input[0] : input;
const { teamId, tmbId, apikey, authType } = await authCert({
req,
authToken: true,
authApiKey: true
});
await checkTeamAIPoints(teamId);
const { tokens, vectors } = await getVectorsByText({
input: query,
model: getVectorModel(model),
type
});
res.json({
object: 'list',
data: vectors.map((item, index) => ({
object: 'embedding',
index: index,
embedding: item
})),
model,
usage: {
prompt_tokens: tokens,
total_tokens: tokens
}
});
const query = Array.isArray(input) ? input[0] : input;
const { totalPoints } = pushGenerateVectorUsage({
teamId,
tmbId,
tokens,
model,
billId,
source: getUsageSourceByAuthType({ authType })
});
const { teamId, tmbId, apikey, authType } = await authCert({
req,
authToken: true,
authApiKey: true
});
await checkTeamAIPoints(teamId);
const { tokens, vectors } = await getVectorsByText({
input: query,
model: getVectorModel(model),
type
});
res.json({
object: 'list',
data: vectors.map((item, index) => ({
object: 'embedding',
index: index,
embedding: item
})),
model,
usage: {
prompt_tokens: tokens,
total_tokens: tokens
}
});
const { totalPoints } = pushGenerateVectorUsage({
teamId,
tmbId,
tokens,
model,
billId,
source: getUsageSourceByAuthType({ authType })
});
if (apikey) {
updateApiKeyUsage({
apikey,
totalPoints: totalPoints
});
}
} catch (err) {
console.log(err);
jsonRes(res, {
code: 500,
error: err
if (apikey) {
updateApiKeyUsage({
apikey,
totalPoints: totalPoints
});
}
}
export default NextAPI(handler);