Add image index and pdf parse (#3956)
* feat: think tag parse * feat: parse think tag test * feat: pdf parse ux * feat: doc2x parse * perf: rewrite training mode setting * feat: image parse queue * perf: image index * feat: image parse process * feat: add init sh * fix: ts
This commit is contained in:
65
projects/app/src/pages/api/admin/initv490.ts
Normal file
65
projects/app/src/pages/api/admin/initv490.ts
Normal file
@@ -0,0 +1,65 @@
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { DatasetCollectionDataProcessModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
|
||||
|
||||
// 所有 trainingType=auto 的 collection,都改成 trainingType=chunk
|
||||
const updateCollections = async () => {
|
||||
await MongoDatasetCollection.updateMany(
|
||||
{
|
||||
trainingType: DatasetCollectionDataProcessModeEnum.auto
|
||||
},
|
||||
{
|
||||
$set: {
|
||||
trainingType: DatasetCollectionDataProcessModeEnum.chunk,
|
||||
autoIndexes: true
|
||||
}
|
||||
}
|
||||
);
|
||||
};
|
||||
const updateData = async () => {
|
||||
await MongoDatasetData.updateMany({ indexes: { $exists: true } }, [
|
||||
{
|
||||
$set: {
|
||||
indexes: {
|
||||
$map: {
|
||||
input: '$indexes',
|
||||
as: 'index',
|
||||
in: {
|
||||
$mergeObjects: [
|
||||
'$$index',
|
||||
{
|
||||
type: {
|
||||
$cond: {
|
||||
if: { $eq: ['$$index.defaultIndex', true] },
|
||||
then: DatasetDataIndexTypeEnum.default,
|
||||
else: DatasetDataIndexTypeEnum.custom
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]);
|
||||
};
|
||||
|
||||
async function handler(req: NextApiRequest, _res: NextApiResponse) {
|
||||
await authCert({ req, authRoot: true });
|
||||
|
||||
console.log('变更所有 collection 的 trainingType 为 chunk');
|
||||
await updateCollections();
|
||||
|
||||
console.log(
|
||||
"更新所有 data 的 index, autoIndex=true 的,增加type='default',其他的增加 type='custom'"
|
||||
);
|
||||
await updateData();
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
||||
@@ -1,78 +0,0 @@
|
||||
/*
|
||||
Read db file content and response 3000 words
|
||||
*/
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { authCollectionFile } from '@fastgpt/service/support/permission/auth/file';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import {
|
||||
OwnerPermissionVal,
|
||||
WritePermissionVal
|
||||
} from '@fastgpt/global/support/permission/constant';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
|
||||
export type PreviewContextProps = {
|
||||
datasetId: string;
|
||||
type: DatasetSourceReadTypeEnum;
|
||||
sourceId: string;
|
||||
isQAImport?: boolean;
|
||||
selector?: string;
|
||||
externalFileId?: string;
|
||||
};
|
||||
|
||||
async function handler(req: ApiRequestProps<PreviewContextProps>, res: NextApiResponse<any>) {
|
||||
const { type, sourceId, isQAImport, selector, datasetId, externalFileId } = req.body;
|
||||
|
||||
if (!sourceId) {
|
||||
throw new Error('fileId is empty');
|
||||
}
|
||||
|
||||
const { teamId, apiServer, feishuServer, yuqueServer } = await (async () => {
|
||||
if (type === DatasetSourceReadTypeEnum.fileLocal) {
|
||||
const res = await authCollectionFile({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
fileId: sourceId,
|
||||
per: OwnerPermissionVal
|
||||
});
|
||||
return {
|
||||
teamId: res.teamId
|
||||
};
|
||||
}
|
||||
const { dataset } = await authDataset({
|
||||
req,
|
||||
authApiKey: true,
|
||||
authToken: true,
|
||||
datasetId,
|
||||
per: WritePermissionVal
|
||||
});
|
||||
return {
|
||||
teamId: dataset.teamId,
|
||||
apiServer: dataset.apiServer,
|
||||
feishuServer: dataset.feishuServer,
|
||||
yuqueServer: dataset.yuqueServer
|
||||
};
|
||||
})();
|
||||
|
||||
const rawText = await readDatasetSourceRawText({
|
||||
teamId,
|
||||
type,
|
||||
sourceId,
|
||||
isQAImport,
|
||||
selector,
|
||||
apiServer,
|
||||
feishuServer,
|
||||
yuqueServer,
|
||||
externalFileId
|
||||
});
|
||||
|
||||
return {
|
||||
previewContent: rawText.slice(0, 3000),
|
||||
totalLength: rawText.length
|
||||
};
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
||||
@@ -4,7 +4,8 @@ import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
TrainingModeEnum,
|
||||
DatasetCollectionTypeEnum
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetCollectionDataProcessModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
@@ -15,15 +16,7 @@ import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection
|
||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||
|
||||
async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
const {
|
||||
name,
|
||||
apiFileId,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as ApiDatasetCreateDatasetCollectionParams;
|
||||
const { name, apiFileId, ...body } = req.body as ApiDatasetCreateDatasetCollectionParams;
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
@@ -56,7 +49,8 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
feishuServer,
|
||||
yuqueServer,
|
||||
apiFileId,
|
||||
teamId
|
||||
teamId,
|
||||
tmbId
|
||||
});
|
||||
|
||||
const { collectionId, insertResults } = await createCollectionAndInsertData({
|
||||
@@ -69,10 +63,6 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.apiFile,
|
||||
name: name,
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
apiFileId,
|
||||
metadata: {
|
||||
relatedImgId: apiFileId
|
||||
|
||||
@@ -4,6 +4,7 @@ import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
@@ -15,7 +16,6 @@ import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schem
|
||||
|
||||
async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
const { datasetId, parentId, fileId, ...body } = req.body as FileIdCreateDatasetCollectionParams;
|
||||
const trainingType = TrainingModeEnum.chunk;
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
@@ -27,6 +27,7 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContentFromMongo({
|
||||
teamId,
|
||||
tmbId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId,
|
||||
isQAImport: true
|
||||
@@ -47,7 +48,7 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
fileId,
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
trainingType: DatasetCollectionDataProcessModeEnum.chunk,
|
||||
chunkSize: 0
|
||||
}
|
||||
});
|
||||
|
||||
@@ -2,12 +2,8 @@ import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/co
|
||||
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
@@ -17,14 +13,7 @@ import { CreateCollectionResponse } from '@/global/core/dataset/api';
|
||||
async function handler(
|
||||
req: ApiRequestProps<FileIdCreateDatasetCollectionParams>
|
||||
): CreateCollectionResponse {
|
||||
const {
|
||||
fileId,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body;
|
||||
const { fileId, customPdfParse, ...body } = req.body;
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
@@ -37,8 +26,10 @@ async function handler(
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContentFromMongo({
|
||||
teamId,
|
||||
tmbId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId
|
||||
fileId,
|
||||
customPdfParse
|
||||
});
|
||||
|
||||
const { collectionId, insertResults } = await createCollectionAndInsertData({
|
||||
@@ -54,12 +45,7 @@ async function handler(
|
||||
metadata: {
|
||||
relatedImgId: fileId
|
||||
},
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt
|
||||
customPdfParse
|
||||
},
|
||||
|
||||
relatedId: fileId
|
||||
|
||||
@@ -13,14 +13,7 @@ import { urlsFetch } from '@fastgpt/service/common/string/cheerio';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
|
||||
async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
const {
|
||||
link,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as LinkCreateDatasetCollectionParams;
|
||||
const { link, ...body } = req.body as LinkCreateDatasetCollectionParams;
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
@@ -53,12 +46,6 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
relatedImgId: link,
|
||||
webPageSelector: body?.metadata?.webPageSelector
|
||||
},
|
||||
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
rawLink: link
|
||||
},
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ import { FileCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/
|
||||
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
|
||||
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { getNanoid, hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { readRawTextByLocalFile } from '@fastgpt/service/common/file/read/utils';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
@@ -48,8 +48,10 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>): CreateCo
|
||||
// 1. read file
|
||||
const { rawText } = await readRawTextByLocalFile({
|
||||
teamId,
|
||||
tmbId,
|
||||
path: file.path,
|
||||
encoding: file.encoding,
|
||||
customPdfParse: collectionData.customPdfParse,
|
||||
metadata: {
|
||||
...fileMetadata,
|
||||
relatedId: relatedImgId
|
||||
|
||||
@@ -24,20 +24,14 @@ type RetrainingCollectionResponse = {
|
||||
async function handler(
|
||||
req: ApiRequestProps<reTrainingDatasetFileCollectionParams>
|
||||
): Promise<RetrainingCollectionResponse> {
|
||||
const {
|
||||
collectionId,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt
|
||||
} = req.body;
|
||||
const { collectionId, customPdfParse, ...data } = req.body;
|
||||
|
||||
if (!collectionId) {
|
||||
return Promise.reject(CommonErrEnum.missingParams);
|
||||
}
|
||||
|
||||
// 凭证校验
|
||||
const { collection } = await authDatasetCollection({
|
||||
const { collection, teamId, tmbId } = await authDatasetCollection({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
@@ -84,7 +78,9 @@ async function handler(
|
||||
})();
|
||||
|
||||
const rawText = await readDatasetSourceRawText({
|
||||
teamId: collection.teamId,
|
||||
teamId,
|
||||
tmbId,
|
||||
customPdfParse,
|
||||
...sourceReadType
|
||||
});
|
||||
|
||||
@@ -100,12 +96,15 @@ async function handler(
|
||||
dataset: collection.dataset,
|
||||
rawText,
|
||||
createCollectionParams: {
|
||||
...data,
|
||||
teamId: collection.teamId,
|
||||
tmbId: collection.tmbId,
|
||||
datasetId: collection.dataset._id,
|
||||
name: collection.name,
|
||||
type: collection.type,
|
||||
|
||||
customPdfParse,
|
||||
|
||||
fileId: collection.fileId,
|
||||
rawLink: collection.rawLink,
|
||||
externalFileId: collection.externalFileId,
|
||||
@@ -121,10 +120,6 @@ async function handler(
|
||||
parentId: collection.parentId,
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
metadata: collection.metadata
|
||||
}
|
||||
});
|
||||
|
||||
@@ -2,25 +2,13 @@ import type { NextApiRequest } from 'next';
|
||||
import type { TextCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
TrainingModeEnum,
|
||||
DatasetCollectionTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { CreateCollectionResponse } from '@/global/core/dataset/api';
|
||||
|
||||
async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
const {
|
||||
name,
|
||||
text,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as TextCreateDatasetCollectionParams;
|
||||
const { name, text, ...body } = req.body as TextCreateDatasetCollectionParams;
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
@@ -39,11 +27,7 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.virtual,
|
||||
|
||||
name,
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt
|
||||
name
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -6,12 +6,12 @@ import {
|
||||
getLLMModel,
|
||||
getEmbeddingModel,
|
||||
getDatasetModel,
|
||||
getDefaultEmbeddingModel
|
||||
getDefaultEmbeddingModel,
|
||||
getVlmModel
|
||||
} from '@fastgpt/service/core/ai/model';
|
||||
import { checkTeamDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||
import type { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { parseParentIdInMongo } from '@fastgpt/global/common/parentFolder/utils';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
@@ -32,8 +32,9 @@ async function handler(
|
||||
intro,
|
||||
type = DatasetTypeEnum.dataset,
|
||||
avatar,
|
||||
vectorModel = getDefaultEmbeddingModel().model,
|
||||
agentModel = getDatasetModel().model,
|
||||
vectorModel = getDefaultEmbeddingModel()?.model,
|
||||
agentModel = getDatasetModel()?.model,
|
||||
vlmModel,
|
||||
apiServer,
|
||||
feishuServer,
|
||||
yuqueServer
|
||||
@@ -63,8 +64,11 @@ async function handler(
|
||||
// check model valid
|
||||
const vectorModelStore = getEmbeddingModel(vectorModel);
|
||||
const agentModelStore = getLLMModel(agentModel);
|
||||
if (!vectorModelStore || !agentModelStore) {
|
||||
return Promise.reject(DatasetErrEnum.invalidVectorModelOrQAModel);
|
||||
if (!vectorModelStore) {
|
||||
return Promise.reject(`System not embedding model`);
|
||||
}
|
||||
if (!agentModelStore) {
|
||||
return Promise.reject(`System not llm model`);
|
||||
}
|
||||
|
||||
// check limit
|
||||
@@ -81,6 +85,7 @@ async function handler(
|
||||
tmbId,
|
||||
vectorModel,
|
||||
agentModel,
|
||||
vlmModel,
|
||||
avatar,
|
||||
type,
|
||||
apiServer,
|
||||
|
||||
@@ -7,9 +7,13 @@ import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { getTrainingModeByCollection } from '@fastgpt/service/core/dataset/collection/utils';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const body = req.body as PushDatasetDataProps;
|
||||
// Adapter 4.9.0
|
||||
body.trainingType = body.trainingType || body.trainingMode;
|
||||
|
||||
const { collectionId, data } = body;
|
||||
|
||||
if (!collectionId || !Array.isArray(data)) {
|
||||
@@ -32,7 +36,7 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
// auth dataset limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(collection.trainingType, data)
|
||||
insertLen: predictDataLimitLength(getTrainingModeByCollection(collection), data)
|
||||
});
|
||||
|
||||
return pushDataListToTrainingQueue({
|
||||
@@ -40,8 +44,9 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: collection.datasetId,
|
||||
vectorModel: collection.dataset.vectorModel,
|
||||
agentModel: collection.dataset.agentModel,
|
||||
vectorModel: collection.dataset.vectorModel
|
||||
vlmModel: collection.dataset.vlmModel
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { getLLMModel, getEmbeddingModel } from '@fastgpt/service/core/ai/model';
|
||||
import { getLLMModel, getEmbeddingModel, getVlmModel } from '@fastgpt/service/core/ai/model';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
@@ -51,7 +51,8 @@ async function handler(req: ApiRequestProps<Query>): Promise<DatasetItemType> {
|
||||
: undefined,
|
||||
permission,
|
||||
vectorModel: getEmbeddingModel(dataset.vectorModel),
|
||||
agentModel: getLLMModel(dataset.agentModel)
|
||||
agentModel: getLLMModel(dataset.agentModel),
|
||||
vlmModel: getVlmModel(dataset.vlmModel)
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ export type PostPreviewFilesChunksProps = {
|
||||
chunkSize: number;
|
||||
overlapRatio: number;
|
||||
customSplitChar?: string;
|
||||
customPdfParse?: boolean;
|
||||
|
||||
// Read params
|
||||
selector?: string;
|
||||
@@ -40,7 +41,8 @@ async function handler(
|
||||
selector,
|
||||
isQAImport,
|
||||
datasetId,
|
||||
externalFileId
|
||||
externalFileId,
|
||||
customPdfParse = false
|
||||
} = req.body;
|
||||
|
||||
if (!sourceId) {
|
||||
@@ -50,7 +52,7 @@ async function handler(
|
||||
throw new Error('chunkSize is too large, should be less than 30000');
|
||||
}
|
||||
|
||||
const { teamId, apiServer, feishuServer, yuqueServer } = await (async () => {
|
||||
const { teamId, tmbId, apiServer, feishuServer, yuqueServer } = await (async () => {
|
||||
if (type === DatasetSourceReadTypeEnum.fileLocal) {
|
||||
const res = await authCollectionFile({
|
||||
req,
|
||||
@@ -60,10 +62,11 @@ async function handler(
|
||||
per: OwnerPermissionVal
|
||||
});
|
||||
return {
|
||||
teamId: res.teamId
|
||||
teamId: res.teamId,
|
||||
tmbId: res.tmbId
|
||||
};
|
||||
}
|
||||
const { dataset } = await authDataset({
|
||||
const { dataset, teamId, tmbId } = await authDataset({
|
||||
req,
|
||||
authApiKey: true,
|
||||
authToken: true,
|
||||
@@ -71,7 +74,8 @@ async function handler(
|
||||
per: WritePermissionVal
|
||||
});
|
||||
return {
|
||||
teamId: dataset.teamId,
|
||||
teamId,
|
||||
tmbId,
|
||||
apiServer: dataset.apiServer,
|
||||
feishuServer: dataset.feishuServer,
|
||||
yuqueServer: dataset.yuqueServer
|
||||
@@ -80,6 +84,7 @@ async function handler(
|
||||
|
||||
const rawText = await readDatasetSourceRawText({
|
||||
teamId,
|
||||
tmbId,
|
||||
type,
|
||||
sourceId,
|
||||
selector,
|
||||
@@ -87,7 +92,8 @@ async function handler(
|
||||
apiServer,
|
||||
feishuServer,
|
||||
yuqueServer,
|
||||
externalFileId
|
||||
externalFileId,
|
||||
customPdfParse
|
||||
});
|
||||
|
||||
return rawText2Chunks({
|
||||
@@ -96,6 +102,6 @@ async function handler(
|
||||
overlapRatio,
|
||||
customReg: customSplitChar ? [customSplitChar] : [],
|
||||
isQAImport: isQAImport
|
||||
}).slice(0, 15);
|
||||
}).slice(0, 10);
|
||||
}
|
||||
export default NextAPI(handler);
|
||||
|
||||
@@ -6,7 +6,7 @@ import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getEmbeddingModel } from '@fastgpt/service/core/ai/model';
|
||||
import { getLLMModel, getEmbeddingModel, getVlmModel } from '@fastgpt/service/core/ai/model';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { OwnerPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
@@ -50,7 +50,8 @@ async function handler(req: ApiRequestProps<rebuildEmbeddingBody>): Promise<Resp
|
||||
appName: '切换索引模型',
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getEmbeddingModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
vllmModel: getVlmModel(dataset.vlmModel)?.name
|
||||
});
|
||||
|
||||
// update vector model and dataset.data rebuild field
|
||||
|
||||
@@ -56,6 +56,7 @@ async function handler(
|
||||
avatar,
|
||||
intro,
|
||||
agentModel,
|
||||
vlmModel,
|
||||
websiteConfig,
|
||||
externalReadUrl,
|
||||
apiServer,
|
||||
@@ -109,7 +110,7 @@ async function handler(
|
||||
updateTraining({
|
||||
teamId: dataset.teamId,
|
||||
datasetId: id,
|
||||
agentModel: agentModel?.model
|
||||
agentModel
|
||||
});
|
||||
|
||||
const onUpdate = async (session: ClientSession) => {
|
||||
@@ -119,7 +120,8 @@ async function handler(
|
||||
...parseParentIdInMongo(parentId),
|
||||
...(name && { name }),
|
||||
...(avatar && { avatar }),
|
||||
...(agentModel && { agentModel: agentModel.model }),
|
||||
...(agentModel && { agentModel }),
|
||||
...(vlmModel && { vlmModel }),
|
||||
...(websiteConfig && { websiteConfig }),
|
||||
...(status && { status }),
|
||||
...(intro !== undefined && { intro }),
|
||||
@@ -212,7 +214,7 @@ const updateTraining = async ({
|
||||
$set: {
|
||||
model: agentModel,
|
||||
retryCount: 5,
|
||||
lockTime: new Date()
|
||||
lockTime: new Date('2000/1/1')
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import type { NextApiRequest } from 'next';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { CreateTrainingUsageProps } from '@fastgpt/global/support/wallet/usage/api.d';
|
||||
import { getLLMModel, getEmbeddingModel } from '@fastgpt/service/core/ai/model';
|
||||
import { getLLMModel, getEmbeddingModel, getVlmModel } from '@fastgpt/service/core/ai/model';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
@@ -24,7 +24,8 @@ async function handler(req: NextApiRequest) {
|
||||
appName: name,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getEmbeddingModel(dataset.vectorModel).name,
|
||||
agentModel: getLLMModel(dataset.agentModel).name
|
||||
agentModel: getLLMModel(dataset.agentModel).name,
|
||||
vllmModel: getVlmModel(dataset.vlmModel)?.name
|
||||
});
|
||||
|
||||
return billId;
|
||||
|
||||
Reference in New Issue
Block a user