feat: model config required check;feat: dataset text model default setting (#3866)

* feat: model config required check

* feat: dataset text model default setting

* perf: collection list count

* fix: ts

* remove index count
This commit is contained in:
Archer
2025-02-24 19:55:49 +08:00
committed by GitHub
parent 3bfe802c48
commit 255764400f
32 changed files with 356 additions and 192 deletions

View File

@@ -15,6 +15,8 @@ export type updateDefaultBody = {
[ModelTypeEnum.tts]?: string;
[ModelTypeEnum.stt]?: string;
[ModelTypeEnum.rerank]?: string;
datasetTextLLM?: string;
datasetImageLLM?: string;
};
export type updateDefaultResponse = {};
@@ -25,10 +27,21 @@ async function handler(
): Promise<updateDefaultResponse> {
await authSystemAdmin({ req });
const { llm, embedding, tts, stt, rerank } = req.body;
const { llm, embedding, tts, stt, rerank, datasetTextLLM, datasetImageLLM } = req.body;
await mongoSessionRun(async (session) => {
await MongoSystemModel.updateMany({}, { $unset: { 'metadata.isDefault': 1 } }, { session });
// Remove all default flags
await MongoSystemModel.updateMany(
{},
{
$unset: {
'metadata.isDefault': 1,
'metadata.isDefaultDatasetTextModel': 1,
'metadata.isDefaultDatasetImageModel': 1
}
},
{ session }
);
if (llm) {
await MongoSystemModel.updateOne(
@@ -37,6 +50,20 @@ async function handler(
{ session }
);
}
if (datasetTextLLM) {
await MongoSystemModel.updateOne(
{ model: datasetTextLLM },
{ $set: { 'metadata.isDefaultDatasetTextModel': true } },
{ session }
);
}
if (datasetImageLLM) {
await MongoSystemModel.updateOne(
{ model: datasetImageLLM },
{ $set: { 'metadata.isDefaultDatasetImageModel': true } },
{ session }
);
}
if (embedding) {
await MongoSystemModel.updateOne(
{ model: embedding },

View File

@@ -11,6 +11,7 @@ import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
import { getVectorCountByCollectionId } from '@fastgpt/service/common/vectorStore/controller';
async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType> {
const { id } = req.query as { id: string };
@@ -29,12 +30,16 @@ async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType>
});
// get file
const file = collection?.fileId
? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
: undefined;
const [file, indexAmount] = await Promise.all([
collection?.fileId
? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
: undefined,
getVectorCountByCollectionId(collection.teamId, collection.datasetId, collection._id)
]);
return {
...collection,
indexAmount: indexAmount ?? 0,
...getCollectionSourceData(collection),
tags: await collectionTagsToTagLabel({
datasetId: collection.datasetId,

View File

@@ -1,12 +1,10 @@
import type { NextApiRequest } from 'next';
import { DatasetTrainingCollectionName } from '@fastgpt/service/core/dataset/training/schema';
import { Types } from '@fastgpt/service/common/mongo';
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
import type { GetDatasetCollectionsProps } from '@/global/core/api/datasetReq';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { DatasetDataCollectionName } from '@fastgpt/service/core/dataset/data/schema';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
import { NextAPI } from '@/service/middleware/entry';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
@@ -14,6 +12,8 @@ import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
import { PaginationResponse } from '@fastgpt/web/common/fetch/type';
import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination';
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
async function handler(
req: NextApiRequest
@@ -77,6 +77,8 @@ async function handler(
.sort({
updateTime: -1
})
.skip(offset)
.limit(pageSize)
.lean();
return {
@@ -88,6 +90,7 @@ async function handler(
tags: item.tags
}),
dataAmount: 0,
indexAmount: 0,
trainingAmount: 0,
permission
}))
@@ -96,75 +99,62 @@ async function handler(
};
}
const [collections, total]: [DatasetCollectionsListItemType[], number] = await Promise.all([
MongoDatasetCollection.aggregate([
{
$match: match
},
{
$sort: { updateTime: -1 }
},
{
$skip: offset
},
{
$limit: pageSize
},
// count training data
{
$lookup: {
from: DatasetTrainingCollectionName,
let: { id: '$_id', team_id: match.teamId, dataset_id: match.datasetId },
pipeline: [
{
$match: {
$expr: {
$and: [{ $eq: ['$teamId', '$$team_id'] }, { $eq: ['$collectionId', '$$id'] }]
}
}
},
{ $count: 'count' }
],
as: 'trainingCount'
}
},
// count collection total data
{
$lookup: {
from: DatasetDataCollectionName,
let: { id: '$_id', team_id: match.teamId, dataset_id: match.datasetId },
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ['$teamId', '$$team_id'] },
{ $eq: ['$datasetId', '$$dataset_id'] },
{ $eq: ['$collectionId', '$$id'] }
]
}
}
},
{ $count: 'count' }
],
as: 'dataCount'
}
},
{
$project: {
...selectField,
dataAmount: {
$ifNull: [{ $arrayElemAt: ['$dataCount.count', 0] }, 0]
},
trainingAmount: {
$ifNull: [{ $arrayElemAt: ['$trainingCount.count', 0] }, 0]
const [collections, total]: [DatasetCollectionSchemaType[], number] = await Promise.all([
MongoDatasetCollection.find(match, undefined, { ...readFromSecondary })
.select(selectField)
.sort({ updateTime: -1 })
.skip(offset)
.limit(pageSize)
.lean(),
MongoDatasetCollection.countDocuments(match, { ...readFromSecondary })
]);
const collectionIds = collections.map((item) => item._id);
// Compute data amount
const [trainingAmount, dataAmount]: [
{ _id: string; count: number }[],
{ _id: string; count: number }[]
] = await Promise.all([
MongoDatasetCollection.aggregate(
[
{
$match: {
teamId: match.teamId,
datasetId: match.datasetId,
collectionId: { $in: collectionIds }
}
},
{
$group: {
_id: '$collectionId',
count: { $sum: 1 }
}
}
],
{
...readFromSecondary
}
]),
MongoDatasetCollection.countDocuments(match, {
...readFromSecondary
})
),
MongoDatasetData.aggregate(
[
{
$match: {
teamId: match.teamId,
datasetId: match.datasetId,
collectionId: { $in: collectionIds }
}
},
{
$group: {
_id: '$collectionId',
count: { $sum: 1 }
}
}
],
{
...readFromSecondary
}
)
]);
const list = await Promise.all(
@@ -174,11 +164,14 @@ async function handler(
datasetId,
tags: item.tags
}),
trainingAmount:
trainingAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
dataAmount: dataAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
permission
}))
);
if (list.find((item) => item.trainingAmount > 0)) {
if (list.some((item) => item.trainingAmount > 0)) {
startTrainingQueue();
}

View File

@@ -94,6 +94,7 @@ async function handler(
...item,
dataAmount: 0,
trainingAmount: 0,
indexAmount: 0,
permission
}))
),