feat: model config required check;feat: dataset text model default setting (#3866)

* feat: model config required check * feat: dataset text model default setting * perf: collection list count * fix: ts * remove index count
2025-02-24 19:55:49 +08:00
parent 3bfe802c48
commit 255764400f
32 changed files with 356 additions and 192 deletions
--- a/projects/app/src/pages/api/core/ai/model/updateDefault.ts
+++ b/projects/app/src/pages/api/core/ai/model/updateDefault.ts
@@ -15,6 +15,8 @@ export type updateDefaultBody = {
  [ModelTypeEnum.tts]?: string;
  [ModelTypeEnum.stt]?: string;
  [ModelTypeEnum.rerank]?: string;
+  datasetTextLLM?: string;
+  datasetImageLLM?: string;
 };

 export type updateDefaultResponse = {};
@@ -25,10 +27,21 @@ async function handler(
 ): Promise<updateDefaultResponse> {
  await authSystemAdmin({ req });

-  const { llm, embedding, tts, stt, rerank } = req.body;
+  const { llm, embedding, tts, stt, rerank, datasetTextLLM, datasetImageLLM } = req.body;

  await mongoSessionRun(async (session) => {
-    await MongoSystemModel.updateMany({}, { $unset: { 'metadata.isDefault': 1 } }, { session });
+    // Remove all default flags
+    await MongoSystemModel.updateMany(
+      {},
+      {
+        $unset: {
+          'metadata.isDefault': 1,
+          'metadata.isDefaultDatasetTextModel': 1,
+          'metadata.isDefaultDatasetImageModel': 1
+        }
+      },
+      { session }
+    );

    if (llm) {
      await MongoSystemModel.updateOne(
@@ -37,6 +50,20 @@ async function handler(
        { session }
      );
    }
+    if (datasetTextLLM) {
+      await MongoSystemModel.updateOne(
+        { model: datasetTextLLM },
+        { $set: { 'metadata.isDefaultDatasetTextModel': true } },
+        { session }
+      );
+    }
+    if (datasetImageLLM) {
+      await MongoSystemModel.updateOne(
+        { model: datasetImageLLM },
+        { $set: { 'metadata.isDefaultDatasetImageModel': true } },
+        { session }
+      );
+    }
    if (embedding) {
      await MongoSystemModel.updateOne(
        { model: embedding },
--- a/projects/app/src/pages/api/core/dataset/collection/detail.ts
+++ b/projects/app/src/pages/api/core/dataset/collection/detail.ts
@@ -11,6 +11,7 @@ import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
 import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
 import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
 import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
+import { getVectorCountByCollectionId } from '@fastgpt/service/common/vectorStore/controller';

 async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType> {
  const { id } = req.query as { id: string };
@@ -29,12 +30,16 @@ async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType>
  });

  // get file
-  const file = collection?.fileId
-    ? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
-    : undefined;
+  const [file, indexAmount] = await Promise.all([
+    collection?.fileId
+      ? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
+      : undefined,
+    getVectorCountByCollectionId(collection.teamId, collection.datasetId, collection._id)
+  ]);

  return {
    ...collection,
+    indexAmount: indexAmount ?? 0,
    ...getCollectionSourceData(collection),
    tags: await collectionTagsToTagLabel({
      datasetId: collection.datasetId,
--- a/projects/app/src/pages/api/core/dataset/collection/listV2.ts
+++ b/projects/app/src/pages/api/core/dataset/collection/listV2.ts
@@ -1,12 +1,10 @@
 import type { NextApiRequest } from 'next';
-import { DatasetTrainingCollectionName } from '@fastgpt/service/core/dataset/training/schema';
 import { Types } from '@fastgpt/service/common/mongo';
 import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
 import type { GetDatasetCollectionsProps } from '@/global/core/api/datasetReq';
 import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
 import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
 import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
-import { DatasetDataCollectionName } from '@fastgpt/service/core/dataset/data/schema';
 import { startTrainingQueue } from '@/service/core/dataset/training/utils';
 import { NextAPI } from '@/service/middleware/entry';
 import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
@@ -14,6 +12,8 @@ import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
 import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
 import { PaginationResponse } from '@fastgpt/web/common/fetch/type';
 import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination';
+import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
+import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';

 async function handler(
  req: NextApiRequest
@@ -77,6 +77,8 @@ async function handler(
      .sort({
        updateTime: -1
      })
+      .skip(offset)
+      .limit(pageSize)
      .lean();

    return {
@@ -88,6 +90,7 @@ async function handler(
            tags: item.tags
          }),
          dataAmount: 0,
+          indexAmount: 0,
          trainingAmount: 0,
          permission
        }))
@@ -96,75 +99,62 @@ async function handler(
    };
  }

-  const [collections, total]: [DatasetCollectionsListItemType[], number] = await Promise.all([
-    MongoDatasetCollection.aggregate([
-      {
-        $match: match
-      },
-      {
-        $sort: { updateTime: -1 }
-      },
-      {
-        $skip: offset
-      },
-      {
-        $limit: pageSize
-      },
-      // count training data
-      {
-        $lookup: {
-          from: DatasetTrainingCollectionName,
-          let: { id: '$_id', team_id: match.teamId, dataset_id: match.datasetId },
-          pipeline: [
-            {
-              $match: {
-                $expr: {
-                  $and: [{ $eq: ['$teamId', '$$team_id'] }, { $eq: ['$collectionId', '$$id'] }]
-                }
-              }
-            },
-            { $count: 'count' }
-          ],
-          as: 'trainingCount'
-        }
-      },
-      // count collection total data
-      {
-        $lookup: {
-          from: DatasetDataCollectionName,
-          let: { id: '$_id', team_id: match.teamId, dataset_id: match.datasetId },
-          pipeline: [
-            {
-              $match: {
-                $expr: {
-                  $and: [
-                    { $eq: ['$teamId', '$$team_id'] },
-                    { $eq: ['$datasetId', '$$dataset_id'] },
-                    { $eq: ['$collectionId', '$$id'] }
-                  ]
-                }
-              }
-            },
-            { $count: 'count' }
-          ],
-          as: 'dataCount'
-        }
-      },
-      {
-        $project: {
-          ...selectField,
-          dataAmount: {
-            $ifNull: [{ $arrayElemAt: ['$dataCount.count', 0] }, 0]
-          },
-          trainingAmount: {
-            $ifNull: [{ $arrayElemAt: ['$trainingCount.count', 0] }, 0]
+  const [collections, total]: [DatasetCollectionSchemaType[], number] = await Promise.all([
+    MongoDatasetCollection.find(match, undefined, { ...readFromSecondary })
+      .select(selectField)
+      .sort({ updateTime: -1 })
+      .skip(offset)
+      .limit(pageSize)
+      .lean(),
+    MongoDatasetCollection.countDocuments(match, { ...readFromSecondary })
+  ]);
+  const collectionIds = collections.map((item) => item._id);
+
+  // Compute data amount
+  const [trainingAmount, dataAmount]: [
+    { _id: string; count: number }[],
+    { _id: string; count: number }[]
+  ] = await Promise.all([
+    MongoDatasetCollection.aggregate(
+      [
+        {
+          $match: {
+            teamId: match.teamId,
+            datasetId: match.datasetId,
+            collectionId: { $in: collectionIds }
+          }
+        },
+        {
+          $group: {
+            _id: '$collectionId',
+            count: { $sum: 1 }
          }
        }
+      ],
+      {
+        ...readFromSecondary
      }
-    ]),
-    MongoDatasetCollection.countDocuments(match, {
-      ...readFromSecondary
-    })
+    ),
+    MongoDatasetData.aggregate(
+      [
+        {
+          $match: {
+            teamId: match.teamId,
+            datasetId: match.datasetId,
+            collectionId: { $in: collectionIds }
+          }
+        },
+        {
+          $group: {
+            _id: '$collectionId',
+            count: { $sum: 1 }
+          }
+        }
+      ],
+      {
+        ...readFromSecondary
+      }
+    )
  ]);

  const list = await Promise.all(
@@ -174,11 +164,14 @@ async function handler(
        datasetId,
        tags: item.tags
      }),
+      trainingAmount:
+        trainingAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
+      dataAmount: dataAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
      permission
    }))
  );

-  if (list.find((item) => item.trainingAmount > 0)) {
+  if (list.some((item) => item.trainingAmount > 0)) {
    startTrainingQueue();
  }

--- a/projects/app/src/pages/api/core/dataset/collection/scrollList.ts
+++ b/projects/app/src/pages/api/core/dataset/collection/scrollList.ts
@@ -94,6 +94,7 @@ async function handler(
          ...item,
          dataAmount: 0,
          trainingAmount: 0,
+          indexAmount: 0,
          permission
        }))
      ),