Add tests for filterSafeProps function in Markdown utils test suite

Update dataset.md (#4927 )
feat: text collecion auto save for a txt file (#4924 )
2025-05-29 16:12:14 +00:00 · 2025-05-29 18:25:59 +08:00 · 2025-05-29 17:57:27 +08:00 · 2025-05-29 14:29:28 +08:00 · 2025-05-29 13:37:59 +08:00 · 2025-05-28 22:11:40 +08:00
336 changed files with 7475 additions and 4418 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -21,7 +21,7 @@
    "i18n-ally.namespace": true,
    "i18n-ally.pathMatcher": "{locale}/{namespaces}.json",
    "i18n-ally.extract.targetPickingStrategy": "most-similar-by-key",
-    "i18n-ally.translate.engines": ["google"],
+    "i18n-ally.translate.engines": ["deepl","google"],
    "[typescript]": {
        "editor.defaultFormatter": "esbenp.prettier-vscode"
    },
--- a/deploy/docker/docker-compose-milvus.yml
+++ b/deploy/docker/docker-compose-milvus.yml
@@ -132,15 +132,15 @@ services:
  # fastgpt
  sandbox:
    container_name: sandbox
-    image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
+    image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
    networks:
      - fastgpt
    restart: always
  fastgpt-mcp-server:
    container_name: fastgpt-mcp-server
-    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
+    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
    ports:
      - 3005:3000
    networks:
@@ -150,8 +150,8 @@ services:
      - FASTGPT_ENDPOINT=http://fastgpt:3000
  fastgpt:
    container_name: fastgpt
-    image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
+    image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
    ports:
      - 3000:3000
    networks:
--- a/deploy/docker/docker-compose-oceanbase/docker-compose.yml
+++ b/deploy/docker/docker-compose-oceanbase/docker-compose.yml
@@ -109,15 +109,15 @@ services:
  # fastgpt
  sandbox:
    container_name: sandbox
-    image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
+    image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
    networks:
      - fastgpt
    restart: always
  fastgpt-mcp-server:
    container_name: fastgpt-mcp-server
-    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
+    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
    ports:
      - 3005:3000
    networks:
@@ -127,8 +127,8 @@ services:
      - FASTGPT_ENDPOINT=http://fastgpt:3000
  fastgpt:
    container_name: fastgpt
-    image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
+    image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
    ports:
      - 3000:3000
    networks:
--- a/deploy/docker/docker-compose-pgvector.yml
+++ b/deploy/docker/docker-compose-pgvector.yml
@@ -23,7 +23,7 @@ services:
    volumes:
      - ./pg/data:/var/lib/postgresql/data
    healthcheck:
-      test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
+      test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'postgres']
      interval: 5s
      timeout: 5s
      retries: 10
@@ -96,15 +96,15 @@ services:
  # fastgpt
  sandbox:
    container_name: sandbox
-    image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
+    image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
    networks:
      - fastgpt
    restart: always
  fastgpt-mcp-server:
    container_name: fastgpt-mcp-server
-    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
+    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
    ports:
      - 3005:3000
    networks:
@@ -114,8 +114,8 @@ services:
      - FASTGPT_ENDPOINT=http://fastgpt:3000
  fastgpt:
    container_name: fastgpt
-    image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
+    image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
    ports:
      - 3000:3000
    networks:
--- a/deploy/docker/docker-compose-zilliz.yml
+++ b/deploy/docker/docker-compose-zilliz.yml
@@ -72,15 +72,15 @@ services:

  sandbox:
    container_name: sandbox
-    image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
+    image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
    networks:
      - fastgpt
    restart: always
  fastgpt-mcp-server:
    container_name: fastgpt-mcp-server
-    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
+    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
    ports:
      - 3005:3000
    networks:
@@ -90,8 +90,8 @@ services:
      - FASTGPT_ENDPOINT=http://fastgpt:3000
  fastgpt:
    container_name: fastgpt
-    image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
+    image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
    ports:
      - 3000:3000
    networks:
--- a/docSite/assets/imgs/official_account_faq.png
+++ b/docSite/assets/imgs/official_account_faq.png
--- a/docSite/content/zh-cn/docs/development/openapi/chat.md
+++ b/docSite/content/zh-cn/docs/development/openapi/chat.md
@@ -959,10 +959,16 @@ curl --location --request POST 'http://localhost:3000/api/core/chat/getHistories
 {{< markdownify >}}

 {{% alert icon=" " context="success" %}}
+目前仅能获取到当前 API key 的创建者的对话。
+
 - appId - 应用 Id
 - offset - 偏移量，即从第几条数据开始取
 - pageSize - 记录数量
 - source - 对话源。source=api，表示获取通过 API 创建的对话（不会获取到页面上的对话记录）
+- startCreateTime - 开始创建时间（可选）
+- endCreateTime - 结束创建时间（可选）
+- startUpdateTime - 开始更新时间（可选）
+- endUpdateTime - 结束更新时间（可选）
 {{% /alert %}}

 {{< /markdownify >}}
--- a/docSite/content/zh-cn/docs/development/openapi/dataset.md
+++ b/docSite/content/zh-cn/docs/development/openapi/dataset.md
@@ -645,7 +645,7 @@ data 为集合的 ID。
 {{< /tab >}}
 {{< /tabs >}}

-### 创建一个外部文件库集合（商业版）
+### 创建一个外部文件库集合（弃用）

 {{< tabs tabTotal="3" >}}
 {{< tab tabName="请求示例" >}}
--- a/docSite/content/zh-cn/docs/development/upgrading/4910.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/4910.md
@@ -0,0 +1,50 @@
+---
+title: 'V4.9.10'
+description: 'FastGPT V4.9.10 更新说明'
+icon: 'upgrade'
+draft: false
+toc: true
+weight: 790
+---
+
+## 升级指南
+
+重要提示：本次更新会重新构建全文索引，构建期间，全文检索结果会为空，4c16g 700 万组全文索引大致消耗 25 分钟。如需无缝升级，需自行做表同步工程。
+
+### 1. 做好数据备份
+
+### 2. 更新镜像 tag
+
+- 更新 FastGPT 镜像 tag: v4.9.10-fix2
+- 更新 FastGPT 商业版镜像 tag: v4.9.10-fix2
+- mcp_server 无需更新
+- Sandbox 无需更新
+- AIProxy 无需更新
+
+## 🚀 新增内容
+
+1. 支持 PG 设置`systemEnv.hnswMaxScanTuples`参数，提高迭代搜索的数据总量。
+2. 知识库预处理参数增加 “分块条件”，可控制某些情况下不进行分块处理。
+3. 知识库预处理参数增加 “段落优先” 模式，可控制最大段落深度。原“长度优先”模式，不再内嵌段落优先逻辑。
+4. 工作流调整为单向接入和接出，支持快速的添加下一步节点。
+5. 开放飞书和语雀知识库到开源版。
+6. gemini 和 claude 最新模型预设。
+
+## ⚙️ 优化
+
+1. LLM stream调用，默认超时调大。
+2. 部分确认交互优化。
+3. 纠正原先知识库的“表格数据集”名称，改成“备份导入”。同时支持知识库索引的导出和导入。
+4. 工作流知识库引用上限，如果工作流中没有相关 AI 节点，则交互模式改成纯手动输入，并且上限为 1000万。
+5. 语音输入，移动端判断逻辑，准确判断是否为手机，而不是小屏。
+6. 优化上下文截取算法，至少保证留下一组 Human 信息。
+
+## 🐛 修复
+
+1. 全文检索多知识库时排序得分排序不正确。
+2. 流响应捕获 finish_reason 可能不正确。
+3. 工具调用模式，未保存思考输出。
+4. 知识库 indexSize 参数未生效。
+5. 工作流嵌套 2 层后，获取预览引用、上下文不正确。
+6. xlsx 转成 Markdown 时候，前面会多出一个空格。
+7. 读取 Markdown 文件时，Base64 图片未进行额外抓换保存。
--- a/docSite/content/zh-cn/docs/development/upgrading/4911.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/4911.md
@@ -0,0 +1,25 @@
+---
+title: 'V4.9.11(进行中)'
+description: 'FastGPT V4.9.11 更新说明'
+icon: 'upgrade'
+draft: false
+toc: true
+weight: 789
+---
+
+
+## 🚀 新增内容
+
+1. 工作流中增加节点搜索功能。
+2. 工作流中，子流程版本控制，可选择“保持最新版本”，无需手动更新。
+
+## ⚙️ 优化
+
+1. 原文缓存改用 gridfs 存储，提高上限。
+
+## 🐛 修复
+
+1. 工作流中，管理员声明的全局系统工具，无法进行版本管理。
+2. 工具调用节点前，有交互节点时，上下文异常。
+3. 修复备份导入，小于 1000 字时，无法分块问题。
+4. 自定义 PDF 解析，无法保存 base64 图片。
--- a/docSite/content/zh-cn/docs/development/upgrading/498.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/498.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.9.8(进行中)'
+title: 'V4.9.8'
 description: 'FastGPT V4.9.8 更新说明'
 icon: 'upgrade'
 draft: false
@@ -7,6 +7,17 @@ toc: true
 weight: 792
 ---

+## 升级指南
+
+### 1. 做好数据备份
+
+### 2. 更新镜像 tag
+
+- 更新 FastGPT 镜像 tag: v4.9.8
+- 更新 FastGPT 商业版镜像 tag: v4.9.8
+- mcp_server 无需更新
+- Sandbox 无需更新
+- AIProxy 无需更新

 ## 🚀 新增内容

--- a/docSite/content/zh-cn/docs/development/upgrading/499.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/499.md
@@ -0,0 +1,43 @@
+---
+title: 'V4.9.9'
+description: 'FastGPT V4.9.9 更新说明'
+icon: 'upgrade'
+draft: false
+toc: true
+weight: 791
+---
+
+## 升级指南
+
+### 1. 做好数据备份
+
+### 2. 商业版用户替换新 License
+
+商业版用户可以联系 FastGPT 团队支持同学，获取 License 替换方案。替换后，可以直接升级系统，管理后台会提示输入新 License。
+
+### 3. 更新镜像 tag
+
+- 更新 FastGPT 镜像 tag: v4.9.9
+- 更新 FastGPT 商业版镜像 tag: v4.9.9
+- mcp_server 无需更新
+- Sandbox 无需更新
+- AIProxy 无需更新
+
+## 🚀 新增内容
+
+1. 切换 SessionId 来替代 JWT 实现登录鉴权，可控制最大登录客户端数量。
+2. 新的商业版 License 管理模式。
+3. 公众号调用，显示记录 chat 对话错误，方便排查。
+4. API 知识库支持 BasePath 选择，需增加 API 接口，具体可见[API 知识库介绍](/docs/guide/knowledge_base/api_dataset/#4-获取文件详细信息用于获取文件信息)
+
+## ⚙️ 优化
+
+1. 优化工具调用，新工具的判断逻辑。
+2. 调整 Cite 引用提示词。
+
+## 🐛 修复
+
+1. 无法正常获取应用历史保存/发布记录。
+2. 成员创建 MCP 工具权限问题。
+3. 来源引用展示，存在 ID 传递错误，导致提示无权操作该文件。
+4. 回答标注前端数据报错。
--- a/docSite/content/zh-cn/docs/guide/knowledge_base/api_dataset.md
+++ b/docSite/content/zh-cn/docs/guide/knowledge_base/api_dataset.md
@@ -185,3 +185,40 @@ curl --location --request GET '{{baseURL}}/v1/file/read?id=xx' \
 {{< /tabs >}}


+### 4. 获取文件详细信息（用于获取文件信息）
+
+{{< tabs tabTotal="2" >}}
+{{< tab tabName="请求示例" >}}
+{{< markdownify >}}
+
+id 为文件的 id。
+
+```bash
+curl --location --request GET '{{baseURL}}/v1/file/detail?id=xx' \
+--header 'Authorization: Bearer {{authorization}}'
+```
+
+{{< /markdownify >}}
+{{< /tab >}}
+
+{{< tab tabName="响应示例" >}}
+{{< markdownify >}}
+
+```json
+{
+    "code": 200,
+    "success": true,
+    "message": "",
+    "data": {
+        "id": "docs",
+        "parentId": "",
+        "name": "docs"
+    }
+}
+```
+
+{{< /markdownify >}}
+{{< /tab >}}
+{{< /tabs >}}
+
+
--- a/docSite/content/zh-cn/docs/shopping_cart/intro.md
+++ b/docSite/content/zh-cn/docs/shopping_cart/intro.md
@@ -28,7 +28,6 @@ FastGPT 商业版是基于 FastGPT 开源版的增强版本，增加了一些独
 | 应用发布安全配置 | ❌ | ✅ | ✅ |
 | 内容审核 | ❌ | ✅ | ✅ |
 | web站点同步 | ❌ | ✅ | ✅ |
-| 主流文档库接入（目前支持：语雀、飞书） | ❌ | ✅ | ✅ |
 | 增强训练模式 | ❌ | ✅ | ✅ |
 | 第三方应用快速接入（飞书、公众号） | ❌ | ✅ | ✅ |
 | 管理后台 | ❌ | ✅ | 不需要 |
--- a/docSite/content/zh-cn/docs/use-cases/external-integration/official_account.md
+++ b/docSite/content/zh-cn/docs/use-cases/external-integration/official_account.md
@@ -132,7 +132,9 @@ weight: 506
 ### 公众号没响应

 检查应用对话日志，如果有对话日志，但是微信公众号无响应，则是白名单 IP未成功。
-添加白名单IP 后，通常需要等待几分钟微信更新。
+添加白名单IP 后，通常需要等待几分钟微信更新。可以在对话日志中，找点错误日志。
+
+![](/imgs/official_account_faq.png)

 ### 如何新开一个聊天记录

--- a/env.d.ts
+++ b/env.d.ts
@@ -4,7 +4,6 @@ declare global {
      LOG_DEPTH: string;
      DEFAULT_ROOT_PSW: string;
      DB_MAX_LINK: string;
-      TOKEN_KEY: string;
      FILE_TOKEN_KEY: string;
      ROOT_KEY: string;
      OPENAI_BASE_URL: string;
@@ -37,6 +36,7 @@ declare global {
      CONFIG_JSON_PATH?: string;
      PASSWORD_LOGIN_LOCK_SECONDS?: string;
      PASSWORD_EXPIRED_MONTH?: string;
+      MAX_LOGIN_SESSION?: string;
    }
  }
 }
--- a/packages/global/common/error/code/dataset.ts
+++ b/packages/global/common/error/code/dataset.ts
@@ -27,7 +27,7 @@ const datasetErr = [
  },
  {
    statusText: DatasetErrEnum.unExist,
-    message: 'core.dataset.error.unExistDataset'
+    message: i18nT('common:core.dataset.error.unExistDataset')
  },
  {
    statusText: DatasetErrEnum.unExistCollection,
--- a/packages/global/common/error/code/system.ts
+++ b/packages/global/common/error/code/system.ts
@@ -2,13 +2,28 @@ import { type ErrType } from '../errorCode';
 import { i18nT } from '../../../../web/i18n/utils';
 /* dataset: 509000 */
 export enum SystemErrEnum {
-  communityVersionNumLimit = 'communityVersionNumLimit'
+  communityVersionNumLimit = 'communityVersionNumLimit',
+  licenseAppAmountLimit = 'licenseAppAmountLimit',
+  licenseDatasetAmountLimit = 'licenseDatasetAmountLimit',
+  licenseUserAmountLimit = 'licenseUserAmountLimit'
 }

 const systemErr = [
  {
    statusText: SystemErrEnum.communityVersionNumLimit,
    message: i18nT('common:code_error.system_error.community_version_num_limit')
+  },
+  {
+    statusText: SystemErrEnum.licenseAppAmountLimit,
+    message: i18nT('common:code_error.system_error.license_app_amount_limit')
+  },
+  {
+    statusText: SystemErrEnum.licenseDatasetAmountLimit,
+    message: i18nT('common:code_error.system_error.license_dataset_amount_limit')
+  },
+  {
+    statusText: SystemErrEnum.licenseUserAmountLimit,
+    message: i18nT('common:code_error.system_error.license_user_amount_limit')
  }
 ];

--- a/packages/global/common/string/textSplitter.ts
+++ b/packages/global/common/string/textSplitter.ts
@@ -7,6 +7,10 @@ export const CUSTOM_SPLIT_SIGN = '-----CUSTOM_SPLIT_SIGN-----';
 type SplitProps = {
  text: string;
  chunkSize: number;
+
+  paragraphChunkDeep?: number; // Paragraph deep
+  paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
+
  maxSize?: number;
  overlapRatio?: number;
  customReg?: string[];
@@ -108,6 +112,8 @@ const commonSplit = (props: SplitProps): SplitResponse => {
  let {
    text = '',
    chunkSize,
+    paragraphChunkDeep = 5,
+    paragraphChunkMinSize = 100,
    maxSize = defaultMaxChunkSize,
    overlapRatio = 0.15,
    customReg = []
@@ -123,7 +129,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
  text = text.replace(/(```[\s\S]*?```|~~~[\s\S]*?~~~)/g, function (match) {
    return match.replace(/\n/g, codeBlockMarker);
  });
-  // 2. 表格处理 - 单独提取表格出来，进行表头合并
+  // 2. Markdown 表格处理 - 单独提取表格出来，进行表头合并
  const tableReg =
    /(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n?)*)(?:\n|$)/g;
  const tableDataList = text.match(tableReg);
@@ -143,25 +149,40 @@ const commonSplit = (props: SplitProps): SplitResponse => {
  text = text.replace(/(\r?\n|\r){3,}/g, '\n\n\n');

  // The larger maxLen is, the next sentence is less likely to trigger splitting
-  const markdownIndex = 4;
-  const forbidOverlapIndex = 8;
+  const customRegLen = customReg.length;
+  const markdownIndex = paragraphChunkDeep - 1;
+  const forbidOverlapIndex = customRegLen + markdownIndex + 4;
+
+  const markdownHeaderRules = ((deep?: number): { reg: RegExp; maxLen: number }[] => {
+    if (!deep || deep === 0) return [];
+
+    const maxDeep = Math.min(deep, 8); // Maximum 8 levels
+    const rules: { reg: RegExp; maxLen: number }[] = [];
+
+    for (let i = 1; i <= maxDeep; i++) {
+      const hashSymbols = '#'.repeat(i);
+      rules.push({
+        reg: new RegExp(`^(${hashSymbols}\\s[^\\n]+\\n)`, 'gm'),
+        maxLen: chunkSize
+      });
+    }
+
+    return rules;
+  })(paragraphChunkDeep);

  const stepReges: { reg: RegExp | string; maxLen: number }[] = [
    ...customReg.map((text) => ({
      reg: text.replaceAll('\\n', '\n'),
      maxLen: chunkSize
    })),
-    { reg: /^(#\s[^\n]+\n)/gm, maxLen: chunkSize },
-    { reg: /^(##\s[^\n]+\n)/gm, maxLen: chunkSize },
-    { reg: /^(###\s[^\n]+\n)/gm, maxLen: chunkSize },
-    { reg: /^(####\s[^\n]+\n)/gm, maxLen: chunkSize },
-    { reg: /^(#####\s[^\n]+\n)/gm, maxLen: chunkSize },
+    ...markdownHeaderRules,

    { reg: /([\n](```[\s\S]*?```|~~~[\s\S]*?~~~))/g, maxLen: maxSize }, // code block
+    // HTML Table tag 尽可能保障完整
    {
      reg: /(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n)*)/g,
-      maxLen: Math.min(chunkSize * 1.5, maxSize)
-    }, // Table 尽可能保证完整性
+      maxLen: chunkSize
+    }, // Markdown Table 尽可能保证完整性
    { reg: /(\n{2,})/g, maxLen: chunkSize },
    { reg: /([\n])/g, maxLen: chunkSize },
    // ------ There's no overlap on the top
@@ -172,12 +193,10 @@ const commonSplit = (props: SplitProps): SplitResponse => {
    { reg: /([，]|,\s)/g, maxLen: chunkSize }
  ];

-  const customRegLen = customReg.length;
  const checkIsCustomStep = (step: number) => step < customRegLen;
  const checkIsMarkdownSplit = (step: number) =>
    step >= customRegLen && step <= markdownIndex + customRegLen;
-
-  const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex + customRegLen;
+  const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex;

  // if use markdown title split, Separate record title
  const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
@@ -301,6 +320,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
    const splitTexts = getSplitTexts({ text, step });

    const chunks: string[] = [];
+
    for (let i = 0; i < splitTexts.length; i++) {
      const item = splitTexts[i];

@@ -443,7 +463,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
 */
 export const splitText2Chunks = (props: SplitProps): SplitResponse => {
  let { text = '' } = props;
-  const start = Date.now();
  const splitWithCustomSign = text.split(CUSTOM_SPLIT_SIGN);

  const splitResult = splitWithCustomSign.map((item) => {
--- a/packages/global/common/system/types/index.d.ts
+++ b/packages/global/common/system/types/index.d.ts
@@ -70,6 +70,9 @@ export type FastGPTFeConfigsType = {
  show_publish_dingtalk?: boolean;
  show_publish_offiaccount?: boolean;

+  show_dataset_enhance?: boolean;
+  show_batch_eval?: boolean;
+
  concatMd?: string;
  docUrl?: string;
  openAPIDocUrl?: string;
@@ -127,9 +130,11 @@ export type SystemEnvType = {
  vectorMaxProcess: number;
  qaMaxProcess: number;
  vlmMaxProcess: number;
-  hnswEfSearch: number;
  tokenWorkers: number; // token count max worker

+  hnswEfSearch: number;
+  hnswMaxScanTuples: number;
+
  oneapiUrl?: string;
  chatApiKey?: string;

@@ -142,3 +147,21 @@ export type customPdfParseType = {
  doc2xKey?: string;
  price?: number;
 };
+
+export type LicenseDataType = {
+  startTime: string;
+  expiredTime: string;
+  company: string;
+  description?: string; // 描述
+  hosts?: string[]; // 管理端有效域名
+  maxUsers?: number; // 最大用户数，不填默认不上限
+  maxApps?: number; // 最大应用数，不填默认不上限
+  maxDatasets?: number; // 最大数据集数，不填默认不上限
+  functions: {
+    sso: boolean;
+    pay: boolean;
+    customTemplates: boolean;
+    datasetEnhance: boolean;
+    batchEval: boolean;
+  };
+};
--- a/packages/global/core/ai/prompt/AIChat.ts
+++ b/packages/global/core/ai/prompt/AIChat.ts
@@ -2,6 +2,248 @@ import { type PromptTemplateItem } from '../type.d';
 import { i18nT } from '../../../../web/i18n/utils';
 import { getPromptByVersion } from './utils';

+export const Prompt_userQuotePromptList: PromptTemplateItem[] = [
+  {
+    title: i18nT('app:template.standard_template'),
+    desc: '',
+    value: {
+      ['4.9.7']: `## 任务描述
+你是一个知识库回答助手，可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
+同时，为了使回答结果更加可信并且可追溯，你需要在每段话结尾添加引用标记，标识参考了哪些内容。
+
+## 追溯展示规则
+
+- 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识，其中 CITE 是固定常量, id 为引文中的 id。
+- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
+- 每段话**至少包含一个引用**，多个引用时按顺序排列，例如："Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
+- 不要把示例作为知识点。
+- 不要伪造 id，返回的 id 必须都存在 <Cites></Cites> 中！
+
+## 通用规则
+
+- 如果你不清楚答案，你需要澄清。
+- 避免提及你是从 <Cites></Cites> 获取的知识。
+- 保持答案与 <Cites></Cites> 中描述的一致。
+- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容，需严格完整输出。
+- 使用与问题相同的语言回答。
+
+<Cites>
+{{quote}}
+</Cites>
+
+## 用户问题
+
+{{question}}
+
+## 回答
+`
+    }
+  },
+  {
+    title: i18nT('app:template.qa_template'),
+    desc: '',
+    value: {
+      ['4.9.7']: `## 任务描述
+作为一个问答助手，你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
+
+## 回答要求
+- 选择其中一个或多个问答对进行回答。
+- 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
+- 如果没有相关的问答对，你需要澄清。
+- 避免提及你是从 <QA></QA> 获取的知识，只需要回复答案。
+- 使用与问题相同的语言回答。
+
+<QA>
+{{quote}}
+</QA>
+
+## 用户问题
+
+{{question}}
+
+## 回答
+`
+    }
+  },
+  {
+    title: i18nT('app:template.standard_strict'),
+    desc: '',
+    value: {
+      ['4.9.7']: `## 任务描述
+你是一个知识库回答助手，可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
+同时，为了使回答结果更加可信并且可追溯，你需要在每段话结尾添加引用标记，标识参考了哪些内容。
+
+## 追溯展示规则
+
+- 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识，其中 CITE 是固定常量, id 为引文中的 id。
+- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
+- 每段话**至少包含一个引用**，多个引用时按顺序排列，例如："Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
+- 不要把示例作为知识点。
+- 不要伪造 id，返回的 id 必须都存在 <Cites></Cites> 中！
+
+## 通用规则
+
+- 如果你不清楚答案，你需要澄清。
+- 避免提及你是从 <Cites></Cites> 获取的知识。
+- 保持答案与 <Cites></Cites> 中描述的一致。
+- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容，需严格完整输出。
+- 使用与问题相同的语言回答。
+
+## 严格要求
+
+你只能使用 <Cites></Cites> 标记中的内容作为参考，不能使用自身的知识，并且回答的内容需严格与 <Cites></Cites> 中的内容一致。
+
+<Cites>
+{{quote}}
+</Cites>
+
+## 用户问题
+
+{{question}}
+
+## 回答
+`
+    }
+  },
+  {
+    title: i18nT('app:template.hard_strict'),
+    desc: '',
+    value: {
+      ['4.9.7']: `## 任务描述
+作为一个问答助手，你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
+
+## 回答要求
+- 选择其中一个或多个问答对进行回答。
+- 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
+- 如果没有相关的问答对，你需要澄清。
+- 避免提及你是从 <QA></QA> 获取的知识，只需要回复答案。
+- 使用与问题相同的语言回答。
+
+## 严格要求
+
+你只能使用 <QA></QA> 标记中的内容作为参考，不能使用自身的知识，并且回答的内容需严格与 <QA></QA> 中的内容一致。
+
+<QA>
+{{quote}}
+</QA>
+
+## 用户问题
+
+{{question}}
+
+## 回答
+`
+    }
+  }
+];
+
+export const Prompt_systemQuotePromptList: PromptTemplateItem[] = [
+  {
+    title: i18nT('app:template.standard_template'),
+    desc: '',
+    value: {
+      ['4.9.7']: `## 任务描述
+你是一个知识库回答助手，可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
+同时，为了使回答结果更加可信并且可追溯，你需要在每段话结尾添加引用标记，标识参考了哪些内容。
+
+## 追溯展示规则
+
+- 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识，其中 CITE 是固定常量, id 为引文中的 id。
+- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
+- 每段话**至少包含一个引用**，多个引用时按顺序排列，例如："Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
+- 不要把示例作为知识点。
+- 不要伪造 id，返回的 id 必须都存在 <Cites></Cites> 中！
+
+## 通用规则
+
+- 如果你不清楚答案，你需要澄清。
+- 避免提及你是从 <Cites></Cites> 获取的知识。
+- 保持答案与 <Cites></Cites> 中描述的一致。
+- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容，需严格完整输出。
+- 使用与问题相同的语言回答。
+
+<Cites>
+{{quote}}
+</Cites>`
+    }
+  },
+  {
+    title: i18nT('app:template.qa_template'),
+    desc: '',
+    value: {
+      ['4.9.8']: `## 任务描述
+作为一个问答助手，你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
+
+## 回答要求
+- 选择其中一个或多个问答对进行回答。
+- 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
+- 如果没有相关的问答对，你需要澄清。
+- 避免提及你是从 <QA></QA> 获取的知识，只需要回复答案。
+- 使用与问题相同的语言回答。
+
+<QA>
+{{quote}}
+</QA>`
+    }
+  },
+  {
+    title: i18nT('app:template.standard_strict'),
+    desc: '',
+    value: {
+      ['4.9.7']: `## 任务描述
+你是一个知识库回答助手，可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
+同时，为了使回答结果更加可信并且可追溯，你需要在每段话结尾添加引用标记，标识参考了哪些内容。
+
+## 追溯展示规则
+
+- 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识，其中 CITE 是固定常量, id 为引文中的 id。
+- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
+- 每段话**至少包含一个引用**，多个引用时按顺序排列，例如："Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
+- 不要把示例作为知识点。
+- 不要伪造 id，返回的 id 必须都存在 <Cites></Cites> 中！
+
+## 通用规则
+
+- 如果你不清楚答案，你需要澄清。
+- 避免提及你是从 <Cites></Cites> 获取的知识。
+- 保持答案与 <Cites></Cites> 中描述的一致。
+- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容，需严格完整输出。
+- 使用与问题相同的语言回答。
+
+## 严格要求
+
+你只能使用 <Cites></Cites> 标记中的内容作为参考，不能使用自身的知识，并且回答的内容需严格与 <Cites></Cites> 中的内容一致。
+
+<Cites>
+{{quote}}
+</Cites>`
+    }
+  },
+  {
+    title: i18nT('app:template.hard_strict'),
+    desc: '',
+    value: {
+      ['4.9.7']: `## 任务描述
+作为一个问答助手，你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
+
+## 回答要求
+- 选择其中一个或多个问答对进行回答。
+- 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
+- 如果没有相关的问答对，你需要澄清。
+- 避免提及你是从 <QA></QA> 获取的知识，只需要回复答案。
+- 使用与问题相同的语言回答。
+
+## 严格要求
+
+你只能使用 <QA></QA> 标记中的内容作为参考，不能使用自身的知识，并且回答的内容需严格与 <QA></QA> 中的内容一致。
+
+<QA>
+{{quote}}
+</QA>`
+    }
+  }
+];
+
 export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
  {
    title: i18nT('app:template.standard_template'),
@@ -10,11 +252,6 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
      ['4.9.7']: `{
  "id": "{{id}}",
  "sourceName": "{{source}}",
-  "content": "{{q}}\n{{a}}"
-}
-`,
-      ['4.9.2']: `{
-  "sourceName": "{{source}}",
  "updateTime": "{{updateTime}}",
  "content": "{{q}}\n{{a}}"
 }
@@ -25,7 +262,7 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
    title: i18nT('app:template.qa_template'),
    desc: i18nT('app:template.qa_template_des'),
    value: {
-      ['4.9.2']: `<Question>
+      ['4.9.7']: `<Question>
 {{q}}
 </Question>
 <Answer>
@@ -40,11 +277,6 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
      ['4.9.7']: `{
  "id": "{{id}}",
  "sourceName": "{{source}}",
-  "content": "{{q}}\n{{a}}"
-}
-`,
-      ['4.9.2']: `{
-  "sourceName": "{{source}}",
  "updateTime": "{{updateTime}}",
  "content": "{{q}}\n{{a}}"
 }
@@ -55,7 +287,7 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
    title: i18nT('app:template.hard_strict'),
    desc: i18nT('app:template.hard_strict_des'),
    value: {
-      ['4.9.2']: `<Question>
+      ['4.9.7']: `<Question>
 {{q}}
 </Question>
 <Answer>
@@ -64,263 +296,12 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
    }
  }
 ];
-
 export const getQuoteTemplate = (version?: string) => {
  const defaultTemplate = Prompt_QuoteTemplateList[0].value;

  return getPromptByVersion(version, defaultTemplate);
 };

-export const Prompt_userQuotePromptList: PromptTemplateItem[] = [
-  {
-    title: i18nT('app:template.standard_template'),
-    desc: '',
-    value: {
-      ['4.9.7']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
-
-<Reference>
-{{quote}}
-</Reference>
-
-回答要求：
- 如果你不清楚答案，你需要澄清。
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
- 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识，其中 CITE 是固定常量, id 为引文中的 id。
- 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
- 每段至少包含一个引用，也可根据内容需要加入多个引用，按顺序排列。`,
-      ['4.9.2']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
-
-<Reference>
-{{quote}}
-</Reference>
-
-回答要求：
- 如果你不清楚答案，你需要澄清。
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
-
-问题:"""{{question}}"""`
-    }
-  },
-  {
-    title: i18nT('app:template.qa_template'),
-    desc: '',
-    value: {
-      ['4.9.2']: `使用 <QA></QA> 标记中的问答对进行回答。
-
-<QA>
-{{quote}}
-</QA>
-
-回答要求：
- 选择其中一个或多个问答对进行回答。
- 回答的内容应尽可能与 <答案></答案> 中的内容一致。
- 如果没有相关的问答对，你需要澄清。
- 避免提及你是从 QA 获取的知识，只需要回复答案。
-
-问题:"""{{question}}"""`
-    }
-  },
-  {
-    title: i18nT('app:template.standard_strict'),
-    desc: '',
-    value: {
-      ['4.9.7']: `忘记你已有的知识，仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
-
-<Reference>
-{{quote}}
-</Reference>
-
-思考流程：
-1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
-2. 如果有关，你按下面的要求回答。
-3. 如果无关，你直接拒绝回答本次问题。
-
-回答要求：
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
- 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识，其中 CITE 是固定常量, id 为引文中的 id。
- 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
- 每段至少包含一个引用，也可根据内容需要加入多个引用，按顺序排列。
-
-问题:"""{{question}}"""`,
-      ['4.9.2']: `忘记你已有的知识，仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
-
-<Reference>
-{{quote}}
-</Reference>
-
-思考流程：
-1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
-2. 如果有关，你按下面的要求回答。
-3. 如果无关，你直接拒绝回答本次问题。
-
-回答要求：
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
-
-问题:"""{{question}}"""`
-    }
-  },
-  {
-    title: i18nT('app:template.hard_strict'),
-    desc: '',
-    value: {
-      ['4.9.2']: `忘记你已有的知识，仅使用 <QA></QA> 标记中的问答对进行回答。
-
-<QA>
-{{quote}}
-</QA>
-
-思考流程：
-1. 判断问题是否与 <QA></QA> 标记中的内容有关。
-2. 如果无关，你直接拒绝回答本次问题。
-3. 判断是否有相近或相同的问题。
-4. 如果有相同的问题，直接输出对应答案。
-5. 如果只有相近的问题，请把相近的问题和答案一起输出。
-
-回答要求：
- 如果没有相关的问答对，你需要澄清。
- 回答的内容应尽可能与 <QA></QA> 标记中的内容一致。
- 避免提及你是从 QA 获取的知识，只需要回复答案。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
-
-问题:"""{{question}}"""`
-    }
-  }
-];
-
-export const Prompt_systemQuotePromptList: PromptTemplateItem[] = [
-  {
-    title: i18nT('app:template.standard_template'),
-    desc: '',
-    value: {
-      ['4.9.7']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
-
-<Reference>
-{{quote}}
-</Reference>
-
-回答要求：
- 如果你不清楚答案，你需要澄清。
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
- 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识，其中 CITE 是固定常量, id 为引文中的 id。
- 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
- 每段至少包含一个引用，也可根据内容需要加入多个引用，按顺序排列。`,
-      ['4.9.2']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
-
-<Reference>
-{{quote}}
-</Reference>
-
-回答要求：
- 如果你不清楚答案，你需要澄清。
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。`
-    }
-  },
-  {
-    title: i18nT('app:template.qa_template'),
-    desc: '',
-    value: {
-      ['4.9.2']: `使用 <QA></QA> 标记中的问答对进行回答。
-
-<QA>
-{{quote}}
-</QA>
-
-回答要求：
- 选择其中一个或多个问答对进行回答。
- 回答的内容应尽可能与 <答案></答案> 中的内容一致。
- 如果没有相关的问答对，你需要澄清。
- 避免提及你是从 QA 获取的知识，只需要回复答案。`
-    }
-  },
-  {
-    title: i18nT('app:template.standard_strict'),
-    desc: '',
-    value: {
-      ['4.9.7']: `忘记你已有的知识，仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
-
-<Reference>
-{{quote}}
-</Reference>
-
-思考流程：
-1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
-2. 如果有关，你按下面的要求回答。
-3. 如果无关，你直接拒绝回答本次问题。
-
-回答要求：
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
- 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识，其中 CITE 是固定常量, id 为引文中的 id。
- 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
- 每段至少包含一个引用，也可根据内容需要加入多个引用，按顺序排列。
-
-问题:"""{{question}}"""`,
-      ['4.9.2']: `忘记你已有的知识，仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
-
-<Reference>
-{{quote}}
-</Reference>
-
-思考流程：
-1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
-2. 如果有关，你按下面的要求回答。
-3. 如果无关，你直接拒绝回答本次问题。
-
-回答要求：
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。`
-    }
-  },
-  {
-    title: i18nT('app:template.hard_strict'),
-    desc: '',
-    value: {
-      ['4.9.2']: `忘记你已有的知识，仅使用 <QA></QA> 标记中的问答对进行回答。
-
-<QA>
-{{quote}}
-</QA>
-
-思考流程：
-1. 判断问题是否与 <QA></QA> 标记中的内容有关。
-2. 如果无关，你直接拒绝回答本次问题。
-3. 判断是否有相近或相同的问题。
-4. 如果有相同的问题，直接输出对应答案。
-5. 如果只有相近的问题，请把相近的问题和答案一起输出。
-
-回答要求：
- 如果没有相关的问答对，你需要澄清。
- 回答的内容应尽可能与 <QA></QA> 标记中的内容一致。
- 避免提及你是从 QA 获取的知识，只需要回复答案。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。`
-    }
-  }
-];
-
 export const getQuotePrompt = (version?: string, role: 'user' | 'system' = 'user') => {
  const quotePromptTemplates =
    role === 'user' ? Prompt_userQuotePromptList : Prompt_systemQuotePromptList;
@@ -333,7 +314,7 @@ export const getQuotePrompt = (version?: string, role: 'user' | 'system' = 'user
 // Document quote prompt
 export const getDocumentQuotePrompt = (version?: string) => {
  const promptMap = {
-    ['4.9.2']: `将 <FilesContent></FilesContent> 中的内容作为本次对话的参考:
+    ['4.9.7']: `将 <FilesContent></FilesContent> 中的内容作为本次对话的参考:
 <FilesContent>
 {{quote}}
 </FilesContent>
--- a/packages/global/core/ai/prompt/dataset.ts
+++ b/packages/global/core/ai/prompt/dataset.ts
@@ -1,14 +1,19 @@
 export const getDatasetSearchToolResponsePrompt = () => {
  return `## Role
-你是一个知识库回答助手，可以 "quotes" 中的内容作为本次对话的参考。为了使回答结果更加可信并且可追溯，你需要在每段话结尾添加引用标记。
+你是一个知识库回答助手，可以 "cites" 中的内容作为本次对话的参考。为了使回答结果更加可信并且可追溯，你需要在每段话结尾添加引用标记，标识参考了哪些内容。

-## Rules
+## 追溯展示规则
+
+- 使用 **[id](CITE)** 格式来引用 "cites" 中的知识，其中 CITE 是固定常量, id 为引文中的 id。
+- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
+- 每段话**至少包含一个引用**，多个引用时按顺序排列，例如："Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
+- 不要把示例作为知识点。
+- 不要伪造 id，返回的 id 必须都存在 cites 中！
+
+## 通用规则
 - 如果你不清楚答案，你需要澄清。
- 避免提及你是从 "quotes" 获取的知识。
- 保持答案与 "quotes" 中描述的一致。
+- 避免提及你是从 "cites" 获取的知识。
+- 保持答案与 "cites" 中描述的一致。
 - 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容，需严格完整输出。
- 使用与问题相同的语言回答。
- 使用 [id](CITE) 格式来引用 "quotes" 中的知识，其中 CITE 是固定常量, id 为引文中的 id。
- 在每段话结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
- 每段话至少包含一个引用，也可根据内容需要加入多个引用，按顺序排列。`;
+- 使用与问题相同的语言回答。`;
 };
--- a/packages/global/core/app/constants.ts
+++ b/packages/global/core/app/constants.ts
@@ -60,5 +60,3 @@ export enum AppTemplateTypeEnum {
  // special type
  contribute = 'contribute'
 }
-
-export const defaultDatasetMaxTokens = 16000;
--- a/packages/global/core/app/mcpTools/utils.ts
+++ b/packages/global/core/app/mcpTools/utils.ts
@@ -5,7 +5,7 @@ import {
  FlowNodeTypeEnum
 } from '../../workflow/node/constant';
 import { nanoid } from 'nanoid';
-import { type ToolType } from '../type';
+import { type McpToolConfigType } from '../type';
 import { i18nT } from '../../../../web/i18n/utils';
 import { type RuntimeNodeItemType } from '../../workflow/runtime/type';

@@ -16,7 +16,7 @@ export const getMCPToolSetRuntimeNode = ({
  avatar
 }: {
  url: string;
-  toolList: ToolType[];
+  toolList: McpToolConfigType[];
  name?: string;
  avatar?: string;
 }): RuntimeNodeItemType => {
@@ -45,7 +45,7 @@ export const getMCPToolRuntimeNode = ({
  url,
  avatar = 'core/app/type/mcpToolsFill'
 }: {
-  tool: ToolType;
+  tool: McpToolConfigType;
  url: string;
  avatar?: string;
 }): RuntimeNodeItemType => {
@@ -65,7 +65,7 @@ export const getMCPToolRuntimeNode = ({
      ...Object.entries(tool.inputSchema?.properties || {}).map(([key, value]) => ({
        key,
        label: key,
-        valueType: value.type as WorkflowIOValueTypeEnum,
+        valueType: value.type as WorkflowIOValueTypeEnum, // TODO: 这里需要做一个映射
        description: value.description,
        toolDescription: value.description || key,
        required: tool.inputSchema?.required?.includes(key) || false,
--- a/packages/global/core/app/type.d.ts
+++ b/packages/global/core/app/type.d.ts
@@ -16,16 +16,6 @@ import { FlowNodeInputTypeEnum } from '../../core/workflow/node/constant';
 import type { WorkflowTemplateBasicType } from '@fastgpt/global/core/workflow/type';
 import type { SourceMemberType } from '../../support/user/type';

-export type ToolType = {
-  name: string;
-  description: string;
-  inputSchema: {
-    type: string;
-    properties?: Record<string, { type: string; description?: string }>;
-    required?: string[];
-  };
-};
-
 export type AppSchema = {
  _id: string;
  parentId?: ParentIdType;
@@ -117,6 +107,16 @@ export type AppSimpleEditFormType = {
  chatConfig: AppChatConfigType;
 };

+export type McpToolConfigType = {
+  name: string;
+  description: string;
+  inputSchema: {
+    type: string;
+    properties?: Record<string, { type: string; description?: string }>;
+    required?: string[];
+  };
+};
+
 /* app chat config type */
 export type AppChatConfigType = {
  welcomeText?: string;
--- a/packages/global/core/app/utils.ts
+++ b/packages/global/core/app/utils.ts
@@ -9,6 +9,9 @@ import { type WorkflowTemplateBasicType } from '../workflow/type';
 import { AppTypeEnum } from './constants';
 import { AppErrEnum } from '../../common/error/code/app';
 import { PluginErrEnum } from '../../common/error/code/plugin';
+import { i18nT } from '../../../web/i18n/utils';
+import appErrList from '../../common/error/code/app';
+import pluginErrList from '../../common/error/code/plugin';

 export const getDefaultAppForm = (): AppSimpleEditFormType => {
  return {
@@ -189,17 +192,10 @@ export const getAppType = (config?: WorkflowTemplateBasicType | AppSimpleEditFor
  return '';
 };

-export const checkAppUnExistError = (error?: string) => {
-  const unExistError: Array<string> = [
-    AppErrEnum.unAuthApp,
-    AppErrEnum.unExist,
-    PluginErrEnum.unAuth,
-    PluginErrEnum.unExist
-  ];
+export const formatToolError = (error?: any) => {
+  if (!error || typeof error !== 'string') return;

-  if (!!error && unExistError.includes(error)) {
-    return error;
-  } else {
-    return undefined;
-  }
+  const errorText = appErrList[error]?.message || pluginErrList[error]?.message;
+
+  return errorText || error;
 };
--- a/packages/global/core/chat/type.d.ts
+++ b/packages/global/core/chat/type.d.ts
@@ -26,6 +26,7 @@ export type ChatSchema = {
  teamId: string;
  tmbId: string;
  appId: string;
+  createTime: Date;
  updateTime: Date;
  title: string;
  customTitle: string;
@@ -112,6 +113,7 @@ export type ChatItemSchema = (UserChatItemType | SystemChatItemType | AIChatItem
  appId: string;
  time: Date;
  durationSeconds?: number;
+  errorMsg?: string;
 };

 export type AdminFbkType = {
@@ -143,6 +145,7 @@ export type ChatSiteItemType = (UserChatItemType | SystemChatItemType | AIChatIt
  responseData?: ChatHistoryItemResType[];
  time?: Date;
  durationSeconds?: number;
+  errorMsg?: string;
 } & ChatBoxInputType &
  ResponseTagItemType;

--- a/packages/global/core/dataset/api.d.ts
+++ b/packages/global/core/dataset/api.d.ts
@@ -1,9 +1,11 @@
-import type { DatasetDataIndexItemType, DatasetSchemaType } from './type';
+import type { ChunkSettingsType, DatasetDataIndexItemType, DatasetSchemaType } from './type';
 import type {
  DatasetCollectionTypeEnum,
  DatasetCollectionDataProcessModeEnum,
  ChunkSettingModeEnum,
-  DataChunkSplitModeEnum
+  DataChunkSplitModeEnum,
+  ChunkTriggerConfigTypeEnum,
+  ParagraphChunkAIModeEnum
 } from './constants';
 import type { LLMModelItemType } from '../ai/model.d';
 import type { ParentIdType } from 'common/parentFolder/type';
@@ -32,26 +34,16 @@ export type DatasetUpdateBody = {
 };

 /* ================= collection ===================== */
-export type DatasetCollectionChunkMetadataType = {
+// Input + store params
+type DatasetCollectionStoreDataType = ChunkSettingsType & {
  parentId?: string;
-  customPdfParse?: boolean;
-  trainingType?: DatasetCollectionDataProcessModeEnum;
-  imageIndex?: boolean;
-  autoIndexes?: boolean;
-
-  chunkSettingMode?: ChunkSettingModeEnum;
-  chunkSplitMode?: DataChunkSplitModeEnum;
-
-  chunkSize?: number;
-  indexSize?: number;
-
-  chunkSplitter?: string;
-  qaPrompt?: string;
  metadata?: Record<string, any>;
+
+  customPdfParse?: boolean;
 };

 // create collection params
-export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
+export type CreateDatasetCollectionParams = DatasetCollectionStoreDataType & {
  datasetId: string;
  name: string;
  type: DatasetCollectionTypeEnum;
@@ -72,7 +64,7 @@ export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType &
  nextSyncTime?: Date;
 };

-export type ApiCreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
+export type ApiCreateDatasetCollectionParams = DatasetCollectionStoreDataType & {
  datasetId: string;
  tags?: string[];
 };
@@ -90,7 +82,7 @@ export type ApiDatasetCreateDatasetCollectionParams = ApiCreateDatasetCollection
 export type FileIdCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
  fileId: string;
 };
-export type reTrainingDatasetFileCollectionParams = DatasetCollectionChunkMetadataType & {
+export type reTrainingDatasetFileCollectionParams = DatasetCollectionStoreDataType & {
  datasetId: string;
  collectionId: string;
 };
@@ -147,6 +139,7 @@ export type PushDatasetDataProps = {
  collectionId: string;
  data: PushDatasetDataChunkProps[];
  trainingType?: DatasetCollectionDataProcessModeEnum;
+  indexSize?: number;
  autoIndexes?: boolean;
  imageIndex?: boolean;
  prompt?: string;
--- a/packages/global/core/dataset/constants.ts
+++ b/packages/global/core/dataset/constants.ts
@@ -120,6 +120,8 @@ export const DatasetCollectionSyncResultMap = {
 export enum DatasetCollectionDataProcessModeEnum {
  chunk = 'chunk',
  qa = 'qa',
+  backup = 'backup',
+
  auto = 'auto' // abandon
 }
 export const DatasetCollectionDataProcessModeMap = {
@@ -131,21 +133,35 @@ export const DatasetCollectionDataProcessModeMap = {
    label: i18nT('common:core.dataset.training.QA mode'),
    tooltip: i18nT('common:core.dataset.import.QA Import Tip')
  },
+  [DatasetCollectionDataProcessModeEnum.backup]: {
+    label: i18nT('dataset:backup_mode'),
+    tooltip: i18nT('dataset:backup_mode')
+  },
  [DatasetCollectionDataProcessModeEnum.auto]: {
    label: i18nT('common:core.dataset.training.Auto mode'),
    tooltip: i18nT('common:core.dataset.training.Auto mode Tip')
  }
 };

+export enum ChunkTriggerConfigTypeEnum {
+  minSize = 'minSize',
+  forceChunk = 'forceChunk',
+  maxSize = 'maxSize'
+}
 export enum ChunkSettingModeEnum {
  auto = 'auto',
  custom = 'custom'
 }

 export enum DataChunkSplitModeEnum {
+  paragraph = 'paragraph',
  size = 'size',
  char = 'char'
 }
+export enum ParagraphChunkAIModeEnum {
+  auto = 'auto',
+  force = 'force'
+}

 /* ------------ data -------------- */

@@ -154,7 +170,6 @@ export enum ImportDataSourceEnum {
  fileLocal = 'fileLocal',
  fileLink = 'fileLink',
  fileCustom = 'fileCustom',
-  csvTable = 'csvTable',
  externalFile = 'externalFile',
  apiDataset = 'apiDataset',
  reTraining = 'reTraining'
--- a/packages/global/core/dataset/data/constants.ts
+++ b/packages/global/core/dataset/data/constants.ts
@@ -32,7 +32,7 @@ export const DatasetDataIndexMap: Record<
    color: 'red'
  },
  [DatasetDataIndexTypeEnum.image]: {
-    label: i18nT('common:data_index_image'),
+    label: i18nT('dataset:data_index_image'),
    color: 'purple'
  }
 };
--- a/packages/global/core/dataset/training/utils.ts
+++ b/packages/global/core/dataset/training/utils.ts
@@ -118,9 +118,8 @@ export const computeChunkSize = (params: {
    return getLLMMaxChunkSize(params.llmModel);
  }

-  return Math.min(params.chunkSize || chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
+  return Math.min(params.chunkSize ?? chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
 };
-
 export const computeChunkSplitter = (params: {
  chunkSettingMode?: ChunkSettingModeEnum;
  chunkSplitMode?: DataChunkSplitModeEnum;
@@ -129,8 +128,21 @@ export const computeChunkSplitter = (params: {
  if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
    return undefined;
  }
-  if (params.chunkSplitMode === DataChunkSplitModeEnum.size) {
+  if (params.chunkSplitMode !== DataChunkSplitModeEnum.char) {
    return undefined;
  }
  return params.chunkSplitter;
 };
+export const computeParagraphChunkDeep = (params: {
+  chunkSettingMode?: ChunkSettingModeEnum;
+  chunkSplitMode?: DataChunkSplitModeEnum;
+  paragraphChunkDeep?: number;
+}) => {
+  if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
+    return 5;
+  }
+  if (params.chunkSplitMode === DataChunkSplitModeEnum.paragraph) {
+    return params.paragraphChunkDeep;
+  }
+  return 0;
+};
--- a/packages/global/core/dataset/type.d.ts
+++ b/packages/global/core/dataset/type.d.ts
@@ -8,26 +8,42 @@ import type {
  DatasetStatusEnum,
  DatasetTypeEnum,
  SearchScoreTypeEnum,
-  TrainingModeEnum
+  TrainingModeEnum,
+  ChunkSettingModeEnum,
+  ChunkTriggerConfigTypeEnum
 } from './constants';
 import type { DatasetPermission } from '../../support/permission/dataset/controller';
-import { Permission } from '../../support/permission/controller';
 import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
 import type { SourceMemberType } from 'support/user/type';
 import type { DatasetDataIndexTypeEnum } from './data/constants';
-import type { ChunkSettingModeEnum } from './constants';

 export type ChunkSettingsType = {
-  trainingType: DatasetCollectionDataProcessModeEnum;
-  autoIndexes?: boolean;
+  trainingType?: DatasetCollectionDataProcessModeEnum;
+
+  // Chunk trigger
+  chunkTriggerType?: ChunkTriggerConfigTypeEnum;
+  chunkTriggerMinSize?: number; // maxSize from agent model, not store
+
+  // Data enhance
+  dataEnhanceCollectionName?: boolean; // Auto add collection name to data
+
+  // Index enhance
  imageIndex?: boolean;
+  autoIndexes?: boolean;

-  chunkSettingMode?: ChunkSettingModeEnum;
+  // Chunk setting
+  chunkSettingMode?: ChunkSettingModeEnum; // 系统参数/自定义参数
  chunkSplitMode?: DataChunkSplitModeEnum;
-
-  chunkSize?: number;
+  // Paragraph split
+  paragraphChunkAIMode?: ParagraphChunkAIModeEnum;
+  paragraphChunkDeep?: number; // Paragraph deep
+  paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
+  // Size split
+  chunkSize?: number; // chunk/qa chunk size, Paragraph max chunk size.
+  // Char split
+  chunkSplitter?: string; // chunk/qa chunk splitter
  indexSize?: number;
-  chunkSplitter?: string;
+
  qaPrompt?: string;
 };

@@ -66,7 +82,7 @@ export type DatasetSchemaType = {
  defaultPermission?: number;
 };

-export type DatasetCollectionSchemaType = {
+export type DatasetCollectionSchemaType = ChunkSettingsType & {
  _id: string;
  teamId: string;
  tmbId: string;
@@ -101,18 +117,7 @@ export type DatasetCollectionSchemaType = {

  // Parse settings
  customPdfParse?: boolean;
-  // Chunk settings
-  autoIndexes?: boolean;
-  imageIndex?: boolean;
  trainingType: DatasetCollectionDataProcessModeEnum;
-
-  chunkSettingMode?: ChunkSettingModeEnum;
-  chunkSplitMode?: DataChunkSplitModeEnum;
-
-  chunkSize?: number;
-  indexSize?: number;
-  chunkSplitter?: string;
-  qaPrompt?: string;
 };

 export type DatasetCollectionTagsSchemaType = {
@@ -175,6 +180,7 @@ export type DatasetTrainingSchemaType = {
  q: string;
  a: string;
  chunkIndex: number;
+  indexSize?: number;
  weight: number;
  indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
  retryCount: number;
--- a/packages/global/core/dataset/utils.ts
+++ b/packages/global/core/dataset/utils.ts
@@ -40,5 +40,6 @@ export function getSourceNameIcon({
 export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
  if (mode === TrainingModeEnum.qa) return data.length * 20;
  if (mode === TrainingModeEnum.auto) return data.length * 5;
+  if (mode === TrainingModeEnum.image) return data.length * 2;
  return data.length;
 };
--- a/packages/global/core/workflow/runtime/type.d.ts
+++ b/packages/global/core/workflow/runtime/type.d.ts
@@ -7,7 +7,7 @@ import type {
 } from '../../chat/type';
 import { NodeOutputItemType } from '../../chat/type';
 import type { FlowNodeInputItemType, FlowNodeOutputItemType } from '../type/io.d';
-import type { StoreNodeItemType } from '../type/node';
+import type { NodeToolConfigType, StoreNodeItemType } from '../type/node';
 import type { DispatchNodeResponseKeyEnum } from './constants';
 import type { StoreEdgeItemType } from '../type/edge';
 import type { NodeInputKeyEnum } from '../constants';
@@ -102,6 +102,9 @@ export type RuntimeNodeItemType = {

  pluginId?: string; // workflow id / plugin id
  version?: string;
+
+  // tool
+  toolConfig?: NodeToolConfigType;
 };

 export type RuntimeEdgeItemType = StoreEdgeItemType & {
@@ -114,7 +117,7 @@ export type DispatchNodeResponseType = {
  runningTime?: number;
  query?: string;
  textOutput?: string;
-  error?: Record<string, any>;
+  error?: Record<string, any> | string;
  customInputs?: Record<string, any>;
  customOutputs?: Record<string, any>;
  nodeInputs?: Record<string, any>;
--- a/packages/global/core/workflow/type/node.d.ts
+++ b/packages/global/core/workflow/type/node.d.ts
@@ -20,11 +20,17 @@ import { RuntimeNodeItemType } from '../runtime/type';
 import { PluginTypeEnum } from '../../plugin/constants';
 import { RuntimeEdgeItemType, StoreEdgeItemType } from './edge';
 import { NextApiResponse } from 'next';
-import { AppDetailType, AppSchema } from '../../app/type';
+import type { AppDetailType, AppSchema, McpToolConfigType } from '../../app/type';
 import type { ParentIdType } from 'common/parentFolder/type';
-import { AppTypeEnum } from 'core/app/constants';
+import { AppTypeEnum } from '../../app/constants';
 import type { WorkflowInteractiveResponseType } from '../template/system/interactive/type';

+export type NodeToolConfigType = {
+  mcpTool?: McpToolConfigType & {
+    url: string;
+  };
+};
+
 export type FlowNodeCommonType = {
  parentNodeId?: string;
  flowNodeType: FlowNodeTypeEnum; // render node card
@@ -46,12 +52,13 @@ export type FlowNodeCommonType = {
  // plugin data
  pluginId?: string;
  isFolder?: boolean;
-  // pluginType?: AppTypeEnum;
  pluginData?: PluginDataType;
+
+  // tool data
+  toolData?: NodeToolConfigType;
 };

 export type PluginDataType = {
-  version?: string;
  diagram?: string;
  userGuide?: string;
  courseUrl?: string;
@@ -118,6 +125,7 @@ export type FlowNodeItemType = FlowNodeTemplateType & {
  nodeId: string;
  parentNodeId?: string;
  isError?: boolean;
+  searchedText?: string;
  debugResult?: {
    status: 'running' | 'success' | 'skipped' | 'failed';
    message?: string;
--- a/packages/plugins/src/DingTalkWebhook/template.json
+++ b/packages/plugins/src/DingTalkWebhook/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "4816",
  "name": "钉钉 webhook",
  "avatar": "plugins/dingding",
  "intro": "向钉钉机器人发起 webhook 请求。",
--- a/packages/plugins/src/Doc2X/PDF2text/template.json
+++ b/packages/plugins/src/Doc2X/PDF2text/template.json
@@ -1,6 +1,5 @@
 {
  "author": "Menghuan1918",
-  "version": "488",
  "name": "PDF识别",
  "avatar": "plugins/doc2x",
  "intro": "将PDF文件发送至Doc2X进行解析，返回结构化的LaTeX公式的文本(markdown)，支持传入String类型的URL或者流程输出中的文件链接变量",
--- a/packages/plugins/src/Doc2X/template.json
+++ b/packages/plugins/src/Doc2X/template.json
@@ -1,6 +1,5 @@
 {
  "author": "Menghuan1918",
-  "version": "488",
  "name": "Doc2X服务",
  "avatar": "plugins/doc2x",
  "intro": "将传入的图片或PDF文件发送至Doc2X进行解析，返回带LaTeX公式的markdown格式的文本。",
--- a/packages/plugins/src/WeWorkWebhook/template.json
+++ b/packages/plugins/src/WeWorkWebhook/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "4816",
  "name": "企业微信 webhook",
  "avatar": "plugins/qiwei",
  "intro": "向企业微信机器人发起 webhook 请求。只能内部群使用。",
--- a/packages/plugins/src/bing/template.json
+++ b/packages/plugins/src/bing/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "4811",
  "name": "Bing搜索",
  "avatar": "core/workflow/template/bing",
  "intro": "在Bing中搜索。",
--- a/packages/plugins/src/databaseConnection/template.json
+++ b/packages/plugins/src/databaseConnection/template.json
@@ -1,6 +1,5 @@
 {
  "author": "silencezhang",
-  "version": "4811",
  "name": "数据库连接",
  "avatar": "core/workflow/template/datasource",
  "intro": "可连接常用数据库，并执行sql",
--- a/packages/plugins/src/delay/template.json
+++ b/packages/plugins/src/delay/template.json
@@ -1,6 +1,5 @@
 {
  "author": "collin",
-  "version": "4817",
  "name": "流程等待",
  "avatar": "core/workflow/template/sleep",
  "intro": "让工作流等待指定时间后运行",
--- a/packages/plugins/src/drawing/baseChart/template.json
+++ b/packages/plugins/src/drawing/baseChart/template.json
@@ -1,6 +1,5 @@
 {
  "author": "silencezhang",
-  "version": "4817",
  "name": "基础图表",
  "avatar": "core/workflow/template/baseChart",
  "intro": "根据数据生成图表，可根据chartType生成柱状图，折线图，饼图",
--- a/packages/plugins/src/drawing/template.json
+++ b/packages/plugins/src/drawing/template.json
@@ -1,6 +1,5 @@
 {
  "author": "silencezhang",
-  "version": "486",
  "name": "BI图表功能",
  "avatar": "core/workflow/template/BI",
  "intro": "BI图表功能，可以生成一些常用的图表，如饼图，柱状图，折线图等",
--- a/packages/plugins/src/duckduckgo/search/template.json
+++ b/packages/plugins/src/duckduckgo/search/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "486",
  "name": "DuckDuckGo 网络搜索",
  "avatar": "core/workflow/template/duckduckgo",
  "intro": "使用 DuckDuckGo 进行网络搜索",
--- a/packages/plugins/src/duckduckgo/searchImg/template.json
+++ b/packages/plugins/src/duckduckgo/searchImg/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "486",
  "name": "DuckDuckGo 图片搜索",
  "avatar": "core/workflow/template/duckduckgo",
  "intro": "使用 DuckDuckGo 进行图片搜索",
--- a/packages/plugins/src/duckduckgo/searchNews/template.json
+++ b/packages/plugins/src/duckduckgo/searchNews/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "486",
  "name": "DuckDuckGo 新闻检索",
  "avatar": "core/workflow/template/duckduckgo",
  "intro": "使用 DuckDuckGo 进行新闻检索",
--- a/packages/plugins/src/duckduckgo/searchVideo/template.json
+++ b/packages/plugins/src/duckduckgo/searchVideo/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "486",
  "name": "DuckDuckGo 视频搜索",
  "avatar": "core/workflow/template/duckduckgo",
  "intro": "使用 DuckDuckGo 进行视频搜索",
--- a/packages/plugins/src/duckduckgo/template.json
+++ b/packages/plugins/src/duckduckgo/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "486",
  "name": "DuckDuckGo服务",
  "avatar": "core/workflow/template/duckduckgo",
  "intro": "DuckDuckGo 服务，包含网络搜索、图片搜索、新闻搜索等。",
--- a/packages/plugins/src/feishu/template.json
+++ b/packages/plugins/src/feishu/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "488",
  "name": "飞书 webhook",
  "avatar": "core/app/templates/plugin-feishu",
  "intro": "向飞书机器人发起 webhook 请求。",
--- a/packages/plugins/src/fetchUrl/template.json
+++ b/packages/plugins/src/fetchUrl/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "486",
  "name": "网页内容抓取",
  "avatar": "core/workflow/template/fetchUrl",
  "intro": "可获取一个网页链接内容，并以 Markdown 格式输出，仅支持获取静态网站。",
--- a/packages/plugins/src/getTime/template.json
+++ b/packages/plugins/src/getTime/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "481",
  "templateType": "tools",
  "name": "获取当前时间",
  "avatar": "core/workflow/template/getTime",
--- a/packages/plugins/src/google/template.json
+++ b/packages/plugins/src/google/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "4811",
  "name": "Google搜索",
  "avatar": "core/workflow/template/google",
  "intro": "在google中搜索。",
--- a/packages/plugins/src/mathExprVal/template.json
+++ b/packages/plugins/src/mathExprVal/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "486",
  "name": "数学公式执行",
  "avatar": "core/workflow/template/mathCall",
  "intro": "用于执行数学表达式的工具，通过 js 的 expr-eval 库运行表达式并返回结果。",
--- a/packages/plugins/src/searchXNG/template.json
+++ b/packages/plugins/src/searchXNG/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "4816",
  "name": "Search XNG 搜索",
  "avatar": "core/workflow/template/searxng",
  "intro": "使用 Search XNG 服务进行搜索。",
--- a/packages/plugins/src/smtpEmail/template.json
+++ b/packages/plugins/src/smtpEmail/template.json
@@ -1,6 +1,5 @@
 {
  "author": "cloudpense",
-  "version": "1.0.0",
  "name": "Email 邮件发送",
  "avatar": "plugins/email",
  "intro": "通过SMTP协议发送电子邮件(nodemailer)",
--- a/packages/plugins/src/template/template.json
+++ b/packages/plugins/src/template/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "489",
  "name": "文本加工",
  "avatar": "/imgs/workflow/textEditor.svg",
  "intro": "可对固定或传入的文本进行加工后输出，非字符串类型数据最终会转成字符串类型。",
--- a/packages/plugins/src/wiki/template.json
+++ b/packages/plugins/src/wiki/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
-  "version": "4811",
  "name": "Wiki搜索",
  "avatar": "core/workflow/template/wiki",
  "intro": "在Wiki中查询释义。",
--- a/packages/service/common/api/type.d.ts
+++ b/packages/service/common/api/type.d.ts
@@ -6,12 +6,6 @@ import type {
 } from '../../core/dataset/search/controller';
 import type { AuthOpenApiLimitProps } from '../../support/openapi/auth';
 import type { CreateUsageProps, ConcatUsageProps } from '@fastgpt/global/support/wallet/usage/api';
-import type {
-  GetProApiDatasetFileContentParams,
-  GetProApiDatasetFileDetailParams,
-  GetProApiDatasetFileListParams,
-  GetProApiDatasetFilePreviewUrlParams
-} from '../../core/dataset/apiDataset/proApi';

 declare global {
  var textCensorHandler: (params: { text: string }) => Promise<{ code: number; message?: string }>;
@@ -19,16 +13,4 @@ declare global {
  var authOpenApiHandler: (data: AuthOpenApiLimitProps) => Promise<any>;
  var createUsageHandler: (data: CreateUsageProps) => any;
  var concatUsageHandler: (data: ConcatUsageProps) => any;
-
-  // API dataset
-  var getProApiDatasetFileList: (data: GetProApiDatasetFileListParams) => Promise<APIFileItem[]>;
-  var getProApiDatasetFileContent: (
-    data: GetProApiDatasetFileContentParams
-  ) => Promise<ApiFileReadContentResponse>;
-  var getProApiDatasetFilePreviewUrl: (
-    data: GetProApiDatasetFilePreviewUrlParams
-  ) => Promise<string>;
-  var getProApiDatasetFileDetail: (
-    data: GetProApiDatasetFileDetailParams
-  ) => Promise<ApiDatasetDetailResponse>;
 }
--- a/packages/service/common/buffer/rawText/controller.ts
+++ b/packages/service/common/buffer/rawText/controller.ts
@@ -0,0 +1,178 @@
+import { retryFn } from '@fastgpt/global/common/system/utils';
+import { connectionMongo } from '../../mongo';
+import { MongoRawTextBufferSchema, bucketName } from './schema';
+import { addLog } from '../../system/log';
+import { setCron } from '../../system/cron';
+import { checkTimerLock } from '../../system/timerLock/utils';
+import { TimerIdEnum } from '../../system/timerLock/constants';
+
+const getGridBucket = () => {
+  return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
+    bucketName: bucketName
+  });
+};
+
+export const addRawTextBuffer = async ({
+  sourceId,
+  sourceName,
+  text,
+  expiredTime
+}: {
+  sourceId: string;
+  sourceName: string;
+  text: string;
+  expiredTime: Date;
+}) => {
+  const gridBucket = getGridBucket();
+  const metadata = {
+    sourceId,
+    sourceName,
+    expiredTime
+  };
+
+  const buffer = Buffer.from(text);
+
+  const fileSize = buffer.length;
+  // 单块大小：尽可能大，但不超过 14MB，不小于128KB
+  const chunkSizeBytes = (() => {
+    // 计算理想块大小：文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
+    const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
+
+    // 确保块大小至少为128KB
+    const minChunkSize = 128 * 1024; // 128KB
+
+    // 取理想块大小和最小块大小中的较大值
+    let chunkSize = Math.max(idealChunkSize, minChunkSize);
+
+    // 将块大小向上取整到最接近的64KB的倍数，使其更整齐
+    chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
+
+    return chunkSize;
+  })();
+
+  const uploadStream = gridBucket.openUploadStream(sourceId, {
+    metadata,
+    chunkSizeBytes
+  });
+
+  return retryFn(async () => {
+    return new Promise((resolve, reject) => {
+      uploadStream.end(buffer);
+      uploadStream.on('finish', () => {
+        resolve(uploadStream.id);
+      });
+      uploadStream.on('error', (error) => {
+        addLog.error('addRawTextBuffer error', error);
+        resolve('');
+      });
+    });
+  });
+};
+
+export const getRawTextBuffer = async (sourceId: string) => {
+  const gridBucket = getGridBucket();
+
+  return retryFn(async () => {
+    const bufferData = await MongoRawTextBufferSchema.findOne(
+      {
+        'metadata.sourceId': sourceId
+      },
+      '_id metadata'
+    ).lean();
+    if (!bufferData) {
+      return null;
+    }
+
+    // Read file content
+    const downloadStream = gridBucket.openDownloadStream(bufferData._id);
+    const chunks: Buffer[] = [];
+
+    return new Promise<{
+      text: string;
+      sourceName: string;
+    } | null>((resolve, reject) => {
+      downloadStream.on('data', (chunk) => {
+        chunks.push(chunk);
+      });
+
+      downloadStream.on('end', () => {
+        const buffer = Buffer.concat(chunks);
+        const text = buffer.toString('utf8');
+        resolve({
+          text,
+          sourceName: bufferData.metadata?.sourceName || ''
+        });
+      });
+
+      downloadStream.on('error', (error) => {
+        addLog.error('getRawTextBuffer error', error);
+        resolve(null);
+      });
+    });
+  });
+};
+
+export const deleteRawTextBuffer = async (sourceId: string): Promise<boolean> => {
+  const gridBucket = getGridBucket();
+
+  return retryFn(async () => {
+    const buffer = await MongoRawTextBufferSchema.findOne({ 'metadata.sourceId': sourceId });
+    if (!buffer) {
+      return false;
+    }
+
+    await gridBucket.delete(buffer._id);
+    return true;
+  });
+};
+
+export const updateRawTextBufferExpiredTime = async ({
+  sourceId,
+  expiredTime
+}: {
+  sourceId: string;
+  expiredTime: Date;
+}) => {
+  return retryFn(async () => {
+    return MongoRawTextBufferSchema.updateOne(
+      { 'metadata.sourceId': sourceId },
+      { $set: { 'metadata.expiredTime': expiredTime } }
+    );
+  });
+};
+
+export const clearExpiredRawTextBufferCron = async () => {
+  const clearExpiredRawTextBuffer = async () => {
+    addLog.debug('Clear expired raw text buffer start');
+    const gridBucket = getGridBucket();
+
+    return retryFn(async () => {
+      const data = await MongoRawTextBufferSchema.find(
+        {
+          'metadata.expiredTime': { $lt: new Date() }
+        },
+        '_id'
+      ).lean();
+
+      for (const item of data) {
+        await gridBucket.delete(item._id);
+      }
+      addLog.debug('Clear expired raw text buffer end');
+    });
+  };
+
+  setCron('*/10 * * * *', async () => {
+    if (
+      await checkTimerLock({
+        timerId: TimerIdEnum.clearExpiredRawTextBuffer,
+        lockMinuted: 9
+      })
+    ) {
+      try {
+        await clearExpiredRawTextBuffer();
+      } catch (error) {
+        addLog.error('clearExpiredRawTextBufferCron error', error);
+      }
+    }
+  });
+};
--- a/packages/service/common/buffer/rawText/schema.ts
+++ b/packages/service/common/buffer/rawText/schema.ts
@@ -1,33 +1,22 @@
-import { getMongoModel, Schema } from '../../mongo';
-import { type RawTextBufferSchemaType } from './type';
+import { getMongoModel, type Types, Schema } from '../../mongo';

-export const collectionName = 'buffer_rawtexts';
+export const bucketName = 'buffer_rawtext';

 const RawTextBufferSchema = new Schema({
-  sourceId: {
-    type: String,
-    required: true
-  },
-  rawText: {
-    type: String,
-    default: ''
-  },
-  createTime: {
-    type: Date,
-    default: () => new Date()
-  },
-  metadata: Object
+  metadata: {
+    sourceId: { type: String, required: true },
+    sourceName: { type: String, required: true },
+    expiredTime: { type: Date, required: true }
+  }
 });
+RawTextBufferSchema.index({ 'metadata.sourceId': 'hashed' });
+RawTextBufferSchema.index({ 'metadata.expiredTime': -1 });

-try {
-  RawTextBufferSchema.index({ sourceId: 1 });
-  //  20 minutes
-  RawTextBufferSchema.index({ createTime: 1 }, { expireAfterSeconds: 20 * 60 });
-} catch (error) {
-  console.log(error);
-}
-
-export const MongoRawTextBuffer = getMongoModel<RawTextBufferSchemaType>(
-  collectionName,
-  RawTextBufferSchema
-);
+export const MongoRawTextBufferSchema = getMongoModel<{
+  _id: Types.ObjectId;
+  metadata: {
+    sourceId: string;
+    sourceName: string;
+    expiredTime: Date;
+  };
+}>(`${bucketName}.files`, RawTextBufferSchema);
--- a/packages/service/common/buffer/rawText/type.d.ts
+++ b/packages/service/common/buffer/rawText/type.d.ts
@@ -1,8 +0,0 @@
-export type RawTextBufferSchemaType = {
-  sourceId: string;
-  rawText: string;
-  createTime: Date;
-  metadata?: {
-    filename: string;
-  };
-};
--- a/packages/service/common/file/gridfs/controller.ts
+++ b/packages/service/common/file/gridfs/controller.ts
@@ -6,13 +6,13 @@ import { type DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
 import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
 import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
 import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
-import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
 import { readRawContentByFileBuffer } from '../read/utils';
 import { gridFsStream2Buffer, stream2Encoding } from './utils';
 import { addLog } from '../../system/log';
-import { readFromSecondary } from '../../mongo/utils';
 import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
 import { Readable } from 'stream';
+import { addRawTextBuffer, getRawTextBuffer } from '../../buffer/rawText/controller';
+import { addMinutes } from 'date-fns';

 export function getGFSCollection(bucket: `${BucketNameEnum}`) {
  MongoDatasetFileSchema;
@@ -210,28 +210,26 @@ export const readFileContentFromMongo = async ({
  tmbId,
  bucketName,
  fileId,
-  isQAImport = false,
-  customPdfParse = false
+  customPdfParse = false,
+  getFormatText
 }: {
  teamId: string;
  tmbId: string;
  bucketName: `${BucketNameEnum}`;
  fileId: string;
-  isQAImport?: boolean;
  customPdfParse?: boolean;
+  getFormatText?: boolean; // 数据类型都尽可能转化成 markdown 格式
 }): Promise<{
  rawText: string;
  filename: string;
 }> => {
-  const bufferId = `${fileId}-${customPdfParse}`;
+  const bufferId = `${String(fileId)}-${customPdfParse}`;
  // read buffer
-  const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: bufferId }, undefined, {
-    ...readFromSecondary
-  }).lean();
+  const fileBuffer = await getRawTextBuffer(bufferId);
  if (fileBuffer) {
    return {
-      rawText: fileBuffer.rawText,
-      filename: fileBuffer.metadata?.filename || ''
+      rawText: fileBuffer.text,
+      filename: fileBuffer?.sourceName
    };
  }

@@ -254,8 +252,8 @@ export const readFileContentFromMongo = async ({
  // Get raw text
  const { rawText } = await readRawContentByFileBuffer({
    customPdfParse,
+    getFormatText,
    extension,
-    isQAImport,
    teamId,
    tmbId,
    buffer: fileBuffers,
@@ -265,16 +263,13 @@ export const readFileContentFromMongo = async ({
    }
  });

-  // < 14M
-  if (fileBuffers.length < 14 * 1024 * 1024 && rawText.trim()) {
-    MongoRawTextBuffer.create({
-      sourceId: bufferId,
-      rawText,
-      metadata: {
-        filename: file.filename
-      }
-    });
-  }
+  // Add buffer
+  addRawTextBuffer({
+    sourceId: bufferId,
+    sourceName: file.filename,
+    text: rawText,
+    expiredTime: addMinutes(new Date(), 20)
+  });

  return {
    rawText,
--- a/packages/service/common/file/gridfs/schema.ts
+++ b/packages/service/common/file/gridfs/schema.ts
@@ -1,16 +1,16 @@
 import { Schema, getMongoModel } from '../../mongo';

-const DatasetFileSchema = new Schema({});
-const ChatFileSchema = new Schema({});
+const DatasetFileSchema = new Schema({
+  metadata: Object
+});
+const ChatFileSchema = new Schema({
+  metadata: Object
+});

-try {
-  DatasetFileSchema.index({ uploadDate: -1 });
+DatasetFileSchema.index({ uploadDate: -1 });

-  ChatFileSchema.index({ uploadDate: -1 });
-  ChatFileSchema.index({ 'metadata.chatId': 1 });
-} catch (error) {
-  console.log(error);
-}
+ChatFileSchema.index({ uploadDate: -1 });
+ChatFileSchema.index({ 'metadata.chatId': 1 });

 export const MongoDatasetFileSchema = getMongoModel('dataset.files', DatasetFileSchema);
 export const MongoChatFileSchema = getMongoModel('chat.files', ChatFileSchema);
--- a/packages/service/common/file/gridfs/utils.ts
+++ b/packages/service/common/file/gridfs/utils.ts
@@ -1,5 +1,57 @@
 import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
 import { PassThrough } from 'stream';
+import { getGridBucket } from './controller';
+import { type BucketNameEnum } from '@fastgpt/global/common/file/constants';
+import { retryFn } from '@fastgpt/global/common/system/utils';
+
+export const createFileFromText = async ({
+  bucket,
+  filename,
+  text,
+  metadata
+}: {
+  bucket: `${BucketNameEnum}`;
+  filename: string;
+  text: string;
+  metadata: Record<string, any>;
+}) => {
+  const gridBucket = getGridBucket(bucket);
+
+  const buffer = Buffer.from(text);
+
+  const fileSize = buffer.length;
+  // 单块大小：尽可能大，但不超过 14MB，不小于128KB
+  const chunkSizeBytes = (() => {
+    // 计算理想块大小：文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
+    const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
+
+    // 确保块大小至少为128KB
+    const minChunkSize = 128 * 1024; // 128KB
+
+    // 取理想块大小和最小块大小中的较大值
+    let chunkSize = Math.max(idealChunkSize, minChunkSize);
+
+    // 将块大小向上取整到最接近的64KB的倍数，使其更整齐
+    chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
+
+    return chunkSize;
+  })();
+
+  const uploadStream = gridBucket.openUploadStream(filename, {
+    metadata,
+    chunkSizeBytes
+  });
+
+  return retryFn(async () => {
+    return new Promise<{ fileId: string }>((resolve, reject) => {
+      uploadStream.end(buffer);
+      uploadStream.on('finish', () => {
+        resolve({ fileId: String(uploadStream.id) });
+      });
+      uploadStream.on('error', reject);
+    });
+  });
+};

 export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
  return new Promise<Buffer>((resolve, reject) => {
--- a/packages/service/common/file/read/utils.ts
+++ b/packages/service/common/file/read/utils.ts
@@ -16,6 +16,7 @@ export type readRawTextByLocalFileParams = {
  path: string;
  encoding: string;
  customPdfParse?: boolean;
+  getFormatText?: boolean;
  metadata?: Record<string, any>;
 };
 export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
@@ -27,8 +28,8 @@ export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParam

  return readRawContentByFileBuffer({
    extension,
-    isQAImport: false,
    customPdfParse: params.customPdfParse,
+    getFormatText: params.getFormatText,
    teamId: params.teamId,
    tmbId: params.tmbId,
    encoding: params.encoding,
@@ -46,7 +47,7 @@ export const readRawContentByFileBuffer = async ({
  encoding,
  metadata,
  customPdfParse = false,
-  isQAImport = false
+  getFormatText = true
 }: {
  teamId: string;
  tmbId: string;
@@ -57,8 +58,10 @@ export const readRawContentByFileBuffer = async ({
  metadata?: Record<string, any>;

  customPdfParse?: boolean;
-  isQAImport: boolean;
-}): Promise<ReadFileResponse> => {
+  getFormatText?: boolean;
+}): Promise<{
+  rawText: string;
+}> => {
  const systemParse = () =>
    runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
      extension,
@@ -107,7 +110,7 @@ export const readRawContentByFileBuffer = async ({

    return {
      rawText: text,
-      formatText: rawText,
+      formatText: text,
      imageList
    };
  };
@@ -149,7 +152,7 @@ export const readRawContentByFileBuffer = async ({
    return await systemParse();
  })();

-  addLog.debug(`Parse file success, time: ${Date.now() - start}ms. Uploading file image.`);
+  addLog.debug(`Parse file success, time: ${Date.now() - start}ms. `);

  // markdown data format
  if (imageList) {
@@ -176,16 +179,7 @@ export const readRawContentByFileBuffer = async ({
    });
  }

-  if (['csv', 'xlsx'].includes(extension)) {
-    // qa data
-    if (isQAImport) {
-      rawText = rawText || '';
-    } else {
-      rawText = formatText || rawText;
-    }
-  }
+  addLog.debug(`Upload file success, time: ${Date.now() - start}ms`);

-  addLog.debug(`Upload file image success, time: ${Date.now() - start}ms`);
-
-  return { rawText, formatText, imageList };
+  return { rawText: getFormatText ? formatText || rawText : rawText };
 };
--- a/packages/service/common/redis/cache.ts
+++ b/packages/service/common/redis/cache.ts
@@ -1,7 +1,10 @@
-import { getGlobalRedisCacheConnection } from './index';
+import { getGlobalRedisConnection } from './index';
 import { addLog } from '../system/log';
 import { retryFn } from '@fastgpt/global/common/system/utils';

+const redisPrefix = 'cache:';
+const getCacheKey = (key: string) => `${redisPrefix}${key}`;
+
 export enum CacheKeyEnum {
  team_vector_count = 'team_vector_count'
 }
@@ -13,12 +16,12 @@ export const setRedisCache = async (
 ) => {
  return await retryFn(async () => {
    try {
-      const redis = getGlobalRedisCacheConnection();
+      const redis = getGlobalRedisConnection();

      if (expireSeconds) {
-        await redis.set(key, data, 'EX', expireSeconds);
+        await redis.set(getCacheKey(key), data, 'EX', expireSeconds);
      } else {
-        await redis.set(key, data);
+        await redis.set(getCacheKey(key), data);
      }
    } catch (error) {
      addLog.error('Set cache error:', error);
@@ -28,11 +31,11 @@ export const setRedisCache = async (
 };

 export const getRedisCache = async (key: string) => {
-  const redis = getGlobalRedisCacheConnection();
-  return await retryFn(() => redis.get(key));
+  const redis = getGlobalRedisConnection();
+  return await retryFn(() => redis.get(getCacheKey(key)));
 };

 export const delRedisCache = async (key: string) => {
-  const redis = getGlobalRedisCacheConnection();
-  await retryFn(() => redis.del(key));
+  const redis = getGlobalRedisConnection();
+  await retryFn(() => redis.del(getCacheKey(key)));
 };
--- a/packages/service/common/redis/index.ts
+++ b/packages/service/common/redis/index.ts
@@ -27,17 +27,26 @@ export const newWorkerRedisConnection = () => {
  return redis;
 };

-export const getGlobalRedisCacheConnection = () => {
-  if (global.redisCache) return global.redisCache;
+export const FASTGPT_REDIS_PREFIX = 'fastgpt:';
+export const getGlobalRedisConnection = () => {
+  if (global.redisClient) return global.redisClient;

-  global.redisCache = new Redis(REDIS_URL, { keyPrefix: 'fastgpt:cache:' });
+  global.redisClient = new Redis(REDIS_URL, { keyPrefix: FASTGPT_REDIS_PREFIX });

-  global.redisCache.on('connect', () => {
+  global.redisClient.on('connect', () => {
    addLog.info('Redis connected');
  });
-  global.redisCache.on('error', (error) => {
+  global.redisClient.on('error', (error) => {
    addLog.error('Redis connection error', error);
  });

-  return global.redisCache;
+  return global.redisClient;
+};
+
+export const getAllKeysByPrefix = async (key: string) => {
+  const redis = getGlobalRedisConnection();
+  const keys = (await redis.keys(`${FASTGPT_REDIS_PREFIX}${key}:*`)).map((key) =>
+    key.replace(FASTGPT_REDIS_PREFIX, '')
+  );
+  return keys;
 };
--- a/packages/service/common/redis/type.d.ts
+++ b/packages/service/common/redis/type.d.ts
@@ -1,5 +1,5 @@
 import type Redis from 'ioredis';

 declare global {
-  var redisCache: Redis | null;
+  var redisClient: Redis | null;
 }
--- a/packages/service/common/string/jieba/index.ts
+++ b/packages/service/common/string/jieba/index.ts
@@ -10,6 +10,7 @@ let jieba: Jieba | undefined;
 })();

 const stopWords = new Set([
+  '\n',
  '--',
  '?',
  '“',
@@ -1519,8 +1520,7 @@ const stopWords = new Set([
 ]);

 export async function jiebaSplit({ text }: { text: string }) {
-  text = text.replace(/[#*`_~>[\](){}|]/g, '').replace(/\S*https?\S*/gi, '');
-
+  text = text.replace(/[#*`_~>[\](){}|]|\S*https?\S*/g, '').trim();
  const tokens = (await jieba!.cutAsync(text, true)) as string[];

  return (
--- a/packages/service/common/system/config/controller.ts
+++ b/packages/service/common/system/config/controller.ts
@@ -2,26 +2,44 @@ import { SystemConfigsTypeEnum } from '@fastgpt/global/common/system/config/cons
 import { MongoSystemConfigs } from './schema';
 import { type FastGPTConfigFileType } from '@fastgpt/global/common/system/types';
 import { FastGPTProUrl } from '../constants';
+import { type LicenseDataType } from '@fastgpt/global/common/system/types';

-export const getFastGPTConfigFromDB = async () => {
+export const getFastGPTConfigFromDB = async (): Promise<{
+  fastgptConfig: FastGPTConfigFileType;
+  licenseData?: LicenseDataType;
+}> => {
  if (!FastGPTProUrl) {
    return {
-      config: {} as FastGPTConfigFileType
+      fastgptConfig: {} as FastGPTConfigFileType
    };
  }

-  const res = await MongoSystemConfigs.findOne({
-    type: SystemConfigsTypeEnum.fastgpt
-  }).sort({
-    createTime: -1
-  });
+  const [fastgptConfig, licenseConfig] = await Promise.all([
+    MongoSystemConfigs.findOne({
+      type: SystemConfigsTypeEnum.fastgpt
+    }).sort({
+      createTime: -1
+    }),
+    MongoSystemConfigs.findOne({
+      type: SystemConfigsTypeEnum.license
+    }).sort({
+      createTime: -1
+    })
+  ]);

-  const config = res?.value || {};
+  const config = fastgptConfig?.value || {};
+  const licenseData = licenseConfig?.value?.data as LicenseDataType | undefined;
+
+  const fastgptConfigTime = fastgptConfig?.createTime.getTime().toString();
+  const licenseConfigTime = licenseConfig?.createTime.getTime().toString();
  // 利用配置文件的创建时间（更新时间）来做缓存，如果前端命中缓存，则不需要再返回配置文件
-  global.systemInitBufferId = res ? res.createTime.getTime().toString() : undefined;
+  global.systemInitBufferId = fastgptConfigTime
+    ? `${fastgptConfigTime}-${licenseConfigTime}`
+    : undefined;

  return {
-    config: config as FastGPTConfigFileType
+    fastgptConfig: config as FastGPTConfigFileType,
+    licenseData
  };
 };

--- a/packages/service/common/system/log.ts
+++ b/packages/service/common/system/log.ts
@@ -57,14 +57,19 @@ export const addLog = {

    level === LogLevelEnum.error && console.error(obj);

-    // store
+    // store log
    if (level >= STORE_LOG_LEVEL && connectionMongo.connection.readyState === 1) {
-      // store log
-      getMongoLog().create({
-        text: msg,
-        level,
-        metadata: obj
-      });
+      (async () => {
+        try {
+          await getMongoLog().create({
+            text: msg,
+            level,
+            metadata: obj
+          });
+        } catch (error) {
+          console.error('store log error', error);
+        }
+      })();
    }
  },
  debug(msg: string, obj?: Record<string, any>) {
--- a/packages/service/common/system/timerLock/constants.ts
+++ b/packages/service/common/system/timerLock/constants.ts
@@ -5,7 +5,8 @@ export enum TimerIdEnum {
  clearExpiredSubPlan = 'clearExpiredSubPlan',
  updateStandardPlan = 'updateStandardPlan',
  scheduleTriggerApp = 'scheduleTriggerApp',
-  notification = 'notification'
+  notification = 'notification',
+  clearExpiredRawTextBuffer = 'clearExpiredRawTextBuffer'
 }

 export enum LockNotificationEnum {
--- a/packages/service/common/vectorDB/pg/index.ts
+++ b/packages/service/common/vectorDB/pg/index.ts
@@ -188,6 +188,7 @@ export class PgVectorCtrl {
      const results: any = await PgClient.query(
        `BEGIN;
          SET LOCAL hnsw.ef_search = ${global.systemEnv?.hnswEfSearch || 100};
+          SET LOCAL hnsw.max_scan_tuples = ${global.systemEnv?.hnswMaxScanTuples || 100000};
          SET LOCAL hnsw.iterative_scan = relaxed_order;
          WITH relaxed_results AS MATERIALIZED (
            select id, collection_id, vector <#> '[${vector}]' AS score
@@ -199,7 +200,7 @@ export class PgVectorCtrl {
          ) SELECT id, collection_id, score FROM relaxed_results ORDER BY score;
        COMMIT;`
      );
-      const rows = results?.[3]?.rows as PgSearchRawType[];
+      const rows = results?.[results.length - 2]?.rows as PgSearchRawType[];

      if (!Array.isArray(rows)) {
        return {
--- a/packages/service/core/ai/config.ts
+++ b/packages/service/core/ai/config.ts
@@ -78,7 +78,7 @@ export const createChatCompletion = async ({
    }
    body.model = modelConstantsData.model;

-    const formatTimeout = timeout ? timeout : body.stream ? 60000 : 600000;
+    const formatTimeout = timeout ? timeout : 600000;
    const ai = getAIApi({
      userKey,
      timeout: formatTimeout
--- a/packages/service/core/ai/config/provider/Claude.json
+++ b/packages/service/core/ai/config/provider/Claude.json
@@ -1,6 +1,54 @@
 {
  "provider": "Claude",
  "list": [
+    {
+      "model": "claude-sonnet-4-20250514",
+      "name": "claude-sonnet-4-20250514",
+      "maxContext": 200000,
+      "maxResponse": 8000,
+      "quoteMaxToken": 100000,
+      "maxTemperature": 1,
+      "showTopP": true,
+      "showStopSign": true,
+      "vision": true,
+      "toolChoice": true,
+      "functionCall": false,
+      "defaultSystemChatPrompt": "",
+      "datasetProcess": true,
+      "usedInClassify": true,
+      "customCQPrompt": "",
+      "usedInExtractFields": true,
+      "usedInQueryExtension": true,
+      "customExtractPrompt": "",
+      "usedInToolCall": true,
+      "defaultConfig": {},
+      "fieldMap": {},
+      "type": "llm"
+    },
+    {
+      "model": "claude-opus-4-20250514",
+      "name": "claude-opus-4-20250514",
+      "maxContext": 200000,
+      "maxResponse": 4096,
+      "quoteMaxToken": 100000,
+      "maxTemperature": 1,
+      "showTopP": true,
+      "showStopSign": true,
+      "vision": true,
+      "toolChoice": true,
+      "functionCall": false,
+      "defaultSystemChatPrompt": "",
+      "datasetProcess": true,
+      "usedInClassify": true,
+      "customCQPrompt": "",
+      "usedInExtractFields": true,
+      "usedInQueryExtension": true,
+      "customExtractPrompt": "",
+      "usedInToolCall": true,
+      "defaultConfig": {},
+      "fieldMap": {},
+      "type": "llm"
+    },
    {
      "model": "claude-3-7-sonnet-20250219",
      "name": "claude-3-7-sonnet-20250219",
--- a/packages/service/core/ai/config/provider/Gemini.json
+++ b/packages/service/core/ai/config/provider/Gemini.json
@@ -25,6 +25,30 @@
      "showTopP": true,
      "showStopSign": true
    },
+    {
+      "model": "gemini-2.5-flash-preview-04-17",
+      "name": "gemini-2.5-flash-preview-04-17",
+      "maxContext": 1000000,
+      "maxResponse": 8000,
+      "quoteMaxToken": 60000,
+      "maxTemperature": 1,
+      "vision": true,
+      "toolChoice": true,
+      "functionCall": false,
+      "defaultSystemChatPrompt": "",
+      "datasetProcess": true,
+      "usedInClassify": true,
+      "customCQPrompt": "",
+      "usedInExtractFields": true,
+      "usedInQueryExtension": true,
+      "customExtractPrompt": "",
+      "usedInToolCall": true,
+      "defaultConfig": {},
+      "fieldMap": {},
+      "type": "llm",
+      "showTopP": true,
+      "showStopSign": true
+    },
    {
      "model": "gemini-2.0-flash",
      "name": "gemini-2.0-flash",
--- a/packages/service/core/ai/utils.ts
+++ b/packages/service/core/ai/utils.ts
@@ -18,15 +18,17 @@ import json5 from 'json5';
 */
 export const computedMaxToken = ({
  maxToken,
-  model
+  model,
+  min
 }: {
  maxToken?: number;
  model: LLMModelItemType;
+  min?: number;
 }) => {
  if (maxToken === undefined) return;

  maxToken = Math.min(maxToken, model.maxResponse);
-  return maxToken;
+  return Math.max(maxToken, min || 0);
 };

 // FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]……
@@ -135,12 +137,14 @@ export const llmStreamResponseToAnswerText = async (

    // Tool calls
    if (responseChoice?.tool_calls?.length) {
-      responseChoice.tool_calls.forEach((toolCall) => {
-        const index = toolCall.index;
+      responseChoice.tool_calls.forEach((toolCall, i) => {
+        const index = toolCall.index ?? i;

-        if (toolCall.id || callingTool) {
-          // 有 id，代表新 call 工具
-          if (toolCall.id) {
+        // Call new tool
+        const hasNewTool = toolCall?.function?.name || callingTool;
+        if (hasNewTool) {
+          // 有 function name，代表新 call 工具
+          if (toolCall?.function?.name) {
            callingTool = {
              name: toolCall.function?.name || '',
              arguments: toolCall.function?.arguments || ''
@@ -176,7 +180,7 @@ export const llmStreamResponseToAnswerText = async (
    }
  }
  return {
-    text: parseReasoningContent(answer)[1],
+    text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
    usage,
    toolCalls
  };
@@ -190,8 +194,9 @@ export const llmUnStreamResponseToAnswerText = async (
 }> => {
  const answer = response.choices?.[0]?.message?.content || '';
  const toolCalls = response.choices?.[0]?.message?.tool_calls;
+
  return {
-    text: answer,
+    text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
    usage: response.usage,
    toolCalls
  };
@@ -221,7 +226,9 @@ export const parseReasoningContent = (text: string): [string, string] => {
 };

 export const removeDatasetCiteText = (text: string, retainDatasetCite: boolean) => {
-  return retainDatasetCite ? text : text.replace(/\[([a-f0-9]{24})\](?:\([^\)]*\)?)?/g, '');
+  return retainDatasetCite
+    ? text.replace(/\[id\]\(CITE\)/g, '')
+    : text.replace(/\[([a-f0-9]{24})\](?:\([^\)]*\)?)?/g, '').replace(/\[id\]\(CITE\)/g, '');
 };

 // Parse llm stream part
@@ -236,6 +243,12 @@ export const parseLLMStreamResponse = () => {
  let citeBuffer = '';
  const maxCiteBufferLength = 32; // [Object](CITE)总长度为32

+  // Buffer
+  let buffer_finishReason: CompletionFinishReason = null;
+  let buffer_usage: CompletionUsage = getLLMDefaultUsage();
+  let buffer_reasoningContent = '';
+  let buffer_content = '';
+
  /* 
    parseThinkTag - 只控制是否主动解析 <think></think>，如果接口已经解析了，则不再解析。
    retainDatasetCite - 
@@ -253,6 +266,7 @@ export const parseLLMStreamResponse = () => {
        };
        finish_reason?: CompletionFinishReason;
      }[];
+      usage?: CompletionUsage;
    };
    parseThinkTag?: boolean;
    retainDatasetCite?: boolean;
@@ -262,72 +276,71 @@ export const parseLLMStreamResponse = () => {
    responseContent: string;
    finishReason: CompletionFinishReason;
  } => {
-    const finishReason = part.choices?.[0]?.finish_reason || null;
-    const content = part.choices?.[0]?.delta?.content || '';
-    // @ts-ignore
-    const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
-    const isStreamEnd = !!finishReason;
+    const data = (() => {
+      buffer_usage = part.usage || buffer_usage;

-    // Parse think
-    const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } = (() => {
-      if (reasoningContent || !parseThinkTag) {
-        isInThinkTag = false;
-        return { reasoningContent, content };
-      }
+      const finishReason = part.choices?.[0]?.finish_reason || null;
+      buffer_finishReason = finishReason || buffer_finishReason;

-      if (!content) {
-        return {
-          reasoningContent: '',
-          content: ''
-        };
-      }
+      const content = part.choices?.[0]?.delta?.content || '';
+      // @ts-ignore
+      const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
+      const isStreamEnd = !!buffer_finishReason;

-      // 如果不在 think 标签中，或者有 reasoningContent(接口已解析），则返回 reasoningContent 和 content
-      if (isInThinkTag === false) {
-        return {
-          reasoningContent: '',
-          content
-        };
-      }
+      // Parse think
+      const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } =
+        (() => {
+          if (reasoningContent || !parseThinkTag) {
+            isInThinkTag = false;
+            return { reasoningContent, content };
+          }

-      // 检测是否为 think 标签开头的数据
-      if (isInThinkTag === undefined) {
-        // Parse content think and answer
-        startTagBuffer += content;
-        // 太少内容时候，暂时不解析
-        if (startTagBuffer.length < thinkStartChars.length) {
-          if (isStreamEnd) {
-            const tmpContent = startTagBuffer;
-            startTagBuffer = '';
+          // 如果不在 think 标签中，或者有 reasoningContent(接口已解析），则返回 reasoningContent 和 content
+          if (isInThinkTag === false) {
            return {
              reasoningContent: '',
-              content: tmpContent
+              content
            };
          }
-          return {
-            reasoningContent: '',
-            content: ''
-          };
-        }

-        if (startTagBuffer.startsWith(thinkStartChars)) {
-          isInThinkTag = true;
-          return {
-            reasoningContent: startTagBuffer.slice(thinkStartChars.length),
-            content: ''
-          };
-        }
+          // 检测是否为 think 标签开头的数据
+          if (isInThinkTag === undefined) {
+            // Parse content think and answer
+            startTagBuffer += content;
+            // 太少内容时候，暂时不解析
+            if (startTagBuffer.length < thinkStartChars.length) {
+              if (isStreamEnd) {
+                const tmpContent = startTagBuffer;
+                startTagBuffer = '';
+                return {
+                  reasoningContent: '',
+                  content: tmpContent
+                };
+              }
+              return {
+                reasoningContent: '',
+                content: ''
+              };
+            }

-        // 如果未命中 think 标签，则认为不在 think 标签中，返回 buffer 内容作为 content
-        isInThinkTag = false;
-        return {
-          reasoningContent: '',
-          content: startTagBuffer
-        };
-      }
+            if (startTagBuffer.startsWith(thinkStartChars)) {
+              isInThinkTag = true;
+              return {
+                reasoningContent: startTagBuffer.slice(thinkStartChars.length),
+                content: ''
+              };
+            }

-      // 确认是 think 标签内容，开始返回 think 内容，并实时检测 </think>
-      /* 
+            // 如果未命中 think 标签，则认为不在 think 标签中，返回 buffer 内容作为 content
+            isInThinkTag = false;
+            return {
+              reasoningContent: '',
+              content: startTagBuffer
+            };
+          }
+
+          // 确认是 think 标签内容，开始返回 think 内容，并实时检测 </think>
+          /* 
        检测 </think> 方案。
        存储所有疑似 </think> 的内容，直到检测到完整的 </think> 标签或超出 </think> 长度。
        content 返回值包含以下几种情况:
@@ -338,124 +351,145 @@ export const parseLLMStreamResponse = () => {
          </think>abc - 完全命中尾标签
          k>abc - 命中一部分尾标签
      */
-      // endTagBuffer 专门用来记录疑似尾标签的内容
-      if (endTagBuffer) {
-        endTagBuffer += content;
-        if (endTagBuffer.includes(thinkEndChars)) {
-          isInThinkTag = false;
-          const answer = endTagBuffer.slice(thinkEndChars.length);
-          return {
-            reasoningContent: '',
-            content: answer
-          };
-        } else if (endTagBuffer.length >= thinkEndChars.length) {
-          // 缓存内容超出尾标签长度，且仍未命中 </think>，则认为本次猜测 </think> 失败，仍处于 think 阶段。
-          const tmp = endTagBuffer;
-          endTagBuffer = '';
-          return {
-            reasoningContent: tmp,
-            content: ''
-          };
-        }
-        return {
-          reasoningContent: '',
-          content: ''
-        };
-      } else if (content.includes(thinkEndChars)) {
-        // 返回内容，完整命中</think>，直接结束
-        isInThinkTag = false;
-        const [think, answer] = content.split(thinkEndChars);
-        return {
-          reasoningContent: think,
-          content: answer
-        };
-      } else {
-        // 无 buffer，且未命中 </think>，开始疑似 </think> 检测。
-        for (let i = 1; i < thinkEndChars.length; i++) {
-          const partialEndTag = thinkEndChars.slice(0, i);
-          // 命中一部分尾标签
-          if (content.endsWith(partialEndTag)) {
-            const think = content.slice(0, -partialEndTag.length);
-            endTagBuffer += partialEndTag;
+          // endTagBuffer 专门用来记录疑似尾标签的内容
+          if (endTagBuffer) {
+            endTagBuffer += content;
+            if (endTagBuffer.includes(thinkEndChars)) {
+              isInThinkTag = false;
+              const answer = endTagBuffer.slice(thinkEndChars.length);
+              return {
+                reasoningContent: '',
+                content: answer
+              };
+            } else if (endTagBuffer.length >= thinkEndChars.length) {
+              // 缓存内容超出尾标签长度，且仍未命中 </think>，则认为本次猜测 </think> 失败，仍处于 think 阶段。
+              const tmp = endTagBuffer;
+              endTagBuffer = '';
+              return {
+                reasoningContent: tmp,
+                content: ''
+              };
+            }
            return {
-              reasoningContent: think,
+              reasoningContent: '',
              content: ''
            };
+          } else if (content.includes(thinkEndChars)) {
+            // 返回内容，完整命中</think>，直接结束
+            isInThinkTag = false;
+            const [think, answer] = content.split(thinkEndChars);
+            return {
+              reasoningContent: think,
+              content: answer
+            };
+          } else {
+            // 无 buffer，且未命中 </think>，开始疑似 </think> 检测。
+            for (let i = 1; i < thinkEndChars.length; i++) {
+              const partialEndTag = thinkEndChars.slice(0, i);
+              // 命中一部分尾标签
+              if (content.endsWith(partialEndTag)) {
+                const think = content.slice(0, -partialEndTag.length);
+                endTagBuffer += partialEndTag;
+                return {
+                  reasoningContent: think,
+                  content: ''
+                };
+              }
+            }
          }
-        }
+
+          // 完全未命中尾标签，还是 think 阶段。
+          return {
+            reasoningContent: content,
+            content: ''
+          };
+        })();
+
+      // Parse datset cite
+      if (retainDatasetCite) {
+        return {
+          reasoningContent: parsedThinkReasoningContent,
+          content: parsedThinkContent,
+          responseContent: parsedThinkContent,
+          finishReason: buffer_finishReason
+        };
      }

-      // 完全未命中尾标签，还是 think 阶段。
-      return {
-        reasoningContent: content,
-        content: ''
-      };
-    })();
+      // 缓存包含 [ 的字符串，直到超出 maxCiteBufferLength 再一次性返回
+      const parseCite = (text: string) => {
+        // 结束时，返回所有剩余内容
+        if (isStreamEnd) {
+          const content = citeBuffer + text;
+          return {
+            content: removeDatasetCiteText(content, false)
+          };
+        }
+
+        // 新内容包含 [，初始化缓冲数据
+        if (text.includes('[')) {
+          const index = text.indexOf('[');
+          const beforeContent = citeBuffer + text.slice(0, index);
+          citeBuffer = text.slice(index);
+
+          // beforeContent 可能是：普通字符串，带 [ 的字符串
+          return {
+            content: removeDatasetCiteText(beforeContent, false)
+          };
+        }
+        // 处于 Cite 缓冲区，判断是否满足条件
+        else if (citeBuffer) {
+          citeBuffer += text;
+
+          // 检查缓冲区长度是否达到完整Quote长度或已经流结束
+          if (citeBuffer.length >= maxCiteBufferLength) {
+            const content = removeDatasetCiteText(citeBuffer, false);
+            citeBuffer = '';
+
+            return {
+              content
+            };
+          } else {
+            // 暂时不返回内容
+            return { content: '' };
+          }
+        }
+
+        return {
+          content: text
+        };
+      };
+      const { content: pasedCiteContent } = parseCite(parsedThinkContent);

-    // Parse datset cite
-    if (retainDatasetCite) {
      return {
        reasoningContent: parsedThinkReasoningContent,
        content: parsedThinkContent,
-        responseContent: parsedThinkContent,
-        finishReason
+        responseContent: pasedCiteContent,
+        finishReason: buffer_finishReason
      };
-    }
+    })();

-    // 缓存包含 [ 的字符串，直到超出 maxCiteBufferLength 再一次性返回
-    const parseCite = (text: string) => {
-      // 结束时，返回所有剩余内容
-      if (isStreamEnd) {
-        const content = citeBuffer + text;
-        return {
-          content: removeDatasetCiteText(content, false)
-        };
-      }
+    buffer_reasoningContent += data.reasoningContent;
+    buffer_content += data.content;

-      // 新内容包含 [，初始化缓冲数据
-      if (text.includes('[')) {
-        const index = text.indexOf('[');
-        const beforeContent = citeBuffer + text.slice(0, index);
-        citeBuffer = text.slice(index);
-
-        // beforeContent 可能是：普通字符串，带 [ 的字符串
-        return {
-          content: removeDatasetCiteText(beforeContent, false)
-        };
-      }
-      // 处于 Cite 缓冲区，判断是否满足条件
-      else if (citeBuffer) {
-        citeBuffer += text;
-
-        // 检查缓冲区长度是否达到完整Quote长度或已经流结束
-        if (citeBuffer.length >= maxCiteBufferLength) {
-          const content = removeDatasetCiteText(citeBuffer, false);
-          citeBuffer = '';
-
-          return {
-            content
-          };
-        } else {
-          // 暂时不返回内容
-          return { content: '' };
-        }
-      }
-
-      return {
-        content: text
-      };
-    };
-    const { content: pasedCiteContent } = parseCite(parsedThinkContent);
+    return data;
+  };

+  const getResponseData = () => {
    return {
-      reasoningContent: parsedThinkReasoningContent,
-      content: parsedThinkContent,
-      responseContent: pasedCiteContent,
-      finishReason
+      finish_reason: buffer_finishReason,
+      usage: buffer_usage,
+      reasoningContent: buffer_reasoningContent,
+      content: buffer_content
    };
  };

+  const updateFinishReason = (finishReason: CompletionFinishReason) => {
+    buffer_finishReason = finishReason;
+  };
+
  return {
-    parsePart
+    parsePart,
+    getResponseData,
+    updateFinishReason
  };
 };
--- a/packages/service/core/app/controller.ts
+++ b/packages/service/core/app/controller.ts
@@ -11,40 +11,6 @@ export const beforeUpdateAppFormat = <T extends AppSchema['modules'] | undefined
  nodes: T;
  isPlugin: boolean;
 }) => {
-  if (nodes) {
-    // Check dataset maxTokens
-    if (isPlugin) {
-      let maxTokens = 16000;
-
-      nodes.forEach((item) => {
-        if (
-          item.flowNodeType === FlowNodeTypeEnum.chatNode ||
-          item.flowNodeType === FlowNodeTypeEnum.tools
-        ) {
-          const model =
-            item.inputs.find((item) => item.key === NodeInputKeyEnum.aiModel)?.value || '';
-          const chatModel = getLLMModel(model);
-          const quoteMaxToken = chatModel.quoteMaxToken || 16000;
-
-          maxTokens = Math.max(maxTokens, quoteMaxToken);
-        }
-      });
-
-      nodes.forEach((item) => {
-        if (item.flowNodeType === FlowNodeTypeEnum.datasetSearchNode) {
-          item.inputs.forEach((input) => {
-            if (input.key === NodeInputKeyEnum.datasetMaxTokens) {
-              const val = input.value as number;
-              if (val > maxTokens) {
-                input.value = maxTokens;
-              }
-            }
-          });
-        }
-      });
-    }
-  }
-
  return {
    nodes
  };
--- a/packages/service/core/app/mcp.ts
+++ b/packages/service/core/app/mcp.ts
@@ -1,7 +1,7 @@
 import { Client } from '@modelcontextprotocol/sdk/client/index.js';
 import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
 import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
-import { type ToolType } from '@fastgpt/global/core/app/type';
+import { type McpToolConfigType } from '@fastgpt/global/core/app/type';
 import { addLog } from '../../common/system/log';
 import { retryFn } from '@fastgpt/global/common/system/utils';

@@ -41,7 +41,7 @@ export class MCPClient {
   * Get available tools list
   * @returns List of tools
   */
-  public async getTools(): Promise<ToolType[]> {
+  public async getTools(): Promise<McpToolConfigType[]> {
    try {
      const client = await this.getConnection();
      const response = await client.listTools();
--- a/packages/service/core/app/plugin/controller.ts
+++ b/packages/service/core/app/plugin/controller.ts
@@ -30,8 +30,7 @@ import { Types } from 'mongoose';
  community: community-id
  commercial: commercial-id
 */
-
-export async function splitCombinePluginId(id: string) {
+export function splitCombineToolId(id: string) {
  const splitRes = id.split('-');
  if (splitRes.length === 1) {
    // app id
@@ -42,7 +41,7 @@ export async function splitCombinePluginId(id: string) {
  }

  const [source, pluginId] = id.split('-') as [PluginSourceEnum, string];
-  if (!source || !pluginId) return Promise.reject('pluginId not found');
+  if (!source || !pluginId) throw new Error('pluginId not found');

  return { source, pluginId: id };
 }
@@ -54,7 +53,7 @@ const getSystemPluginTemplateById = async (
  versionId?: string
 ): Promise<ChildAppType> => {
  const item = getSystemPluginTemplates().find((plugin) => plugin.id === pluginId);
-  if (!item) return Promise.reject(PluginErrEnum.unAuth);
+  if (!item) return Promise.reject(PluginErrEnum.unExist);

  const plugin = cloneDeep(item);

@@ -64,10 +63,10 @@ const getSystemPluginTemplateById = async (
      { pluginId: plugin.id, 'customConfig.associatedPluginId': plugin.associatedPluginId },
      'associatedPluginId'
    ).lean();
-    if (!systemPlugin) return Promise.reject(PluginErrEnum.unAuth);
+    if (!systemPlugin) return Promise.reject(PluginErrEnum.unExist);

    const app = await MongoApp.findById(plugin.associatedPluginId).lean();
-    if (!app) return Promise.reject(PluginErrEnum.unAuth);
+    if (!app) return Promise.reject(PluginErrEnum.unExist);

    const version = versionId
      ? await getAppVersionById({
@@ -77,6 +76,12 @@ const getSystemPluginTemplateById = async (
        })
      : await getAppLatestVersion(plugin.associatedPluginId, app);
    if (!version.versionId) return Promise.reject('App version not found');
+    const isLatest = version.versionId
+      ? await checkIsLatestVersion({
+          appId: plugin.associatedPluginId,
+          versionId: version.versionId
+        })
+      : true;

    return {
      ...plugin,
@@ -85,12 +90,19 @@ const getSystemPluginTemplateById = async (
        edges: version.edges,
        chatConfig: version.chatConfig
      },
-      version: versionId || String(version.versionId),
+      version: versionId ? version?.versionId : '',
+      versionLabel: version?.versionName,
+      isLatestVersion: isLatest,
      teamId: String(app.teamId),
      tmbId: String(app.tmbId)
    };
  }
-  return plugin;
+
+  return {
+    ...plugin,
+    version: undefined,
+    isLatestVersion: true
+  };
 };

 /* Format plugin to workflow preview node data */
@@ -102,11 +114,11 @@ export async function getChildAppPreviewNode({
  versionId?: string;
 }): Promise<FlowNodeTemplateType> {
  const app: ChildAppType = await (async () => {
-    const { source, pluginId } = await splitCombinePluginId(appId);
+    const { source, pluginId } = splitCombineToolId(appId);

    if (source === PluginSourceEnum.personal) {
      const item = await MongoApp.findById(appId).lean();
-      if (!item) return Promise.reject('plugin not found');
+      if (!item) return Promise.reject(PluginErrEnum.unExist);

      const version = await getAppVersionById({ appId, versionId, app: item });

@@ -132,8 +144,8 @@ export async function getChildAppPreviewNode({
        },
        templateType: FlowNodeTemplateTypeEnum.teamApp,

-        version: version.versionId,
-        versionLabel: version?.versionName || '',
+        version: versionId ? version?.versionId : '',
+        versionLabel: version?.versionName,
        isLatestVersion: isLatest,

        originCost: 0,
@@ -142,7 +154,7 @@ export async function getChildAppPreviewNode({
        pluginOrder: 0
      };
    } else {
-      return getSystemPluginTemplateById(pluginId);
+      return getSystemPluginTemplateById(pluginId, versionId);
    }
  })();

@@ -216,12 +228,12 @@ export async function getChildAppRuntimeById(
  id: string,
  versionId?: string
 ): Promise<PluginRuntimeType> {
-  const app: ChildAppType = await (async () => {
-    const { source, pluginId } = await splitCombinePluginId(id);
+  const app = await (async () => {
+    const { source, pluginId } = splitCombineToolId(id);

    if (source === PluginSourceEnum.personal) {
      const item = await MongoApp.findById(id).lean();
-      if (!item) return Promise.reject('plugin not found');
+      if (!item) return Promise.reject(PluginErrEnum.unExist);

      const version = await getAppVersionById({
        appId: id,
@@ -244,8 +256,6 @@ export async function getChildAppRuntimeById(
        },
        templateType: FlowNodeTemplateTypeEnum.teamApp,

-        // 用不到
-        version: item?.pluginData?.nodeVersion,
        originCost: 0,
        currentCost: 0,
        hasTokenFee: false,
--- a/packages/service/core/app/plugin/utils.ts
+++ b/packages/service/core/app/plugin/utils.ts
@@ -1,6 +1,6 @@
 import { type ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
 import { type PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
-import { splitCombinePluginId } from './controller';
+import { splitCombineToolId } from './controller';
 import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';

 /* 
@@ -20,7 +20,7 @@ export const computedPluginUsage = async ({
  childrenUsage: ChatNodeUsageType[];
  error?: boolean;
 }) => {
-  const { source } = await splitCombinePluginId(plugin.id);
+  const { source } = splitCombineToolId(plugin.id);
  const childrenUsages = childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);

  if (source !== PluginSourceEnum.personal) {
--- a/packages/service/core/app/schema.ts
+++ b/packages/service/core/app/schema.ts
@@ -119,6 +119,7 @@ const AppSchema = new Schema({
  defaultPermission: Number
 });

+AppSchema.index({ type: 1 });
 AppSchema.index({ teamId: 1, updateTime: -1 });
 AppSchema.index({ teamId: 1, type: 1 });
 AppSchema.index(
--- a/packages/service/core/app/utils.ts
+++ b/packages/service/core/app/utils.ts
@@ -1,14 +1,13 @@
 import { MongoDataset } from '../dataset/schema';
 import { getEmbeddingModel } from '../ai/model';
-import {
-  AppNodeFlowNodeTypeMap,
-  FlowNodeTypeEnum
-} from '@fastgpt/global/core/workflow/node/constant';
+import { FlowNodeTypeEnum } from '@fastgpt/global/core/workflow/node/constant';
 import { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
 import type { StoreNodeItemType } from '@fastgpt/global/core/workflow/type/node';
-import { MongoAppVersion } from './version/schema';
-import { checkIsLatestVersion } from './version/controller';
-import { Types } from '../../common/mongo';
+import { getChildAppPreviewNode, splitCombineToolId } from './plugin/controller';
+import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
+import { authAppByTmbId } from '../../support/permission/app/auth';
+import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
+import { getErrText } from '@fastgpt/global/common/error/utils';

 export async function listAppDatasetDataByTeamIdAndDatasetIds({
  teamId,
@@ -33,52 +32,58 @@ export async function listAppDatasetDataByTeamIdAndDatasetIds({
 export async function rewriteAppWorkflowToDetail({
  nodes,
  teamId,
-  isRoot
+  isRoot,
+  ownerTmbId
 }: {
  nodes: StoreNodeItemType[];
  teamId: string;
  isRoot: boolean;
+  ownerTmbId: string;
 }) {
  const datasetIdSet = new Set<string>();

-  // Add node(App Type) versionlabel and latest sign
-  const appNodes = nodes.filter((node) => AppNodeFlowNodeTypeMap[node.flowNodeType]);
-  const versionIds = appNodes
-    .filter((node) => node.version && Types.ObjectId.isValid(node.version))
-    .map((node) => node.version);
-  if (versionIds.length > 0) {
-    const versionDataList = await MongoAppVersion.find(
-      {
-        _id: { $in: versionIds }
-      },
-      '_id versionName appId time'
-    ).lean();
+  /* Add node(App Type) versionlabel and latest sign ==== */
+  await Promise.all(
+    nodes.map(async (node) => {
+      if (!node.pluginId) return;
+      const { source } = splitCombineToolId(node.pluginId);

-    const versionMap: Record<string, any> = {};
+      try {
+        const [preview] = await Promise.all([
+          getChildAppPreviewNode({
+            appId: node.pluginId,
+            versionId: node.version
+          }),
+          ...(source === PluginSourceEnum.personal
+            ? [
+                authAppByTmbId({
+                  tmbId: ownerTmbId,
+                  appId: node.pluginId,
+                  per: ReadPermissionVal
+                })
+              ]
+            : [])
+        ]);

-    const isLatestChecks = await Promise.all(
-      versionDataList.map(async (version) => {
-        const isLatest = await checkIsLatestVersion({
-          appId: version.appId,
-          versionId: version._id
-        });
-
-        return { versionId: String(version._id), isLatest };
-      })
-    );
-    const isLatestMap = new Map(isLatestChecks.map((item) => [item.versionId, item.isLatest]));
-    versionDataList.forEach((version) => {
-      versionMap[String(version._id)] = version;
-    });
-    appNodes.forEach((node) => {
-      if (!node.version) return;
-      const versionData = versionMap[String(node.version)];
-      if (versionData) {
-        node.versionLabel = versionData.versionName;
-        node.isLatestVersion = isLatestMap.get(String(node.version)) || false;
+        node.pluginData = {
+          diagram: preview.diagram,
+          userGuide: preview.userGuide,
+          courseUrl: preview.courseUrl,
+          name: preview.name,
+          avatar: preview.avatar
+        };
+        node.versionLabel = preview.versionLabel;
+        node.isLatestVersion = preview.isLatestVersion;
+        node.version = preview.version;
+      } catch (error) {
+        node.pluginData = {
+          error: getErrText(error)
+        };
      }
-    });
-  }
+    })
+  );
+
+  /* Add node(App Type) versionlabel and latest sign ==== */

  // Get all dataset ids from nodes
  nodes.forEach((node) => {
--- a/packages/service/core/app/version/controller.ts
+++ b/packages/service/core/app/version/controller.ts
@@ -68,6 +68,9 @@ export const checkIsLatestVersion = async ({
  appId: string;
  versionId: string;
 }) => {
+  if (!Types.ObjectId.isValid(versionId)) {
+    return false;
+  }
  const version = await MongoAppVersion.findOne(
    {
      appId,
--- a/packages/service/core/chat/chatItemSchema.ts
+++ b/packages/service/core/chat/chatItemSchema.ts
@@ -61,6 +61,7 @@ const ChatItemSchema = new Schema({
    type: Array,
    default: []
  },
+  errorMsg: String,
  userGoodFeedback: {
    type: String
  },
--- a/packages/service/core/chat/chatSchema.ts
+++ b/packages/service/core/chat/chatSchema.ts
@@ -34,6 +34,10 @@ const ChatSchema = new Schema({
    ref: AppCollectionName,
    required: true
  },
+  createTime: {
+    type: Date,
+    default: () => new Date()
+  },
  updateTime: {
    type: Date,
    default: () => new Date()
--- a/packages/service/core/chat/saveChat.ts
+++ b/packages/service/core/chat/saveChat.ts
@@ -32,6 +32,7 @@ type Props = {
  content: [UserChatItemType & { dataId?: string }, AIChatItemType & { dataId?: string }];
  metadata?: Record<string, any>;
  durationSeconds: number; //s
+  errorMsg?: string;
 };

 export async function saveChat({
@@ -50,6 +51,7 @@ export async function saveChat({
  outLinkUid,
  content,
  durationSeconds,
+  errorMsg,
  metadata = {}
 }: Props) {
  if (!chatId || chatId === 'NO_RECORD_HISTORIES') return;
@@ -104,7 +106,8 @@ export async function saveChat({
        return {
          ...item,
          [DispatchNodeResponseKeyEnum.nodeResponse]: nodeResponse,
-          durationSeconds
+          durationSeconds,
+          errorMsg
        };
      }
      return item;
--- a/packages/service/core/chat/utils.ts
+++ b/packages/service/core/chat/utils.ts
@@ -65,8 +65,8 @@ export const filterGPTMessageByMaxContext = async ({
    if (lastMessage.role === ChatCompletionRequestMessageRoleEnum.User) {
      const tokens = await countGptMessagesTokens([lastMessage, ...tmpChats]);
      maxContext -= tokens;
-      // 该轮信息整体 tokens 超出范围，这段数据不要了
-      if (maxContext < 0) {
+      // 该轮信息整体 tokens 超出范围，这段数据不要了。但是至少保证一组。
+      if (maxContext < 0 && chats.length > 0) {
        break;
      }

--- a/packages/service/core/dataset/apiDataset/api.ts
+++ b/packages/service/core/dataset/apiDataset/api.ts
@@ -2,7 +2,9 @@ import type {
  APIFileListResponse,
  ApiFileReadContentResponse,
  APIFileReadResponse,
-  APIFileServer
+  ApiDatasetDetailResponse,
+  APIFileServer,
+  APIFileItem
 } from '@fastgpt/global/core/dataset/apiDataset';
 import axios, { type Method } from 'axios';
 import { addLog } from '../../../common/system/log';
@@ -89,7 +91,7 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
      `/v1/file/list`,
      {
        searchKey,
-        parentId
+        parentId: parentId || apiServer.basePath
      },
      'POST'
    );
@@ -144,7 +146,8 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
        tmbId,
        url: previewUrl,
        relatedId: apiFileId,
-        customPdfParse
+        customPdfParse,
+        getFormatText: true
      });
      return {
        title,
@@ -164,9 +167,34 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
    return url;
  };

+  const getFileDetail = async ({
+    apiFileId
+  }: {
+    apiFileId: string;
+  }): Promise<ApiDatasetDetailResponse> => {
+    const fileData = await request<ApiDatasetDetailResponse>(
+      `/v1/file/detail`,
+      {
+        id: apiFileId
+      },
+      'GET'
+    );
+
+    if (fileData) {
+      return {
+        id: fileData.id,
+        name: fileData.name,
+        parentId: fileData.parentId === null ? '' : fileData.parentId
+      };
+    }
+
+    return Promise.reject('File not found');
+  };
+
  return {
    getFileContent,
    listFiles,
-    getFilePreviewUrl
+    getFilePreviewUrl,
+    getFileDetail
  };
 };
--- a/packages/service/core/dataset/apiDataset/index.ts
+++ b/packages/service/core/dataset/apiDataset/index.ts
@@ -0,0 +1,27 @@
+import type {
+  APIFileServer,
+  YuqueServer,
+  FeishuServer
+} from '@fastgpt/global/core/dataset/apiDataset';
+import { useApiDatasetRequest } from './api';
+import { useYuqueDatasetRequest } from '../yuqueDataset/api';
+import { useFeishuDatasetRequest } from '../feishuDataset/api';
+
+export const getApiDatasetRequest = async (data: {
+  apiServer?: APIFileServer;
+  yuqueServer?: YuqueServer;
+  feishuServer?: FeishuServer;
+}) => {
+  const { apiServer, yuqueServer, feishuServer } = data;
+
+  if (apiServer) {
+    return useApiDatasetRequest({ apiServer });
+  }
+  if (yuqueServer) {
+    return useYuqueDatasetRequest({ yuqueServer });
+  }
+  if (feishuServer) {
+    return useFeishuDatasetRequest({ feishuServer });
+  }
+  return Promise.reject('Can not find api dataset server');
+};
--- a/packages/service/core/dataset/apiDataset/proApi.ts
+++ b/packages/service/core/dataset/apiDataset/proApi.ts
@@ -1,30 +0,0 @@
-import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
-import { type FeishuServer, type YuqueServer } from '@fastgpt/global/core/dataset/apiDataset';
-
-export enum ProApiDatasetOperationTypeEnum {
-  LIST = 'list',
-  READ = 'read',
-  CONTENT = 'content',
-  DETAIL = 'detail'
-}
-
-export type ProApiDatasetCommonParams = {
-  feishuServer?: FeishuServer;
-  yuqueServer?: YuqueServer;
-};
-
-export type GetProApiDatasetFileListParams = ProApiDatasetCommonParams & {
-  parentId?: ParentIdType;
-};
-
-export type GetProApiDatasetFileContentParams = ProApiDatasetCommonParams & {
-  apiFileId: string;
-};
-
-export type GetProApiDatasetFilePreviewUrlParams = ProApiDatasetCommonParams & {
-  apiFileId: string;
-};
-
-export type GetProApiDatasetFileDetailParams = ProApiDatasetCommonParams & {
-  apiFileId: string;
-};
--- a/packages/service/core/dataset/collection/controller.ts
+++ b/packages/service/core/dataset/collection/controller.ts
@@ -34,15 +34,17 @@ import { getTrainingModeByCollection } from './utils';
 import {
  computeChunkSize,
  computeChunkSplitter,
+  computeParagraphChunkDeep,
  getLLMMaxChunkSize
 } from '@fastgpt/global/core/dataset/training/utils';
+import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';

 export const createCollectionAndInsertData = async ({
  dataset,
  rawText,
  relatedId,
  createCollectionParams,
-  isQAImport = false,
+  backupParse = false,
  billId,
  session
 }: {
@@ -50,8 +52,8 @@ export const createCollectionAndInsertData = async ({
  rawText: string;
  relatedId?: string;
  createCollectionParams: CreateOneCollectionParams;
+  backupParse?: boolean;

-  isQAImport?: boolean;
  billId?: string;
  session?: ClientSession;
 }) => {
@@ -73,15 +75,33 @@ export const createCollectionAndInsertData = async ({
    llmModel: getLLMModel(dataset.agentModel)
  });
  const chunkSplitter = computeChunkSplitter(createCollectionParams);
+  const paragraphChunkDeep = computeParagraphChunkDeep(createCollectionParams);
+
+  if (
+    trainingType === DatasetCollectionDataProcessModeEnum.qa ||
+    trainingType === DatasetCollectionDataProcessModeEnum.backup
+  ) {
+    delete createCollectionParams.chunkTriggerType;
+    delete createCollectionParams.chunkTriggerMinSize;
+    delete createCollectionParams.dataEnhanceCollectionName;
+    delete createCollectionParams.imageIndex;
+    delete createCollectionParams.autoIndexes;
+    delete createCollectionParams.indexSize;
+    delete createCollectionParams.qaPrompt;
+  }

  // 1. split chunks
  const chunks = rawText2Chunks({
    rawText,
+    chunkTriggerType: createCollectionParams.chunkTriggerType,
+    chunkTriggerMinSize: createCollectionParams.chunkTriggerMinSize,
    chunkSize,
+    paragraphChunkDeep,
+    paragraphChunkMinSize: createCollectionParams.paragraphChunkMinSize,
    maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
    overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
    customReg: chunkSplitter ? [chunkSplitter] : [],
-    isQAImport
+    backupParse
  });

  // 2. auth limit
@@ -102,6 +122,7 @@ export const createCollectionAndInsertData = async ({
    const { _id: collectionId } = await createOneCollection({
      ...createCollectionParams,
      trainingType,
+      paragraphChunkDeep,
      chunkSize,
      chunkSplitter,

@@ -157,6 +178,10 @@ export const createCollectionAndInsertData = async ({
      billId: traingBillId,
      data: chunks.map((item, index) => ({
        ...item,
+        indexes: item.indexes?.map((text) => ({
+          type: DatasetDataIndexTypeEnum.custom,
+          text
+        })),
        chunkIndex: index
      })),
      session
@@ -198,46 +223,19 @@ export type CreateOneCollectionParams = CreateDatasetCollectionParams & {
  tmbId: string;
  session?: ClientSession;
 };
-export async function createOneCollection({
-  teamId,
-  tmbId,
-  name,
-  parentId,
-  datasetId,
-  type,
+export async function createOneCollection({ session, ...props }: CreateOneCollectionParams) {
+  const {
+    teamId,
+    parentId,
+    datasetId,
+    tags,

-  createTime,
-  updateTime,
-
-  hashRawText,
-  rawTextLength,
-  metadata = {},
-  tags,
-
-  nextSyncTime,
-
-  fileId,
-  rawLink,
-  externalFileId,
-  externalFileUrl,
-  apiFileId,
-
-  // Parse settings
-  customPdfParse,
-  imageIndex,
-  autoIndexes,
-
-  // Chunk settings
-  trainingType,
-  chunkSettingMode,
-  chunkSplitMode,
-  chunkSize,
-  indexSize,
-  chunkSplitter,
-  qaPrompt,
-
-  session
-}: CreateOneCollectionParams) {
+    fileId,
+    rawLink,
+    externalFileId,
+    externalFileUrl,
+    apiFileId
+  } = props;
  // Create collection tags
  const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });

@@ -245,41 +243,18 @@ export async function createOneCollection({
  const [collection] = await MongoDatasetCollection.create(
    [
      {
+        ...props,
        teamId,
-        tmbId,
        parentId: parentId || null,
        datasetId,
-        name,
-        type,

-        rawTextLength,
-        hashRawText,
        tags: collectionTags,
-        metadata,
-
-        createTime,
-        updateTime,
-        nextSyncTime,

        ...(fileId ? { fileId } : {}),
        ...(rawLink ? { rawLink } : {}),
        ...(externalFileId ? { externalFileId } : {}),
        ...(externalFileUrl ? { externalFileUrl } : {}),
-        ...(apiFileId ? { apiFileId } : {}),
-
-        // Parse settings
-        customPdfParse,
-        imageIndex,
-        autoIndexes,
-
-        // Chunk settings
-        trainingType,
-        chunkSettingMode,
-        chunkSplitMode,
-        chunkSize,
-        indexSize,
-        chunkSplitter,
-        qaPrompt
+        ...(apiFileId ? { apiFileId } : {})
      }
    ],
    { session, ordered: true }
--- a/packages/service/core/dataset/data/dataTextSchema.ts
+++ b/packages/service/core/dataset/data/dataTextSchema.ts
@@ -34,9 +34,9 @@ const DatasetDataTextSchema = new Schema({

 try {
  DatasetDataTextSchema.index(
-    { teamId: 1, datasetId: 1, fullTextToken: 'text' },
+    { teamId: 1, fullTextToken: 'text' },
    {
-      name: 'teamId_1_datasetId_1_fullTextToken_text',
+      name: 'teamId_1_fullTextToken_text',
      default_language: 'none'
    }
  );
--- a/packages/service/core/dataset/feishuDataset/api.ts
+++ b/packages/service/core/dataset/feishuDataset/api.ts
@@ -0,0 +1,208 @@
+import type {
+  APIFileItem,
+  ApiFileReadContentResponse,
+  ApiDatasetDetailResponse,
+  FeishuServer
+} from '@fastgpt/global/core/dataset/apiDataset';
+import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
+import axios, { type Method } from 'axios';
+import { addLog } from '../../../common/system/log';
+
+type ResponseDataType = {
+  success: boolean;
+  message: string;
+  data: any;
+};
+
+type FeishuFileListResponse = {
+  files: {
+    token: string;
+    parent_token: string;
+    name: string;
+    type: string;
+    modified_time: number;
+    created_time: number;
+    url: string;
+    owner_id: string;
+  }[];
+  has_more: boolean;
+  next_page_token: string;
+};
+
+const feishuBaseUrl = process.env.FEISHU_BASE_URL || 'https://open.feishu.cn';
+
+export const useFeishuDatasetRequest = ({ feishuServer }: { feishuServer: FeishuServer }) => {
+  const instance = axios.create({
+    baseURL: feishuBaseUrl,
+    timeout: 60000
+  });
+
+  // 添加请求拦截器
+  instance.interceptors.request.use(async (config) => {
+    if (!config.headers.Authorization) {
+      const { data } = await axios.post<{ tenant_access_token: string }>(
+        `${feishuBaseUrl}/open-apis/auth/v3/tenant_access_token/internal`,
+        {
+          app_id: feishuServer.appId,
+          app_secret: feishuServer.appSecret
+        }
+      );
+
+      config.headers['Authorization'] = `Bearer ${data.tenant_access_token}`;
+      config.headers['Content-Type'] = 'application/json; charset=utf-8';
+    }
+    return config;
+  });
+
+  /**
+   * 响应数据检查
+   */
+  const checkRes = (data: ResponseDataType) => {
+    if (data === undefined) {
+      addLog.info('yuque dataset data is empty');
+      return Promise.reject('服务器异常');
+    }
+    return data.data;
+  };
+  const responseError = (err: any) => {
+    console.log('error->', '请求错误', err);
+
+    if (!err) {
+      return Promise.reject({ message: '未知错误' });
+    }
+    if (typeof err === 'string') {
+      return Promise.reject({ message: err });
+    }
+    if (typeof err.message === 'string') {
+      return Promise.reject({ message: err.message });
+    }
+    if (typeof err.data === 'string') {
+      return Promise.reject({ message: err.data });
+    }
+    if (err?.response?.data) {
+      return Promise.reject(err?.response?.data);
+    }
+    return Promise.reject(err);
+  };
+
+  const request = <T>(url: string, data: any, method: Method): Promise<T> => {
+    /* 去空 */
+    for (const key in data) {
+      if (data[key] === undefined) {
+        delete data[key];
+      }
+    }
+
+    return instance
+      .request({
+        url,
+        method,
+        data: ['POST', 'PUT'].includes(method) ? data : undefined,
+        params: !['POST', 'PUT'].includes(method) ? data : undefined
+      })
+      .then((res) => checkRes(res.data))
+      .catch((err) => responseError(err));
+  };
+
+  const listFiles = async ({ parentId }: { parentId?: ParentIdType }): Promise<APIFileItem[]> => {
+    const fetchFiles = async (pageToken?: string): Promise<FeishuFileListResponse['files']> => {
+      const data = await request<FeishuFileListResponse>(
+        `/open-apis/drive/v1/files`,
+        {
+          folder_token: parentId || feishuServer.folderToken,
+          page_size: 200,
+          page_token: pageToken
+        },
+        'GET'
+      );
+
+      if (data.has_more) {
+        const nextFiles = await fetchFiles(data.next_page_token);
+        return [...data.files, ...nextFiles];
+      }
+
+      return data.files;
+    };
+
+    const allFiles = await fetchFiles();
+
+    return allFiles
+      .filter((file) => ['folder', 'docx'].includes(file.type))
+      .map((file) => ({
+        id: file.token,
+        parentId: file.parent_token,
+        name: file.name,
+        type: file.type === 'folder' ? ('folder' as const) : ('file' as const),
+        hasChild: file.type === 'folder',
+        updateTime: new Date(file.modified_time * 1000),
+        createTime: new Date(file.created_time * 1000)
+      }));
+  };
+
+  const getFileContent = async ({
+    apiFileId
+  }: {
+    apiFileId: string;
+  }): Promise<ApiFileReadContentResponse> => {
+    const [{ content }, { document }] = await Promise.all([
+      request<{ content: string }>(
+        `/open-apis/docx/v1/documents/${apiFileId}/raw_content`,
+        {},
+        'GET'
+      ),
+      request<{ document: { title: string } }>(
+        `/open-apis/docx/v1/documents/${apiFileId}`,
+        {},
+        'GET'
+      )
+    ]);
+
+    return {
+      title: document?.title,
+      rawText: content
+    };
+  };
+
+  const getFilePreviewUrl = async ({ apiFileId }: { apiFileId: string }): Promise<string> => {
+    const { metas } = await request<{ metas: { url: string }[] }>(
+      `/open-apis/drive/v1/metas/batch_query`,
+      {
+        request_docs: [
+          {
+            doc_token: apiFileId,
+            doc_type: 'docx'
+          }
+        ],
+        with_url: true
+      },
+      'POST'
+    );
+
+    return metas[0].url;
+  };
+
+  const getFileDetail = async ({
+    apiFileId
+  }: {
+    apiFileId: string;
+  }): Promise<ApiDatasetDetailResponse> => {
+    const { document } = await request<{ document: { title: string } }>(
+      `/open-apis/docx/v1/documents/${apiFileId}`,
+      {},
+      'GET'
+    );
+
+    return {
+      name: document?.title,
+      parentId: null,
+      id: apiFileId
+    };
+  };
+
+  return {
+    getFileContent,
+    listFiles,
+    getFilePreviewUrl,
+    getFileDetail
+  };
+};
--- a/packages/service/core/dataset/read.ts
+++ b/packages/service/core/dataset/read.ts
@@ -1,8 +1,10 @@
 import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
-import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
+import {
+  ChunkTriggerConfigTypeEnum,
+  DatasetSourceReadTypeEnum
+} from '@fastgpt/global/core/dataset/constants';
 import { readFileContentFromMongo } from '../../common/file/gridfs/controller';
 import { urlsFetch } from '../../common/string/cheerio';
-import { parseCsvTable2Chunks } from './training/utils';
 import { type TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
 import axios from 'axios';
 import { readRawContentByFileBuffer } from '../../common/file/read/utils';
@@ -12,19 +14,22 @@ import {
  type FeishuServer,
  type YuqueServer
 } from '@fastgpt/global/core/dataset/apiDataset';
-import { useApiDatasetRequest } from './apiDataset/api';
+import { getApiDatasetRequest } from './apiDataset';
+import Papa from 'papaparse';

 export const readFileRawTextByUrl = async ({
  teamId,
  tmbId,
  url,
  customPdfParse,
+  getFormatText,
  relatedId
 }: {
  teamId: string;
  tmbId: string;
  url: string;
  customPdfParse?: boolean;
+  getFormatText?: boolean;
  relatedId: string; // externalFileId / apiFileId
 }) => {
  const response = await axios({
@@ -38,7 +43,7 @@ export const readFileRawTextByUrl = async ({

  const { rawText } = await readRawContentByFileBuffer({
    customPdfParse,
-    isQAImport: false,
+    getFormatText,
    extension,
    teamId,
    tmbId,
@@ -62,21 +67,21 @@ export const readDatasetSourceRawText = async ({
  tmbId,
  type,
  sourceId,
-  isQAImport,
  selector,
  externalFileId,
  apiServer,
  feishuServer,
  yuqueServer,
-  customPdfParse
+  customPdfParse,
+  getFormatText
 }: {
  teamId: string;
  tmbId: string;
  type: DatasetSourceReadTypeEnum;
  sourceId: string;
  customPdfParse?: boolean;
+  getFormatText?: boolean;

-  isQAImport?: boolean; // csv data
  selector?: string; // link selector
  externalFileId?: string; // external file dataset
  apiServer?: APIFileServer; // api dataset
@@ -92,8 +97,8 @@ export const readDatasetSourceRawText = async ({
      tmbId,
      bucketName: BucketNameEnum.dataset,
      fileId: sourceId,
-      isQAImport,
-      customPdfParse
+      customPdfParse,
+      getFormatText
    });
    return {
      title: filename,
@@ -161,38 +166,82 @@ export const readApiServerFileContent = async ({
  title?: string;
  rawText: string;
 }> => {
-  if (apiServer) {
-    return useApiDatasetRequest({ apiServer }).getFileContent({
-      teamId,
-      tmbId,
-      apiFileId,
-      customPdfParse
-    });
-  }
-
-  if (feishuServer || yuqueServer) {
-    return global.getProApiDatasetFileContent({
-      feishuServer,
+  return (
+    await getApiDatasetRequest({
+      apiServer,
      yuqueServer,
-      apiFileId
-    });
-  }
-
-  return Promise.reject('No apiServer or feishuServer or yuqueServer');
+      feishuServer
+    })
+  ).getFileContent({
+    teamId,
+    tmbId,
+    apiFileId,
+    customPdfParse
+  });
 };

 export const rawText2Chunks = ({
  rawText,
-  isQAImport,
+  chunkTriggerType = ChunkTriggerConfigTypeEnum.minSize,
+  chunkTriggerMinSize = 1000,
+  backupParse,
  chunkSize = 512,
  ...splitProps
 }: {
  rawText: string;
-  isQAImport?: boolean;
-} & TextSplitProps) => {
-  if (isQAImport) {
-    const { chunks } = parseCsvTable2Chunks(rawText);
-    return chunks;
+
+  chunkTriggerType?: ChunkTriggerConfigTypeEnum;
+  chunkTriggerMinSize?: number; // maxSize from agent model, not store
+
+  backupParse?: boolean;
+  tableParse?: boolean;
+} & TextSplitProps): {
+  q: string;
+  a: string;
+  indexes?: string[];
+}[] => {
+  const parseDatasetBackup2Chunks = (rawText: string) => {
+    const csvArr = Papa.parse(rawText).data as string[][];
+    console.log(rawText, csvArr);
+
+    const chunks = csvArr
+      .slice(1)
+      .map((item) => ({
+        q: item[0] || '',
+        a: item[1] || '',
+        indexes: item.slice(2)
+      }))
+      .filter((item) => item.q || item.a);
+
+    return {
+      chunks
+    };
+  };
+
+  if (backupParse) {
+    return parseDatasetBackup2Chunks(rawText).chunks;
+  }
+
+  // Chunk condition
+  // 1. 选择最大值条件，只有超过了最大值(默认为模型的最大值*0.7），才会触发分块
+  if (chunkTriggerType === ChunkTriggerConfigTypeEnum.maxSize) {
+    const textLength = rawText.trim().length;
+    const maxSize = splitProps.maxSize ? splitProps.maxSize * 0.7 : 16000;
+    if (textLength < maxSize) {
+      return [
+        {
+          q: rawText,
+          a: ''
+        }
+      ];
+    }
+  }
+  // 2. 选择最小值条件，只有超过最小值(手动决定)才会触发分块
+  if (chunkTriggerType !== ChunkTriggerConfigTypeEnum.forceChunk) {
+    const textLength = rawText.trim().length;
+    if (textLength < chunkTriggerMinSize) {
+      return [{ q: rawText, a: '' }];
+    }
  }

  const { chunks } = splitText2Chunks({
@@ -203,6 +252,7 @@ export const rawText2Chunks = ({

  return chunks.map((item) => ({
    q: item,
-    a: ''
+    a: '',
+    indexes: []
  }));
 };
--- a/packages/service/core/dataset/schema.ts
+++ b/packages/service/core/dataset/schema.ts
@@ -1,10 +1,12 @@
 import { getMongoModel, Schema } from '../../common/mongo';
 import {
  ChunkSettingModeEnum,
+  ChunkTriggerConfigTypeEnum,
  DataChunkSplitModeEnum,
  DatasetCollectionDataProcessModeEnum,
  DatasetTypeEnum,
-  DatasetTypeMap
+  DatasetTypeMap,
+  ParagraphChunkAIModeEnum
 } from '@fastgpt/global/core/dataset/constants';
 import {
  TeamCollectionName,
@@ -15,12 +17,22 @@ import type { DatasetSchemaType } from '@fastgpt/global/core/dataset/type.d';
 export const DatasetCollectionName = 'datasets';

 export const ChunkSettings = {
-  imageIndex: Boolean,
-  autoIndexes: Boolean,
  trainingType: {
    type: String,
    enum: Object.values(DatasetCollectionDataProcessModeEnum)
  },
+
+  chunkTriggerType: {
+    type: String,
+    enum: Object.values(ChunkTriggerConfigTypeEnum)
+  },
+  chunkTriggerMinSize: Number,
+
+  dataEnhanceCollectionName: Boolean,
+
+  imageIndex: Boolean,
+  autoIndexes: Boolean,
+
  chunkSettingMode: {
    type: String,
    enum: Object.values(ChunkSettingModeEnum)
@@ -29,6 +41,12 @@ export const ChunkSettings = {
    type: String,
    enum: Object.values(DataChunkSplitModeEnum)
  },
+  paragraphChunkAIMode: {
+    type: String,
+    enum: Object.values(ParagraphChunkAIModeEnum)
+  },
+  paragraphChunkDeep: Number,
+  paragraphChunkMinSize: Number,
  chunkSize: Number,
  chunkSplitter: String,

@@ -115,14 +133,13 @@ const DatasetSchema = new Schema({

  // abandoned
  autoSync: Boolean,
-  externalReadUrl: {
-    type: String
-  },
+  externalReadUrl: String,
  defaultPermission: Number
 });

 try {
  DatasetSchema.index({ teamId: 1 });
+  DatasetSchema.index({ type: 1 });
 } catch (error) {
  console.log(error);
 }
--- a/packages/service/core/dataset/search/controller.ts
+++ b/packages/service/core/dataset/search/controller.ts
@@ -27,6 +27,7 @@ import { type ChatItemType } from '@fastgpt/global/core/chat/type';
 import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
 import { datasetSearchQueryExtension } from './utils';
 import type { RerankModelItemType } from '@fastgpt/global/core/ai/model.d';
+import { addLog } from '../../../common/system/log';

 export type SearchDatasetDataProps = {
  histories: ChatItemType[];
@@ -474,7 +475,7 @@ export async function searchDatasetData(
      ).lean()
    ]);

-    const set = new Map<string, number>();
+    const set = new Set<string>();
    const formatResult = results
      .map((item, index) => {
        const collection = collections.find((col) => String(col._id) === String(item.collectionId));
@@ -507,7 +508,7 @@ export async function searchDatasetData(
      .filter((item) => {
        if (!item) return false;
        if (set.has(item.id)) return false;
-        set.set(item.id, 1);
+        set.add(item.id);
        return true;
      })
      .map((item, index) => {
@@ -544,113 +545,125 @@ export async function searchDatasetData(
      };
    }

-    const searchResults = (
-      await Promise.all(
-        datasetIds.map(async (id) => {
-          return MongoDatasetDataText.aggregate(
-            [
-              {
-                $match: {
-                  teamId: new Types.ObjectId(teamId),
-                  datasetId: new Types.ObjectId(id),
-                  $text: { $search: await jiebaSplit({ text: query }) },
-                  ...(filterCollectionIdList
-                    ? {
-                        collectionId: {
-                          $in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
-                        }
-                      }
-                    : {}),
-                  ...(forbidCollectionIdList && forbidCollectionIdList.length > 0
-                    ? {
-                        collectionId: {
-                          $nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
-                        }
-                      }
-                    : {})
-                }
-              },
-              {
-                $sort: {
-                  score: { $meta: 'textScore' }
-                }
-              },
-              {
-                $limit: limit
-              },
-              {
-                $project: {
-                  _id: 1,
-                  collectionId: 1,
-                  dataId: 1,
-                  score: { $meta: 'textScore' }
-                }
-              }
-            ],
-            {
-              ...readFromSecondary
+    try {
+      const searchResults = (await MongoDatasetDataText.aggregate(
+        [
+          {
+            $match: {
+              teamId: new Types.ObjectId(teamId),
+              $text: { $search: await jiebaSplit({ text: query }) },
+              datasetId: { $in: datasetIds.map((id) => new Types.ObjectId(id)) },
+              ...(filterCollectionIdList
+                ? {
+                    collectionId: {
+                      $in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
+                    }
+                  }
+                : {}),
+              ...(forbidCollectionIdList && forbidCollectionIdList.length > 0
+                ? {
+                    collectionId: {
+                      $nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
+                    }
+                  }
+                : {})
+            }
+          },
+          {
+            $sort: {
+              score: { $meta: 'textScore' }
+            }
+          },
+          {
+            $limit: limit
+          },
+          {
+            $project: {
+              _id: 1,
+              collectionId: 1,
+              dataId: 1,
+              score: { $meta: 'textScore' }
            }
-          );
-        })
-      )
-    ).flat() as (DatasetDataTextSchemaType & { score: number })[];
-
-    // Get data and collections
-    const [dataList, collections] = await Promise.all([
-      MongoDatasetData.find(
-        {
-          _id: { $in: searchResults.map((item) => item.dataId) }
-        },
-        '_id datasetId collectionId updateTime q a chunkIndex indexes',
-        { ...readFromSecondary }
-      ).lean(),
-      MongoDatasetCollection.find(
-        {
-          _id: { $in: searchResults.map((item) => item.collectionId) }
-        },
-        '_id name fileId rawLink apiFileId externalFileId externalFileUrl',
-        { ...readFromSecondary }
-      ).lean()
-    ]);
-
-    return {
-      fullTextRecallResults: searchResults
-        .map((item, index) => {
-          const collection = collections.find(
-            (col) => String(col._id) === String(item.collectionId)
-          );
-          if (!collection) {
-            console.log('Collection is not found', item);
-            return;
-          }
-          const data = dataList.find((data) => String(data._id) === String(item.dataId));
-          if (!data) {
-            console.log('Data is not found', item);
-            return;
          }
+        ],
+        {
+          ...readFromSecondary
+        }
+      )) as (DatasetDataTextSchemaType & { score: number })[];

-          return {
-            id: String(data._id),
-            datasetId: String(data.datasetId),
-            collectionId: String(data.collectionId),
-            updateTime: data.updateTime,
-            q: data.q,
-            a: data.a,
-            chunkIndex: data.chunkIndex,
-            indexes: data.indexes,
-            ...getCollectionSourceData(collection),
-            score: [
-              {
-                type: SearchScoreTypeEnum.fullText,
-                value: item.score || 0,
-                index
-              }
-            ]
-          };
-        })
-        .filter(Boolean) as SearchDataResponseItemType[],
-      tokenLen: 0
-    };
+      // Get data and collections
+      const [dataList, collections] = await Promise.all([
+        MongoDatasetData.find(
+          {
+            _id: { $in: searchResults.map((item) => item.dataId) }
+          },
+          '_id datasetId collectionId updateTime q a chunkIndex indexes',
+          { ...readFromSecondary }
+        ).lean(),
+        MongoDatasetCollection.find(
+          {
+            _id: { $in: searchResults.map((item) => item.collectionId) }
+          },
+          '_id name fileId rawLink apiFileId externalFileId externalFileUrl',
+          { ...readFromSecondary }
+        ).lean()
+      ]);
+
+      return {
+        fullTextRecallResults: searchResults
+          .map((item, index) => {
+            const collection = collections.find(
+              (col) => String(col._id) === String(item.collectionId)
+            );
+            if (!collection) {
+              console.log('Collection is not found', item);
+              return;
+            }
+            const data = dataList.find((data) => String(data._id) === String(item.dataId));
+            if (!data) {
+              console.log('Data is not found', item);
+              return;
+            }
+
+            return {
+              id: String(data._id),
+              datasetId: String(data.datasetId),
+              collectionId: String(data.collectionId),
+              updateTime: data.updateTime,
+              q: data.q,
+              a: data.a,
+              chunkIndex: data.chunkIndex,
+              indexes: data.indexes,
+              ...getCollectionSourceData(collection),
+              score: [
+                {
+                  type: SearchScoreTypeEnum.fullText,
+                  value: item.score || 0,
+                  index
+                }
+              ]
+            };
+          })
+          .filter((item) => {
+            if (!item) return false;
+            return true;
+          })
+          .map((item, index) => {
+            if (!item) return;
+            return {
+              ...item,
+              score: item.score.map((item) => ({ ...item, index }))
+            };
+          }) as SearchDataResponseItemType[],
+        tokenLen: 0
+      };
+    } catch (error) {
+      addLog.error('Full text search error', error);
+      return {
+        fullTextRecallResults: [],
+        tokenLen: 0
+      };
+    }
  };
  const multiQueryRecall = async ({
    embeddingLimit,
--- a/packages/service/core/dataset/training/constants.ts
+++ b/packages/service/core/dataset/training/constants.ts
@@ -1,6 +1,5 @@
 export enum ImportDataSourceEnum {
  fileLocal = 'fileLocal',
  fileLink = 'fileLink',
-  fileCustom = 'fileCustom',
-  tableLocal = 'tableLocal'
+  fileCustom = 'fileCustom'
 }
--- a/packages/service/core/dataset/training/utils.ts
+++ b/packages/service/core/dataset/training/utils.ts
@@ -1,16 +0,0 @@
-import Papa from 'papaparse';
-
-export const parseCsvTable2Chunks = (rawText: string) => {
-  const csvArr = Papa.parse(rawText).data as string[][];
-
-  const chunks = csvArr
-    .map((item) => ({
-      q: item[0] || '',
-      a: item[1] || ''
-    }))
-    .filter((item) => item.q || item.a);
-
-  return {
-    chunks
-  };
-};
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
gru-agent[bot]	1066ea62e3	Add tests for filterSafeProps function in Markdown utils test suite	2025-05-29 16:12:14 +00:00
Archer	8ed35ffe7e	Update dataset.md (#4927 )	2025-05-29 18:25:59 +08:00
Archer	0f866fc552	feat: text collecion auto save for a txt file (#4924 )	2025-05-29 17:57:27 +08:00
Archer	05c7ba4483	feat: Workflow node search (#4920 ) * add node find (#4902) * add node find * plugin header * fix * fix * remove * type * add searched status * optimize * perf: search nodes --------- Co-authored-by: heheer <heheer@sealos.io>	2025-05-29 14:29:28 +08:00
heheer	fa80ce3a77	fix child app external variables (#4919 )	2025-05-29 13:37:59 +08:00
Archer	830358aa72	remove invalid code (#4915 )	2025-05-28 22:11:40 +08:00
Archer	02b214b3ec	feat: remove buffer;fix: custom pdf parse (#4914 ) * fix: doc * fix: remove buffer * fix: pdf parse	2025-05-28 21:48:10 +08:00
Archer	a171c7b11c	perf: buffer;fix: back up split (#4913 ) * perf: buffer * fix: back up split * fix: app limit * doc	2025-05-28 18:18:25 +08:00
heheer	802de11363	fix runtool empty message (#4911 ) * fix runtool empty message * del unused code * fix	2025-05-28 17:48:30 +08:00
Archer	b4ecfb0b79	Feat: Node latest version (#4905 ) * node versions add keep the latest option (#4899) * node versions add keep the latest option * i18n * perf: version code * fix: ts * hide system version * hide system version * hide system version * fix: ts * fix: ts --------- Co-authored-by: heheer <heheer@sealos.io>	2025-05-28 10:46:32 +08:00
heheer	331b851a78	fix has tool node condition (#4907 )	2025-05-28 10:34:02 +08:00
Archer	50d235c42a	fix: i18n (#4898 )	2025-05-27 10:45:25 +08:00
Archer	9838593451	version doc (#4897 )	2025-05-27 10:33:35 +08:00
Archer	c25cd48e72	perf: chunk trigger and paragraph split (#4893 ) * perf: chunk trigger and paragraph split * update max size computed * perf: i18n * remove table	2025-05-26 18:57:22 +08:00
Archer	874300a56a	fix: chinese name export (#4890 ) * fix: chinese name export * fix: xlsx white space * doc * doc	2025-05-25 21:19:29 +08:00
Archer	1dea2b71b4	perf: human check;perf: recursion get node response (#4888 ) * perf: human check * version * perf: recursion get node response	2025-05-25 20:55:29 +08:00
Archer	a8673344b1	Test add menu (#4887 ) * Feature: Add additional dataset options and their descriptions, updat… (#4874) * Feature: Add additional dataset options and their descriptions, update menu components to support submenu functionality * Optimize the menu component by removing the sub-menu position attribute, introducing the MyPopover component to support sub-menu functionality, and adding new dataset options and their descriptions in the dataset list. --------- Co-authored-by: dreamer6680 <146868355@qq.com> * api dataset tip * remove invalid code --------- Co-authored-by: dreamer6680 <1468683855@qq.com> Co-authored-by: dreamer6680 <146868355@qq.com>	2025-05-25 20:16:03 +08:00
Archer	9709ae7a4f	feat: The workflow quickly adds applications (#4882 ) * feat: add node by handle (#4860) * feat: add node by handle * fix * fix edge filter * fix * move utils * move context * scale handle * move postion to handle params & optimize handle scale (#4878) * move position to handle params * close button scale * perf: node template ui * remove handle scale (#4880) * feat: handle connect * add mouse down duration check (#4881) * perf: long press time * tool handle size * optimize add node by handle (#4883) --------- Co-authored-by: heheer <heheer@sealos.io>	2025-05-23 19:20:12 +08:00
Archer	fae76e887a	perf: dataset import params code (#4875 ) * perf: dataset import params code * perf: api dataset code * model	2025-05-23 10:40:25 +08:00
dreamer6680	9af92d1eae	Open Yufu Feishu Knowledge Base Permissions (#4867 ) * add feishu yuque dataset * Open Yufu Feishu Knowledge Base Permissions * Refactor the dataset request module, optimize the import path, and fix the type definition --------- Co-authored-by: dreamer6680 <146868355@qq.com>	2025-05-22 23:19:55 +08:00
Archer	6a6719e93d	perf: isPc check;perf: dataset max token checker (#4872 ) * perf: isPc check * perf: dataset max token checker * perf: dataset max token checker	2025-05-22 18:40:29 +08:00
Compasafe	50481f4ca8	fix: 修改语音组件中判断isPc的逻辑 (#4854 ) * fix: 修改语音组件中判断isPc的逻辑 * fix: 修改语音组件中判断isPc的逻辑	2025-05-22 16:29:53 +08:00
Archer	88bd3aaa9e	perf: backup import (#4866 ) * i18n * remove invalid code * perf: backup import * backup tip * fix: indexsize invalid	2025-05-22 15:53:51 +08:00
Archer	dd3c251603	fix: stream response (#4853 )	2025-05-21 10:21:20 +08:00
Archer	aa55f059d4	perf: chat history api;perf: full text error (#4852 ) * perf: chat history api * perf: i18n * perf: full text	2025-05-20 22:31:32 +08:00
dreamer6680	89c9a02650	change ui of price (#4851 ) Co-authored-by: dreamer6680 <146868355@qq.com>	2025-05-20 20:51:07 +08:00
heheer	0f3bfa280a	fix quote reader duplicate rendering (#4845 )	2025-05-20 20:21:00 +08:00
dependabot[bot]	593ebfd269	chore(deps): bump multer from 1.4.5-lts.1 to 2.0.0 (#4839 ) Bumps [multer](https://github.com/expressjs/multer) from 1.4.5-lts.1 to 2.0.0. - [Release notes](https://github.com/expressjs/multer/releases) - [Changelog](https://github.com/expressjs/multer/blob/v2.0.0/CHANGELOG.md) - [Commits](https://github.com/expressjs/multer/compare/v1.4.5-lts.1...v2.0.0) --- updated-dependencies: - dependency-name: multer dependency-version: 2.0.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-05-20 13:58:47 +08:00
John Chen	f6dc2204f5	fix:修正docker-compose-pgvecto.yml文件中，健康检查参数错误 (#4841 )	2025-05-20 13:57:32 +08:00
Archer	d44c338059	perf: confirm ux (#4843 ) * perf: delete tip ux * perf: confirm ux	2025-05-20 13:41:56 +08:00
Archer	1dac2b70ec	perf: stream timeout;feat: hnsw max_scan_tuples config;fix: fulltext search merge error (#4838 ) * perf: stream timeout * feat: hnsw max_scan_tuples config * fix: fulltext search merge error * perf: jieba code	2025-05-20 09:59:24 +08:00
Archer	9fef3e15fb	Update doc (#4831 ) * doc * doc * version update	2025-05-18 23:16:31 +08:00
Archer	2d2d0fffe9	Test apidataset (#4830 ) * Dataset (#4822) * apidataset support to basepath * Resolve the error of the Feishu Knowledge Base modification configuration page not supporting baseurl bug. * apibasepath * add * perf: api dataset --------- Co-authored-by: dreamer6680 <1468683855@qq.com>	2025-05-17 22:41:10 +08:00
heheer	c6e0b5a1e7	offiaccount welcome text (#4827 ) * offiaccount welcome text * fix * Update Image.tsx --------- Co-authored-by: Archer <545436317@qq.com>	2025-05-17 22:03:18 +08:00
dependabot[bot]	932aa28a1f	chore(deps): bump undici in /plugins/webcrawler/SPIDER (#4825 ) Bumps [undici](https://github.com/nodejs/undici) from 6.21.1 to 6.21.3. - [Release notes](https://github.com/nodejs/undici/releases) - [Commits](https://github.com/nodejs/undici/compare/v6.21.1...v6.21.3) --- updated-dependencies: - dependency-name: undici dependency-version: 6.21.3 dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-05-17 01:16:31 +08:00
heheer	9c59bc2c17	fix: handle optional indexes in InputDataModal (#4828 )	2025-05-16 15:07:33 +08:00
Archer	e145f63554	feat: chat error msg (#4826 ) * perf: i18n * feat: chat error msg * feat: doc	2025-05-16 12:07:11 +08:00
Archer	554b2ca8dc	perf: mcp tool type (#4820 )	2025-05-15 18:14:32 +08:00
Archer	4e83840c14	perf: tool call check (#4818 ) * i18n * tool call * fix: mcp create permission;Plugin unauth tip * fix: mcp create permission;Plugin unauth tip * fix: Cite modal permission * remove invalide cite * perf: prompt * filter fulltext search * fix: ts * fix: ts * fix: ts	2025-05-15 15:51:34 +08:00
heheer	a6c80684d1	fix version match (#4814 )	2025-05-14 17:45:31 +08:00
Archer	a4db03a3b7	feat: session id (#4817 ) * feat: session id * feat: Add default index	2025-05-14 17:24:02 +08:00
Archer	cba8f773fe	New license (#4809 ) * feat: new-license * perf: volumn watch * Set use client	2025-05-14 13:55:09 +08:00
Archer	bd93f28d6f	update doc (#4806 )	2025-05-13 21:24:35 +08:00