add csp and more function for markdown (#4921 )

* support html * html * add csp * remove unuse function --------- Co-authored-by: dreamer6680 <146868355@qq.com>
feat: text collecion auto save for a txt file (#4924 )
2025-05-29 17:57:37 +08:00 · 2025-05-29 17:57:27 +08:00 · 2025-05-29 14:29:28 +08:00 · 2025-05-29 13:37:59 +08:00 · 2025-05-28 22:11:40 +08:00 · 2025-05-28 21:48:10 +08:00
342 changed files with 8005 additions and 4542 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -21,7 +21,7 @@
    "i18n-ally.namespace": true,
    "i18n-ally.pathMatcher": "{locale}/{namespaces}.json",
    "i18n-ally.extract.targetPickingStrategy": "most-similar-by-key",
-    "i18n-ally.translate.engines": ["google"],
+    "i18n-ally.translate.engines": ["deepl","google"],
    "[typescript]": {
        "editor.defaultFormatter": "esbenp.prettier-vscode"
    },
--- a/deploy/docker/docker-compose-milvus.yml
+++ b/deploy/docker/docker-compose-milvus.yml
@@ -132,15 +132,15 @@ services:
  # fastgpt
  sandbox:
    container_name: sandbox
-    image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
+    image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
    networks:
      - fastgpt
    restart: always
  fastgpt-mcp-server:
    container_name: fastgpt-mcp-server
-    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
+    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
    ports:
      - 3005:3000
    networks:
@@ -150,8 +150,8 @@ services:
      - FASTGPT_ENDPOINT=http://fastgpt:3000
  fastgpt:
    container_name: fastgpt
-    image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
+    image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
    ports:
      - 3000:3000
    networks:
--- a/deploy/docker/docker-compose-oceanbase/docker-compose.yml
+++ b/deploy/docker/docker-compose-oceanbase/docker-compose.yml
@@ -109,15 +109,15 @@ services:
  # fastgpt
  sandbox:
    container_name: sandbox
-    image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
+    image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
    networks:
      - fastgpt
    restart: always
  fastgpt-mcp-server:
    container_name: fastgpt-mcp-server
-    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
+    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
    ports:
      - 3005:3000
    networks:
@@ -127,8 +127,8 @@ services:
      - FASTGPT_ENDPOINT=http://fastgpt:3000
  fastgpt:
    container_name: fastgpt
-    image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
+    image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
    ports:
      - 3000:3000
    networks:
--- a/deploy/docker/docker-compose-pgvector.yml
+++ b/deploy/docker/docker-compose-pgvector.yml
@@ -23,7 +23,7 @@ services:
    volumes:
      - ./pg/data:/var/lib/postgresql/data
    healthcheck:
-      test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
+      test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'postgres']
      interval: 5s
      timeout: 5s
      retries: 10
@@ -96,15 +96,15 @@ services:
  # fastgpt
  sandbox:
    container_name: sandbox
-    image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
+    image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
    networks:
      - fastgpt
    restart: always
  fastgpt-mcp-server:
    container_name: fastgpt-mcp-server
-    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
+    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
    ports:
      - 3005:3000
    networks:
@@ -114,8 +114,8 @@ services:
      - FASTGPT_ENDPOINT=http://fastgpt:3000
  fastgpt:
    container_name: fastgpt
-    image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
+    image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
    ports:
      - 3000:3000
    networks:
--- a/deploy/docker/docker-compose-zilliz.yml
+++ b/deploy/docker/docker-compose-zilliz.yml
@@ -72,15 +72,15 @@ services:
  sandbox:
    container_name: sandbox
-    image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
+    image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
    networks:
      - fastgpt
    restart: always
  fastgpt-mcp-server:
    container_name: fastgpt-mcp-server
-    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
+    image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
    ports:
      - 3005:3000
    networks:
@@ -90,8 +90,8 @@ services:
      - FASTGPT_ENDPOINT=http://fastgpt:3000
  fastgpt:
    container_name: fastgpt
-    image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
+    image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
-    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
+    # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
    ports:
      - 3000:3000
    networks:
--- a/docSite/assets/imgs/official_account_faq.png
+++ b/docSite/assets/imgs/official_account_faq.png
--- a/docSite/content/zh-cn/docs/development/openapi/chat.md
+++ b/docSite/content/zh-cn/docs/development/openapi/chat.md
@@ -959,10 +959,16 @@ curl --location --request POST 'http://localhost:3000/api/core/chat/getHistories
 {{< markdownify >}}
 {{% alert icon=" " context="success" %}}
 目前仅能获取到当前 API key 的创建者的对话。
 - appId - 应用 Id
 - offset - 偏移量，即从第几条数据开始取
 - pageSize - 记录数量
 - source - 对话源。source=api，表示获取通过 API 创建的对话（不会获取到页面上的对话记录）
 - startCreateTime - 开始创建时间（可选）
 - endCreateTime - 结束创建时间（可选）
 - startUpdateTime - 开始更新时间（可选）
 - endUpdateTime - 结束更新时间（可选）
 {{% /alert %}}
 {{< /markdownify >}}
--- a/docSite/content/zh-cn/docs/development/upgrading/4910.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/4910.md
@@ -0,0 +1,50 @@
 ---
 title: 'V4.9.10'
 description: 'FastGPT V4.9.10 更新说明'
 icon: 'upgrade'
 draft: false
 toc: true
 weight: 790
 ---
 ## 升级指南
 重要提示：本次更新会重新构建全文索引，构建期间，全文检索结果会为空，4c16g 700 万组全文索引大致消耗 25 分钟。如需无缝升级，需自行做表同步工程。
 ### 1. 做好数据备份
 ### 2. 更新镜像 tag
 - 更新 FastGPT 镜像 tag: v4.9.10-fix2
 - 更新 FastGPT 商业版镜像 tag: v4.9.10-fix2
 - mcp_server 无需更新
 - Sandbox 无需更新
 - AIProxy 无需更新
 ## 🚀 新增内容
 1. 支持 PG 设置`systemEnv.hnswMaxScanTuples`参数，提高迭代搜索的数据总量。
 2. 知识库预处理参数增加 “分块条件”，可控制某些情况下不进行分块处理。
 3. 知识库预处理参数增加 “段落优先” 模式，可控制最大段落深度。原“长度优先”模式，不再内嵌段落优先逻辑。
 4. 工作流调整为单向接入和接出，支持快速的添加下一步节点。
 5. 开放飞书和语雀知识库到开源版。
 6. gemini 和 claude 最新模型预设。
 ## ⚙️ 优化
 1. LLM stream调用，默认超时调大。
 2. 部分确认交互优化。
 3. 纠正原先知识库的“表格数据集”名称，改成“备份导入”。同时支持知识库索引的导出和导入。
 4. 工作流知识库引用上限，如果工作流中没有相关 AI 节点，则交互模式改成纯手动输入，并且上限为 1000万。
 5. 语音输入，移动端判断逻辑，准确判断是否为手机，而不是小屏。
 6. 优化上下文截取算法，至少保证留下一组 Human 信息。
 ## 🐛 修复
 1. 全文检索多知识库时排序得分排序不正确。
 2. 流响应捕获 finish_reason 可能不正确。
 3. 工具调用模式，未保存思考输出。
 4. 知识库 indexSize 参数未生效。
 5. 工作流嵌套 2 层后，获取预览引用、上下文不正确。
 6. xlsx 转成 Markdown 时候，前面会多出一个空格。
 7. 读取 Markdown 文件时，Base64 图片未进行额外抓换保存。
--- a/docSite/content/zh-cn/docs/development/upgrading/4911.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/4911.md
@@ -0,0 +1,25 @@
 ---
 title: 'V4.9.11(进行中)'
 description: 'FastGPT V4.9.11 更新说明'
 icon: 'upgrade'
 draft: false
 toc: true
 weight: 789
 ---
 ## 🚀 新增内容
 1. 工作流中增加节点搜索功能。
 2. 工作流中，子流程版本控制，可选择“保持最新版本”，无需手动更新。
 ## ⚙️ 优化
 1. 原文缓存改用 gridfs 存储，提高上限。
 ## 🐛 修复
 1. 工作流中，管理员声明的全局系统工具，无法进行版本管理。
 2. 工具调用节点前，有交互节点时，上下文异常。
 3. 修复备份导入，小于 1000 字时，无法分块问题。
 4. 自定义 PDF 解析，无法保存 base64 图片。
--- a/docSite/content/zh-cn/docs/development/upgrading/498.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/498.md
@@ -1,5 +1,5 @@
 ---
-title: 'V4.9.8(进行中)'
+title: 'V4.9.8'
 description: 'FastGPT V4.9.8 更新说明'
 icon: 'upgrade'
 draft: false
@@ -7,6 +7,17 @@ toc: true
 weight: 792
 ---
 ## 升级指南
 ### 1. 做好数据备份
 ### 2. 更新镜像 tag
 - 更新 FastGPT 镜像 tag: v4.9.8
 - 更新 FastGPT 商业版镜像 tag: v4.9.8
 - mcp_server 无需更新
 - Sandbox 无需更新
 - AIProxy 无需更新
 ## 🚀 新增内容
--- a/docSite/content/zh-cn/docs/development/upgrading/499.md
+++ b/docSite/content/zh-cn/docs/development/upgrading/499.md
@@ -0,0 +1,43 @@
 ---
 title: 'V4.9.9'
 description: 'FastGPT V4.9.9 更新说明'
 icon: 'upgrade'
 draft: false
 toc: true
 weight: 791
 ---
 ## 升级指南
 ### 1. 做好数据备份
 ### 2. 商业版用户替换新 License
 商业版用户可以联系 FastGPT 团队支持同学，获取 License 替换方案。替换后，可以直接升级系统，管理后台会提示输入新 License。
 ### 3. 更新镜像 tag
 - 更新 FastGPT 镜像 tag: v4.9.9
 - 更新 FastGPT 商业版镜像 tag: v4.9.9
 - mcp_server 无需更新
 - Sandbox 无需更新
 - AIProxy 无需更新
 ## 🚀 新增内容
 1. 切换 SessionId 来替代 JWT 实现登录鉴权，可控制最大登录客户端数量。
 2. 新的商业版 License 管理模式。
 3. 公众号调用，显示记录 chat 对话错误，方便排查。
 4. API 知识库支持 BasePath 选择，需增加 API 接口，具体可见[API 知识库介绍](/docs/guide/knowledge_base/api_dataset/#4-获取文件详细信息用于获取文件信息)
 ## ⚙️ 优化
 1. 优化工具调用，新工具的判断逻辑。
 2. 调整 Cite 引用提示词。
 ## 🐛 修复
 1. 无法正常获取应用历史保存/发布记录。
 2. 成员创建 MCP 工具权限问题。
 3. 来源引用展示，存在 ID 传递错误，导致提示无权操作该文件。
 4. 回答标注前端数据报错。
--- a/docSite/content/zh-cn/docs/guide/knowledge_base/api_dataset.md
+++ b/docSite/content/zh-cn/docs/guide/knowledge_base/api_dataset.md
@@ -43,7 +43,7 @@ type ResponseType = {
 // 文件列表中，单项的文件类型
 type FileListItem = {
  id: string;
-  parentId: string | null;
+  parentId: string  //也可能为 null 或者 undefined 类型;
  name: string;
  type: 'file' | 'folder';
  updateTime: Date;
@@ -59,7 +59,7 @@ type FileListItem = {
 {{< markdownify >}}
 {{% alert icon=" " context="success" %}}
- parentId - 父级 id，可选，或者 null。
+- parentId - 父级 id，可选，或者 null | undefined。
 - searchKey - 检索词，可选
 {{% /alert %}}
@@ -68,7 +68,7 @@ curl --location --request POST '{{baseURL}}/v1/file/list' \
 --header 'Authorization: Bearer {{authorization}}' \
 --header 'Content-Type: application/json' \
 --data-raw '{
-    "parentId": null,
+    "parentId": "",
    "searchKey": ""
 }'
 ```
@@ -185,3 +185,40 @@ curl --location --request GET '{{baseURL}}/v1/file/read?id=xx' \
 {{< /tabs >}}
 ### 4. 获取文件详细信息（用于获取文件信息）
 {{< tabs tabTotal="2" >}}
 {{< tab tabName="请求示例" >}}
 {{< markdownify >}}
 id 为文件的 id。
 ```bash
 curl --location --request GET '{{baseURL}}/v1/file/detail?id=xx' \
 --header 'Authorization: Bearer {{authorization}}'
 ```
 {{< /markdownify >}}
 {{< /tab >}}
 {{< tab tabName="响应示例" >}}
 {{< markdownify >}}
 ```json
 {
    "code": 200,
    "success": true,
    "message": "",
    "data": {
        "id": "docs",
        "parentId": "",
        "name": "docs"
    }
 }
 ```
 {{< /markdownify >}}
 {{< /tab >}}
 {{< /tabs >}}
--- a/docSite/content/zh-cn/docs/shopping_cart/intro.md
+++ b/docSite/content/zh-cn/docs/shopping_cart/intro.md
@@ -28,7 +28,6 @@ FastGPT 商业版是基于 FastGPT 开源版的增强版本，增加了一些独
 | 应用发布安全配置 | ❌ | ✅ | ✅ |
 | 内容审核 | ❌ | ✅ | ✅ |
 | web站点同步 | ❌ | ✅ | ✅ |
 | 主流文档库接入（目前支持：语雀、飞书） | ❌ | ✅ | ✅ |
 | 增强训练模式 | ❌ | ✅ | ✅ |
 | 第三方应用快速接入（飞书、公众号） | ❌ | ✅ | ✅ |
 | 管理后台 | ❌ | ✅ | 不需要 |
--- a/docSite/content/zh-cn/docs/use-cases/external-integration/official_account.md
+++ b/docSite/content/zh-cn/docs/use-cases/external-integration/official_account.md
@@ -132,7 +132,9 @@ weight: 506
 ### 公众号没响应
 检查应用对话日志，如果有对话日志，但是微信公众号无响应，则是白名单 IP未成功。
-添加白名单IP 后，通常需要等待几分钟微信更新。
+添加白名单IP 后，通常需要等待几分钟微信更新。可以在对话日志中，找点错误日志。
 ![](/imgs/official_account_faq.png)
 ### 如何新开一个聊天记录
--- a/env.d.ts
+++ b/env.d.ts
@@ -4,7 +4,6 @@ declare global {
      LOG_DEPTH: string;
      DEFAULT_ROOT_PSW: string;
      DB_MAX_LINK: string;
      TOKEN_KEY: string;
      FILE_TOKEN_KEY: string;
      ROOT_KEY: string;
      OPENAI_BASE_URL: string;
@@ -37,6 +36,7 @@ declare global {
      CONFIG_JSON_PATH?: string;
      PASSWORD_LOGIN_LOCK_SECONDS?: string;
      PASSWORD_EXPIRED_MONTH?: string;
      MAX_LOGIN_SESSION?: string;
    }
  }
 }
--- a/packages/global/common/error/code/dataset.ts
+++ b/packages/global/common/error/code/dataset.ts
@@ -27,7 +27,7 @@ const datasetErr = [
  },
  {
    statusText: DatasetErrEnum.unExist,
-    message: 'core.dataset.error.unExistDataset'
+    message: i18nT('common:core.dataset.error.unExistDataset')
  },
  {
    statusText: DatasetErrEnum.unExistCollection,
--- a/packages/global/common/error/code/system.ts
+++ b/packages/global/common/error/code/system.ts
@@ -2,13 +2,28 @@ import { type ErrType } from '../errorCode';
 import { i18nT } from '../../../../web/i18n/utils';
 /* dataset: 509000 */
 export enum SystemErrEnum {
-  communityVersionNumLimit = 'communityVersionNumLimit'
+  communityVersionNumLimit = 'communityVersionNumLimit',
  licenseAppAmountLimit = 'licenseAppAmountLimit',
  licenseDatasetAmountLimit = 'licenseDatasetAmountLimit',
  licenseUserAmountLimit = 'licenseUserAmountLimit'
 }
 const systemErr = [
  {
    statusText: SystemErrEnum.communityVersionNumLimit,
    message: i18nT('common:code_error.system_error.community_version_num_limit')
  },
  {
    statusText: SystemErrEnum.licenseAppAmountLimit,
    message: i18nT('common:code_error.system_error.license_app_amount_limit')
  },
  {
    statusText: SystemErrEnum.licenseDatasetAmountLimit,
    message: i18nT('common:code_error.system_error.license_dataset_amount_limit')
  },
  {
    statusText: SystemErrEnum.licenseUserAmountLimit,
    message: i18nT('common:code_error.system_error.license_user_amount_limit')
  }
 ];
--- a/packages/global/common/string/password.ts
+++ b/packages/global/common/string/password.ts
@@ -5,7 +5,7 @@ export const checkPasswordRule = (password: string) => {
    /[A-Z]/, // Contains uppercase letters
    /[!@#$%^&*()_+=-]/ // Contains special characters
  ];
-  const validChars = /^[\dA-Za-z!@#$%^&*()_+=-]{6,100}$/;
+  const validChars = /^[\dA-Za-z!@#$%^&*()_+=-]{8,100}$/;
  // Check length and valid characters
  if (!validChars.test(password)) return false;
--- a/packages/global/common/string/textSplitter.ts
+++ b/packages/global/common/string/textSplitter.ts
@@ -7,6 +7,10 @@ export const CUSTOM_SPLIT_SIGN = '-----CUSTOM_SPLIT_SIGN-----';
 type SplitProps = {
  text: string;
  chunkSize: number;
  paragraphChunkDeep?: number; // Paragraph deep
  paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
  maxSize?: number;
  overlapRatio?: number;
  customReg?: string[];
@@ -108,6 +112,8 @@ const commonSplit = (props: SplitProps): SplitResponse => {
  let {
    text = '',
    chunkSize,
    paragraphChunkDeep = 5,
    paragraphChunkMinSize = 100,
    maxSize = defaultMaxChunkSize,
    overlapRatio = 0.15,
    customReg = []
@@ -123,7 +129,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
  text = text.replace(/(```[\s\S]*?```|~~~[\s\S]*?~~~)/g, function (match) {
    return match.replace(/\n/g, codeBlockMarker);
  });
-  // 2. 表格处理 - 单独提取表格出来，进行表头合并
+  // 2. Markdown 表格处理 - 单独提取表格出来，进行表头合并
  const tableReg =
    /(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n?)*)(?:\n|$)/g;
  const tableDataList = text.match(tableReg);
@@ -143,25 +149,40 @@ const commonSplit = (props: SplitProps): SplitResponse => {
  text = text.replace(/(\r?\n|\r){3,}/g, '\n\n\n');
  // The larger maxLen is, the next sentence is less likely to trigger splitting
-  const markdownIndex = 4;
+  const customRegLen = customReg.length;
-  const forbidOverlapIndex = 8;
+  const markdownIndex = paragraphChunkDeep - 1;
  const forbidOverlapIndex = customRegLen + markdownIndex + 4;
  const markdownHeaderRules = ((deep?: number): { reg: RegExp; maxLen: number }[] => {
    if (!deep || deep === 0) return [];
    const maxDeep = Math.min(deep, 8); // Maximum 8 levels
    const rules: { reg: RegExp; maxLen: number }[] = [];
    for (let i = 1; i <= maxDeep; i++) {
      const hashSymbols = '#'.repeat(i);
      rules.push({
        reg: new RegExp(`^(${hashSymbols}\\s[^\\n]+\\n)`, 'gm'),
        maxLen: chunkSize
      });
    }
    return rules;
  })(paragraphChunkDeep);
  const stepReges: { reg: RegExp | string; maxLen: number }[] = [
    ...customReg.map((text) => ({
      reg: text.replaceAll('\\n', '\n'),
      maxLen: chunkSize
    })),
-    { reg: /^(#\s[^\n]+\n)/gm, maxLen: chunkSize },
+    ...markdownHeaderRules,
    { reg: /^(##\s[^\n]+\n)/gm, maxLen: chunkSize },
    { reg: /^(###\s[^\n]+\n)/gm, maxLen: chunkSize },
    { reg: /^(####\s[^\n]+\n)/gm, maxLen: chunkSize },
    { reg: /^(#####\s[^\n]+\n)/gm, maxLen: chunkSize },
    { reg: /([\n](```[\s\S]*?```|~~~[\s\S]*?~~~))/g, maxLen: maxSize }, // code block
    // HTML Table tag 尽可能保障完整
    {
      reg: /(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n)*)/g,
-      maxLen: Math.min(chunkSize * 1.5, maxSize)
+      maxLen: chunkSize
-    }, // Table 尽可能保证完整性
+    }, // Markdown Table 尽可能保证完整性
    { reg: /(\n{2,})/g, maxLen: chunkSize },
    { reg: /([\n])/g, maxLen: chunkSize },
    // ------ There's no overlap on the top
@@ -172,12 +193,10 @@ const commonSplit = (props: SplitProps): SplitResponse => {
    { reg: /([，]|,\s)/g, maxLen: chunkSize }
  ];
  const customRegLen = customReg.length;
  const checkIsCustomStep = (step: number) => step < customRegLen;
  const checkIsMarkdownSplit = (step: number) =>
    step >= customRegLen && step <= markdownIndex + customRegLen;
-
+  const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex;
  const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex + customRegLen;
  // if use markdown title split, Separate record title
  const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
@@ -301,6 +320,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
    const splitTexts = getSplitTexts({ text, step });
    const chunks: string[] = [];
    for (let i = 0; i < splitTexts.length; i++) {
      const item = splitTexts[i];
@@ -443,7 +463,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
 */
 export const splitText2Chunks = (props: SplitProps): SplitResponse => {
  let { text = '' } = props;
  const start = Date.now();
  const splitWithCustomSign = text.split(CUSTOM_SPLIT_SIGN);
  const splitResult = splitWithCustomSign.map((item) => {
--- a/packages/global/common/system/types/index.d.ts
+++ b/packages/global/common/system/types/index.d.ts
@@ -70,6 +70,9 @@ export type FastGPTFeConfigsType = {
  show_publish_dingtalk?: boolean;
  show_publish_offiaccount?: boolean;
  show_dataset_enhance?: boolean;
  show_batch_eval?: boolean;
  concatMd?: string;
  docUrl?: string;
  openAPIDocUrl?: string;
@@ -127,9 +130,11 @@ export type SystemEnvType = {
  vectorMaxProcess: number;
  qaMaxProcess: number;
  vlmMaxProcess: number;
  hnswEfSearch: number;
  tokenWorkers: number; // token count max worker
  hnswEfSearch: number;
  hnswMaxScanTuples: number;
  oneapiUrl?: string;
  chatApiKey?: string;
@@ -142,3 +147,21 @@ export type customPdfParseType = {
  doc2xKey?: string;
  price?: number;
 };
 export type LicenseDataType = {
  startTime: string;
  expiredTime: string;
  company: string;
  description?: string; // 描述
  hosts?: string[]; // 管理端有效域名
  maxUsers?: number; // 最大用户数，不填默认不上限
  maxApps?: number; // 最大应用数，不填默认不上限
  maxDatasets?: number; // 最大数据集数，不填默认不上限
  functions: {
    sso: boolean;
    pay: boolean;
    customTemplates: boolean;
    datasetEnhance: boolean;
    batchEval: boolean;
  };
 };
--- a/packages/global/core/ai/prompt/AIChat.ts
+++ b/packages/global/core/ai/prompt/AIChat.ts
@@ -2,6 +2,248 @@ import { type PromptTemplateItem } from '../type.d';
 import { i18nT } from '../../../../web/i18n/utils';
 import { getPromptByVersion } from './utils';
 export const Prompt_userQuotePromptList: PromptTemplateItem[] = [
  {
    title: i18nT('app:template.standard_template'),
    desc: '',
    value: {
      ['4.9.7']: `## 任务描述
 你是一个知识库回答助手，可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
 同时，为了使回答结果更加可信并且可追溯，你需要在每段话结尾添加引用标记，标识参考了哪些内容。
 ## 追溯展示规则
 - 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识，其中 CITE 是固定常量, id 为引文中的 id。
 - 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
 - 每段话**至少包含一个引用**，多个引用时按顺序排列，例如："Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
 - 不要把示例作为知识点。
 - 不要伪造 id，返回的 id 必须都存在 <Cites></Cites> 中！
 ## 通用规则
 - 如果你不清楚答案，你需要澄清。
 - 避免提及你是从 <Cites></Cites> 获取的知识。
 - 保持答案与 <Cites></Cites> 中描述的一致。
 - 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容，需严格完整输出。
 - 使用与问题相同的语言回答。
 <Cites>
 {{quote}}
 </Cites>
 ## 用户问题
 {{question}}
 ## 回答
 `
    }
  },
  {
    title: i18nT('app:template.qa_template'),
    desc: '',
    value: {
      ['4.9.7']: `## 任务描述
 作为一个问答助手，你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
 ## 回答要求
 - 选择其中一个或多个问答对进行回答。
 - 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
 - 如果没有相关的问答对，你需要澄清。
 - 避免提及你是从 <QA></QA> 获取的知识，只需要回复答案。
 - 使用与问题相同的语言回答。
 <QA>
 {{quote}}
 </QA>
 ## 用户问题
 {{question}}
 ## 回答
 `
    }
  },
  {
    title: i18nT('app:template.standard_strict'),
    desc: '',
    value: {
      ['4.9.7']: `## 任务描述
 你是一个知识库回答助手，可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
 同时，为了使回答结果更加可信并且可追溯，你需要在每段话结尾添加引用标记，标识参考了哪些内容。
 ## 追溯展示规则
 - 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识，其中 CITE 是固定常量, id 为引文中的 id。
 - 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
 - 每段话**至少包含一个引用**，多个引用时按顺序排列，例如："Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
 - 不要把示例作为知识点。
 - 不要伪造 id，返回的 id 必须都存在 <Cites></Cites> 中！
 ## 通用规则
 - 如果你不清楚答案，你需要澄清。
 - 避免提及你是从 <Cites></Cites> 获取的知识。
 - 保持答案与 <Cites></Cites> 中描述的一致。
 - 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容，需严格完整输出。
 - 使用与问题相同的语言回答。
 ## 严格要求
 你只能使用 <Cites></Cites> 标记中的内容作为参考，不能使用自身的知识，并且回答的内容需严格与 <Cites></Cites> 中的内容一致。
 <Cites>
 {{quote}}
 </Cites>
 ## 用户问题
 {{question}}
 ## 回答
 `
    }
  },
  {
    title: i18nT('app:template.hard_strict'),
    desc: '',
    value: {
      ['4.9.7']: `## 任务描述
 作为一个问答助手，你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
 ## 回答要求
 - 选择其中一个或多个问答对进行回答。
 - 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
 - 如果没有相关的问答对，你需要澄清。
 - 避免提及你是从 <QA></QA> 获取的知识，只需要回复答案。
 - 使用与问题相同的语言回答。
 ## 严格要求
 你只能使用 <QA></QA> 标记中的内容作为参考，不能使用自身的知识，并且回答的内容需严格与 <QA></QA> 中的内容一致。
 <QA>
 {{quote}}
 </QA>
 ## 用户问题
 {{question}}
 ## 回答
 `
    }
  }
 ];
 export const Prompt_systemQuotePromptList: PromptTemplateItem[] = [
  {
    title: i18nT('app:template.standard_template'),
    desc: '',
    value: {
      ['4.9.7']: `## 任务描述
 你是一个知识库回答助手，可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
 同时，为了使回答结果更加可信并且可追溯，你需要在每段话结尾添加引用标记，标识参考了哪些内容。
 ## 追溯展示规则
 - 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识，其中 CITE 是固定常量, id 为引文中的 id。
 - 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
 - 每段话**至少包含一个引用**，多个引用时按顺序排列，例如："Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
 - 不要把示例作为知识点。
 - 不要伪造 id，返回的 id 必须都存在 <Cites></Cites> 中！
 ## 通用规则
 - 如果你不清楚答案，你需要澄清。
 - 避免提及你是从 <Cites></Cites> 获取的知识。
 - 保持答案与 <Cites></Cites> 中描述的一致。
 - 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容，需严格完整输出。
 - 使用与问题相同的语言回答。
 <Cites>
 {{quote}}
 </Cites>`
    }
  },
  {
    title: i18nT('app:template.qa_template'),
    desc: '',
    value: {
      ['4.9.8']: `## 任务描述
 作为一个问答助手，你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
 ## 回答要求
 - 选择其中一个或多个问答对进行回答。
 - 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
 - 如果没有相关的问答对，你需要澄清。
 - 避免提及你是从 <QA></QA> 获取的知识，只需要回复答案。
 - 使用与问题相同的语言回答。
 <QA>
 {{quote}}
 </QA>`
    }
  },
  {
    title: i18nT('app:template.standard_strict'),
    desc: '',
    value: {
      ['4.9.7']: `## 任务描述
 你是一个知识库回答助手，可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
 同时，为了使回答结果更加可信并且可追溯，你需要在每段话结尾添加引用标记，标识参考了哪些内容。
 ## 追溯展示规则
 - 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识，其中 CITE 是固定常量, id 为引文中的 id。
 - 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
 - 每段话**至少包含一个引用**，多个引用时按顺序排列，例如："Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
 - 不要把示例作为知识点。
 - 不要伪造 id，返回的 id 必须都存在 <Cites></Cites> 中！
 ## 通用规则
 - 如果你不清楚答案，你需要澄清。
 - 避免提及你是从 <Cites></Cites> 获取的知识。
 - 保持答案与 <Cites></Cites> 中描述的一致。
 - 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容，需严格完整输出。
 - 使用与问题相同的语言回答。
 ## 严格要求
 你只能使用 <Cites></Cites> 标记中的内容作为参考，不能使用自身的知识，并且回答的内容需严格与 <Cites></Cites> 中的内容一致。
 <Cites>
 {{quote}}
 </Cites>`
    }
  },
  {
    title: i18nT('app:template.hard_strict'),
    desc: '',
    value: {
      ['4.9.7']: `## 任务描述
 作为一个问答助手，你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
 ## 回答要求
 - 选择其中一个或多个问答对进行回答。
 - 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
 - 如果没有相关的问答对，你需要澄清。
 - 避免提及你是从 <QA></QA> 获取的知识，只需要回复答案。
 - 使用与问题相同的语言回答。
 ## 严格要求
 你只能使用 <QA></QA> 标记中的内容作为参考，不能使用自身的知识，并且回答的内容需严格与 <QA></QA> 中的内容一致。
 <QA>
 {{quote}}
 </QA>`
    }
  }
 ];
 export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
  {
    title: i18nT('app:template.standard_template'),
@@ -10,11 +252,6 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
      ['4.9.7']: `{
  "id": "{{id}}",
  "sourceName": "{{source}}",
  "content": "{{q}}\n{{a}}"
 }
 `,
      ['4.9.2']: `{
  "sourceName": "{{source}}",
  "updateTime": "{{updateTime}}",
  "content": "{{q}}\n{{a}}"
 }
@@ -25,7 +262,7 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
    title: i18nT('app:template.qa_template'),
    desc: i18nT('app:template.qa_template_des'),
    value: {
-      ['4.9.2']: `<Question>
+      ['4.9.7']: `<Question>
 {{q}}
 </Question>
 <Answer>
@@ -40,11 +277,6 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
      ['4.9.7']: `{
  "id": "{{id}}",
  "sourceName": "{{source}}",
  "content": "{{q}}\n{{a}}"
 }
 `,
      ['4.9.2']: `{
  "sourceName": "{{source}}",
  "updateTime": "{{updateTime}}",
  "content": "{{q}}\n{{a}}"
 }
@@ -55,7 +287,7 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
    title: i18nT('app:template.hard_strict'),
    desc: i18nT('app:template.hard_strict_des'),
    value: {
-      ['4.9.2']: `<Question>
+      ['4.9.7']: `<Question>
 {{q}}
 </Question>
 <Answer>
@@ -64,263 +296,12 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
    }
  }
 ];
 export const getQuoteTemplate = (version?: string) => {
  const defaultTemplate = Prompt_QuoteTemplateList[0].value;
  return getPromptByVersion(version, defaultTemplate);
 };
 export const Prompt_userQuotePromptList: PromptTemplateItem[] = [
  {
    title: i18nT('app:template.standard_template'),
    desc: '',
    value: {
      ['4.9.7']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
 <Reference>
 {{quote}}
 </Reference>
 回答要求：
 - 如果你不清楚答案，你需要澄清。
 - 避免提及你是从 <Reference></Reference> 获取的知识。
 - 保持答案与 <Reference></Reference> 中描述的一致。
 - 使用 Markdown 语法优化回答格式。
 - 使用与问题相同的语言回答。
 - 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识，其中 CITE 是固定常量, id 为引文中的 id。
 - 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
 - 每段至少包含一个引用，也可根据内容需要加入多个引用，按顺序排列。`,
      ['4.9.2']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
 <Reference>
 {{quote}}
 </Reference>
 回答要求：
 - 如果你不清楚答案，你需要澄清。
 - 避免提及你是从 <Reference></Reference> 获取的知识。
 - 保持答案与 <Reference></Reference> 中描述的一致。
 - 使用 Markdown 语法优化回答格式。
 - 使用与问题相同的语言回答。
 问题:"""{{question}}"""`
    }
  },
  {
    title: i18nT('app:template.qa_template'),
    desc: '',
    value: {
      ['4.9.2']: `使用 <QA></QA> 标记中的问答对进行回答。
 <QA>
 {{quote}}
 </QA>
 回答要求：
 - 选择其中一个或多个问答对进行回答。
 - 回答的内容应尽可能与 <答案></答案> 中的内容一致。
 - 如果没有相关的问答对，你需要澄清。
 - 避免提及你是从 QA 获取的知识，只需要回复答案。
 问题:"""{{question}}"""`
    }
  },
  {
    title: i18nT('app:template.standard_strict'),
    desc: '',
    value: {
      ['4.9.7']: `忘记你已有的知识，仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
 <Reference>
 {{quote}}
 </Reference>
 思考流程：
 1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
 2. 如果有关，你按下面的要求回答。
 3. 如果无关，你直接拒绝回答本次问题。
 回答要求：
 - 避免提及你是从 <Reference></Reference> 获取的知识。
 - 保持答案与 <Reference></Reference> 中描述的一致。
 - 使用 Markdown 语法优化回答格式。
 - 使用与问题相同的语言回答。
 - 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识，其中 CITE 是固定常量, id 为引文中的 id。
 - 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
 - 每段至少包含一个引用，也可根据内容需要加入多个引用，按顺序排列。
 问题:"""{{question}}"""`,
      ['4.9.2']: `忘记你已有的知识，仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
 <Reference>
 {{quote}}
 </Reference>
 思考流程：
 1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
 2. 如果有关，你按下面的要求回答。
 3. 如果无关，你直接拒绝回答本次问题。
 回答要求：
 - 避免提及你是从 <Reference></Reference> 获取的知识。
 - 保持答案与 <Reference></Reference> 中描述的一致。
 - 使用 Markdown 语法优化回答格式。
 - 使用与问题相同的语言回答。
 问题:"""{{question}}"""`
    }
  },
  {
    title: i18nT('app:template.hard_strict'),
    desc: '',
    value: {
      ['4.9.2']: `忘记你已有的知识，仅使用 <QA></QA> 标记中的问答对进行回答。
 <QA>
 {{quote}}
 </QA>
 思考流程：
 1. 判断问题是否与 <QA></QA> 标记中的内容有关。
 2. 如果无关，你直接拒绝回答本次问题。
 3. 判断是否有相近或相同的问题。
 4. 如果有相同的问题，直接输出对应答案。
 5. 如果只有相近的问题，请把相近的问题和答案一起输出。
 回答要求：
 - 如果没有相关的问答对，你需要澄清。
 - 回答的内容应尽可能与 <QA></QA> 标记中的内容一致。
 - 避免提及你是从 QA 获取的知识，只需要回复答案。
 - 使用 Markdown 语法优化回答格式。
 - 使用与问题相同的语言回答。
 问题:"""{{question}}"""`
    }
  }
 ];
 export const Prompt_systemQuotePromptList: PromptTemplateItem[] = [
  {
    title: i18nT('app:template.standard_template'),
    desc: '',
    value: {
      ['4.9.7']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
 <Reference>
 {{quote}}
 </Reference>
 回答要求：
 - 如果你不清楚答案，你需要澄清。
 - 避免提及你是从 <Reference></Reference> 获取的知识。
 - 保持答案与 <Reference></Reference> 中描述的一致。
 - 使用 Markdown 语法优化回答格式。
 - 使用与问题相同的语言回答。
 - 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识，其中 CITE 是固定常量, id 为引文中的 id。
 - 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
 - 每段至少包含一个引用，也可根据内容需要加入多个引用，按顺序排列。`,
      ['4.9.2']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
 <Reference>
 {{quote}}
 </Reference>
 回答要求：
 - 如果你不清楚答案，你需要澄清。
 - 避免提及你是从 <Reference></Reference> 获取的知识。
 - 保持答案与 <Reference></Reference> 中描述的一致。
 - 使用 Markdown 语法优化回答格式。
 - 使用与问题相同的语言回答。`
    }
  },
  {
    title: i18nT('app:template.qa_template'),
    desc: '',
    value: {
      ['4.9.2']: `使用 <QA></QA> 标记中的问答对进行回答。
 <QA>
 {{quote}}
 </QA>
 回答要求：
 - 选择其中一个或多个问答对进行回答。
 - 回答的内容应尽可能与 <答案></答案> 中的内容一致。
 - 如果没有相关的问答对，你需要澄清。
 - 避免提及你是从 QA 获取的知识，只需要回复答案。`
    }
  },
  {
    title: i18nT('app:template.standard_strict'),
    desc: '',
    value: {
      ['4.9.7']: `忘记你已有的知识，仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
 <Reference>
 {{quote}}
 </Reference>
 思考流程：
 1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
 2. 如果有关，你按下面的要求回答。
 3. 如果无关，你直接拒绝回答本次问题。
 回答要求：
 - 避免提及你是从 <Reference></Reference> 获取的知识。
 - 保持答案与 <Reference></Reference> 中描述的一致。
 - 使用 Markdown 语法优化回答格式。
 - 使用与问题相同的语言回答。
 - 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识，其中 CITE 是固定常量, id 为引文中的 id。
 - 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
 - 每段至少包含一个引用，也可根据内容需要加入多个引用，按顺序排列。
 问题:"""{{question}}"""`,
      ['4.9.2']: `忘记你已有的知识，仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
 <Reference>
 {{quote}}
 </Reference>
 思考流程：
 1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
 2. 如果有关，你按下面的要求回答。
 3. 如果无关，你直接拒绝回答本次问题。
 回答要求：
 - 避免提及你是从 <Reference></Reference> 获取的知识。
 - 保持答案与 <Reference></Reference> 中描述的一致。
 - 使用 Markdown 语法优化回答格式。
 - 使用与问题相同的语言回答。`
    }
  },
  {
    title: i18nT('app:template.hard_strict'),
    desc: '',
    value: {
      ['4.9.2']: `忘记你已有的知识，仅使用 <QA></QA> 标记中的问答对进行回答。
 <QA>
 {{quote}}
 </QA>
 思考流程：
 1. 判断问题是否与 <QA></QA> 标记中的内容有关。
 2. 如果无关，你直接拒绝回答本次问题。
 3. 判断是否有相近或相同的问题。
 4. 如果有相同的问题，直接输出对应答案。
 5. 如果只有相近的问题，请把相近的问题和答案一起输出。
 回答要求：
 - 如果没有相关的问答对，你需要澄清。
 - 回答的内容应尽可能与 <QA></QA> 标记中的内容一致。
 - 避免提及你是从 QA 获取的知识，只需要回复答案。
 - 使用 Markdown 语法优化回答格式。
 - 使用与问题相同的语言回答。`
    }
  }
 ];
 export const getQuotePrompt = (version?: string, role: 'user' | 'system' = 'user') => {
  const quotePromptTemplates =
    role === 'user' ? Prompt_userQuotePromptList : Prompt_systemQuotePromptList;
@@ -333,7 +314,7 @@ export const getQuotePrompt = (version?: string, role: 'user' | 'system' = 'user
 // Document quote prompt
 export const getDocumentQuotePrompt = (version?: string) => {
  const promptMap = {
-    ['4.9.2']: `将 <FilesContent></FilesContent> 中的内容作为本次对话的参考:
+    ['4.9.7']: `将 <FilesContent></FilesContent> 中的内容作为本次对话的参考:
 <FilesContent>
 {{quote}}
 </FilesContent>
--- a/packages/global/core/ai/prompt/dataset.ts
+++ b/packages/global/core/ai/prompt/dataset.ts
@@ -1,14 +1,19 @@
 export const getDatasetSearchToolResponsePrompt = () => {
  return `## Role
-你是一个知识库回答助手，可以 "quotes" 中的内容作为本次对话的参考。为了使回答结果更加可信并且可追溯，你需要在每段话结尾添加引用标记。
+你是一个知识库回答助手，可以 "cites" 中的内容作为本次对话的参考。为了使回答结果更加可信并且可追溯，你需要在每段话结尾添加引用标记，标识参考了哪些内容。
-## Rules
+## 追溯展示规则
 - 使用 **[id](CITE)** 格式来引用 "cites" 中的知识，其中 CITE 是固定常量, id 为引文中的 id。
 - 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
 - 每段话**至少包含一个引用**，多个引用时按顺序排列，例如："Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
 - 不要把示例作为知识点。
 - 不要伪造 id，返回的 id 必须都存在 cites 中！
 ## 通用规则
 - 如果你不清楚答案，你需要澄清。
- 避免提及你是从 "quotes" 获取的知识。
+- 避免提及你是从 "cites" 获取的知识。
- 保持答案与 "quotes" 中描述的一致。
+- 保持答案与 "cites" 中描述的一致。
 - 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容，需严格完整输出。
- 使用与问题相同的语言回答。
+- 使用与问题相同的语言回答。`;
 - 使用 [id](CITE) 格式来引用 "quotes" 中的知识，其中 CITE 是固定常量, id 为引文中的 id。
 - 在每段话结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
 - 每段话至少包含一个引用，也可根据内容需要加入多个引用，按顺序排列。`;
 };
--- a/packages/global/core/app/constants.ts
+++ b/packages/global/core/app/constants.ts
@@ -60,5 +60,3 @@ export enum AppTemplateTypeEnum {
  // special type
  contribute = 'contribute'
 }
 export const defaultDatasetMaxTokens = 16000;
--- a/packages/global/core/app/mcpTools/utils.ts
+++ b/packages/global/core/app/mcpTools/utils.ts
@@ -5,7 +5,7 @@ import {
  FlowNodeTypeEnum
 } from '../../workflow/node/constant';
 import { nanoid } from 'nanoid';
-import { type ToolType } from '../type';
+import { type McpToolConfigType } from '../type';
 import { i18nT } from '../../../../web/i18n/utils';
 import { type RuntimeNodeItemType } from '../../workflow/runtime/type';
@@ -16,7 +16,7 @@ export const getMCPToolSetRuntimeNode = ({
  avatar
 }: {
  url: string;
-  toolList: ToolType[];
+  toolList: McpToolConfigType[];
  name?: string;
  avatar?: string;
 }): RuntimeNodeItemType => {
@@ -45,7 +45,7 @@ export const getMCPToolRuntimeNode = ({
  url,
  avatar = 'core/app/type/mcpToolsFill'
 }: {
-  tool: ToolType;
+  tool: McpToolConfigType;
  url: string;
  avatar?: string;
 }): RuntimeNodeItemType => {
@@ -65,7 +65,7 @@ export const getMCPToolRuntimeNode = ({
      ...Object.entries(tool.inputSchema?.properties || {}).map(([key, value]) => ({
        key,
        label: key,
-        valueType: value.type as WorkflowIOValueTypeEnum,
+        valueType: value.type as WorkflowIOValueTypeEnum, // TODO: 这里需要做一个映射
        description: value.description,
        toolDescription: value.description || key,
        required: tool.inputSchema?.required?.includes(key) || false,
--- a/packages/global/core/app/type.d.ts
+++ b/packages/global/core/app/type.d.ts
@@ -16,16 +16,6 @@ import { FlowNodeInputTypeEnum } from '../../core/workflow/node/constant';
 import type { WorkflowTemplateBasicType } from '@fastgpt/global/core/workflow/type';
 import type { SourceMemberType } from '../../support/user/type';
 export type ToolType = {
  name: string;
  description: string;
  inputSchema: {
    type: string;
    properties?: Record<string, { type: string; description?: string }>;
    required?: string[];
  };
 };
 export type AppSchema = {
  _id: string;
  parentId?: ParentIdType;
@@ -117,6 +107,16 @@ export type AppSimpleEditFormType = {
  chatConfig: AppChatConfigType;
 };
 export type McpToolConfigType = {
  name: string;
  description: string;
  inputSchema: {
    type: string;
    properties?: Record<string, { type: string; description?: string }>;
    required?: string[];
  };
 };
 /* app chat config type */
 export type AppChatConfigType = {
  welcomeText?: string;
--- a/packages/global/core/app/utils.ts
+++ b/packages/global/core/app/utils.ts
@@ -9,6 +9,9 @@ import { type WorkflowTemplateBasicType } from '../workflow/type';
 import { AppTypeEnum } from './constants';
 import { AppErrEnum } from '../../common/error/code/app';
 import { PluginErrEnum } from '../../common/error/code/plugin';
 import { i18nT } from '../../../web/i18n/utils';
 import appErrList from '../../common/error/code/app';
 import pluginErrList from '../../common/error/code/plugin';
 export const getDefaultAppForm = (): AppSimpleEditFormType => {
  return {
@@ -189,17 +192,10 @@ export const getAppType = (config?: WorkflowTemplateBasicType | AppSimpleEditFor
  return '';
 };
-export const checkAppUnExistError = (error?: string) => {
+export const formatToolError = (error?: any) => {
-  const unExistError: Array<string> = [
+  if (!error || typeof error !== 'string') return;
    AppErrEnum.unAuthApp,
    AppErrEnum.unExist,
    PluginErrEnum.unAuth,
    PluginErrEnum.unExist
  ];
-  if (!!error && unExistError.includes(error)) {
+  const errorText = appErrList[error]?.message || pluginErrList[error]?.message;
-    return error;
+
-  } else {
+  return errorText || error;
    return undefined;
  }
 };
--- a/packages/global/core/chat/type.d.ts
+++ b/packages/global/core/chat/type.d.ts
@@ -26,6 +26,7 @@ export type ChatSchema = {
  teamId: string;
  tmbId: string;
  appId: string;
  createTime: Date;
  updateTime: Date;
  title: string;
  customTitle: string;
@@ -112,6 +113,7 @@ export type ChatItemSchema = (UserChatItemType | SystemChatItemType | AIChatItem
  appId: string;
  time: Date;
  durationSeconds?: number;
  errorMsg?: string;
 };
 export type AdminFbkType = {
@@ -143,6 +145,7 @@ export type ChatSiteItemType = (UserChatItemType | SystemChatItemType | AIChatIt
  responseData?: ChatHistoryItemResType[];
  time?: Date;
  durationSeconds?: number;
  errorMsg?: string;
 } & ChatBoxInputType &
  ResponseTagItemType;
--- a/packages/global/core/dataset/api.d.ts
+++ b/packages/global/core/dataset/api.d.ts
@@ -1,9 +1,11 @@
-import type { DatasetDataIndexItemType, DatasetSchemaType } from './type';
+import type { ChunkSettingsType, DatasetDataIndexItemType, DatasetSchemaType } from './type';
 import type {
  DatasetCollectionTypeEnum,
  DatasetCollectionDataProcessModeEnum,
  ChunkSettingModeEnum,
-  DataChunkSplitModeEnum
+  DataChunkSplitModeEnum,
  ChunkTriggerConfigTypeEnum,
  ParagraphChunkAIModeEnum
 } from './constants';
 import type { LLMModelItemType } from '../ai/model.d';
 import type { ParentIdType } from 'common/parentFolder/type';
@@ -32,26 +34,16 @@ export type DatasetUpdateBody = {
 };
 /* ================= collection ===================== */
-export type DatasetCollectionChunkMetadataType = {
+// Input + store params
 type DatasetCollectionStoreDataType = ChunkSettingsType & {
  parentId?: string;
  customPdfParse?: boolean;
  trainingType?: DatasetCollectionDataProcessModeEnum;
  imageIndex?: boolean;
  autoIndexes?: boolean;
  chunkSettingMode?: ChunkSettingModeEnum;
  chunkSplitMode?: DataChunkSplitModeEnum;
  chunkSize?: number;
  indexSize?: number;
  chunkSplitter?: string;
  qaPrompt?: string;
  metadata?: Record<string, any>;
  customPdfParse?: boolean;
 };
 // create collection params
-export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
+export type CreateDatasetCollectionParams = DatasetCollectionStoreDataType & {
  datasetId: string;
  name: string;
  type: DatasetCollectionTypeEnum;
@@ -72,7 +64,7 @@ export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType &
  nextSyncTime?: Date;
 };
-export type ApiCreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
+export type ApiCreateDatasetCollectionParams = DatasetCollectionStoreDataType & {
  datasetId: string;
  tags?: string[];
 };
@@ -90,7 +82,7 @@ export type ApiDatasetCreateDatasetCollectionParams = ApiCreateDatasetCollection
 export type FileIdCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
  fileId: string;
 };
-export type reTrainingDatasetFileCollectionParams = DatasetCollectionChunkMetadataType & {
+export type reTrainingDatasetFileCollectionParams = DatasetCollectionStoreDataType & {
  datasetId: string;
  collectionId: string;
 };
@@ -147,6 +139,7 @@ export type PushDatasetDataProps = {
  collectionId: string;
  data: PushDatasetDataChunkProps[];
  trainingType?: DatasetCollectionDataProcessModeEnum;
  indexSize?: number;
  autoIndexes?: boolean;
  imageIndex?: boolean;
  prompt?: string;
--- a/packages/global/core/dataset/constants.ts
+++ b/packages/global/core/dataset/constants.ts
@@ -120,6 +120,8 @@ export const DatasetCollectionSyncResultMap = {
 export enum DatasetCollectionDataProcessModeEnum {
  chunk = 'chunk',
  qa = 'qa',
  backup = 'backup',
  auto = 'auto' // abandon
 }
 export const DatasetCollectionDataProcessModeMap = {
@@ -131,21 +133,35 @@ export const DatasetCollectionDataProcessModeMap = {
    label: i18nT('common:core.dataset.training.QA mode'),
    tooltip: i18nT('common:core.dataset.import.QA Import Tip')
  },
  [DatasetCollectionDataProcessModeEnum.backup]: {
    label: i18nT('dataset:backup_mode'),
    tooltip: i18nT('dataset:backup_mode')
  },
  [DatasetCollectionDataProcessModeEnum.auto]: {
    label: i18nT('common:core.dataset.training.Auto mode'),
    tooltip: i18nT('common:core.dataset.training.Auto mode Tip')
  }
 };
 export enum ChunkTriggerConfigTypeEnum {
  minSize = 'minSize',
  forceChunk = 'forceChunk',
  maxSize = 'maxSize'
 }
 export enum ChunkSettingModeEnum {
  auto = 'auto',
  custom = 'custom'
 }
 export enum DataChunkSplitModeEnum {
  paragraph = 'paragraph',
  size = 'size',
  char = 'char'
 }
 export enum ParagraphChunkAIModeEnum {
  auto = 'auto',
  force = 'force'
 }
 /* ------------ data -------------- */
@@ -154,7 +170,6 @@ export enum ImportDataSourceEnum {
  fileLocal = 'fileLocal',
  fileLink = 'fileLink',
  fileCustom = 'fileCustom',
  csvTable = 'csvTable',
  externalFile = 'externalFile',
  apiDataset = 'apiDataset',
  reTraining = 'reTraining'
--- a/packages/global/core/dataset/data/constants.ts
+++ b/packages/global/core/dataset/data/constants.ts
@@ -32,7 +32,7 @@ export const DatasetDataIndexMap: Record<
    color: 'red'
  },
  [DatasetDataIndexTypeEnum.image]: {
-    label: i18nT('common:data_index_image'),
+    label: i18nT('dataset:data_index_image'),
    color: 'purple'
  }
 };
--- a/packages/global/core/dataset/training/utils.ts
+++ b/packages/global/core/dataset/training/utils.ts
@@ -118,9 +118,8 @@ export const computeChunkSize = (params: {
    return getLLMMaxChunkSize(params.llmModel);
  }
-  return Math.min(params.chunkSize || chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
+  return Math.min(params.chunkSize ?? chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
 };
 export const computeChunkSplitter = (params: {
  chunkSettingMode?: ChunkSettingModeEnum;
  chunkSplitMode?: DataChunkSplitModeEnum;
@@ -129,8 +128,21 @@ export const computeChunkSplitter = (params: {
  if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
    return undefined;
  }
-  if (params.chunkSplitMode === DataChunkSplitModeEnum.size) {
+  if (params.chunkSplitMode !== DataChunkSplitModeEnum.char) {
    return undefined;
  }
  return params.chunkSplitter;
 };
 export const computeParagraphChunkDeep = (params: {
  chunkSettingMode?: ChunkSettingModeEnum;
  chunkSplitMode?: DataChunkSplitModeEnum;
  paragraphChunkDeep?: number;
 }) => {
  if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
    return 5;
  }
  if (params.chunkSplitMode === DataChunkSplitModeEnum.paragraph) {
    return params.paragraphChunkDeep;
  }
  return 0;
 };
--- a/packages/global/core/dataset/type.d.ts
+++ b/packages/global/core/dataset/type.d.ts
@@ -8,26 +8,42 @@ import type {
  DatasetStatusEnum,
  DatasetTypeEnum,
  SearchScoreTypeEnum,
-  TrainingModeEnum
+  TrainingModeEnum,
  ChunkSettingModeEnum,
  ChunkTriggerConfigTypeEnum
 } from './constants';
 import type { DatasetPermission } from '../../support/permission/dataset/controller';
 import { Permission } from '../../support/permission/controller';
 import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
 import type { SourceMemberType } from 'support/user/type';
 import type { DatasetDataIndexTypeEnum } from './data/constants';
 import type { ChunkSettingModeEnum } from './constants';
 export type ChunkSettingsType = {
-  trainingType: DatasetCollectionDataProcessModeEnum;
+  trainingType?: DatasetCollectionDataProcessModeEnum;
-  autoIndexes?: boolean;
+
  // Chunk trigger
  chunkTriggerType?: ChunkTriggerConfigTypeEnum;
  chunkTriggerMinSize?: number; // maxSize from agent model, not store
  // Data enhance
  dataEnhanceCollectionName?: boolean; // Auto add collection name to data
  // Index enhance
  imageIndex?: boolean;
  autoIndexes?: boolean;
-  chunkSettingMode?: ChunkSettingModeEnum;
+  // Chunk setting
  chunkSettingMode?: ChunkSettingModeEnum; // 系统参数/自定义参数
  chunkSplitMode?: DataChunkSplitModeEnum;
-
+  // Paragraph split
-  chunkSize?: number;
+  paragraphChunkAIMode?: ParagraphChunkAIModeEnum;
  paragraphChunkDeep?: number; // Paragraph deep
  paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
  // Size split
  chunkSize?: number; // chunk/qa chunk size, Paragraph max chunk size.
  // Char split
  chunkSplitter?: string; // chunk/qa chunk splitter
  indexSize?: number;
-  chunkSplitter?: string;
+
  qaPrompt?: string;
 };
@@ -66,7 +82,7 @@ export type DatasetSchemaType = {
  defaultPermission?: number;
 };
-export type DatasetCollectionSchemaType = {
+export type DatasetCollectionSchemaType = ChunkSettingsType & {
  _id: string;
  teamId: string;
  tmbId: string;
@@ -101,18 +117,7 @@ export type DatasetCollectionSchemaType = {
  // Parse settings
  customPdfParse?: boolean;
  // Chunk settings
  autoIndexes?: boolean;
  imageIndex?: boolean;
  trainingType: DatasetCollectionDataProcessModeEnum;
  chunkSettingMode?: ChunkSettingModeEnum;
  chunkSplitMode?: DataChunkSplitModeEnum;
  chunkSize?: number;
  indexSize?: number;
  chunkSplitter?: string;
  qaPrompt?: string;
 };
 export type DatasetCollectionTagsSchemaType = {
@@ -175,6 +180,7 @@ export type DatasetTrainingSchemaType = {
  q: string;
  a: string;
  chunkIndex: number;
  indexSize?: number;
  weight: number;
  indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
  retryCount: number;
--- a/packages/global/core/dataset/utils.ts
+++ b/packages/global/core/dataset/utils.ts
@@ -40,5 +40,6 @@ export function getSourceNameIcon({
 export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
  if (mode === TrainingModeEnum.qa) return data.length * 20;
  if (mode === TrainingModeEnum.auto) return data.length * 5;
  if (mode === TrainingModeEnum.image) return data.length * 2;
  return data.length;
 };
--- a/packages/global/core/workflow/runtime/type.d.ts
+++ b/packages/global/core/workflow/runtime/type.d.ts
@@ -7,7 +7,7 @@ import type {
 } from '../../chat/type';
 import { NodeOutputItemType } from '../../chat/type';
 import type { FlowNodeInputItemType, FlowNodeOutputItemType } from '../type/io.d';
-import type { StoreNodeItemType } from '../type/node';
+import type { NodeToolConfigType, StoreNodeItemType } from '../type/node';
 import type { DispatchNodeResponseKeyEnum } from './constants';
 import type { StoreEdgeItemType } from '../type/edge';
 import type { NodeInputKeyEnum } from '../constants';
@@ -102,6 +102,9 @@ export type RuntimeNodeItemType = {
  pluginId?: string; // workflow id / plugin id
  version?: string;
  // tool
  toolConfig?: NodeToolConfigType;
 };
 export type RuntimeEdgeItemType = StoreEdgeItemType & {
@@ -114,7 +117,7 @@ export type DispatchNodeResponseType = {
  runningTime?: number;
  query?: string;
  textOutput?: string;
-  error?: Record<string, any>;
+  error?: Record<string, any> | string;
  customInputs?: Record<string, any>;
  customOutputs?: Record<string, any>;
  nodeInputs?: Record<string, any>;
--- a/packages/global/core/workflow/type/node.d.ts
+++ b/packages/global/core/workflow/type/node.d.ts
@@ -20,11 +20,17 @@ import { RuntimeNodeItemType } from '../runtime/type';
 import { PluginTypeEnum } from '../../plugin/constants';
 import { RuntimeEdgeItemType, StoreEdgeItemType } from './edge';
 import { NextApiResponse } from 'next';
-import { AppDetailType, AppSchema } from '../../app/type';
+import type { AppDetailType, AppSchema, McpToolConfigType } from '../../app/type';
 import type { ParentIdType } from 'common/parentFolder/type';
-import { AppTypeEnum } from 'core/app/constants';
+import { AppTypeEnum } from '../../app/constants';
 import type { WorkflowInteractiveResponseType } from '../template/system/interactive/type';
 export type NodeToolConfigType = {
  mcpTool?: McpToolConfigType & {
    url: string;
  };
 };
 export type FlowNodeCommonType = {
  parentNodeId?: string;
  flowNodeType: FlowNodeTypeEnum; // render node card
@@ -46,12 +52,13 @@ export type FlowNodeCommonType = {
  // plugin data
  pluginId?: string;
  isFolder?: boolean;
  // pluginType?: AppTypeEnum;
  pluginData?: PluginDataType;
  // tool data
  toolData?: NodeToolConfigType;
 };
 export type PluginDataType = {
  version?: string;
  diagram?: string;
  userGuide?: string;
  courseUrl?: string;
@@ -118,6 +125,7 @@ export type FlowNodeItemType = FlowNodeTemplateType & {
  nodeId: string;
  parentNodeId?: string;
  isError?: boolean;
  searchedText?: string;
  debugResult?: {
    status: 'running' | 'success' | 'skipped' | 'failed';
    message?: string;
--- a/packages/plugins/src/DingTalkWebhook/template.json
+++ b/packages/plugins/src/DingTalkWebhook/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "4816",
  "name": "钉钉 webhook",
  "avatar": "plugins/dingding",
  "intro": "向钉钉机器人发起 webhook 请求。",
--- a/packages/plugins/src/Doc2X/PDF2text/template.json
+++ b/packages/plugins/src/Doc2X/PDF2text/template.json
@@ -1,6 +1,5 @@
 {
  "author": "Menghuan1918",
  "version": "488",
  "name": "PDF识别",
  "avatar": "plugins/doc2x",
  "intro": "将PDF文件发送至Doc2X进行解析，返回结构化的LaTeX公式的文本(markdown)，支持传入String类型的URL或者流程输出中的文件链接变量",
--- a/packages/plugins/src/Doc2X/template.json
+++ b/packages/plugins/src/Doc2X/template.json
@@ -1,6 +1,5 @@
 {
  "author": "Menghuan1918",
  "version": "488",
  "name": "Doc2X服务",
  "avatar": "plugins/doc2x",
  "intro": "将传入的图片或PDF文件发送至Doc2X进行解析，返回带LaTeX公式的markdown格式的文本。",
--- a/packages/plugins/src/WeWorkWebhook/template.json
+++ b/packages/plugins/src/WeWorkWebhook/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "4816",
  "name": "企业微信 webhook",
  "avatar": "plugins/qiwei",
  "intro": "向企业微信机器人发起 webhook 请求。只能内部群使用。",
--- a/packages/plugins/src/bing/template.json
+++ b/packages/plugins/src/bing/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "4811",
  "name": "Bing搜索",
  "avatar": "core/workflow/template/bing",
  "intro": "在Bing中搜索。",
--- a/packages/plugins/src/databaseConnection/template.json
+++ b/packages/plugins/src/databaseConnection/template.json
@@ -1,6 +1,5 @@
 {
  "author": "silencezhang",
  "version": "4811",
  "name": "数据库连接",
  "avatar": "core/workflow/template/datasource",
  "intro": "可连接常用数据库，并执行sql",
--- a/packages/plugins/src/delay/template.json
+++ b/packages/plugins/src/delay/template.json
@@ -1,6 +1,5 @@
 {
  "author": "collin",
  "version": "4817",
  "name": "流程等待",
  "avatar": "core/workflow/template/sleep",
  "intro": "让工作流等待指定时间后运行",
--- a/packages/plugins/src/drawing/baseChart/template.json
+++ b/packages/plugins/src/drawing/baseChart/template.json
@@ -1,6 +1,5 @@
 {
  "author": "silencezhang",
  "version": "4817",
  "name": "基础图表",
  "avatar": "core/workflow/template/baseChart",
  "intro": "根据数据生成图表，可根据chartType生成柱状图，折线图，饼图",
--- a/packages/plugins/src/drawing/template.json
+++ b/packages/plugins/src/drawing/template.json
@@ -1,6 +1,5 @@
 {
  "author": "silencezhang",
  "version": "486",
  "name": "BI图表功能",
  "avatar": "core/workflow/template/BI",
  "intro": "BI图表功能，可以生成一些常用的图表，如饼图，柱状图，折线图等",
--- a/packages/plugins/src/duckduckgo/search/template.json
+++ b/packages/plugins/src/duckduckgo/search/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "486",
  "name": "DuckDuckGo 网络搜索",
  "avatar": "core/workflow/template/duckduckgo",
  "intro": "使用 DuckDuckGo 进行网络搜索",
--- a/packages/plugins/src/duckduckgo/searchImg/template.json
+++ b/packages/plugins/src/duckduckgo/searchImg/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "486",
  "name": "DuckDuckGo 图片搜索",
  "avatar": "core/workflow/template/duckduckgo",
  "intro": "使用 DuckDuckGo 进行图片搜索",
--- a/packages/plugins/src/duckduckgo/searchNews/template.json
+++ b/packages/plugins/src/duckduckgo/searchNews/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "486",
  "name": "DuckDuckGo 新闻检索",
  "avatar": "core/workflow/template/duckduckgo",
  "intro": "使用 DuckDuckGo 进行新闻检索",
--- a/packages/plugins/src/duckduckgo/searchVideo/template.json
+++ b/packages/plugins/src/duckduckgo/searchVideo/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "486",
  "name": "DuckDuckGo 视频搜索",
  "avatar": "core/workflow/template/duckduckgo",
  "intro": "使用 DuckDuckGo 进行视频搜索",
--- a/packages/plugins/src/duckduckgo/template.json
+++ b/packages/plugins/src/duckduckgo/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "486",
  "name": "DuckDuckGo服务",
  "avatar": "core/workflow/template/duckduckgo",
  "intro": "DuckDuckGo 服务，包含网络搜索、图片搜索、新闻搜索等。",
--- a/packages/plugins/src/feishu/template.json
+++ b/packages/plugins/src/feishu/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "488",
  "name": "飞书 webhook",
  "avatar": "core/app/templates/plugin-feishu",
  "intro": "向飞书机器人发起 webhook 请求。",
--- a/packages/plugins/src/fetchUrl/template.json
+++ b/packages/plugins/src/fetchUrl/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "486",
  "name": "网页内容抓取",
  "avatar": "core/workflow/template/fetchUrl",
  "intro": "可获取一个网页链接内容，并以 Markdown 格式输出，仅支持获取静态网站。",
--- a/packages/plugins/src/getTime/template.json
+++ b/packages/plugins/src/getTime/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "481",
  "templateType": "tools",
  "name": "获取当前时间",
  "avatar": "core/workflow/template/getTime",
--- a/packages/plugins/src/google/template.json
+++ b/packages/plugins/src/google/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "4811",
  "name": "Google搜索",
  "avatar": "core/workflow/template/google",
  "intro": "在google中搜索。",
--- a/packages/plugins/src/mathExprVal/template.json
+++ b/packages/plugins/src/mathExprVal/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "486",
  "name": "数学公式执行",
  "avatar": "core/workflow/template/mathCall",
  "intro": "用于执行数学表达式的工具，通过 js 的 expr-eval 库运行表达式并返回结果。",
--- a/packages/plugins/src/searchXNG/template.json
+++ b/packages/plugins/src/searchXNG/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "4816",
  "name": "Search XNG 搜索",
  "avatar": "core/workflow/template/searxng",
  "intro": "使用 Search XNG 服务进行搜索。",
--- a/packages/plugins/src/smtpEmail/template.json
+++ b/packages/plugins/src/smtpEmail/template.json
@@ -1,6 +1,5 @@
 {
  "author": "cloudpense",
  "version": "1.0.0",
  "name": "Email 邮件发送",
  "avatar": "plugins/email",
  "intro": "通过SMTP协议发送电子邮件(nodemailer)",
--- a/packages/plugins/src/template/template.json
+++ b/packages/plugins/src/template/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "489",
  "name": "文本加工",
  "avatar": "/imgs/workflow/textEditor.svg",
  "intro": "可对固定或传入的文本进行加工后输出，非字符串类型数据最终会转成字符串类型。",
--- a/packages/plugins/src/wiki/template.json
+++ b/packages/plugins/src/wiki/template.json
@@ -1,6 +1,5 @@
 {
  "author": "",
  "version": "4811",
  "name": "Wiki搜索",
  "avatar": "core/workflow/template/wiki",
  "intro": "在Wiki中查询释义。",
--- a/packages/service/common/api/type.d.ts
+++ b/packages/service/common/api/type.d.ts
@@ -6,12 +6,6 @@ import type {
 } from '../../core/dataset/search/controller';
 import type { AuthOpenApiLimitProps } from '../../support/openapi/auth';
 import type { CreateUsageProps, ConcatUsageProps } from '@fastgpt/global/support/wallet/usage/api';
 import type {
  GetProApiDatasetFileContentParams,
  GetProApiDatasetFileDetailParams,
  GetProApiDatasetFileListParams,
  GetProApiDatasetFilePreviewUrlParams
 } from '../../core/dataset/apiDataset/proApi';
 declare global {
  var textCensorHandler: (params: { text: string }) => Promise<{ code: number; message?: string }>;
@@ -19,16 +13,4 @@ declare global {
  var authOpenApiHandler: (data: AuthOpenApiLimitProps) => Promise<any>;
  var createUsageHandler: (data: CreateUsageProps) => any;
  var concatUsageHandler: (data: ConcatUsageProps) => any;
  // API dataset
  var getProApiDatasetFileList: (data: GetProApiDatasetFileListParams) => Promise<APIFileItem[]>;
  var getProApiDatasetFileContent: (
    data: GetProApiDatasetFileContentParams
  ) => Promise<ApiFileReadContentResponse>;
  var getProApiDatasetFilePreviewUrl: (
    data: GetProApiDatasetFilePreviewUrlParams
  ) => Promise<string>;
  var getProApiDatasetFileDetail: (
    data: GetProApiDatasetFileDetailParams
  ) => Promise<ApiDatasetDetailResponse>;
 }
--- a/packages/service/common/buffer/rawText/controller.ts
+++ b/packages/service/common/buffer/rawText/controller.ts
@@ -0,0 +1,178 @@
 import { retryFn } from '@fastgpt/global/common/system/utils';
 import { connectionMongo } from '../../mongo';
 import { MongoRawTextBufferSchema, bucketName } from './schema';
 import { addLog } from '../../system/log';
 import { setCron } from '../../system/cron';
 import { checkTimerLock } from '../../system/timerLock/utils';
 import { TimerIdEnum } from '../../system/timerLock/constants';
 const getGridBucket = () => {
  return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
    bucketName: bucketName
  });
 };
 export const addRawTextBuffer = async ({
  sourceId,
  sourceName,
  text,
  expiredTime
 }: {
  sourceId: string;
  sourceName: string;
  text: string;
  expiredTime: Date;
 }) => {
  const gridBucket = getGridBucket();
  const metadata = {
    sourceId,
    sourceName,
    expiredTime
  };
  const buffer = Buffer.from(text);
  const fileSize = buffer.length;
  // 单块大小：尽可能大，但不超过 14MB，不小于128KB
  const chunkSizeBytes = (() => {
    // 计算理想块大小：文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
    const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
    // 确保块大小至少为128KB
    const minChunkSize = 128 * 1024; // 128KB
    // 取理想块大小和最小块大小中的较大值
    let chunkSize = Math.max(idealChunkSize, minChunkSize);
    // 将块大小向上取整到最接近的64KB的倍数，使其更整齐
    chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
    return chunkSize;
  })();
  const uploadStream = gridBucket.openUploadStream(sourceId, {
    metadata,
    chunkSizeBytes
  });
  return retryFn(async () => {
    return new Promise((resolve, reject) => {
      uploadStream.end(buffer);
      uploadStream.on('finish', () => {
        resolve(uploadStream.id);
      });
      uploadStream.on('error', (error) => {
        addLog.error('addRawTextBuffer error', error);
        resolve('');
      });
    });
  });
 };
 export const getRawTextBuffer = async (sourceId: string) => {
  const gridBucket = getGridBucket();
  return retryFn(async () => {
    const bufferData = await MongoRawTextBufferSchema.findOne(
      {
        'metadata.sourceId': sourceId
      },
      '_id metadata'
    ).lean();
    if (!bufferData) {
      return null;
    }
    // Read file content
    const downloadStream = gridBucket.openDownloadStream(bufferData._id);
    const chunks: Buffer[] = [];
    return new Promise<{
      text: string;
      sourceName: string;
    } | null>((resolve, reject) => {
      downloadStream.on('data', (chunk) => {
        chunks.push(chunk);
      });
      downloadStream.on('end', () => {
        const buffer = Buffer.concat(chunks);
        const text = buffer.toString('utf8');
        resolve({
          text,
          sourceName: bufferData.metadata?.sourceName || ''
        });
      });
      downloadStream.on('error', (error) => {
        addLog.error('getRawTextBuffer error', error);
        resolve(null);
      });
    });
  });
 };
 export const deleteRawTextBuffer = async (sourceId: string): Promise<boolean> => {
  const gridBucket = getGridBucket();
  return retryFn(async () => {
    const buffer = await MongoRawTextBufferSchema.findOne({ 'metadata.sourceId': sourceId });
    if (!buffer) {
      return false;
    }
    await gridBucket.delete(buffer._id);
    return true;
  });
 };
 export const updateRawTextBufferExpiredTime = async ({
  sourceId,
  expiredTime
 }: {
  sourceId: string;
  expiredTime: Date;
 }) => {
  return retryFn(async () => {
    return MongoRawTextBufferSchema.updateOne(
      { 'metadata.sourceId': sourceId },
      { $set: { 'metadata.expiredTime': expiredTime } }
    );
  });
 };
 export const clearExpiredRawTextBufferCron = async () => {
  const clearExpiredRawTextBuffer = async () => {
    addLog.debug('Clear expired raw text buffer start');
    const gridBucket = getGridBucket();
    return retryFn(async () => {
      const data = await MongoRawTextBufferSchema.find(
        {
          'metadata.expiredTime': { $lt: new Date() }
        },
        '_id'
      ).lean();
      for (const item of data) {
        await gridBucket.delete(item._id);
      }
      addLog.debug('Clear expired raw text buffer end');
    });
  };
  setCron('*/10 * * * *', async () => {
    if (
      await checkTimerLock({
        timerId: TimerIdEnum.clearExpiredRawTextBuffer,
        lockMinuted: 9
      })
    ) {
      try {
        await clearExpiredRawTextBuffer();
      } catch (error) {
        addLog.error('clearExpiredRawTextBufferCron error', error);
      }
    }
  });
 };
--- a/packages/service/common/buffer/rawText/schema.ts
+++ b/packages/service/common/buffer/rawText/schema.ts
@@ -1,33 +1,22 @@
-import { getMongoModel, Schema } from '../../mongo';
+import { getMongoModel, type Types, Schema } from '../../mongo';
 import { type RawTextBufferSchemaType } from './type';
-export const collectionName = 'buffer_rawtexts';
+export const bucketName = 'buffer_rawtext';
 const RawTextBufferSchema = new Schema({
-  sourceId: {
+  metadata: {
-    type: String,
+    sourceId: { type: String, required: true },
-    required: true
+    sourceName: { type: String, required: true },
-  },
+    expiredTime: { type: Date, required: true }
  rawText: {
    type: String,
    default: ''
  },
  createTime: {
    type: Date,
    default: () => new Date()
  },
  metadata: Object
 });
 try {
  RawTextBufferSchema.index({ sourceId: 1 });
  //  20 minutes
  RawTextBufferSchema.index({ createTime: 1 }, { expireAfterSeconds: 20 * 60 });
 } catch (error) {
  console.log(error);
  }
 });
 RawTextBufferSchema.index({ 'metadata.sourceId': 'hashed' });
 RawTextBufferSchema.index({ 'metadata.expiredTime': -1 });
-export const MongoRawTextBuffer = getMongoModel<RawTextBufferSchemaType>(
+export const MongoRawTextBufferSchema = getMongoModel<{
-  collectionName,
+  _id: Types.ObjectId;
-  RawTextBufferSchema
+  metadata: {
-);
+    sourceId: string;
    sourceName: string;
    expiredTime: Date;
  };
 }>(`${bucketName}.files`, RawTextBufferSchema);
--- a/packages/service/common/buffer/rawText/type.d.ts
+++ b/packages/service/common/buffer/rawText/type.d.ts
@@ -1,8 +0,0 @@
 export type RawTextBufferSchemaType = {
  sourceId: string;
  rawText: string;
  createTime: Date;
  metadata?: {
    filename: string;
  };
 };
--- a/packages/service/common/file/gridfs/controller.ts
+++ b/packages/service/common/file/gridfs/controller.ts
@@ -6,13 +6,13 @@ import { type DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
 import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
 import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
 import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
 import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
 import { readRawContentByFileBuffer } from '../read/utils';
 import { gridFsStream2Buffer, stream2Encoding } from './utils';
 import { addLog } from '../../system/log';
 import { readFromSecondary } from '../../mongo/utils';
 import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
 import { Readable } from 'stream';
 import { addRawTextBuffer, getRawTextBuffer } from '../../buffer/rawText/controller';
 import { addMinutes } from 'date-fns';
 export function getGFSCollection(bucket: `${BucketNameEnum}`) {
  MongoDatasetFileSchema;
@@ -210,28 +210,26 @@ export const readFileContentFromMongo = async ({
  tmbId,
  bucketName,
  fileId,
-  isQAImport = false,
+  customPdfParse = false,
-  customPdfParse = false
+  getFormatText
 }: {
  teamId: string;
  tmbId: string;
  bucketName: `${BucketNameEnum}`;
  fileId: string;
  isQAImport?: boolean;
  customPdfParse?: boolean;
  getFormatText?: boolean; // 数据类型都尽可能转化成 markdown 格式
 }): Promise<{
  rawText: string;
  filename: string;
 }> => {
-  const bufferId = `${fileId}-${customPdfParse}`;
+  const bufferId = `${String(fileId)}-${customPdfParse}`;
  // read buffer
-  const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: bufferId }, undefined, {
+  const fileBuffer = await getRawTextBuffer(bufferId);
    ...readFromSecondary
  }).lean();
  if (fileBuffer) {
    return {
-      rawText: fileBuffer.rawText,
+      rawText: fileBuffer.text,
-      filename: fileBuffer.metadata?.filename || ''
+      filename: fileBuffer?.sourceName
    };
  }
@@ -254,8 +252,8 @@ export const readFileContentFromMongo = async ({
  // Get raw text
  const { rawText } = await readRawContentByFileBuffer({
    customPdfParse,
    getFormatText,
    extension,
    isQAImport,
    teamId,
    tmbId,
    buffer: fileBuffers,
@@ -265,16 +263,13 @@ export const readFileContentFromMongo = async ({
    }
  });
-  // < 14M
+  // Add buffer
-  if (fileBuffers.length < 14 * 1024 * 1024 && rawText.trim()) {
+  addRawTextBuffer({
    MongoRawTextBuffer.create({
    sourceId: bufferId,
-      rawText,
+    sourceName: file.filename,
-      metadata: {
+    text: rawText,
-        filename: file.filename
+    expiredTime: addMinutes(new Date(), 20)
      }
  });
  }
  return {
    rawText,
--- a/packages/service/common/file/gridfs/schema.ts
+++ b/packages/service/common/file/gridfs/schema.ts
@@ -1,16 +1,16 @@
 import { Schema, getMongoModel } from '../../mongo';
-const DatasetFileSchema = new Schema({});
+const DatasetFileSchema = new Schema({
-const ChatFileSchema = new Schema({});
+  metadata: Object
 });
 const ChatFileSchema = new Schema({
  metadata: Object
 });
 try {
 DatasetFileSchema.index({ uploadDate: -1 });
 ChatFileSchema.index({ uploadDate: -1 });
 ChatFileSchema.index({ 'metadata.chatId': 1 });
 } catch (error) {
  console.log(error);
 }
 export const MongoDatasetFileSchema = getMongoModel('dataset.files', DatasetFileSchema);
 export const MongoChatFileSchema = getMongoModel('chat.files', ChatFileSchema);
--- a/packages/service/common/file/gridfs/utils.ts
+++ b/packages/service/common/file/gridfs/utils.ts
@@ -1,5 +1,57 @@
 import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
 import { PassThrough } from 'stream';
 import { getGridBucket } from './controller';
 import { type BucketNameEnum } from '@fastgpt/global/common/file/constants';
 import { retryFn } from '@fastgpt/global/common/system/utils';
 export const createFileFromText = async ({
  bucket,
  filename,
  text,
  metadata
 }: {
  bucket: `${BucketNameEnum}`;
  filename: string;
  text: string;
  metadata: Record<string, any>;
 }) => {
  const gridBucket = getGridBucket(bucket);
  const buffer = Buffer.from(text);
  const fileSize = buffer.length;
  // 单块大小：尽可能大，但不超过 14MB，不小于128KB
  const chunkSizeBytes = (() => {
    // 计算理想块大小：文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
    const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
    // 确保块大小至少为128KB
    const minChunkSize = 128 * 1024; // 128KB
    // 取理想块大小和最小块大小中的较大值
    let chunkSize = Math.max(idealChunkSize, minChunkSize);
    // 将块大小向上取整到最接近的64KB的倍数，使其更整齐
    chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
    return chunkSize;
  })();
  const uploadStream = gridBucket.openUploadStream(filename, {
    metadata,
    chunkSizeBytes
  });
  return retryFn(async () => {
    return new Promise<{ fileId: string }>((resolve, reject) => {
      uploadStream.end(buffer);
      uploadStream.on('finish', () => {
        resolve({ fileId: String(uploadStream.id) });
      });
      uploadStream.on('error', reject);
    });
  });
 };
 export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
  return new Promise<Buffer>((resolve, reject) => {
--- a/packages/service/common/file/read/utils.ts
+++ b/packages/service/common/file/read/utils.ts
@@ -16,6 +16,7 @@ export type readRawTextByLocalFileParams = {
  path: string;
  encoding: string;
  customPdfParse?: boolean;
  getFormatText?: boolean;
  metadata?: Record<string, any>;
 };
 export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
@@ -27,8 +28,8 @@ export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParam
  return readRawContentByFileBuffer({
    extension,
    isQAImport: false,
    customPdfParse: params.customPdfParse,
    getFormatText: params.getFormatText,
    teamId: params.teamId,
    tmbId: params.tmbId,
    encoding: params.encoding,
@@ -46,7 +47,7 @@ export const readRawContentByFileBuffer = async ({
  encoding,
  metadata,
  customPdfParse = false,
-  isQAImport = false
+  getFormatText = true
 }: {
  teamId: string;
  tmbId: string;
@@ -57,8 +58,10 @@ export const readRawContentByFileBuffer = async ({
  metadata?: Record<string, any>;
  customPdfParse?: boolean;
-  isQAImport: boolean;
+  getFormatText?: boolean;
-}): Promise<ReadFileResponse> => {
+}): Promise<{
  rawText: string;
 }> => {
  const systemParse = () =>
    runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
      extension,
@@ -107,7 +110,7 @@ export const readRawContentByFileBuffer = async ({
    return {
      rawText: text,
-      formatText: rawText,
+      formatText: text,
      imageList
    };
  };
@@ -149,7 +152,7 @@ export const readRawContentByFileBuffer = async ({
    return await systemParse();
  })();
-  addLog.debug(`Parse file success, time: ${Date.now() - start}ms. Uploading file image.`);
+  addLog.debug(`Parse file success, time: ${Date.now() - start}ms. `);
  // markdown data format
  if (imageList) {
@@ -176,16 +179,7 @@ export const readRawContentByFileBuffer = async ({
    });
  }
-  if (['csv', 'xlsx'].includes(extension)) {
+  addLog.debug(`Upload file success, time: ${Date.now() - start}ms`);
    // qa data
    if (isQAImport) {
      rawText = rawText || '';
    } else {
      rawText = formatText || rawText;
    }
  }
-  addLog.debug(`Upload file image success, time: ${Date.now() - start}ms`);
+  return { rawText: getFormatText ? formatText || rawText : rawText };
  return { rawText, formatText, imageList };
 };
--- a/packages/service/common/redis/cache.ts
+++ b/packages/service/common/redis/cache.ts
@@ -1,7 +1,10 @@
-import { getGlobalRedisCacheConnection } from './index';
+import { getGlobalRedisConnection } from './index';
 import { addLog } from '../system/log';
 import { retryFn } from '@fastgpt/global/common/system/utils';
 const redisPrefix = 'cache:';
 const getCacheKey = (key: string) => `${redisPrefix}${key}`;
 export enum CacheKeyEnum {
  team_vector_count = 'team_vector_count'
 }
@@ -13,12 +16,12 @@ export const setRedisCache = async (
 ) => {
  return await retryFn(async () => {
    try {
-      const redis = getGlobalRedisCacheConnection();
+      const redis = getGlobalRedisConnection();
      if (expireSeconds) {
-        await redis.set(key, data, 'EX', expireSeconds);
+        await redis.set(getCacheKey(key), data, 'EX', expireSeconds);
      } else {
-        await redis.set(key, data);
+        await redis.set(getCacheKey(key), data);
      }
    } catch (error) {
      addLog.error('Set cache error:', error);
@@ -28,11 +31,11 @@ export const setRedisCache = async (
 };
 export const getRedisCache = async (key: string) => {
-  const redis = getGlobalRedisCacheConnection();
+  const redis = getGlobalRedisConnection();
-  return await retryFn(() => redis.get(key));
+  return await retryFn(() => redis.get(getCacheKey(key)));
 };
 export const delRedisCache = async (key: string) => {
-  const redis = getGlobalRedisCacheConnection();
+  const redis = getGlobalRedisConnection();
-  await retryFn(() => redis.del(key));
+  await retryFn(() => redis.del(getCacheKey(key)));
 };
--- a/packages/service/common/redis/index.ts
+++ b/packages/service/common/redis/index.ts
@@ -27,17 +27,26 @@ export const newWorkerRedisConnection = () => {
  return redis;
 };
-export const getGlobalRedisCacheConnection = () => {
+export const FASTGPT_REDIS_PREFIX = 'fastgpt:';
-  if (global.redisCache) return global.redisCache;
+export const getGlobalRedisConnection = () => {
  if (global.redisClient) return global.redisClient;
-  global.redisCache = new Redis(REDIS_URL, { keyPrefix: 'fastgpt:cache:' });
+  global.redisClient = new Redis(REDIS_URL, { keyPrefix: FASTGPT_REDIS_PREFIX });
-  global.redisCache.on('connect', () => {
+  global.redisClient.on('connect', () => {
    addLog.info('Redis connected');
  });
-  global.redisCache.on('error', (error) => {
+  global.redisClient.on('error', (error) => {
    addLog.error('Redis connection error', error);
  });
-  return global.redisCache;
+  return global.redisClient;
 };
 export const getAllKeysByPrefix = async (key: string) => {
  const redis = getGlobalRedisConnection();
  const keys = (await redis.keys(`${FASTGPT_REDIS_PREFIX}${key}:*`)).map((key) =>
    key.replace(FASTGPT_REDIS_PREFIX, '')
  );
  return keys;
 };
--- a/packages/service/common/redis/type.d.ts
+++ b/packages/service/common/redis/type.d.ts
@@ -1,5 +1,5 @@
 import type Redis from 'ioredis';
 declare global {
-  var redisCache: Redis | null;
+  var redisClient: Redis | null;
 }
--- a/packages/service/common/string/jieba/index.ts
+++ b/packages/service/common/string/jieba/index.ts
@@ -10,6 +10,7 @@ let jieba: Jieba | undefined;
 })();
 const stopWords = new Set([
  '\n',
  '--',
  '?',
  '“',
@@ -1519,8 +1520,7 @@ const stopWords = new Set([
 ]);
 export async function jiebaSplit({ text }: { text: string }) {
-  text = text.replace(/[#*`_~>[\](){}|]/g, '').replace(/\S*https?\S*/gi, '');
+  text = text.replace(/[#*`_~>[\](){}|]|\S*https?\S*/g, '').trim();
  const tokens = (await jieba!.cutAsync(text, true)) as string[];
  return (
--- a/packages/service/common/system/config/controller.ts
+++ b/packages/service/common/system/config/controller.ts
@@ -2,26 +2,44 @@ import { SystemConfigsTypeEnum } from '@fastgpt/global/common/system/config/cons
 import { MongoSystemConfigs } from './schema';
 import { type FastGPTConfigFileType } from '@fastgpt/global/common/system/types';
 import { FastGPTProUrl } from '../constants';
 import { type LicenseDataType } from '@fastgpt/global/common/system/types';
-export const getFastGPTConfigFromDB = async () => {
+export const getFastGPTConfigFromDB = async (): Promise<{
  fastgptConfig: FastGPTConfigFileType;
  licenseData?: LicenseDataType;
 }> => {
  if (!FastGPTProUrl) {
    return {
-      config: {} as FastGPTConfigFileType
+      fastgptConfig: {} as FastGPTConfigFileType
    };
  }
-  const res = await MongoSystemConfigs.findOne({
+  const [fastgptConfig, licenseConfig] = await Promise.all([
    MongoSystemConfigs.findOne({
      type: SystemConfigsTypeEnum.fastgpt
    }).sort({
      createTime: -1
-  });
+    }),
    MongoSystemConfigs.findOne({
      type: SystemConfigsTypeEnum.license
    }).sort({
      createTime: -1
    })
  ]);
-  const config = res?.value || {};
+  const config = fastgptConfig?.value || {};
  const licenseData = licenseConfig?.value?.data as LicenseDataType | undefined;
  const fastgptConfigTime = fastgptConfig?.createTime.getTime().toString();
  const licenseConfigTime = licenseConfig?.createTime.getTime().toString();
  // 利用配置文件的创建时间（更新时间）来做缓存，如果前端命中缓存，则不需要再返回配置文件
-  global.systemInitBufferId = res ? res.createTime.getTime().toString() : undefined;
+  global.systemInitBufferId = fastgptConfigTime
    ? `${fastgptConfigTime}-${licenseConfigTime}`
    : undefined;
  return {
-    config: config as FastGPTConfigFileType
+    fastgptConfig: config as FastGPTConfigFileType,
    licenseData
  };
 };
--- a/packages/service/common/system/log.ts
+++ b/packages/service/common/system/log.ts
@@ -57,14 +57,19 @@ export const addLog = {
    level === LogLevelEnum.error && console.error(obj);
    // store
    if (level >= STORE_LOG_LEVEL && connectionMongo.connection.readyState === 1) {
    // store log
-      getMongoLog().create({
+    if (level >= STORE_LOG_LEVEL && connectionMongo.connection.readyState === 1) {
      (async () => {
        try {
          await getMongoLog().create({
            text: msg,
            level,
            metadata: obj
          });
        } catch (error) {
          console.error('store log error', error);
        }
      })();
    }
  },
  debug(msg: string, obj?: Record<string, any>) {
--- a/packages/service/common/system/timerLock/constants.ts
+++ b/packages/service/common/system/timerLock/constants.ts
@@ -5,7 +5,8 @@ export enum TimerIdEnum {
  clearExpiredSubPlan = 'clearExpiredSubPlan',
  updateStandardPlan = 'updateStandardPlan',
  scheduleTriggerApp = 'scheduleTriggerApp',
-  notification = 'notification'
+  notification = 'notification',
  clearExpiredRawTextBuffer = 'clearExpiredRawTextBuffer'
 }
 export enum LockNotificationEnum {
--- a/packages/service/common/vectorDB/pg/index.ts
+++ b/packages/service/common/vectorDB/pg/index.ts
@@ -188,6 +188,7 @@ export class PgVectorCtrl {
      const results: any = await PgClient.query(
        `BEGIN;
          SET LOCAL hnsw.ef_search = ${global.systemEnv?.hnswEfSearch || 100};
          SET LOCAL hnsw.max_scan_tuples = ${global.systemEnv?.hnswMaxScanTuples || 100000};
          SET LOCAL hnsw.iterative_scan = relaxed_order;
          WITH relaxed_results AS MATERIALIZED (
            select id, collection_id, vector <#> '[${vector}]' AS score
@@ -199,7 +200,7 @@ export class PgVectorCtrl {
          ) SELECT id, collection_id, score FROM relaxed_results ORDER BY score;
        COMMIT;`
      );
-      const rows = results?.[3]?.rows as PgSearchRawType[];
+      const rows = results?.[results.length - 2]?.rows as PgSearchRawType[];
      if (!Array.isArray(rows)) {
        return {
--- a/packages/service/core/ai/config.ts
+++ b/packages/service/core/ai/config.ts
@@ -78,7 +78,7 @@ export const createChatCompletion = async ({
    }
    body.model = modelConstantsData.model;
-    const formatTimeout = timeout ? timeout : body.stream ? 60000 : 600000;
+    const formatTimeout = timeout ? timeout : 600000;
    const ai = getAIApi({
      userKey,
      timeout: formatTimeout
--- a/packages/service/core/ai/config/provider/Claude.json
+++ b/packages/service/core/ai/config/provider/Claude.json
@@ -1,6 +1,54 @@
 {
  "provider": "Claude",
  "list": [
    {
      "model": "claude-sonnet-4-20250514",
      "name": "claude-sonnet-4-20250514",
      "maxContext": 200000,
      "maxResponse": 8000,
      "quoteMaxToken": 100000,
      "maxTemperature": 1,
      "showTopP": true,
      "showStopSign": true,
      "vision": true,
      "toolChoice": true,
      "functionCall": false,
      "defaultSystemChatPrompt": "",
      "datasetProcess": true,
      "usedInClassify": true,
      "customCQPrompt": "",
      "usedInExtractFields": true,
      "usedInQueryExtension": true,
      "customExtractPrompt": "",
      "usedInToolCall": true,
      "defaultConfig": {},
      "fieldMap": {},
      "type": "llm"
    },
    {
      "model": "claude-opus-4-20250514",
      "name": "claude-opus-4-20250514",
      "maxContext": 200000,
      "maxResponse": 4096,
      "quoteMaxToken": 100000,
      "maxTemperature": 1,
      "showTopP": true,
      "showStopSign": true,
      "vision": true,
      "toolChoice": true,
      "functionCall": false,
      "defaultSystemChatPrompt": "",
      "datasetProcess": true,
      "usedInClassify": true,
      "customCQPrompt": "",
      "usedInExtractFields": true,
      "usedInQueryExtension": true,
      "customExtractPrompt": "",
      "usedInToolCall": true,
      "defaultConfig": {},
      "fieldMap": {},
      "type": "llm"
    },
    {
      "model": "claude-3-7-sonnet-20250219",
      "name": "claude-3-7-sonnet-20250219",
--- a/packages/service/core/ai/config/provider/Gemini.json
+++ b/packages/service/core/ai/config/provider/Gemini.json
@@ -25,6 +25,30 @@
      "showTopP": true,
      "showStopSign": true
    },
    {
      "model": "gemini-2.5-flash-preview-04-17",
      "name": "gemini-2.5-flash-preview-04-17",
      "maxContext": 1000000,
      "maxResponse": 8000,
      "quoteMaxToken": 60000,
      "maxTemperature": 1,
      "vision": true,
      "toolChoice": true,
      "functionCall": false,
      "defaultSystemChatPrompt": "",
      "datasetProcess": true,
      "usedInClassify": true,
      "customCQPrompt": "",
      "usedInExtractFields": true,
      "usedInQueryExtension": true,
      "customExtractPrompt": "",
      "usedInToolCall": true,
      "defaultConfig": {},
      "fieldMap": {},
      "type": "llm",
      "showTopP": true,
      "showStopSign": true
    },
    {
      "model": "gemini-2.0-flash",
      "name": "gemini-2.0-flash",
--- a/packages/service/core/ai/utils.ts
+++ b/packages/service/core/ai/utils.ts
@@ -18,15 +18,17 @@ import json5 from 'json5';
 */
 export const computedMaxToken = ({
  maxToken,
-  model
+  model,
  min
 }: {
  maxToken?: number;
  model: LLMModelItemType;
  min?: number;
 }) => {
  if (maxToken === undefined) return;
  maxToken = Math.min(maxToken, model.maxResponse);
-  return maxToken;
+  return Math.max(maxToken, min || 0);
 };
 // FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]……
@@ -135,12 +137,14 @@ export const llmStreamResponseToAnswerText = async (
    // Tool calls
    if (responseChoice?.tool_calls?.length) {
-      responseChoice.tool_calls.forEach((toolCall) => {
+      responseChoice.tool_calls.forEach((toolCall, i) => {
-        const index = toolCall.index;
+        const index = toolCall.index ?? i;
-        if (toolCall.id || callingTool) {
+        // Call new tool
-          // 有 id，代表新 call 工具
+        const hasNewTool = toolCall?.function?.name || callingTool;
-          if (toolCall.id) {
+        if (hasNewTool) {
          // 有 function name，代表新 call 工具
          if (toolCall?.function?.name) {
            callingTool = {
              name: toolCall.function?.name || '',
              arguments: toolCall.function?.arguments || ''
@@ -176,7 +180,7 @@ export const llmStreamResponseToAnswerText = async (
    }
  }
  return {
-    text: parseReasoningContent(answer)[1],
+    text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
    usage,
    toolCalls
  };
@@ -190,8 +194,9 @@ export const llmUnStreamResponseToAnswerText = async (
 }> => {
  const answer = response.choices?.[0]?.message?.content || '';
  const toolCalls = response.choices?.[0]?.message?.tool_calls;
  return {
-    text: answer,
+    text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
    usage: response.usage,
    toolCalls
  };
@@ -221,7 +226,9 @@ export const parseReasoningContent = (text: string): [string, string] => {
 };
 export const removeDatasetCiteText = (text: string, retainDatasetCite: boolean) => {
-  return retainDatasetCite ? text : text.replace(/\[([a-f0-9]{24})\](?:\([^\)]*\)?)?/g, '');
+  return retainDatasetCite
    ? text.replace(/\[id\]\(CITE\)/g, '')
    : text.replace(/\[([a-f0-9]{24})\](?:\([^\)]*\)?)?/g, '').replace(/\[id\]\(CITE\)/g, '');
 };
 // Parse llm stream part
@@ -236,6 +243,12 @@ export const parseLLMStreamResponse = () => {
  let citeBuffer = '';
  const maxCiteBufferLength = 32; // [Object](CITE)总长度为32
  // Buffer
  let buffer_finishReason: CompletionFinishReason = null;
  let buffer_usage: CompletionUsage = getLLMDefaultUsage();
  let buffer_reasoningContent = '';
  let buffer_content = '';
  /* 
    parseThinkTag - 只控制是否主动解析 <think></think>，如果接口已经解析了，则不再解析。
    retainDatasetCite - 
@@ -253,6 +266,7 @@ export const parseLLMStreamResponse = () => {
        };
        finish_reason?: CompletionFinishReason;
      }[];
      usage?: CompletionUsage;
    };
    parseThinkTag?: boolean;
    retainDatasetCite?: boolean;
@@ -262,26 +276,25 @@ export const parseLLMStreamResponse = () => {
    responseContent: string;
    finishReason: CompletionFinishReason;
  } => {
    const data = (() => {
      buffer_usage = part.usage || buffer_usage;
      const finishReason = part.choices?.[0]?.finish_reason || null;
      buffer_finishReason = finishReason || buffer_finishReason;
      const content = part.choices?.[0]?.delta?.content || '';
      // @ts-ignore
      const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
-    const isStreamEnd = !!finishReason;
+      const isStreamEnd = !!buffer_finishReason;
      // Parse think
-    const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } = (() => {
+      const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } =
        (() => {
          if (reasoningContent || !parseThinkTag) {
            isInThinkTag = false;
            return { reasoningContent, content };
          }
      if (!content) {
        return {
          reasoningContent: '',
          content: ''
        };
      }
          // 如果不在 think 标签中，或者有 reasoningContent(接口已解析），则返回 reasoningContent 和 content
          if (isInThinkTag === false) {
            return {
@@ -398,7 +411,7 @@ export const parseLLMStreamResponse = () => {
          reasoningContent: parsedThinkReasoningContent,
          content: parsedThinkContent,
          responseContent: parsedThinkContent,
-        finishReason
+          finishReason: buffer_finishReason
        };
      }
@@ -451,11 +464,32 @@ export const parseLLMStreamResponse = () => {
        reasoningContent: parsedThinkReasoningContent,
        content: parsedThinkContent,
        responseContent: pasedCiteContent,
-      finishReason
+        finishReason: buffer_finishReason
      };
    })();
    buffer_reasoningContent += data.reasoningContent;
    buffer_content += data.content;
    return data;
  };
  const getResponseData = () => {
    return {
      finish_reason: buffer_finishReason,
      usage: buffer_usage,
      reasoningContent: buffer_reasoningContent,
      content: buffer_content
    };
  };
  const updateFinishReason = (finishReason: CompletionFinishReason) => {
    buffer_finishReason = finishReason;
  };
  return {
-    parsePart
+    parsePart,
    getResponseData,
    updateFinishReason
  };
 };
--- a/packages/service/core/app/controller.ts
+++ b/packages/service/core/app/controller.ts
@@ -11,40 +11,6 @@ export const beforeUpdateAppFormat = <T extends AppSchema['modules'] | undefined
  nodes: T;
  isPlugin: boolean;
 }) => {
  if (nodes) {
    // Check dataset maxTokens
    if (isPlugin) {
      let maxTokens = 16000;
      nodes.forEach((item) => {
        if (
          item.flowNodeType === FlowNodeTypeEnum.chatNode ||
          item.flowNodeType === FlowNodeTypeEnum.tools
        ) {
          const model =
            item.inputs.find((item) => item.key === NodeInputKeyEnum.aiModel)?.value || '';
          const chatModel = getLLMModel(model);
          const quoteMaxToken = chatModel.quoteMaxToken || 16000;
          maxTokens = Math.max(maxTokens, quoteMaxToken);
        }
      });
      nodes.forEach((item) => {
        if (item.flowNodeType === FlowNodeTypeEnum.datasetSearchNode) {
          item.inputs.forEach((input) => {
            if (input.key === NodeInputKeyEnum.datasetMaxTokens) {
              const val = input.value as number;
              if (val > maxTokens) {
                input.value = maxTokens;
              }
            }
          });
        }
      });
    }
  }
  return {
    nodes
  };
--- a/packages/service/core/app/mcp.ts
+++ b/packages/service/core/app/mcp.ts
@@ -1,7 +1,7 @@
 import { Client } from '@modelcontextprotocol/sdk/client/index.js';
 import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
 import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
-import { type ToolType } from '@fastgpt/global/core/app/type';
+import { type McpToolConfigType } from '@fastgpt/global/core/app/type';
 import { addLog } from '../../common/system/log';
 import { retryFn } from '@fastgpt/global/common/system/utils';
@@ -41,7 +41,7 @@ export class MCPClient {
   * Get available tools list
   * @returns List of tools
   */
-  public async getTools(): Promise<ToolType[]> {
+  public async getTools(): Promise<McpToolConfigType[]> {
    try {
      const client = await this.getConnection();
      const response = await client.listTools();
--- a/packages/service/core/app/plugin/controller.ts
+++ b/packages/service/core/app/plugin/controller.ts
@@ -22,8 +22,7 @@ import {
 import { type PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
 import { MongoSystemPlugin } from './systemPluginSchema';
 import { PluginErrEnum } from '@fastgpt/global/common/error/code/plugin';
-import { MongoAppVersion } from '../version/schema';
+import { Types } from 'mongoose';
 import { i18nT } from '../../../../web/i18n/utils';
 /* 
  plugin id rule:
@@ -31,8 +30,7 @@ import { i18nT } from '../../../../web/i18n/utils';
  community: community-id
  commercial: commercial-id
 */
-
+export function splitCombineToolId(id: string) {
 export async function splitCombinePluginId(id: string) {
  const splitRes = id.split('-');
  if (splitRes.length === 1) {
    // app id
@@ -43,7 +41,7 @@ export async function splitCombinePluginId(id: string) {
  }
  const [source, pluginId] = id.split('-') as [PluginSourceEnum, string];
-  if (!source || !pluginId) return Promise.reject('pluginId not found');
+  if (!source || !pluginId) throw new Error('pluginId not found');
  return { source, pluginId: id };
 }
@@ -55,7 +53,7 @@ const getSystemPluginTemplateById = async (
  versionId?: string
 ): Promise<ChildAppType> => {
  const item = getSystemPluginTemplates().find((plugin) => plugin.id === pluginId);
-  if (!item) return Promise.reject(PluginErrEnum.unAuth);
+  if (!item) return Promise.reject(PluginErrEnum.unExist);
  const plugin = cloneDeep(item);
@@ -65,10 +63,10 @@ const getSystemPluginTemplateById = async (
      { pluginId: plugin.id, 'customConfig.associatedPluginId': plugin.associatedPluginId },
      'associatedPluginId'
    ).lean();
-    if (!systemPlugin) return Promise.reject(PluginErrEnum.unAuth);
+    if (!systemPlugin) return Promise.reject(PluginErrEnum.unExist);
    const app = await MongoApp.findById(plugin.associatedPluginId).lean();
-    if (!app) return Promise.reject(PluginErrEnum.unAuth);
+    if (!app) return Promise.reject(PluginErrEnum.unExist);
    const version = versionId
      ? await getAppVersionById({
@@ -78,6 +76,12 @@ const getSystemPluginTemplateById = async (
        })
      : await getAppLatestVersion(plugin.associatedPluginId, app);
    if (!version.versionId) return Promise.reject('App version not found');
    const isLatest = version.versionId
      ? await checkIsLatestVersion({
          appId: plugin.associatedPluginId,
          versionId: version.versionId
        })
      : true;
    return {
      ...plugin,
@@ -86,12 +90,19 @@ const getSystemPluginTemplateById = async (
        edges: version.edges,
        chatConfig: version.chatConfig
      },
-      version: versionId || String(version.versionId),
+      version: versionId ? version?.versionId : '',
      versionLabel: version?.versionName,
      isLatestVersion: isLatest,
      teamId: String(app.teamId),
      tmbId: String(app.tmbId)
    };
  }
-  return plugin;
+
  return {
    ...plugin,
    version: undefined,
    isLatestVersion: true
  };
 };
 /* Format plugin to workflow preview node data */
@@ -103,25 +114,19 @@ export async function getChildAppPreviewNode({
  versionId?: string;
 }): Promise<FlowNodeTemplateType> {
  const app: ChildAppType = await (async () => {
-    const { source, pluginId } = await splitCombinePluginId(appId);
+    const { source, pluginId } = splitCombineToolId(appId);
    if (source === PluginSourceEnum.personal) {
      const item = await MongoApp.findById(appId).lean();
-      if (!item) return Promise.reject('plugin not found');
+      if (!item) return Promise.reject(PluginErrEnum.unExist);
      const version = await getAppVersionById({ appId, versionId, app: item });
-      if (!version.versionId) return Promise.reject(i18nT('common:app_not_version'));
+      const isLatest =
-
+        version.versionId && Types.ObjectId.isValid(version.versionId)
      const versionData = await MongoAppVersion.findById(
        version.versionId,
        '_id versionName appId time'
      ).lean();
      const isLatest = versionData
          ? await checkIsLatestVersion({
              appId,
-            versionId: versionData._id
+              versionId: version.versionId
            })
          : true;
@@ -139,8 +144,8 @@ export async function getChildAppPreviewNode({
        },
        templateType: FlowNodeTemplateTypeEnum.teamApp,
-        version: version.versionId,
+        version: versionId ? version?.versionId : '',
-        versionLabel: versionData?.versionName || '',
+        versionLabel: version?.versionName,
        isLatestVersion: isLatest,
        originCost: 0,
@@ -149,7 +154,7 @@ export async function getChildAppPreviewNode({
        pluginOrder: 0
      };
    } else {
-      return getSystemPluginTemplateById(pluginId);
+      return getSystemPluginTemplateById(pluginId, versionId);
    }
  })();
@@ -223,12 +228,12 @@ export async function getChildAppRuntimeById(
  id: string,
  versionId?: string
 ): Promise<PluginRuntimeType> {
-  const app: ChildAppType = await (async () => {
+  const app = await (async () => {
-    const { source, pluginId } = await splitCombinePluginId(id);
+    const { source, pluginId } = splitCombineToolId(id);
    if (source === PluginSourceEnum.personal) {
      const item = await MongoApp.findById(id).lean();
-      if (!item) return Promise.reject('plugin not found');
+      if (!item) return Promise.reject(PluginErrEnum.unExist);
      const version = await getAppVersionById({
        appId: id,
@@ -251,8 +256,6 @@ export async function getChildAppRuntimeById(
        },
        templateType: FlowNodeTemplateTypeEnum.teamApp,
        // 用不到
        version: item?.pluginData?.nodeVersion,
        originCost: 0,
        currentCost: 0,
        hasTokenFee: false,
--- a/packages/service/core/app/plugin/utils.ts
+++ b/packages/service/core/app/plugin/utils.ts
@@ -1,6 +1,6 @@
 import { type ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
 import { type PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
-import { splitCombinePluginId } from './controller';
+import { splitCombineToolId } from './controller';
 import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
 /* 
@@ -20,7 +20,7 @@ export const computedPluginUsage = async ({
  childrenUsage: ChatNodeUsageType[];
  error?: boolean;
 }) => {
-  const { source } = await splitCombinePluginId(plugin.id);
+  const { source } = splitCombineToolId(plugin.id);
  const childrenUsages = childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
  if (source !== PluginSourceEnum.personal) {
--- a/packages/service/core/app/schema.ts
+++ b/packages/service/core/app/schema.ts
@@ -119,6 +119,7 @@ const AppSchema = new Schema({
  defaultPermission: Number
 });
 AppSchema.index({ type: 1 });
 AppSchema.index({ teamId: 1, updateTime: -1 });
 AppSchema.index({ teamId: 1, type: 1 });
 AppSchema.index(
--- a/packages/service/core/app/utils.ts
+++ b/packages/service/core/app/utils.ts
@@ -1,14 +1,13 @@
 import { MongoDataset } from '../dataset/schema';
 import { getEmbeddingModel } from '../ai/model';
-import {
+import { FlowNodeTypeEnum } from '@fastgpt/global/core/workflow/node/constant';
  AppNodeFlowNodeTypeMap,
  FlowNodeTypeEnum
 } from '@fastgpt/global/core/workflow/node/constant';
 import { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
 import type { StoreNodeItemType } from '@fastgpt/global/core/workflow/type/node';
-import { MongoAppVersion } from './version/schema';
+import { getChildAppPreviewNode, splitCombineToolId } from './plugin/controller';
-import { checkIsLatestVersion } from './version/controller';
+import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
-import { Types } from '../../common/mongo';
+import { authAppByTmbId } from '../../support/permission/app/auth';
 import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
 import { getErrText } from '@fastgpt/global/common/error/utils';
 export async function listAppDatasetDataByTeamIdAndDatasetIds({
  teamId,
@@ -33,52 +32,58 @@ export async function listAppDatasetDataByTeamIdAndDatasetIds({
 export async function rewriteAppWorkflowToDetail({
  nodes,
  teamId,
-  isRoot
+  isRoot,
  ownerTmbId
 }: {
  nodes: StoreNodeItemType[];
  teamId: string;
  isRoot: boolean;
  ownerTmbId: string;
 }) {
  const datasetIdSet = new Set<string>();
-  // Add node(App Type) versionlabel and latest sign
+  /* Add node(App Type) versionlabel and latest sign ==== */
-  const appNodes = nodes.filter((node) => AppNodeFlowNodeTypeMap[node.flowNodeType]);
+  await Promise.all(
-  const versionIds = appNodes
+    nodes.map(async (node) => {
-    .filter((node) => node.version && Types.ObjectId.isValid(node.version))
+      if (!node.pluginId) return;
-    .map((node) => node.version);
+      const { source } = splitCombineToolId(node.pluginId);
  if (versionIds.length > 0) {
    const versionDataList = await MongoAppVersion.find(
      {
        _id: { $in: versionIds }
      },
      '_id versionName appId time'
    ).lean();
-    const versionMap: Record<string, any> = {};
+      try {
        const [preview] = await Promise.all([
          getChildAppPreviewNode({
            appId: node.pluginId,
            versionId: node.version
          }),
          ...(source === PluginSourceEnum.personal
            ? [
                authAppByTmbId({
                  tmbId: ownerTmbId,
                  appId: node.pluginId,
                  per: ReadPermissionVal
                })
              ]
            : [])
        ]);
-    const isLatestChecks = await Promise.all(
+        node.pluginData = {
-      versionDataList.map(async (version) => {
+          diagram: preview.diagram,
-        const isLatest = await checkIsLatestVersion({
+          userGuide: preview.userGuide,
-          appId: version.appId,
+          courseUrl: preview.courseUrl,
-          versionId: version._id
+          name: preview.name,
-        });
+          avatar: preview.avatar
-
+        };
-        return { versionId: String(version._id), isLatest };
+        node.versionLabel = preview.versionLabel;
        node.isLatestVersion = preview.isLatestVersion;
        node.version = preview.version;
      } catch (error) {
        node.pluginData = {
          error: getErrText(error)
        };
      }
    })
  );
-    const isLatestMap = new Map(isLatestChecks.map((item) => [item.versionId, item.isLatest]));
+
-    versionDataList.forEach((version) => {
+  /* Add node(App Type) versionlabel and latest sign ==== */
      versionMap[String(version._id)] = version;
    });
    appNodes.forEach((node) => {
      if (!node.version) return;
      const versionData = versionMap[String(node.version)];
      if (versionData) {
        node.versionLabel = versionData.versionName;
        node.isLatestVersion = isLatestMap.get(String(node.version)) || false;
      }
    });
  }
  // Get all dataset ids from nodes
  nodes.forEach((node) => {
--- a/packages/service/core/app/version/controller.ts
+++ b/packages/service/core/app/version/controller.ts
@@ -15,6 +15,7 @@ export const getAppLatestVersion = async (appId: string, app?: AppSchema) => {
  if (version) {
    return {
      versionId: version._id,
      versionName: version.versionName,
      nodes: version.nodes,
      edges: version.edges,
      chatConfig: version.chatConfig || app?.chatConfig || {}
@@ -22,6 +23,7 @@ export const getAppLatestVersion = async (appId: string, app?: AppSchema) => {
  }
  return {
    versionId: app?.pluginData?.nodeVersion,
    versionName: app?.name,
    nodes: app?.modules || [],
    edges: app?.edges || [],
    chatConfig: app?.chatConfig || {}
@@ -47,6 +49,7 @@ export const getAppVersionById = async ({
    if (version) {
      return {
        versionId: version._id,
        versionName: version.versionName,
        nodes: version.nodes,
        edges: version.edges,
        chatConfig: version.chatConfig || app?.chatConfig || {}
@@ -65,6 +68,9 @@ export const checkIsLatestVersion = async ({
  appId: string;
  versionId: string;
 }) => {
  if (!Types.ObjectId.isValid(versionId)) {
    return false;
  }
  const version = await MongoAppVersion.findOne(
    {
      appId,
--- a/packages/service/core/chat/chatItemSchema.ts
+++ b/packages/service/core/chat/chatItemSchema.ts
@@ -61,6 +61,7 @@ const ChatItemSchema = new Schema({
    type: Array,
    default: []
  },
  errorMsg: String,
  userGoodFeedback: {
    type: String
  },
--- a/packages/service/core/chat/chatSchema.ts
+++ b/packages/service/core/chat/chatSchema.ts
@@ -34,6 +34,10 @@ const ChatSchema = new Schema({
    ref: AppCollectionName,
    required: true
  },
  createTime: {
    type: Date,
    default: () => new Date()
  },
  updateTime: {
    type: Date,
    default: () => new Date()
--- a/packages/service/core/chat/saveChat.ts
+++ b/packages/service/core/chat/saveChat.ts
@@ -32,6 +32,7 @@ type Props = {
  content: [UserChatItemType & { dataId?: string }, AIChatItemType & { dataId?: string }];
  metadata?: Record<string, any>;
  durationSeconds: number; //s
  errorMsg?: string;
 };
 export async function saveChat({
@@ -50,6 +51,7 @@ export async function saveChat({
  outLinkUid,
  content,
  durationSeconds,
  errorMsg,
  metadata = {}
 }: Props) {
  if (!chatId || chatId === 'NO_RECORD_HISTORIES') return;
@@ -104,7 +106,8 @@ export async function saveChat({
        return {
          ...item,
          [DispatchNodeResponseKeyEnum.nodeResponse]: nodeResponse,
-          durationSeconds
+          durationSeconds,
          errorMsg
        };
      }
      return item;
--- a/packages/service/core/chat/utils.ts
+++ b/packages/service/core/chat/utils.ts
@@ -65,8 +65,8 @@ export const filterGPTMessageByMaxContext = async ({
    if (lastMessage.role === ChatCompletionRequestMessageRoleEnum.User) {
      const tokens = await countGptMessagesTokens([lastMessage, ...tmpChats]);
      maxContext -= tokens;
-      // 该轮信息整体 tokens 超出范围，这段数据不要了
+      // 该轮信息整体 tokens 超出范围，这段数据不要了。但是至少保证一组。
-      if (maxContext < 0) {
+      if (maxContext < 0 && chats.length > 0) {
        break;
      }
--- a/packages/service/core/dataset/apiDataset/api.ts
+++ b/packages/service/core/dataset/apiDataset/api.ts
@@ -2,7 +2,9 @@ import type {
  APIFileListResponse,
  ApiFileReadContentResponse,
  APIFileReadResponse,
-  APIFileServer
+  ApiDatasetDetailResponse,
  APIFileServer,
  APIFileItem
 } from '@fastgpt/global/core/dataset/apiDataset';
 import axios, { type Method } from 'axios';
 import { addLog } from '../../../common/system/log';
@@ -89,7 +91,7 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
      `/v1/file/list`,
      {
        searchKey,
-        parentId
+        parentId: parentId || apiServer.basePath
      },
      'POST'
    );
@@ -144,7 +146,8 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
        tmbId,
        url: previewUrl,
        relatedId: apiFileId,
-        customPdfParse
+        customPdfParse,
        getFormatText: true
      });
      return {
        title,
@@ -164,9 +167,34 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
    return url;
  };
  const getFileDetail = async ({
    apiFileId
  }: {
    apiFileId: string;
  }): Promise<ApiDatasetDetailResponse> => {
    const fileData = await request<ApiDatasetDetailResponse>(
      `/v1/file/detail`,
      {
        id: apiFileId
      },
      'GET'
    );
    if (fileData) {
      return {
        id: fileData.id,
        name: fileData.name,
        parentId: fileData.parentId === null ? '' : fileData.parentId
      };
    }
    return Promise.reject('File not found');
  };
  return {
    getFileContent,
    listFiles,
-    getFilePreviewUrl
+    getFilePreviewUrl,
    getFileDetail
  };
 };
--- a/packages/service/core/dataset/apiDataset/index.ts
+++ b/packages/service/core/dataset/apiDataset/index.ts
@@ -0,0 +1,27 @@
 import type {
  APIFileServer,
  YuqueServer,
  FeishuServer
 } from '@fastgpt/global/core/dataset/apiDataset';
 import { useApiDatasetRequest } from './api';
 import { useYuqueDatasetRequest } from '../yuqueDataset/api';
 import { useFeishuDatasetRequest } from '../feishuDataset/api';
 export const getApiDatasetRequest = async (data: {
  apiServer?: APIFileServer;
  yuqueServer?: YuqueServer;
  feishuServer?: FeishuServer;
 }) => {
  const { apiServer, yuqueServer, feishuServer } = data;
  if (apiServer) {
    return useApiDatasetRequest({ apiServer });
  }
  if (yuqueServer) {
    return useYuqueDatasetRequest({ yuqueServer });
  }
  if (feishuServer) {
    return useFeishuDatasetRequest({ feishuServer });
  }
  return Promise.reject('Can not find api dataset server');
 };
--- a/packages/service/core/dataset/apiDataset/proApi.ts
+++ b/packages/service/core/dataset/apiDataset/proApi.ts
@@ -1,30 +0,0 @@
 import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
 import { type FeishuServer, type YuqueServer } from '@fastgpt/global/core/dataset/apiDataset';
 export enum ProApiDatasetOperationTypeEnum {
  LIST = 'list',
  READ = 'read',
  CONTENT = 'content',
  DETAIL = 'detail'
 }
 export type ProApiDatasetCommonParams = {
  feishuServer?: FeishuServer;
  yuqueServer?: YuqueServer;
 };
 export type GetProApiDatasetFileListParams = ProApiDatasetCommonParams & {
  parentId?: ParentIdType;
 };
 export type GetProApiDatasetFileContentParams = ProApiDatasetCommonParams & {
  apiFileId: string;
 };
 export type GetProApiDatasetFilePreviewUrlParams = ProApiDatasetCommonParams & {
  apiFileId: string;
 };
 export type GetProApiDatasetFileDetailParams = ProApiDatasetCommonParams & {
  apiFileId: string;
 };
--- a/packages/service/core/dataset/collection/controller.ts
+++ b/packages/service/core/dataset/collection/controller.ts
@@ -34,15 +34,17 @@ import { getTrainingModeByCollection } from './utils';
 import {
  computeChunkSize,
  computeChunkSplitter,
  computeParagraphChunkDeep,
  getLLMMaxChunkSize
 } from '@fastgpt/global/core/dataset/training/utils';
 import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
 export const createCollectionAndInsertData = async ({
  dataset,
  rawText,
  relatedId,
  createCollectionParams,
-  isQAImport = false,
+  backupParse = false,
  billId,
  session
 }: {
@@ -50,8 +52,8 @@ export const createCollectionAndInsertData = async ({
  rawText: string;
  relatedId?: string;
  createCollectionParams: CreateOneCollectionParams;
  backupParse?: boolean;
  isQAImport?: boolean;
  billId?: string;
  session?: ClientSession;
 }) => {
@@ -73,15 +75,33 @@ export const createCollectionAndInsertData = async ({
    llmModel: getLLMModel(dataset.agentModel)
  });
  const chunkSplitter = computeChunkSplitter(createCollectionParams);
  const paragraphChunkDeep = computeParagraphChunkDeep(createCollectionParams);
  if (
    trainingType === DatasetCollectionDataProcessModeEnum.qa ||
    trainingType === DatasetCollectionDataProcessModeEnum.backup
  ) {
    delete createCollectionParams.chunkTriggerType;
    delete createCollectionParams.chunkTriggerMinSize;
    delete createCollectionParams.dataEnhanceCollectionName;
    delete createCollectionParams.imageIndex;
    delete createCollectionParams.autoIndexes;
    delete createCollectionParams.indexSize;
    delete createCollectionParams.qaPrompt;
  }
  // 1. split chunks
  const chunks = rawText2Chunks({
    rawText,
    chunkTriggerType: createCollectionParams.chunkTriggerType,
    chunkTriggerMinSize: createCollectionParams.chunkTriggerMinSize,
    chunkSize,
    paragraphChunkDeep,
    paragraphChunkMinSize: createCollectionParams.paragraphChunkMinSize,
    maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
    overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
    customReg: chunkSplitter ? [chunkSplitter] : [],
-    isQAImport
+    backupParse
  });
  // 2. auth limit
@@ -102,6 +122,7 @@ export const createCollectionAndInsertData = async ({
    const { _id: collectionId } = await createOneCollection({
      ...createCollectionParams,
      trainingType,
      paragraphChunkDeep,
      chunkSize,
      chunkSplitter,
@@ -157,6 +178,10 @@ export const createCollectionAndInsertData = async ({
      billId: traingBillId,
      data: chunks.map((item, index) => ({
        ...item,
        indexes: item.indexes?.map((text) => ({
          type: DatasetDataIndexTypeEnum.custom,
          text
        })),
        chunkIndex: index
      })),
      session
@@ -198,46 +223,19 @@ export type CreateOneCollectionParams = CreateDatasetCollectionParams & {
  tmbId: string;
  session?: ClientSession;
 };
-export async function createOneCollection({
+export async function createOneCollection({ session, ...props }: CreateOneCollectionParams) {
  const {
    teamId,
  tmbId,
  name,
    parentId,
    datasetId,
  type,
  createTime,
  updateTime,
  hashRawText,
  rawTextLength,
  metadata = {},
    tags,
  nextSyncTime,
    fileId,
    rawLink,
    externalFileId,
    externalFileUrl,
-  apiFileId,
+    apiFileId
-
+  } = props;
  // Parse settings
  customPdfParse,
  imageIndex,
  autoIndexes,
  // Chunk settings
  trainingType,
  chunkSettingMode,
  chunkSplitMode,
  chunkSize,
  indexSize,
  chunkSplitter,
  qaPrompt,
  session
 }: CreateOneCollectionParams) {
  // Create collection tags
  const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
@@ -245,41 +243,18 @@ export async function createOneCollection({
  const [collection] = await MongoDatasetCollection.create(
    [
      {
        ...props,
        teamId,
        tmbId,
        parentId: parentId || null,
        datasetId,
        name,
        type,
        rawTextLength,
        hashRawText,
        tags: collectionTags,
        metadata,
        createTime,
        updateTime,
        nextSyncTime,
        ...(fileId ? { fileId } : {}),
        ...(rawLink ? { rawLink } : {}),
        ...(externalFileId ? { externalFileId } : {}),
        ...(externalFileUrl ? { externalFileUrl } : {}),
-        ...(apiFileId ? { apiFileId } : {}),
+        ...(apiFileId ? { apiFileId } : {})
        // Parse settings
        customPdfParse,
        imageIndex,
        autoIndexes,
        // Chunk settings
        trainingType,
        chunkSettingMode,
        chunkSplitMode,
        chunkSize,
        indexSize,
        chunkSplitter,
        qaPrompt
      }
    ],
    { session, ordered: true }
--- a/packages/service/core/dataset/data/dataTextSchema.ts
+++ b/packages/service/core/dataset/data/dataTextSchema.ts
@@ -34,9 +34,9 @@ const DatasetDataTextSchema = new Schema({
 try {
  DatasetDataTextSchema.index(
-    { teamId: 1, datasetId: 1, fullTextToken: 'text' },
+    { teamId: 1, fullTextToken: 'text' },
    {
-      name: 'teamId_1_datasetId_1_fullTextToken_text',
+      name: 'teamId_1_fullTextToken_text',
      default_language: 'none'
    }
  );
--- a/packages/service/core/dataset/feishuDataset/api.ts
+++ b/packages/service/core/dataset/feishuDataset/api.ts
@@ -0,0 +1,208 @@
 import type {
  APIFileItem,
  ApiFileReadContentResponse,
  ApiDatasetDetailResponse,
  FeishuServer
 } from '@fastgpt/global/core/dataset/apiDataset';
 import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
 import axios, { type Method } from 'axios';
 import { addLog } from '../../../common/system/log';
 type ResponseDataType = {
  success: boolean;
  message: string;
  data: any;
 };
 type FeishuFileListResponse = {
  files: {
    token: string;
    parent_token: string;
    name: string;
    type: string;
    modified_time: number;
    created_time: number;
    url: string;
    owner_id: string;
  }[];
  has_more: boolean;
  next_page_token: string;
 };
 const feishuBaseUrl = process.env.FEISHU_BASE_URL || 'https://open.feishu.cn';
 export const useFeishuDatasetRequest = ({ feishuServer }: { feishuServer: FeishuServer }) => {
  const instance = axios.create({
    baseURL: feishuBaseUrl,
    timeout: 60000
  });
  // 添加请求拦截器
  instance.interceptors.request.use(async (config) => {
    if (!config.headers.Authorization) {
      const { data } = await axios.post<{ tenant_access_token: string }>(
        `${feishuBaseUrl}/open-apis/auth/v3/tenant_access_token/internal`,
        {
          app_id: feishuServer.appId,
          app_secret: feishuServer.appSecret
        }
      );
      config.headers['Authorization'] = `Bearer ${data.tenant_access_token}`;
      config.headers['Content-Type'] = 'application/json; charset=utf-8';
    }
    return config;
  });
  /**
   * 响应数据检查
   */
  const checkRes = (data: ResponseDataType) => {
    if (data === undefined) {
      addLog.info('yuque dataset data is empty');
      return Promise.reject('服务器异常');
    }
    return data.data;
  };
  const responseError = (err: any) => {
    console.log('error->', '请求错误', err);
    if (!err) {
      return Promise.reject({ message: '未知错误' });
    }
    if (typeof err === 'string') {
      return Promise.reject({ message: err });
    }
    if (typeof err.message === 'string') {
      return Promise.reject({ message: err.message });
    }
    if (typeof err.data === 'string') {
      return Promise.reject({ message: err.data });
    }
    if (err?.response?.data) {
      return Promise.reject(err?.response?.data);
    }
    return Promise.reject(err);
  };
  const request = <T>(url: string, data: any, method: Method): Promise<T> => {
    /* 去空 */
    for (const key in data) {
      if (data[key] === undefined) {
        delete data[key];
      }
    }
    return instance
      .request({
        url,
        method,
        data: ['POST', 'PUT'].includes(method) ? data : undefined,
        params: !['POST', 'PUT'].includes(method) ? data : undefined
      })
      .then((res) => checkRes(res.data))
      .catch((err) => responseError(err));
  };
  const listFiles = async ({ parentId }: { parentId?: ParentIdType }): Promise<APIFileItem[]> => {
    const fetchFiles = async (pageToken?: string): Promise<FeishuFileListResponse['files']> => {
      const data = await request<FeishuFileListResponse>(
        `/open-apis/drive/v1/files`,
        {
          folder_token: parentId || feishuServer.folderToken,
          page_size: 200,
          page_token: pageToken
        },
        'GET'
      );
      if (data.has_more) {
        const nextFiles = await fetchFiles(data.next_page_token);
        return [...data.files, ...nextFiles];
      }
      return data.files;
    };
    const allFiles = await fetchFiles();
    return allFiles
      .filter((file) => ['folder', 'docx'].includes(file.type))
      .map((file) => ({
        id: file.token,
        parentId: file.parent_token,
        name: file.name,
        type: file.type === 'folder' ? ('folder' as const) : ('file' as const),
        hasChild: file.type === 'folder',
        updateTime: new Date(file.modified_time * 1000),
        createTime: new Date(file.created_time * 1000)
      }));
  };
  const getFileContent = async ({
    apiFileId
  }: {
    apiFileId: string;
  }): Promise<ApiFileReadContentResponse> => {
    const [{ content }, { document }] = await Promise.all([
      request<{ content: string }>(
        `/open-apis/docx/v1/documents/${apiFileId}/raw_content`,
        {},
        'GET'
      ),
      request<{ document: { title: string } }>(
        `/open-apis/docx/v1/documents/${apiFileId}`,
        {},
        'GET'
      )
    ]);
    return {
      title: document?.title,
      rawText: content
    };
  };
  const getFilePreviewUrl = async ({ apiFileId }: { apiFileId: string }): Promise<string> => {
    const { metas } = await request<{ metas: { url: string }[] }>(
      `/open-apis/drive/v1/metas/batch_query`,
      {
        request_docs: [
          {
            doc_token: apiFileId,
            doc_type: 'docx'
          }
        ],
        with_url: true
      },
      'POST'
    );
    return metas[0].url;
  };
  const getFileDetail = async ({
    apiFileId
  }: {
    apiFileId: string;
  }): Promise<ApiDatasetDetailResponse> => {
    const { document } = await request<{ document: { title: string } }>(
      `/open-apis/docx/v1/documents/${apiFileId}`,
      {},
      'GET'
    );
    return {
      name: document?.title,
      parentId: null,
      id: apiFileId
    };
  };
  return {
    getFileContent,
    listFiles,
    getFilePreviewUrl,
    getFileDetail
  };
 };
--- a/packages/service/core/dataset/read.ts
+++ b/packages/service/core/dataset/read.ts
@@ -1,8 +1,10 @@
 import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
-import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
+import {
  ChunkTriggerConfigTypeEnum,
  DatasetSourceReadTypeEnum
 } from '@fastgpt/global/core/dataset/constants';
 import { readFileContentFromMongo } from '../../common/file/gridfs/controller';
 import { urlsFetch } from '../../common/string/cheerio';
 import { parseCsvTable2Chunks } from './training/utils';
 import { type TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
 import axios from 'axios';
 import { readRawContentByFileBuffer } from '../../common/file/read/utils';
@@ -12,19 +14,22 @@ import {
  type FeishuServer,
  type YuqueServer
 } from '@fastgpt/global/core/dataset/apiDataset';
-import { useApiDatasetRequest } from './apiDataset/api';
+import { getApiDatasetRequest } from './apiDataset';
 import Papa from 'papaparse';
 export const readFileRawTextByUrl = async ({
  teamId,
  tmbId,
  url,
  customPdfParse,
  getFormatText,
  relatedId
 }: {
  teamId: string;
  tmbId: string;
  url: string;
  customPdfParse?: boolean;
  getFormatText?: boolean;
  relatedId: string; // externalFileId / apiFileId
 }) => {
  const response = await axios({
@@ -38,7 +43,7 @@ export const readFileRawTextByUrl = async ({
  const { rawText } = await readRawContentByFileBuffer({
    customPdfParse,
-    isQAImport: false,
+    getFormatText,
    extension,
    teamId,
    tmbId,
@@ -62,21 +67,21 @@ export const readDatasetSourceRawText = async ({
  tmbId,
  type,
  sourceId,
  isQAImport,
  selector,
  externalFileId,
  apiServer,
  feishuServer,
  yuqueServer,
-  customPdfParse
+  customPdfParse,
  getFormatText
 }: {
  teamId: string;
  tmbId: string;
  type: DatasetSourceReadTypeEnum;
  sourceId: string;
  customPdfParse?: boolean;
  getFormatText?: boolean;
  isQAImport?: boolean; // csv data
  selector?: string; // link selector
  externalFileId?: string; // external file dataset
  apiServer?: APIFileServer; // api dataset
@@ -92,8 +97,8 @@ export const readDatasetSourceRawText = async ({
      tmbId,
      bucketName: BucketNameEnum.dataset,
      fileId: sourceId,
-      isQAImport,
+      customPdfParse,
-      customPdfParse
+      getFormatText
    });
    return {
      title: filename,
@@ -161,38 +166,82 @@ export const readApiServerFileContent = async ({
  title?: string;
  rawText: string;
 }> => {
-  if (apiServer) {
+  return (
-    return useApiDatasetRequest({ apiServer }).getFileContent({
+    await getApiDatasetRequest({
      apiServer,
      yuqueServer,
      feishuServer
    })
  ).getFileContent({
    teamId,
    tmbId,
    apiFileId,
    customPdfParse
  });
  }
  if (feishuServer || yuqueServer) {
    return global.getProApiDatasetFileContent({
      feishuServer,
      yuqueServer,
      apiFileId
    });
  }
  return Promise.reject('No apiServer or feishuServer or yuqueServer');
 };
 export const rawText2Chunks = ({
  rawText,
-  isQAImport,
+  chunkTriggerType = ChunkTriggerConfigTypeEnum.minSize,
  chunkTriggerMinSize = 1000,
  backupParse,
  chunkSize = 512,
  ...splitProps
 }: {
  rawText: string;
-  isQAImport?: boolean;
+
-} & TextSplitProps) => {
+  chunkTriggerType?: ChunkTriggerConfigTypeEnum;
-  if (isQAImport) {
+  chunkTriggerMinSize?: number; // maxSize from agent model, not store
-    const { chunks } = parseCsvTable2Chunks(rawText);
+
-    return chunks;
+  backupParse?: boolean;
  tableParse?: boolean;
 } & TextSplitProps): {
  q: string;
  a: string;
  indexes?: string[];
 }[] => {
  const parseDatasetBackup2Chunks = (rawText: string) => {
    const csvArr = Papa.parse(rawText).data as string[][];
    console.log(rawText, csvArr);
    const chunks = csvArr
      .slice(1)
      .map((item) => ({
        q: item[0] || '',
        a: item[1] || '',
        indexes: item.slice(2)
      }))
      .filter((item) => item.q || item.a);
    return {
      chunks
    };
  };
  if (backupParse) {
    return parseDatasetBackup2Chunks(rawText).chunks;
  }
  // Chunk condition
  // 1. 选择最大值条件，只有超过了最大值(默认为模型的最大值*0.7），才会触发分块
  if (chunkTriggerType === ChunkTriggerConfigTypeEnum.maxSize) {
    const textLength = rawText.trim().length;
    const maxSize = splitProps.maxSize ? splitProps.maxSize * 0.7 : 16000;
    if (textLength < maxSize) {
      return [
        {
          q: rawText,
          a: ''
        }
      ];
    }
  }
  // 2. 选择最小值条件，只有超过最小值(手动决定)才会触发分块
  if (chunkTriggerType !== ChunkTriggerConfigTypeEnum.forceChunk) {
    const textLength = rawText.trim().length;
    if (textLength < chunkTriggerMinSize) {
      return [{ q: rawText, a: '' }];
    }
  }
  const { chunks } = splitText2Chunks({
@@ -203,6 +252,7 @@ export const rawText2Chunks = ({
  return chunks.map((item) => ({
    q: item,
-    a: ''
+    a: '',
    indexes: []
  }));
 };
--- a/packages/service/core/dataset/schema.ts
+++ b/packages/service/core/dataset/schema.ts
@@ -1,10 +1,12 @@
 import { getMongoModel, Schema } from '../../common/mongo';
 import {
  ChunkSettingModeEnum,
  ChunkTriggerConfigTypeEnum,
  DataChunkSplitModeEnum,
  DatasetCollectionDataProcessModeEnum,
  DatasetTypeEnum,
-  DatasetTypeMap
+  DatasetTypeMap,
  ParagraphChunkAIModeEnum
 } from '@fastgpt/global/core/dataset/constants';
 import {
  TeamCollectionName,
@@ -15,12 +17,22 @@ import type { DatasetSchemaType } from '@fastgpt/global/core/dataset/type.d';
 export const DatasetCollectionName = 'datasets';
 export const ChunkSettings = {
  imageIndex: Boolean,
  autoIndexes: Boolean,
  trainingType: {
    type: String,
    enum: Object.values(DatasetCollectionDataProcessModeEnum)
  },
  chunkTriggerType: {
    type: String,
    enum: Object.values(ChunkTriggerConfigTypeEnum)
  },
  chunkTriggerMinSize: Number,
  dataEnhanceCollectionName: Boolean,
  imageIndex: Boolean,
  autoIndexes: Boolean,
  chunkSettingMode: {
    type: String,
    enum: Object.values(ChunkSettingModeEnum)
@@ -29,6 +41,12 @@ export const ChunkSettings = {
    type: String,
    enum: Object.values(DataChunkSplitModeEnum)
  },
  paragraphChunkAIMode: {
    type: String,
    enum: Object.values(ParagraphChunkAIModeEnum)
  },
  paragraphChunkDeep: Number,
  paragraphChunkMinSize: Number,
  chunkSize: Number,
  chunkSplitter: String,
@@ -115,14 +133,13 @@ const DatasetSchema = new Schema({
  // abandoned
  autoSync: Boolean,
-  externalReadUrl: {
+  externalReadUrl: String,
    type: String
  },
  defaultPermission: Number
 });
 try {
  DatasetSchema.index({ teamId: 1 });
  DatasetSchema.index({ type: 1 });
 } catch (error) {
  console.log(error);
 }
--- a/packages/service/core/dataset/search/controller.ts
+++ b/packages/service/core/dataset/search/controller.ts
@@ -27,6 +27,7 @@ import { type ChatItemType } from '@fastgpt/global/core/chat/type';
 import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
 import { datasetSearchQueryExtension } from './utils';
 import type { RerankModelItemType } from '@fastgpt/global/core/ai/model.d';
 import { addLog } from '../../../common/system/log';
 export type SearchDatasetDataProps = {
  histories: ChatItemType[];
@@ -474,7 +475,7 @@ export async function searchDatasetData(
      ).lean()
    ]);
-    const set = new Map<string, number>();
+    const set = new Set<string>();
    const formatResult = results
      .map((item, index) => {
        const collection = collections.find((col) => String(col._id) === String(item.collectionId));
@@ -507,7 +508,7 @@ export async function searchDatasetData(
      .filter((item) => {
        if (!item) return false;
        if (set.has(item.id)) return false;
-        set.set(item.id, 1);
+        set.add(item.id);
        return true;
      })
      .map((item, index) => {
@@ -544,16 +545,14 @@ export async function searchDatasetData(
      };
    }
-    const searchResults = (
+    try {
-      await Promise.all(
+      const searchResults = (await MongoDatasetDataText.aggregate(
        datasetIds.map(async (id) => {
          return MongoDatasetDataText.aggregate(
        [
          {
            $match: {
              teamId: new Types.ObjectId(teamId),
                  datasetId: new Types.ObjectId(id),
              $text: { $search: await jiebaSplit({ text: query }) },
              datasetId: { $in: datasetIds.map((id) => new Types.ObjectId(id)) },
              ...(filterCollectionIdList
                ? {
                    collectionId: {
@@ -590,10 +589,7 @@ export async function searchDatasetData(
        {
          ...readFromSecondary
        }
-          );
+      )) as (DatasetDataTextSchemaType & { score: number })[];
        })
      )
    ).flat() as (DatasetDataTextSchemaType & { score: number })[];
      // Get data and collections
      const [dataList, collections] = await Promise.all([
@@ -648,9 +644,26 @@ export async function searchDatasetData(
              ]
            };
          })
-        .filter(Boolean) as SearchDataResponseItemType[],
+          .filter((item) => {
            if (!item) return false;
            return true;
          })
          .map((item, index) => {
            if (!item) return;
            return {
              ...item,
              score: item.score.map((item) => ({ ...item, index }))
            };
          }) as SearchDataResponseItemType[],
        tokenLen: 0
      };
    } catch (error) {
      addLog.error('Full text search error', error);
      return {
        fullTextRecallResults: [],
        tokenLen: 0
      };
    }
  };
  const multiQueryRecall = async ({
    embeddingLimit,
--- a/packages/service/core/dataset/training/constants.ts
+++ b/packages/service/core/dataset/training/constants.ts
@@ -1,6 +1,5 @@
 export enum ImportDataSourceEnum {
  fileLocal = 'fileLocal',
  fileLink = 'fileLink',
-  fileCustom = 'fileCustom',
+  fileCustom = 'fileCustom'
  tableLocal = 'tableLocal'
 }
--- a/packages/service/core/dataset/training/utils.ts
+++ b/packages/service/core/dataset/training/utils.ts
@@ -1,16 +0,0 @@
 import Papa from 'papaparse';
 export const parseCsvTable2Chunks = (rawText: string) => {
  const csvArr = Papa.parse(rawText).data as string[][];
  const chunks = csvArr
    .map((item) => ({
      q: item[0] || '',
      a: item[1] || ''
    }))
    .filter((item) => item.q || item.a);
  return {
    chunks
  };
 };
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
dreamer6680	d7a722a609	add csp and more function for markdown (#4921 ) * support html * html * add csp * remove unuse function --------- Co-authored-by: dreamer6680 <146868355@qq.com>	2025-05-29 17:57:37 +08:00
Archer	0f866fc552	feat: text collecion auto save for a txt file (#4924 )	2025-05-29 17:57:27 +08:00
Archer	05c7ba4483	feat: Workflow node search (#4920 ) * add node find (#4902) * add node find * plugin header * fix * fix * remove * type * add searched status * optimize * perf: search nodes --------- Co-authored-by: heheer <heheer@sealos.io>	2025-05-29 14:29:28 +08:00
heheer	fa80ce3a77	fix child app external variables (#4919 )	2025-05-29 13:37:59 +08:00
Archer	830358aa72	remove invalid code (#4915 )	2025-05-28 22:11:40 +08:00
Archer	02b214b3ec	feat: remove buffer;fix: custom pdf parse (#4914 ) * fix: doc * fix: remove buffer * fix: pdf parse	2025-05-28 21:48:10 +08:00
Archer	a171c7b11c	perf: buffer;fix: back up split (#4913 ) * perf: buffer * fix: back up split * fix: app limit * doc	2025-05-28 18:18:25 +08:00
heheer	802de11363	fix runtool empty message (#4911 ) * fix runtool empty message * del unused code * fix	2025-05-28 17:48:30 +08:00
Archer	b4ecfb0b79	Feat: Node latest version (#4905 ) * node versions add keep the latest option (#4899) * node versions add keep the latest option * i18n * perf: version code * fix: ts * hide system version * hide system version * hide system version * fix: ts * fix: ts --------- Co-authored-by: heheer <heheer@sealos.io>	2025-05-28 10:46:32 +08:00
heheer	331b851a78	fix has tool node condition (#4907 )	2025-05-28 10:34:02 +08:00
Archer	50d235c42a	fix: i18n (#4898 )	2025-05-27 10:45:25 +08:00
Archer	9838593451	version doc (#4897 )	2025-05-27 10:33:35 +08:00
Archer	c25cd48e72	perf: chunk trigger and paragraph split (#4893 ) * perf: chunk trigger and paragraph split * update max size computed * perf: i18n * remove table	2025-05-26 18:57:22 +08:00
Archer	874300a56a	fix: chinese name export (#4890 ) * fix: chinese name export * fix: xlsx white space * doc * doc	2025-05-25 21:19:29 +08:00
Archer	1dea2b71b4	perf: human check;perf: recursion get node response (#4888 ) * perf: human check * version * perf: recursion get node response	2025-05-25 20:55:29 +08:00
Archer	a8673344b1	Test add menu (#4887 ) * Feature: Add additional dataset options and their descriptions, updat… (#4874) * Feature: Add additional dataset options and their descriptions, update menu components to support submenu functionality * Optimize the menu component by removing the sub-menu position attribute, introducing the MyPopover component to support sub-menu functionality, and adding new dataset options and their descriptions in the dataset list. --------- Co-authored-by: dreamer6680 <146868355@qq.com> * api dataset tip * remove invalid code --------- Co-authored-by: dreamer6680 <1468683855@qq.com> Co-authored-by: dreamer6680 <146868355@qq.com>	2025-05-25 20:16:03 +08:00
Archer	9709ae7a4f	feat: The workflow quickly adds applications (#4882 ) * feat: add node by handle (#4860) * feat: add node by handle * fix * fix edge filter * fix * move utils * move context * scale handle * move postion to handle params & optimize handle scale (#4878) * move position to handle params * close button scale * perf: node template ui * remove handle scale (#4880) * feat: handle connect * add mouse down duration check (#4881) * perf: long press time * tool handle size * optimize add node by handle (#4883) --------- Co-authored-by: heheer <heheer@sealos.io>	2025-05-23 19:20:12 +08:00
Archer	fae76e887a	perf: dataset import params code (#4875 ) * perf: dataset import params code * perf: api dataset code * model	2025-05-23 10:40:25 +08:00
dreamer6680	9af92d1eae	Open Yufu Feishu Knowledge Base Permissions (#4867 ) * add feishu yuque dataset * Open Yufu Feishu Knowledge Base Permissions * Refactor the dataset request module, optimize the import path, and fix the type definition --------- Co-authored-by: dreamer6680 <146868355@qq.com>	2025-05-22 23:19:55 +08:00
Archer	6a6719e93d	perf: isPc check;perf: dataset max token checker (#4872 ) * perf: isPc check * perf: dataset max token checker * perf: dataset max token checker	2025-05-22 18:40:29 +08:00
Compasafe	50481f4ca8	fix: 修改语音组件中判断isPc的逻辑 (#4854 ) * fix: 修改语音组件中判断isPc的逻辑 * fix: 修改语音组件中判断isPc的逻辑	2025-05-22 16:29:53 +08:00
Archer	88bd3aaa9e	perf: backup import (#4866 ) * i18n * remove invalid code * perf: backup import * backup tip * fix: indexsize invalid	2025-05-22 15:53:51 +08:00
Archer	dd3c251603	fix: stream response (#4853 )	2025-05-21 10:21:20 +08:00
Archer	aa55f059d4	perf: chat history api;perf: full text error (#4852 ) * perf: chat history api * perf: i18n * perf: full text	2025-05-20 22:31:32 +08:00
dreamer6680	89c9a02650	change ui of price (#4851 ) Co-authored-by: dreamer6680 <146868355@qq.com>	2025-05-20 20:51:07 +08:00
heheer	0f3bfa280a	fix quote reader duplicate rendering (#4845 )	2025-05-20 20:21:00 +08:00
dependabot[bot]	593ebfd269	chore(deps): bump multer from 1.4.5-lts.1 to 2.0.0 (#4839 ) Bumps [multer](https://github.com/expressjs/multer) from 1.4.5-lts.1 to 2.0.0. - [Release notes](https://github.com/expressjs/multer/releases) - [Changelog](https://github.com/expressjs/multer/blob/v2.0.0/CHANGELOG.md) - [Commits](https://github.com/expressjs/multer/compare/v1.4.5-lts.1...v2.0.0) --- updated-dependencies: - dependency-name: multer dependency-version: 2.0.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-05-20 13:58:47 +08:00
John Chen	f6dc2204f5	fix:修正docker-compose-pgvecto.yml文件中，健康检查参数错误 (#4841 )	2025-05-20 13:57:32 +08:00
Archer	d44c338059	perf: confirm ux (#4843 ) * perf: delete tip ux * perf: confirm ux	2025-05-20 13:41:56 +08:00
Archer	1dac2b70ec	perf: stream timeout;feat: hnsw max_scan_tuples config;fix: fulltext search merge error (#4838 ) * perf: stream timeout * feat: hnsw max_scan_tuples config * fix: fulltext search merge error * perf: jieba code	2025-05-20 09:59:24 +08:00
Archer	9fef3e15fb	Update doc (#4831 ) * doc * doc * version update	2025-05-18 23:16:31 +08:00
Archer	2d2d0fffe9	Test apidataset (#4830 ) * Dataset (#4822) * apidataset support to basepath * Resolve the error of the Feishu Knowledge Base modification configuration page not supporting baseurl bug. * apibasepath * add * perf: api dataset --------- Co-authored-by: dreamer6680 <1468683855@qq.com>	2025-05-17 22:41:10 +08:00
heheer	c6e0b5a1e7	offiaccount welcome text (#4827 ) * offiaccount welcome text * fix * Update Image.tsx --------- Co-authored-by: Archer <545436317@qq.com>	2025-05-17 22:03:18 +08:00
dependabot[bot]	932aa28a1f	chore(deps): bump undici in /plugins/webcrawler/SPIDER (#4825 ) Bumps [undici](https://github.com/nodejs/undici) from 6.21.1 to 6.21.3. - [Release notes](https://github.com/nodejs/undici/releases) - [Commits](https://github.com/nodejs/undici/compare/v6.21.1...v6.21.3) --- updated-dependencies: - dependency-name: undici dependency-version: 6.21.3 dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-05-17 01:16:31 +08:00
heheer	9c59bc2c17	fix: handle optional indexes in InputDataModal (#4828 )	2025-05-16 15:07:33 +08:00
Archer	e145f63554	feat: chat error msg (#4826 ) * perf: i18n * feat: chat error msg * feat: doc	2025-05-16 12:07:11 +08:00
Archer	554b2ca8dc	perf: mcp tool type (#4820 )	2025-05-15 18:14:32 +08:00
Archer	4e83840c14	perf: tool call check (#4818 ) * i18n * tool call * fix: mcp create permission;Plugin unauth tip * fix: mcp create permission;Plugin unauth tip * fix: Cite modal permission * remove invalide cite * perf: prompt * filter fulltext search * fix: ts * fix: ts * fix: ts	2025-05-15 15:51:34 +08:00
heheer	a6c80684d1	fix version match (#4814 )	2025-05-14 17:45:31 +08:00
Archer	a4db03a3b7	feat: session id (#4817 ) * feat: session id * feat: Add default index	2025-05-14 17:24:02 +08:00
Archer	cba8f773fe	New license (#4809 ) * feat: new-license * perf: volumn watch * Set use client	2025-05-14 13:55:09 +08:00
Archer	bd93f28d6f	update doc (#4806 )	2025-05-13 21:24:35 +08:00
Archer	2063cb6314	i18n (#4805 ) * i18n * version * copy node	2025-05-13 18:58:57 +08:00
dreamer6680	12acaf491c	change password rule (#4804 ) * change password rule * change password.tset.ts	2025-05-13 18:20:11 +08:00
heheer	3688842cc7	filter tool type version & fix unpublished version (#4803 )	2025-05-13 17:58:51 +08:00
Archer	398d131bac	fix api_dataset.md (#4791 ) (#4801 ) Co-authored-by: dreamer6680 <1468683855@qq.com>	2025-05-13 12:28:50 +08:00