Compare commits

...

42 Commits

Author SHA1 Message Date
dreamer6680
d7a722a609 add csp and more function for markdown (#4921)
* support html

* html

* add csp

* remove unuse function

---------

Co-authored-by: dreamer6680 <146868355@qq.com>
2025-05-29 17:57:37 +08:00
Archer
0f866fc552 feat: text collecion auto save for a txt file (#4924) 2025-05-29 17:57:27 +08:00
Archer
05c7ba4483 feat: Workflow node search (#4920)
* add node find (#4902)

* add node find

* plugin header

* fix

* fix

* remove

* type

* add searched status

* optimize

* perf: search nodes

---------

Co-authored-by: heheer <heheer@sealos.io>
2025-05-29 14:29:28 +08:00
heheer
fa80ce3a77 fix child app external variables (#4919) 2025-05-29 13:37:59 +08:00
Archer
830358aa72 remove invalid code (#4915) 2025-05-28 22:11:40 +08:00
Archer
02b214b3ec feat: remove buffer;fix: custom pdf parse (#4914)
* fix: doc

* fix: remove buffer

* fix: pdf parse
2025-05-28 21:48:10 +08:00
Archer
a171c7b11c perf: buffer;fix: back up split (#4913)
* perf: buffer

* fix: back up split

* fix: app limit

* doc
2025-05-28 18:18:25 +08:00
heheer
802de11363 fix runtool empty message (#4911)
* fix runtool empty message

* del unused code

* fix
2025-05-28 17:48:30 +08:00
Archer
b4ecfb0b79 Feat: Node latest version (#4905)
* node versions add keep the latest option (#4899)

* node versions add keep the latest option

* i18n

* perf: version code

* fix: ts

* hide system version

* hide system version

* hide system version

* fix: ts

* fix: ts

---------

Co-authored-by: heheer <heheer@sealos.io>
2025-05-28 10:46:32 +08:00
heheer
331b851a78 fix has tool node condition (#4907) 2025-05-28 10:34:02 +08:00
Archer
50d235c42a fix: i18n (#4898) 2025-05-27 10:45:25 +08:00
Archer
9838593451 version doc (#4897) 2025-05-27 10:33:35 +08:00
Archer
c25cd48e72 perf: chunk trigger and paragraph split (#4893)
* perf: chunk trigger and paragraph split

* update max size computed

* perf: i18n

* remove table
2025-05-26 18:57:22 +08:00
Archer
874300a56a fix: chinese name export (#4890)
* fix: chinese name export

* fix: xlsx white space

* doc

* doc
2025-05-25 21:19:29 +08:00
Archer
1dea2b71b4 perf: human check;perf: recursion get node response (#4888)
* perf: human check

* version

* perf: recursion get node response
2025-05-25 20:55:29 +08:00
Archer
a8673344b1 Test add menu (#4887)
* Feature: Add additional dataset options and their descriptions, updat… (#4874)

* Feature: Add additional dataset options and their descriptions, update menu components to support submenu functionality

* Optimize the menu component by removing the sub-menu position attribute, introducing the MyPopover component to support sub-menu functionality, and adding new dataset options and their descriptions in the dataset list.

---------

Co-authored-by: dreamer6680 <146868355@qq.com>

* api dataset tip

* remove invalid code

---------

Co-authored-by: dreamer6680 <1468683855@qq.com>
Co-authored-by: dreamer6680 <146868355@qq.com>
2025-05-25 20:16:03 +08:00
Archer
9709ae7a4f feat: The workflow quickly adds applications (#4882)
* feat: add node by handle (#4860)

* feat: add node by handle

* fix

* fix edge filter

* fix

* move utils

* move context

* scale handle

* move postion to handle params & optimize handle scale (#4878)

* move position to handle params

* close button scale

* perf: node template ui

* remove handle scale (#4880)

* feat: handle connect

* add mouse down duration check (#4881)

* perf: long press time

* tool handle size

* optimize add node by handle (#4883)

---------

Co-authored-by: heheer <heheer@sealos.io>
2025-05-23 19:20:12 +08:00
Archer
fae76e887a perf: dataset import params code (#4875)
* perf: dataset import params code

* perf: api dataset code

* model
2025-05-23 10:40:25 +08:00
dreamer6680
9af92d1eae Open Yufu Feishu Knowledge Base Permissions (#4867)
* add feishu yuque dataset

* Open Yufu Feishu Knowledge Base Permissions

* Refactor the dataset request module, optimize the import path, and fix the type definition

---------

Co-authored-by: dreamer6680 <146868355@qq.com>
2025-05-22 23:19:55 +08:00
Archer
6a6719e93d perf: isPc check;perf: dataset max token checker (#4872)
* perf: isPc check

* perf: dataset max token checker

* perf: dataset max token checker
2025-05-22 18:40:29 +08:00
Compasafe
50481f4ca8 fix: 修改语音组件中判断isPc的逻辑 (#4854)
* fix: 修改语音组件中判断isPc的逻辑

* fix: 修改语音组件中判断isPc的逻辑
2025-05-22 16:29:53 +08:00
Archer
88bd3aaa9e perf: backup import (#4866)
* i18n

* remove invalid code

* perf: backup import

* backup tip

* fix: indexsize invalid
2025-05-22 15:53:51 +08:00
Archer
dd3c251603 fix: stream response (#4853) 2025-05-21 10:21:20 +08:00
Archer
aa55f059d4 perf: chat history api;perf: full text error (#4852)
* perf: chat history api

* perf: i18n

* perf: full text
2025-05-20 22:31:32 +08:00
dreamer6680
89c9a02650 change ui of price (#4851)
Co-authored-by: dreamer6680 <146868355@qq.com>
2025-05-20 20:51:07 +08:00
heheer
0f3bfa280a fix quote reader duplicate rendering (#4845) 2025-05-20 20:21:00 +08:00
dependabot[bot]
593ebfd269 chore(deps): bump multer from 1.4.5-lts.1 to 2.0.0 (#4839)
Bumps [multer](https://github.com/expressjs/multer) from 1.4.5-lts.1 to 2.0.0.
- [Release notes](https://github.com/expressjs/multer/releases)
- [Changelog](https://github.com/expressjs/multer/blob/v2.0.0/CHANGELOG.md)
- [Commits](https://github.com/expressjs/multer/compare/v1.4.5-lts.1...v2.0.0)

---
updated-dependencies:
- dependency-name: multer
  dependency-version: 2.0.0
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-05-20 13:58:47 +08:00
John Chen
f6dc2204f5 fix:修正docker-compose-pgvecto.yml文件中,健康检查参数错误 (#4841) 2025-05-20 13:57:32 +08:00
Archer
d44c338059 perf: confirm ux (#4843)
* perf: delete tip ux

* perf: confirm ux
2025-05-20 13:41:56 +08:00
Archer
1dac2b70ec perf: stream timeout;feat: hnsw max_scan_tuples config;fix: fulltext search merge error (#4838)
* perf: stream timeout

* feat: hnsw max_scan_tuples config

* fix: fulltext search merge error

* perf: jieba code
2025-05-20 09:59:24 +08:00
Archer
9fef3e15fb Update doc (#4831)
* doc

* doc

* version update
2025-05-18 23:16:31 +08:00
Archer
2d2d0fffe9 Test apidataset (#4830)
* Dataset (#4822)

* apidataset support to basepath

* Resolve the error of the Feishu Knowledge Base modification configuration page not supporting baseurl bug.

* apibasepath

* add

* perf: api dataset

---------

Co-authored-by: dreamer6680 <1468683855@qq.com>
2025-05-17 22:41:10 +08:00
heheer
c6e0b5a1e7 offiaccount welcome text (#4827)
* offiaccount welcome text

* fix

* Update Image.tsx

---------

Co-authored-by: Archer <545436317@qq.com>
2025-05-17 22:03:18 +08:00
dependabot[bot]
932aa28a1f chore(deps): bump undici in /plugins/webcrawler/SPIDER (#4825)
Bumps [undici](https://github.com/nodejs/undici) from 6.21.1 to 6.21.3.
- [Release notes](https://github.com/nodejs/undici/releases)
- [Commits](https://github.com/nodejs/undici/compare/v6.21.1...v6.21.3)

---
updated-dependencies:
- dependency-name: undici
  dependency-version: 6.21.3
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-05-17 01:16:31 +08:00
heheer
9c59bc2c17 fix: handle optional indexes in InputDataModal (#4828) 2025-05-16 15:07:33 +08:00
Archer
e145f63554 feat: chat error msg (#4826)
* perf: i18n

* feat: chat error msg

* feat: doc
2025-05-16 12:07:11 +08:00
Archer
554b2ca8dc perf: mcp tool type (#4820) 2025-05-15 18:14:32 +08:00
Archer
4e83840c14 perf: tool call check (#4818)
* i18n

* tool call

* fix: mcp create permission;Plugin unauth tip

* fix: mcp create permission;Plugin unauth tip

* fix: Cite modal permission

* remove invalide cite

* perf: prompt

* filter fulltext search

* fix: ts

* fix: ts

* fix: ts
2025-05-15 15:51:34 +08:00
heheer
a6c80684d1 fix version match (#4814) 2025-05-14 17:45:31 +08:00
Archer
a4db03a3b7 feat: session id (#4817)
* feat: session id

* feat: Add default index
2025-05-14 17:24:02 +08:00
Archer
cba8f773fe New license (#4809)
* feat: new-license

* perf: volumn watch

* Set use client
2025-05-14 13:55:09 +08:00
Archer
bd93f28d6f update doc (#4806) 2025-05-13 21:24:35 +08:00
337 changed files with 7913 additions and 4457 deletions

View File

@@ -21,7 +21,7 @@
"i18n-ally.namespace": true,
"i18n-ally.pathMatcher": "{locale}/{namespaces}.json",
"i18n-ally.extract.targetPickingStrategy": "most-similar-by-key",
"i18n-ally.translate.engines": ["google"],
"i18n-ally.translate.engines": ["deepl","google"],
"[typescript]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},

View File

@@ -132,15 +132,15 @@ services:
# fastgpt
sandbox:
container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
networks:
- fastgpt
restart: always
fastgpt-mcp-server:
container_name: fastgpt-mcp-server
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
ports:
- 3005:3000
networks:
@@ -150,8 +150,8 @@ services:
- FASTGPT_ENDPOINT=http://fastgpt:3000
fastgpt:
container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
ports:
- 3000:3000
networks:

View File

@@ -109,15 +109,15 @@ services:
# fastgpt
sandbox:
container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
networks:
- fastgpt
restart: always
fastgpt-mcp-server:
container_name: fastgpt-mcp-server
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
ports:
- 3005:3000
networks:
@@ -127,8 +127,8 @@ services:
- FASTGPT_ENDPOINT=http://fastgpt:3000
fastgpt:
container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
ports:
- 3000:3000
networks:

View File

@@ -23,7 +23,7 @@ services:
volumes:
- ./pg/data:/var/lib/postgresql/data
healthcheck:
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'postgres']
interval: 5s
timeout: 5s
retries: 10
@@ -96,15 +96,15 @@ services:
# fastgpt
sandbox:
container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
networks:
- fastgpt
restart: always
fastgpt-mcp-server:
container_name: fastgpt-mcp-server
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
ports:
- 3005:3000
networks:
@@ -114,8 +114,8 @@ services:
- FASTGPT_ENDPOINT=http://fastgpt:3000
fastgpt:
container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
ports:
- 3000:3000
networks:

View File

@@ -72,15 +72,15 @@ services:
sandbox:
container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
networks:
- fastgpt
restart: always
fastgpt-mcp-server:
container_name: fastgpt-mcp-server
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
ports:
- 3005:3000
networks:
@@ -90,8 +90,8 @@ services:
- FASTGPT_ENDPOINT=http://fastgpt:3000
fastgpt:
container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
ports:
- 3000:3000
networks:

Binary file not shown.

After

Width:  |  Height:  |  Size: 386 KiB

View File

@@ -959,10 +959,16 @@ curl --location --request POST 'http://localhost:3000/api/core/chat/getHistories
{{< markdownify >}}
{{% alert icon=" " context="success" %}}
目前仅能获取到当前 API key 的创建者的对话。
- appId - 应用 Id
- offset - 偏移量,即从第几条数据开始取
- pageSize - 记录数量
- source - 对话源。source=api表示获取通过 API 创建的对话(不会获取到页面上的对话记录)
- startCreateTime - 开始创建时间(可选)
- endCreateTime - 结束创建时间(可选)
- startUpdateTime - 开始更新时间(可选)
- endUpdateTime - 结束更新时间(可选)
{{% /alert %}}
{{< /markdownify >}}

View File

@@ -0,0 +1,50 @@
---
title: 'V4.9.10'
description: 'FastGPT V4.9.10 更新说明'
icon: 'upgrade'
draft: false
toc: true
weight: 790
---
## 升级指南
重要提示本次更新会重新构建全文索引构建期间全文检索结果会为空4c16g 700 万组全文索引大致消耗 25 分钟。如需无缝升级,需自行做表同步工程。
### 1. 做好数据备份
### 2. 更新镜像 tag
- 更新 FastGPT 镜像 tag: v4.9.10-fix2
- 更新 FastGPT 商业版镜像 tag: v4.9.10-fix2
- mcp_server 无需更新
- Sandbox 无需更新
- AIProxy 无需更新
## 🚀 新增内容
1. 支持 PG 设置`systemEnv.hnswMaxScanTuples`参数,提高迭代搜索的数据总量。
2. 知识库预处理参数增加 “分块条件”,可控制某些情况下不进行分块处理。
3. 知识库预处理参数增加 “段落优先” 模式,可控制最大段落深度。原“长度优先”模式,不再内嵌段落优先逻辑。
4. 工作流调整为单向接入和接出,支持快速的添加下一步节点。
5. 开放飞书和语雀知识库到开源版。
6. gemini 和 claude 最新模型预设。
## ⚙️ 优化
1. LLM stream调用默认超时调大。
2. 部分确认交互优化。
3. 纠正原先知识库的“表格数据集”名称,改成“备份导入”。同时支持知识库索引的导出和导入。
4. 工作流知识库引用上限,如果工作流中没有相关 AI 节点,则交互模式改成纯手动输入,并且上限为 1000万。
5. 语音输入,移动端判断逻辑,准确判断是否为手机,而不是小屏。
6. 优化上下文截取算法,至少保证留下一组 Human 信息。
## 🐛 修复
1. 全文检索多知识库时排序得分排序不正确。
2. 流响应捕获 finish_reason 可能不正确。
3. 工具调用模式,未保存思考输出。
4. 知识库 indexSize 参数未生效。
5. 工作流嵌套 2 层后,获取预览引用、上下文不正确。
6. xlsx 转成 Markdown 时候,前面会多出一个空格。
7. 读取 Markdown 文件时Base64 图片未进行额外抓换保存。

View File

@@ -0,0 +1,25 @@
---
title: 'V4.9.11(进行中)'
description: 'FastGPT V4.9.11 更新说明'
icon: 'upgrade'
draft: false
toc: true
weight: 789
---
## 🚀 新增内容
1. 工作流中增加节点搜索功能。
2. 工作流中,子流程版本控制,可选择“保持最新版本”,无需手动更新。
## ⚙️ 优化
1. 原文缓存改用 gridfs 存储,提高上限。
## 🐛 修复
1. 工作流中,管理员声明的全局系统工具,无法进行版本管理。
2. 工具调用节点前,有交互节点时,上下文异常。
3. 修复备份导入,小于 1000 字时,无法分块问题。
4. 自定义 PDF 解析,无法保存 base64 图片。

View File

@@ -1,5 +1,5 @@
---
title: 'V4.9.8(进行中)'
title: 'V4.9.8'
description: 'FastGPT V4.9.8 更新说明'
icon: 'upgrade'
draft: false
@@ -7,6 +7,17 @@ toc: true
weight: 792
---
## 升级指南
### 1. 做好数据备份
### 2. 更新镜像 tag
- 更新 FastGPT 镜像 tag: v4.9.8
- 更新 FastGPT 商业版镜像 tag: v4.9.8
- mcp_server 无需更新
- Sandbox 无需更新
- AIProxy 无需更新
## 🚀 新增内容

View File

@@ -0,0 +1,43 @@
---
title: 'V4.9.9'
description: 'FastGPT V4.9.9 更新说明'
icon: 'upgrade'
draft: false
toc: true
weight: 791
---
## 升级指南
### 1. 做好数据备份
### 2. 商业版用户替换新 License
商业版用户可以联系 FastGPT 团队支持同学,获取 License 替换方案。替换后,可以直接升级系统,管理后台会提示输入新 License。
### 3. 更新镜像 tag
- 更新 FastGPT 镜像 tag: v4.9.9
- 更新 FastGPT 商业版镜像 tag: v4.9.9
- mcp_server 无需更新
- Sandbox 无需更新
- AIProxy 无需更新
## 🚀 新增内容
1. 切换 SessionId 来替代 JWT 实现登录鉴权,可控制最大登录客户端数量。
2. 新的商业版 License 管理模式。
3. 公众号调用,显示记录 chat 对话错误,方便排查。
4. API 知识库支持 BasePath 选择,需增加 API 接口,具体可见[API 知识库介绍](/docs/guide/knowledge_base/api_dataset/#4-获取文件详细信息用于获取文件信息)
## ⚙️ 优化
1. 优化工具调用,新工具的判断逻辑。
2. 调整 Cite 引用提示词。
## 🐛 修复
1. 无法正常获取应用历史保存/发布记录。
2. 成员创建 MCP 工具权限问题。
3. 来源引用展示,存在 ID 传递错误,导致提示无权操作该文件。
4. 回答标注前端数据报错。

View File

@@ -185,3 +185,40 @@ curl --location --request GET '{{baseURL}}/v1/file/read?id=xx' \
{{< /tabs >}}
### 4. 获取文件详细信息(用于获取文件信息)
{{< tabs tabTotal="2" >}}
{{< tab tabName="请求示例" >}}
{{< markdownify >}}
id 为文件的 id。
```bash
curl --location --request GET '{{baseURL}}/v1/file/detail?id=xx' \
--header 'Authorization: Bearer {{authorization}}'
```
{{< /markdownify >}}
{{< /tab >}}
{{< tab tabName="响应示例" >}}
{{< markdownify >}}
```json
{
"code": 200,
"success": true,
"message": "",
"data": {
"id": "docs",
"parentId": "",
"name": "docs"
}
}
```
{{< /markdownify >}}
{{< /tab >}}
{{< /tabs >}}

View File

@@ -28,7 +28,6 @@ FastGPT 商业版是基于 FastGPT 开源版的增强版本,增加了一些独
| 应用发布安全配置 | ❌ | ✅ | ✅ |
| 内容审核 | ❌ | ✅ | ✅ |
| web站点同步 | ❌ | ✅ | ✅ |
| 主流文档库接入(目前支持:语雀、飞书) | ❌ | ✅ | ✅ |
| 增强训练模式 | ❌ | ✅ | ✅ |
| 第三方应用快速接入(飞书、公众号) | ❌ | ✅ | ✅ |
| 管理后台 | ❌ | ✅ | 不需要 |

View File

@@ -132,7 +132,9 @@ weight: 506
### 公众号没响应
检查应用对话日志,如果有对话日志,但是微信公众号无响应,则是白名单 IP未成功。
添加白名单IP 后,通常需要等待几分钟微信更新。
添加白名单IP 后,通常需要等待几分钟微信更新。可以在对话日志中,找点错误日志。
![](/imgs/official_account_faq.png)
### 如何新开一个聊天记录

2
env.d.ts vendored
View File

@@ -4,7 +4,6 @@ declare global {
LOG_DEPTH: string;
DEFAULT_ROOT_PSW: string;
DB_MAX_LINK: string;
TOKEN_KEY: string;
FILE_TOKEN_KEY: string;
ROOT_KEY: string;
OPENAI_BASE_URL: string;
@@ -37,6 +36,7 @@ declare global {
CONFIG_JSON_PATH?: string;
PASSWORD_LOGIN_LOCK_SECONDS?: string;
PASSWORD_EXPIRED_MONTH?: string;
MAX_LOGIN_SESSION?: string;
}
}
}

View File

@@ -27,7 +27,7 @@ const datasetErr = [
},
{
statusText: DatasetErrEnum.unExist,
message: 'core.dataset.error.unExistDataset'
message: i18nT('common:core.dataset.error.unExistDataset')
},
{
statusText: DatasetErrEnum.unExistCollection,

View File

@@ -2,13 +2,28 @@ import { type ErrType } from '../errorCode';
import { i18nT } from '../../../../web/i18n/utils';
/* dataset: 509000 */
export enum SystemErrEnum {
communityVersionNumLimit = 'communityVersionNumLimit'
communityVersionNumLimit = 'communityVersionNumLimit',
licenseAppAmountLimit = 'licenseAppAmountLimit',
licenseDatasetAmountLimit = 'licenseDatasetAmountLimit',
licenseUserAmountLimit = 'licenseUserAmountLimit'
}
const systemErr = [
{
statusText: SystemErrEnum.communityVersionNumLimit,
message: i18nT('common:code_error.system_error.community_version_num_limit')
},
{
statusText: SystemErrEnum.licenseAppAmountLimit,
message: i18nT('common:code_error.system_error.license_app_amount_limit')
},
{
statusText: SystemErrEnum.licenseDatasetAmountLimit,
message: i18nT('common:code_error.system_error.license_dataset_amount_limit')
},
{
statusText: SystemErrEnum.licenseUserAmountLimit,
message: i18nT('common:code_error.system_error.license_user_amount_limit')
}
];

View File

@@ -7,6 +7,10 @@ export const CUSTOM_SPLIT_SIGN = '-----CUSTOM_SPLIT_SIGN-----';
type SplitProps = {
text: string;
chunkSize: number;
paragraphChunkDeep?: number; // Paragraph deep
paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
maxSize?: number;
overlapRatio?: number;
customReg?: string[];
@@ -108,6 +112,8 @@ const commonSplit = (props: SplitProps): SplitResponse => {
let {
text = '',
chunkSize,
paragraphChunkDeep = 5,
paragraphChunkMinSize = 100,
maxSize = defaultMaxChunkSize,
overlapRatio = 0.15,
customReg = []
@@ -123,7 +129,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
text = text.replace(/(```[\s\S]*?```|~~~[\s\S]*?~~~)/g, function (match) {
return match.replace(/\n/g, codeBlockMarker);
});
// 2. 表格处理 - 单独提取表格出来,进行表头合并
// 2. Markdown 表格处理 - 单独提取表格出来,进行表头合并
const tableReg =
/(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n?)*)(?:\n|$)/g;
const tableDataList = text.match(tableReg);
@@ -143,25 +149,40 @@ const commonSplit = (props: SplitProps): SplitResponse => {
text = text.replace(/(\r?\n|\r){3,}/g, '\n\n\n');
// The larger maxLen is, the next sentence is less likely to trigger splitting
const markdownIndex = 4;
const forbidOverlapIndex = 8;
const customRegLen = customReg.length;
const markdownIndex = paragraphChunkDeep - 1;
const forbidOverlapIndex = customRegLen + markdownIndex + 4;
const markdownHeaderRules = ((deep?: number): { reg: RegExp; maxLen: number }[] => {
if (!deep || deep === 0) return [];
const maxDeep = Math.min(deep, 8); // Maximum 8 levels
const rules: { reg: RegExp; maxLen: number }[] = [];
for (let i = 1; i <= maxDeep; i++) {
const hashSymbols = '#'.repeat(i);
rules.push({
reg: new RegExp(`^(${hashSymbols}\\s[^\\n]+\\n)`, 'gm'),
maxLen: chunkSize
});
}
return rules;
})(paragraphChunkDeep);
const stepReges: { reg: RegExp | string; maxLen: number }[] = [
...customReg.map((text) => ({
reg: text.replaceAll('\\n', '\n'),
maxLen: chunkSize
})),
{ reg: /^(#\s[^\n]+\n)/gm, maxLen: chunkSize },
{ reg: /^(##\s[^\n]+\n)/gm, maxLen: chunkSize },
{ reg: /^(###\s[^\n]+\n)/gm, maxLen: chunkSize },
{ reg: /^(####\s[^\n]+\n)/gm, maxLen: chunkSize },
{ reg: /^(#####\s[^\n]+\n)/gm, maxLen: chunkSize },
...markdownHeaderRules,
{ reg: /([\n](```[\s\S]*?```|~~~[\s\S]*?~~~))/g, maxLen: maxSize }, // code block
// HTML Table tag 尽可能保障完整
{
reg: /(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n)*)/g,
maxLen: Math.min(chunkSize * 1.5, maxSize)
}, // Table 尽可能保证完整性
maxLen: chunkSize
}, // Markdown Table 尽可能保证完整性
{ reg: /(\n{2,})/g, maxLen: chunkSize },
{ reg: /([\n])/g, maxLen: chunkSize },
// ------ There's no overlap on the top
@@ -172,12 +193,10 @@ const commonSplit = (props: SplitProps): SplitResponse => {
{ reg: /([]|,\s)/g, maxLen: chunkSize }
];
const customRegLen = customReg.length;
const checkIsCustomStep = (step: number) => step < customRegLen;
const checkIsMarkdownSplit = (step: number) =>
step >= customRegLen && step <= markdownIndex + customRegLen;
const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex + customRegLen;
const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex;
// if use markdown title split, Separate record title
const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
@@ -301,6 +320,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
const splitTexts = getSplitTexts({ text, step });
const chunks: string[] = [];
for (let i = 0; i < splitTexts.length; i++) {
const item = splitTexts[i];
@@ -443,7 +463,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
*/
export const splitText2Chunks = (props: SplitProps): SplitResponse => {
let { text = '' } = props;
const start = Date.now();
const splitWithCustomSign = text.split(CUSTOM_SPLIT_SIGN);
const splitResult = splitWithCustomSign.map((item) => {

View File

@@ -70,6 +70,9 @@ export type FastGPTFeConfigsType = {
show_publish_dingtalk?: boolean;
show_publish_offiaccount?: boolean;
show_dataset_enhance?: boolean;
show_batch_eval?: boolean;
concatMd?: string;
docUrl?: string;
openAPIDocUrl?: string;
@@ -127,9 +130,11 @@ export type SystemEnvType = {
vectorMaxProcess: number;
qaMaxProcess: number;
vlmMaxProcess: number;
hnswEfSearch: number;
tokenWorkers: number; // token count max worker
hnswEfSearch: number;
hnswMaxScanTuples: number;
oneapiUrl?: string;
chatApiKey?: string;
@@ -142,3 +147,21 @@ export type customPdfParseType = {
doc2xKey?: string;
price?: number;
};
export type LicenseDataType = {
startTime: string;
expiredTime: string;
company: string;
description?: string; // 描述
hosts?: string[]; // 管理端有效域名
maxUsers?: number; // 最大用户数,不填默认不上限
maxApps?: number; // 最大应用数,不填默认不上限
maxDatasets?: number; // 最大数据集数,不填默认不上限
functions: {
sso: boolean;
pay: boolean;
customTemplates: boolean;
datasetEnhance: boolean;
batchEval: boolean;
};
};

View File

@@ -2,6 +2,248 @@ import { type PromptTemplateItem } from '../type.d';
import { i18nT } from '../../../../web/i18n/utils';
import { getPromptByVersion } from './utils';
export const Prompt_userQuotePromptList: PromptTemplateItem[] = [
{
title: i18nT('app:template.standard_template'),
desc: '',
value: {
['4.9.7']: `## 任务描述
你是一个知识库回答助手,可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
同时,为了使回答结果更加可信并且可追溯,你需要在每段话结尾添加引用标记,标识参考了哪些内容。
## 追溯展示规则
- 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识,其中 CITE 是固定常量, id 为引文中的 id。
- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
- 每段话**至少包含一个引用**,多个引用时按顺序排列,例如:"Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
- 不要把示例作为知识点。
- 不要伪造 id返回的 id 必须都存在 <Cites></Cites> 中!
## 通用规则
- 如果你不清楚答案,你需要澄清。
- 避免提及你是从 <Cites></Cites> 获取的知识。
- 保持答案与 <Cites></Cites> 中描述的一致。
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
- 使用与问题相同的语言回答。
<Cites>
{{quote}}
</Cites>
## 用户问题
{{question}}
## 回答
`
}
},
{
title: i18nT('app:template.qa_template'),
desc: '',
value: {
['4.9.7']: `## 任务描述
作为一个问答助手,你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
## 回答要求
- 选择其中一个或多个问答对进行回答。
- 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
- 如果没有相关的问答对,你需要澄清。
- 避免提及你是从 <QA></QA> 获取的知识,只需要回复答案。
- 使用与问题相同的语言回答。
<QA>
{{quote}}
</QA>
## 用户问题
{{question}}
## 回答
`
}
},
{
title: i18nT('app:template.standard_strict'),
desc: '',
value: {
['4.9.7']: `## 任务描述
你是一个知识库回答助手,可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
同时,为了使回答结果更加可信并且可追溯,你需要在每段话结尾添加引用标记,标识参考了哪些内容。
## 追溯展示规则
- 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识,其中 CITE 是固定常量, id 为引文中的 id。
- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
- 每段话**至少包含一个引用**,多个引用时按顺序排列,例如:"Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
- 不要把示例作为知识点。
- 不要伪造 id返回的 id 必须都存在 <Cites></Cites> 中!
## 通用规则
- 如果你不清楚答案,你需要澄清。
- 避免提及你是从 <Cites></Cites> 获取的知识。
- 保持答案与 <Cites></Cites> 中描述的一致。
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
- 使用与问题相同的语言回答。
## 严格要求
你只能使用 <Cites></Cites> 标记中的内容作为参考,不能使用自身的知识,并且回答的内容需严格与 <Cites></Cites> 中的内容一致。
<Cites>
{{quote}}
</Cites>
## 用户问题
{{question}}
## 回答
`
}
},
{
title: i18nT('app:template.hard_strict'),
desc: '',
value: {
['4.9.7']: `## 任务描述
作为一个问答助手,你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
## 回答要求
- 选择其中一个或多个问答对进行回答。
- 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
- 如果没有相关的问答对,你需要澄清。
- 避免提及你是从 <QA></QA> 获取的知识,只需要回复答案。
- 使用与问题相同的语言回答。
## 严格要求
你只能使用 <QA></QA> 标记中的内容作为参考,不能使用自身的知识,并且回答的内容需严格与 <QA></QA> 中的内容一致。
<QA>
{{quote}}
</QA>
## 用户问题
{{question}}
## 回答
`
}
}
];
export const Prompt_systemQuotePromptList: PromptTemplateItem[] = [
{
title: i18nT('app:template.standard_template'),
desc: '',
value: {
['4.9.7']: `## 任务描述
你是一个知识库回答助手,可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
同时,为了使回答结果更加可信并且可追溯,你需要在每段话结尾添加引用标记,标识参考了哪些内容。
## 追溯展示规则
- 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识,其中 CITE 是固定常量, id 为引文中的 id。
- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
- 每段话**至少包含一个引用**,多个引用时按顺序排列,例如:"Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
- 不要把示例作为知识点。
- 不要伪造 id返回的 id 必须都存在 <Cites></Cites> 中!
## 通用规则
- 如果你不清楚答案,你需要澄清。
- 避免提及你是从 <Cites></Cites> 获取的知识。
- 保持答案与 <Cites></Cites> 中描述的一致。
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
- 使用与问题相同的语言回答。
<Cites>
{{quote}}
</Cites>`
}
},
{
title: i18nT('app:template.qa_template'),
desc: '',
value: {
['4.9.8']: `## 任务描述
作为一个问答助手,你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
## 回答要求
- 选择其中一个或多个问答对进行回答。
- 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
- 如果没有相关的问答对,你需要澄清。
- 避免提及你是从 <QA></QA> 获取的知识,只需要回复答案。
- 使用与问题相同的语言回答。
<QA>
{{quote}}
</QA>`
}
},
{
title: i18nT('app:template.standard_strict'),
desc: '',
value: {
['4.9.7']: `## 任务描述
你是一个知识库回答助手,可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
同时,为了使回答结果更加可信并且可追溯,你需要在每段话结尾添加引用标记,标识参考了哪些内容。
## 追溯展示规则
- 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识,其中 CITE 是固定常量, id 为引文中的 id。
- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
- 每段话**至少包含一个引用**,多个引用时按顺序排列,例如:"Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
- 不要把示例作为知识点。
- 不要伪造 id返回的 id 必须都存在 <Cites></Cites> 中!
## 通用规则
- 如果你不清楚答案,你需要澄清。
- 避免提及你是从 <Cites></Cites> 获取的知识。
- 保持答案与 <Cites></Cites> 中描述的一致。
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
- 使用与问题相同的语言回答。
## 严格要求
你只能使用 <Cites></Cites> 标记中的内容作为参考,不能使用自身的知识,并且回答的内容需严格与 <Cites></Cites> 中的内容一致。
<Cites>
{{quote}}
</Cites>`
}
},
{
title: i18nT('app:template.hard_strict'),
desc: '',
value: {
['4.9.7']: `## 任务描述
作为一个问答助手,你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
## 回答要求
- 选择其中一个或多个问答对进行回答。
- 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
- 如果没有相关的问答对,你需要澄清。
- 避免提及你是从 <QA></QA> 获取的知识,只需要回复答案。
- 使用与问题相同的语言回答。
## 严格要求
你只能使用 <QA></QA> 标记中的内容作为参考,不能使用自身的知识,并且回答的内容需严格与 <QA></QA> 中的内容一致。
<QA>
{{quote}}
</QA>`
}
}
];
export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
{
title: i18nT('app:template.standard_template'),
@@ -10,11 +252,6 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
['4.9.7']: `{
"id": "{{id}}",
"sourceName": "{{source}}",
"content": "{{q}}\n{{a}}"
}
`,
['4.9.2']: `{
"sourceName": "{{source}}",
"updateTime": "{{updateTime}}",
"content": "{{q}}\n{{a}}"
}
@@ -25,7 +262,7 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
title: i18nT('app:template.qa_template'),
desc: i18nT('app:template.qa_template_des'),
value: {
['4.9.2']: `<Question>
['4.9.7']: `<Question>
{{q}}
</Question>
<Answer>
@@ -40,11 +277,6 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
['4.9.7']: `{
"id": "{{id}}",
"sourceName": "{{source}}",
"content": "{{q}}\n{{a}}"
}
`,
['4.9.2']: `{
"sourceName": "{{source}}",
"updateTime": "{{updateTime}}",
"content": "{{q}}\n{{a}}"
}
@@ -55,7 +287,7 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
title: i18nT('app:template.hard_strict'),
desc: i18nT('app:template.hard_strict_des'),
value: {
['4.9.2']: `<Question>
['4.9.7']: `<Question>
{{q}}
</Question>
<Answer>
@@ -64,263 +296,12 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
}
}
];
export const getQuoteTemplate = (version?: string) => {
const defaultTemplate = Prompt_QuoteTemplateList[0].value;
return getPromptByVersion(version, defaultTemplate);
};
export const Prompt_userQuotePromptList: PromptTemplateItem[] = [
{
title: i18nT('app:template.standard_template'),
desc: '',
value: {
['4.9.7']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
<Reference>
{{quote}}
</Reference>
回答要求:
- 如果你不清楚答案,你需要澄清。
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
- 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识,其中 CITE 是固定常量, id 为引文中的 id。
- 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
- 每段至少包含一个引用,也可根据内容需要加入多个引用,按顺序排列。`,
['4.9.2']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
<Reference>
{{quote}}
</Reference>
回答要求:
- 如果你不清楚答案,你需要澄清。
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
问题:"""{{question}}"""`
}
},
{
title: i18nT('app:template.qa_template'),
desc: '',
value: {
['4.9.2']: `使用 <QA></QA> 标记中的问答对进行回答。
<QA>
{{quote}}
</QA>
回答要求:
- 选择其中一个或多个问答对进行回答。
- 回答的内容应尽可能与 <答案></答案> 中的内容一致。
- 如果没有相关的问答对,你需要澄清。
- 避免提及你是从 QA 获取的知识,只需要回复答案。
问题:"""{{question}}"""`
}
},
{
title: i18nT('app:template.standard_strict'),
desc: '',
value: {
['4.9.7']: `忘记你已有的知识,仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
<Reference>
{{quote}}
</Reference>
思考流程:
1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
2. 如果有关,你按下面的要求回答。
3. 如果无关,你直接拒绝回答本次问题。
回答要求:
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
- 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识,其中 CITE 是固定常量, id 为引文中的 id。
- 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
- 每段至少包含一个引用,也可根据内容需要加入多个引用,按顺序排列。
问题:"""{{question}}"""`,
['4.9.2']: `忘记你已有的知识,仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
<Reference>
{{quote}}
</Reference>
思考流程:
1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
2. 如果有关,你按下面的要求回答。
3. 如果无关,你直接拒绝回答本次问题。
回答要求:
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
问题:"""{{question}}"""`
}
},
{
title: i18nT('app:template.hard_strict'),
desc: '',
value: {
['4.9.2']: `忘记你已有的知识,仅使用 <QA></QA> 标记中的问答对进行回答。
<QA>
{{quote}}
</QA>
思考流程:
1. 判断问题是否与 <QA></QA> 标记中的内容有关。
2. 如果无关,你直接拒绝回答本次问题。
3. 判断是否有相近或相同的问题。
4. 如果有相同的问题,直接输出对应答案。
5. 如果只有相近的问题,请把相近的问题和答案一起输出。
回答要求:
- 如果没有相关的问答对,你需要澄清。
- 回答的内容应尽可能与 <QA></QA> 标记中的内容一致。
- 避免提及你是从 QA 获取的知识,只需要回复答案。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
问题:"""{{question}}"""`
}
}
];
export const Prompt_systemQuotePromptList: PromptTemplateItem[] = [
{
title: i18nT('app:template.standard_template'),
desc: '',
value: {
['4.9.7']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
<Reference>
{{quote}}
</Reference>
回答要求:
- 如果你不清楚答案,你需要澄清。
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
- 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识,其中 CITE 是固定常量, id 为引文中的 id。
- 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
- 每段至少包含一个引用,也可根据内容需要加入多个引用,按顺序排列。`,
['4.9.2']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
<Reference>
{{quote}}
</Reference>
回答要求:
- 如果你不清楚答案,你需要澄清。
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。`
}
},
{
title: i18nT('app:template.qa_template'),
desc: '',
value: {
['4.9.2']: `使用 <QA></QA> 标记中的问答对进行回答。
<QA>
{{quote}}
</QA>
回答要求:
- 选择其中一个或多个问答对进行回答。
- 回答的内容应尽可能与 <答案></答案> 中的内容一致。
- 如果没有相关的问答对,你需要澄清。
- 避免提及你是从 QA 获取的知识,只需要回复答案。`
}
},
{
title: i18nT('app:template.standard_strict'),
desc: '',
value: {
['4.9.7']: `忘记你已有的知识,仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
<Reference>
{{quote}}
</Reference>
思考流程:
1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
2. 如果有关,你按下面的要求回答。
3. 如果无关,你直接拒绝回答本次问题。
回答要求:
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。
- 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识,其中 CITE 是固定常量, id 为引文中的 id。
- 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
- 每段至少包含一个引用,也可根据内容需要加入多个引用,按顺序排列。
问题:"""{{question}}"""`,
['4.9.2']: `忘记你已有的知识,仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
<Reference>
{{quote}}
</Reference>
思考流程:
1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
2. 如果有关,你按下面的要求回答。
3. 如果无关,你直接拒绝回答本次问题。
回答要求:
- 避免提及你是从 <Reference></Reference> 获取的知识。
- 保持答案与 <Reference></Reference> 中描述的一致。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。`
}
},
{
title: i18nT('app:template.hard_strict'),
desc: '',
value: {
['4.9.2']: `忘记你已有的知识,仅使用 <QA></QA> 标记中的问答对进行回答。
<QA>
{{quote}}
</QA>
思考流程:
1. 判断问题是否与 <QA></QA> 标记中的内容有关。
2. 如果无关,你直接拒绝回答本次问题。
3. 判断是否有相近或相同的问题。
4. 如果有相同的问题,直接输出对应答案。
5. 如果只有相近的问题,请把相近的问题和答案一起输出。
回答要求:
- 如果没有相关的问答对,你需要澄清。
- 回答的内容应尽可能与 <QA></QA> 标记中的内容一致。
- 避免提及你是从 QA 获取的知识,只需要回复答案。
- 使用 Markdown 语法优化回答格式。
- 使用与问题相同的语言回答。`
}
}
];
export const getQuotePrompt = (version?: string, role: 'user' | 'system' = 'user') => {
const quotePromptTemplates =
role === 'user' ? Prompt_userQuotePromptList : Prompt_systemQuotePromptList;
@@ -333,7 +314,7 @@ export const getQuotePrompt = (version?: string, role: 'user' | 'system' = 'user
// Document quote prompt
export const getDocumentQuotePrompt = (version?: string) => {
const promptMap = {
['4.9.2']: `将 <FilesContent></FilesContent> 中的内容作为本次对话的参考:
['4.9.7']: `将 <FilesContent></FilesContent> 中的内容作为本次对话的参考:
<FilesContent>
{{quote}}
</FilesContent>

View File

@@ -1,14 +1,19 @@
export const getDatasetSearchToolResponsePrompt = () => {
return `## Role
你是一个知识库回答助手,可以 "quotes" 中的内容作为本次对话的参考。为了使回答结果更加可信并且可追溯,你需要在每段话结尾添加引用标记。
你是一个知识库回答助手,可以 "cites" 中的内容作为本次对话的参考。为了使回答结果更加可信并且可追溯,你需要在每段话结尾添加引用标记,标识参考了哪些内容
## Rules
## 追溯展示规则
- 使用 **[id](CITE)** 格式来引用 "cites" 中的知识,其中 CITE 是固定常量, id 为引文中的 id。
- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
- 每段话**至少包含一个引用**,多个引用时按顺序排列,例如:"Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
- 不要把示例作为知识点。
- 不要伪造 id返回的 id 必须都存在 cites 中!
## 通用规则
- 如果你不清楚答案,你需要澄清。
- 避免提及你是从 "quotes" 获取的知识。
- 保持答案与 "quotes" 中描述的一致。
- 避免提及你是从 "cites" 获取的知识。
- 保持答案与 "cites" 中描述的一致。
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
- 使用与问题相同的语言回答。
- 使用 [id](CITE) 格式来引用 "quotes" 中的知识,其中 CITE 是固定常量, id 为引文中的 id。
- 在每段话结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
- 每段话至少包含一个引用,也可根据内容需要加入多个引用,按顺序排列。`;
- 使用与问题相同的语言回答。`;
};

View File

@@ -60,5 +60,3 @@ export enum AppTemplateTypeEnum {
// special type
contribute = 'contribute'
}
export const defaultDatasetMaxTokens = 16000;

View File

@@ -5,7 +5,7 @@ import {
FlowNodeTypeEnum
} from '../../workflow/node/constant';
import { nanoid } from 'nanoid';
import { type ToolType } from '../type';
import { type McpToolConfigType } from '../type';
import { i18nT } from '../../../../web/i18n/utils';
import { type RuntimeNodeItemType } from '../../workflow/runtime/type';
@@ -16,7 +16,7 @@ export const getMCPToolSetRuntimeNode = ({
avatar
}: {
url: string;
toolList: ToolType[];
toolList: McpToolConfigType[];
name?: string;
avatar?: string;
}): RuntimeNodeItemType => {
@@ -45,7 +45,7 @@ export const getMCPToolRuntimeNode = ({
url,
avatar = 'core/app/type/mcpToolsFill'
}: {
tool: ToolType;
tool: McpToolConfigType;
url: string;
avatar?: string;
}): RuntimeNodeItemType => {
@@ -65,7 +65,7 @@ export const getMCPToolRuntimeNode = ({
...Object.entries(tool.inputSchema?.properties || {}).map(([key, value]) => ({
key,
label: key,
valueType: value.type as WorkflowIOValueTypeEnum,
valueType: value.type as WorkflowIOValueTypeEnum, // TODO: 这里需要做一个映射
description: value.description,
toolDescription: value.description || key,
required: tool.inputSchema?.required?.includes(key) || false,

View File

@@ -16,16 +16,6 @@ import { FlowNodeInputTypeEnum } from '../../core/workflow/node/constant';
import type { WorkflowTemplateBasicType } from '@fastgpt/global/core/workflow/type';
import type { SourceMemberType } from '../../support/user/type';
export type ToolType = {
name: string;
description: string;
inputSchema: {
type: string;
properties?: Record<string, { type: string; description?: string }>;
required?: string[];
};
};
export type AppSchema = {
_id: string;
parentId?: ParentIdType;
@@ -117,6 +107,16 @@ export type AppSimpleEditFormType = {
chatConfig: AppChatConfigType;
};
export type McpToolConfigType = {
name: string;
description: string;
inputSchema: {
type: string;
properties?: Record<string, { type: string; description?: string }>;
required?: string[];
};
};
/* app chat config type */
export type AppChatConfigType = {
welcomeText?: string;

View File

@@ -9,6 +9,9 @@ import { type WorkflowTemplateBasicType } from '../workflow/type';
import { AppTypeEnum } from './constants';
import { AppErrEnum } from '../../common/error/code/app';
import { PluginErrEnum } from '../../common/error/code/plugin';
import { i18nT } from '../../../web/i18n/utils';
import appErrList from '../../common/error/code/app';
import pluginErrList from '../../common/error/code/plugin';
export const getDefaultAppForm = (): AppSimpleEditFormType => {
return {
@@ -189,17 +192,10 @@ export const getAppType = (config?: WorkflowTemplateBasicType | AppSimpleEditFor
return '';
};
export const checkAppUnExistError = (error?: string) => {
const unExistError: Array<string> = [
AppErrEnum.unAuthApp,
AppErrEnum.unExist,
PluginErrEnum.unAuth,
PluginErrEnum.unExist
];
export const formatToolError = (error?: any) => {
if (!error || typeof error !== 'string') return;
if (!!error && unExistError.includes(error)) {
return error;
} else {
return undefined;
}
const errorText = appErrList[error]?.message || pluginErrList[error]?.message;
return errorText || error;
};

View File

@@ -26,6 +26,7 @@ export type ChatSchema = {
teamId: string;
tmbId: string;
appId: string;
createTime: Date;
updateTime: Date;
title: string;
customTitle: string;
@@ -112,6 +113,7 @@ export type ChatItemSchema = (UserChatItemType | SystemChatItemType | AIChatItem
appId: string;
time: Date;
durationSeconds?: number;
errorMsg?: string;
};
export type AdminFbkType = {
@@ -143,6 +145,7 @@ export type ChatSiteItemType = (UserChatItemType | SystemChatItemType | AIChatIt
responseData?: ChatHistoryItemResType[];
time?: Date;
durationSeconds?: number;
errorMsg?: string;
} & ChatBoxInputType &
ResponseTagItemType;

View File

@@ -1,9 +1,11 @@
import type { DatasetDataIndexItemType, DatasetSchemaType } from './type';
import type { ChunkSettingsType, DatasetDataIndexItemType, DatasetSchemaType } from './type';
import type {
DatasetCollectionTypeEnum,
DatasetCollectionDataProcessModeEnum,
ChunkSettingModeEnum,
DataChunkSplitModeEnum
DataChunkSplitModeEnum,
ChunkTriggerConfigTypeEnum,
ParagraphChunkAIModeEnum
} from './constants';
import type { LLMModelItemType } from '../ai/model.d';
import type { ParentIdType } from 'common/parentFolder/type';
@@ -32,26 +34,16 @@ export type DatasetUpdateBody = {
};
/* ================= collection ===================== */
export type DatasetCollectionChunkMetadataType = {
// Input + store params
type DatasetCollectionStoreDataType = ChunkSettingsType & {
parentId?: string;
customPdfParse?: boolean;
trainingType?: DatasetCollectionDataProcessModeEnum;
imageIndex?: boolean;
autoIndexes?: boolean;
chunkSettingMode?: ChunkSettingModeEnum;
chunkSplitMode?: DataChunkSplitModeEnum;
chunkSize?: number;
indexSize?: number;
chunkSplitter?: string;
qaPrompt?: string;
metadata?: Record<string, any>;
customPdfParse?: boolean;
};
// create collection params
export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
export type CreateDatasetCollectionParams = DatasetCollectionStoreDataType & {
datasetId: string;
name: string;
type: DatasetCollectionTypeEnum;
@@ -72,7 +64,7 @@ export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType &
nextSyncTime?: Date;
};
export type ApiCreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
export type ApiCreateDatasetCollectionParams = DatasetCollectionStoreDataType & {
datasetId: string;
tags?: string[];
};
@@ -90,7 +82,7 @@ export type ApiDatasetCreateDatasetCollectionParams = ApiCreateDatasetCollection
export type FileIdCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
fileId: string;
};
export type reTrainingDatasetFileCollectionParams = DatasetCollectionChunkMetadataType & {
export type reTrainingDatasetFileCollectionParams = DatasetCollectionStoreDataType & {
datasetId: string;
collectionId: string;
};
@@ -147,6 +139,7 @@ export type PushDatasetDataProps = {
collectionId: string;
data: PushDatasetDataChunkProps[];
trainingType?: DatasetCollectionDataProcessModeEnum;
indexSize?: number;
autoIndexes?: boolean;
imageIndex?: boolean;
prompt?: string;

View File

@@ -120,6 +120,8 @@ export const DatasetCollectionSyncResultMap = {
export enum DatasetCollectionDataProcessModeEnum {
chunk = 'chunk',
qa = 'qa',
backup = 'backup',
auto = 'auto' // abandon
}
export const DatasetCollectionDataProcessModeMap = {
@@ -131,21 +133,35 @@ export const DatasetCollectionDataProcessModeMap = {
label: i18nT('common:core.dataset.training.QA mode'),
tooltip: i18nT('common:core.dataset.import.QA Import Tip')
},
[DatasetCollectionDataProcessModeEnum.backup]: {
label: i18nT('dataset:backup_mode'),
tooltip: i18nT('dataset:backup_mode')
},
[DatasetCollectionDataProcessModeEnum.auto]: {
label: i18nT('common:core.dataset.training.Auto mode'),
tooltip: i18nT('common:core.dataset.training.Auto mode Tip')
}
};
export enum ChunkTriggerConfigTypeEnum {
minSize = 'minSize',
forceChunk = 'forceChunk',
maxSize = 'maxSize'
}
export enum ChunkSettingModeEnum {
auto = 'auto',
custom = 'custom'
}
export enum DataChunkSplitModeEnum {
paragraph = 'paragraph',
size = 'size',
char = 'char'
}
export enum ParagraphChunkAIModeEnum {
auto = 'auto',
force = 'force'
}
/* ------------ data -------------- */
@@ -154,7 +170,6 @@ export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
csvTable = 'csvTable',
externalFile = 'externalFile',
apiDataset = 'apiDataset',
reTraining = 'reTraining'

View File

@@ -32,7 +32,7 @@ export const DatasetDataIndexMap: Record<
color: 'red'
},
[DatasetDataIndexTypeEnum.image]: {
label: i18nT('common:data_index_image'),
label: i18nT('dataset:data_index_image'),
color: 'purple'
}
};

View File

@@ -118,9 +118,8 @@ export const computeChunkSize = (params: {
return getLLMMaxChunkSize(params.llmModel);
}
return Math.min(params.chunkSize || chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
return Math.min(params.chunkSize ?? chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
};
export const computeChunkSplitter = (params: {
chunkSettingMode?: ChunkSettingModeEnum;
chunkSplitMode?: DataChunkSplitModeEnum;
@@ -129,8 +128,21 @@ export const computeChunkSplitter = (params: {
if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
return undefined;
}
if (params.chunkSplitMode === DataChunkSplitModeEnum.size) {
if (params.chunkSplitMode !== DataChunkSplitModeEnum.char) {
return undefined;
}
return params.chunkSplitter;
};
export const computeParagraphChunkDeep = (params: {
chunkSettingMode?: ChunkSettingModeEnum;
chunkSplitMode?: DataChunkSplitModeEnum;
paragraphChunkDeep?: number;
}) => {
if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
return 5;
}
if (params.chunkSplitMode === DataChunkSplitModeEnum.paragraph) {
return params.paragraphChunkDeep;
}
return 0;
};

View File

@@ -8,26 +8,42 @@ import type {
DatasetStatusEnum,
DatasetTypeEnum,
SearchScoreTypeEnum,
TrainingModeEnum
TrainingModeEnum,
ChunkSettingModeEnum,
ChunkTriggerConfigTypeEnum
} from './constants';
import type { DatasetPermission } from '../../support/permission/dataset/controller';
import { Permission } from '../../support/permission/controller';
import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
import type { SourceMemberType } from 'support/user/type';
import type { DatasetDataIndexTypeEnum } from './data/constants';
import type { ChunkSettingModeEnum } from './constants';
export type ChunkSettingsType = {
trainingType: DatasetCollectionDataProcessModeEnum;
autoIndexes?: boolean;
trainingType?: DatasetCollectionDataProcessModeEnum;
// Chunk trigger
chunkTriggerType?: ChunkTriggerConfigTypeEnum;
chunkTriggerMinSize?: number; // maxSize from agent model, not store
// Data enhance
dataEnhanceCollectionName?: boolean; // Auto add collection name to data
// Index enhance
imageIndex?: boolean;
autoIndexes?: boolean;
chunkSettingMode?: ChunkSettingModeEnum;
// Chunk setting
chunkSettingMode?: ChunkSettingModeEnum; // 系统参数/自定义参数
chunkSplitMode?: DataChunkSplitModeEnum;
chunkSize?: number;
// Paragraph split
paragraphChunkAIMode?: ParagraphChunkAIModeEnum;
paragraphChunkDeep?: number; // Paragraph deep
paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
// Size split
chunkSize?: number; // chunk/qa chunk size, Paragraph max chunk size.
// Char split
chunkSplitter?: string; // chunk/qa chunk splitter
indexSize?: number;
chunkSplitter?: string;
qaPrompt?: string;
};
@@ -66,7 +82,7 @@ export type DatasetSchemaType = {
defaultPermission?: number;
};
export type DatasetCollectionSchemaType = {
export type DatasetCollectionSchemaType = ChunkSettingsType & {
_id: string;
teamId: string;
tmbId: string;
@@ -101,18 +117,7 @@ export type DatasetCollectionSchemaType = {
// Parse settings
customPdfParse?: boolean;
// Chunk settings
autoIndexes?: boolean;
imageIndex?: boolean;
trainingType: DatasetCollectionDataProcessModeEnum;
chunkSettingMode?: ChunkSettingModeEnum;
chunkSplitMode?: DataChunkSplitModeEnum;
chunkSize?: number;
indexSize?: number;
chunkSplitter?: string;
qaPrompt?: string;
};
export type DatasetCollectionTagsSchemaType = {
@@ -175,6 +180,7 @@ export type DatasetTrainingSchemaType = {
q: string;
a: string;
chunkIndex: number;
indexSize?: number;
weight: number;
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
retryCount: number;

View File

@@ -40,5 +40,6 @@ export function getSourceNameIcon({
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
if (mode === TrainingModeEnum.qa) return data.length * 20;
if (mode === TrainingModeEnum.auto) return data.length * 5;
if (mode === TrainingModeEnum.image) return data.length * 2;
return data.length;
};

View File

@@ -7,7 +7,7 @@ import type {
} from '../../chat/type';
import { NodeOutputItemType } from '../../chat/type';
import type { FlowNodeInputItemType, FlowNodeOutputItemType } from '../type/io.d';
import type { StoreNodeItemType } from '../type/node';
import type { NodeToolConfigType, StoreNodeItemType } from '../type/node';
import type { DispatchNodeResponseKeyEnum } from './constants';
import type { StoreEdgeItemType } from '../type/edge';
import type { NodeInputKeyEnum } from '../constants';
@@ -102,6 +102,9 @@ export type RuntimeNodeItemType = {
pluginId?: string; // workflow id / plugin id
version?: string;
// tool
toolConfig?: NodeToolConfigType;
};
export type RuntimeEdgeItemType = StoreEdgeItemType & {
@@ -114,7 +117,7 @@ export type DispatchNodeResponseType = {
runningTime?: number;
query?: string;
textOutput?: string;
error?: Record<string, any>;
error?: Record<string, any> | string;
customInputs?: Record<string, any>;
customOutputs?: Record<string, any>;
nodeInputs?: Record<string, any>;

View File

@@ -20,11 +20,17 @@ import { RuntimeNodeItemType } from '../runtime/type';
import { PluginTypeEnum } from '../../plugin/constants';
import { RuntimeEdgeItemType, StoreEdgeItemType } from './edge';
import { NextApiResponse } from 'next';
import { AppDetailType, AppSchema } from '../../app/type';
import type { AppDetailType, AppSchema, McpToolConfigType } from '../../app/type';
import type { ParentIdType } from 'common/parentFolder/type';
import { AppTypeEnum } from 'core/app/constants';
import { AppTypeEnum } from '../../app/constants';
import type { WorkflowInteractiveResponseType } from '../template/system/interactive/type';
export type NodeToolConfigType = {
mcpTool?: McpToolConfigType & {
url: string;
};
};
export type FlowNodeCommonType = {
parentNodeId?: string;
flowNodeType: FlowNodeTypeEnum; // render node card
@@ -46,12 +52,13 @@ export type FlowNodeCommonType = {
// plugin data
pluginId?: string;
isFolder?: boolean;
// pluginType?: AppTypeEnum;
pluginData?: PluginDataType;
// tool data
toolData?: NodeToolConfigType;
};
export type PluginDataType = {
version?: string;
diagram?: string;
userGuide?: string;
courseUrl?: string;
@@ -118,6 +125,7 @@ export type FlowNodeItemType = FlowNodeTemplateType & {
nodeId: string;
parentNodeId?: string;
isError?: boolean;
searchedText?: string;
debugResult?: {
status: 'running' | 'success' | 'skipped' | 'failed';
message?: string;

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "4816",
"name": "钉钉 webhook",
"avatar": "plugins/dingding",
"intro": "向钉钉机器人发起 webhook 请求。",

View File

@@ -1,6 +1,5 @@
{
"author": "Menghuan1918",
"version": "488",
"name": "PDF识别",
"avatar": "plugins/doc2x",
"intro": "将PDF文件发送至Doc2X进行解析返回结构化的LaTeX公式的文本(markdown)支持传入String类型的URL或者流程输出中的文件链接变量",

View File

@@ -1,6 +1,5 @@
{
"author": "Menghuan1918",
"version": "488",
"name": "Doc2X服务",
"avatar": "plugins/doc2x",
"intro": "将传入的图片或PDF文件发送至Doc2X进行解析返回带LaTeX公式的markdown格式的文本。",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "4816",
"name": "企业微信 webhook",
"avatar": "plugins/qiwei",
"intro": "向企业微信机器人发起 webhook 请求。只能内部群使用。",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "4811",
"name": "Bing搜索",
"avatar": "core/workflow/template/bing",
"intro": "在Bing中搜索。",

View File

@@ -1,6 +1,5 @@
{
"author": "silencezhang",
"version": "4811",
"name": "数据库连接",
"avatar": "core/workflow/template/datasource",
"intro": "可连接常用数据库并执行sql",

View File

@@ -1,6 +1,5 @@
{
"author": "collin",
"version": "4817",
"name": "流程等待",
"avatar": "core/workflow/template/sleep",
"intro": "让工作流等待指定时间后运行",

View File

@@ -1,6 +1,5 @@
{
"author": "silencezhang",
"version": "4817",
"name": "基础图表",
"avatar": "core/workflow/template/baseChart",
"intro": "根据数据生成图表可根据chartType生成柱状图折线图饼图",

View File

@@ -1,6 +1,5 @@
{
"author": "silencezhang",
"version": "486",
"name": "BI图表功能",
"avatar": "core/workflow/template/BI",
"intro": "BI图表功能可以生成一些常用的图表如饼图柱状图折线图等",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "486",
"name": "DuckDuckGo 网络搜索",
"avatar": "core/workflow/template/duckduckgo",
"intro": "使用 DuckDuckGo 进行网络搜索",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "486",
"name": "DuckDuckGo 图片搜索",
"avatar": "core/workflow/template/duckduckgo",
"intro": "使用 DuckDuckGo 进行图片搜索",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "486",
"name": "DuckDuckGo 新闻检索",
"avatar": "core/workflow/template/duckduckgo",
"intro": "使用 DuckDuckGo 进行新闻检索",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "486",
"name": "DuckDuckGo 视频搜索",
"avatar": "core/workflow/template/duckduckgo",
"intro": "使用 DuckDuckGo 进行视频搜索",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "486",
"name": "DuckDuckGo服务",
"avatar": "core/workflow/template/duckduckgo",
"intro": "DuckDuckGo 服务,包含网络搜索、图片搜索、新闻搜索等。",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "488",
"name": "飞书 webhook",
"avatar": "core/app/templates/plugin-feishu",
"intro": "向飞书机器人发起 webhook 请求。",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "486",
"name": "网页内容抓取",
"avatar": "core/workflow/template/fetchUrl",
"intro": "可获取一个网页链接内容,并以 Markdown 格式输出,仅支持获取静态网站。",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "481",
"templateType": "tools",
"name": "获取当前时间",
"avatar": "core/workflow/template/getTime",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "4811",
"name": "Google搜索",
"avatar": "core/workflow/template/google",
"intro": "在google中搜索。",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "486",
"name": "数学公式执行",
"avatar": "core/workflow/template/mathCall",
"intro": "用于执行数学表达式的工具,通过 js 的 expr-eval 库运行表达式并返回结果。",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "4816",
"name": "Search XNG 搜索",
"avatar": "core/workflow/template/searxng",
"intro": "使用 Search XNG 服务进行搜索。",

View File

@@ -1,6 +1,5 @@
{
"author": "cloudpense",
"version": "1.0.0",
"name": "Email 邮件发送",
"avatar": "plugins/email",
"intro": "通过SMTP协议发送电子邮件(nodemailer)",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "489",
"name": "文本加工",
"avatar": "/imgs/workflow/textEditor.svg",
"intro": "可对固定或传入的文本进行加工后输出,非字符串类型数据最终会转成字符串类型。",

View File

@@ -1,6 +1,5 @@
{
"author": "",
"version": "4811",
"name": "Wiki搜索",
"avatar": "core/workflow/template/wiki",
"intro": "在Wiki中查询释义。",

View File

@@ -6,12 +6,6 @@ import type {
} from '../../core/dataset/search/controller';
import type { AuthOpenApiLimitProps } from '../../support/openapi/auth';
import type { CreateUsageProps, ConcatUsageProps } from '@fastgpt/global/support/wallet/usage/api';
import type {
GetProApiDatasetFileContentParams,
GetProApiDatasetFileDetailParams,
GetProApiDatasetFileListParams,
GetProApiDatasetFilePreviewUrlParams
} from '../../core/dataset/apiDataset/proApi';
declare global {
var textCensorHandler: (params: { text: string }) => Promise<{ code: number; message?: string }>;
@@ -19,16 +13,4 @@ declare global {
var authOpenApiHandler: (data: AuthOpenApiLimitProps) => Promise<any>;
var createUsageHandler: (data: CreateUsageProps) => any;
var concatUsageHandler: (data: ConcatUsageProps) => any;
// API dataset
var getProApiDatasetFileList: (data: GetProApiDatasetFileListParams) => Promise<APIFileItem[]>;
var getProApiDatasetFileContent: (
data: GetProApiDatasetFileContentParams
) => Promise<ApiFileReadContentResponse>;
var getProApiDatasetFilePreviewUrl: (
data: GetProApiDatasetFilePreviewUrlParams
) => Promise<string>;
var getProApiDatasetFileDetail: (
data: GetProApiDatasetFileDetailParams
) => Promise<ApiDatasetDetailResponse>;
}

View File

@@ -0,0 +1,178 @@
import { retryFn } from '@fastgpt/global/common/system/utils';
import { connectionMongo } from '../../mongo';
import { MongoRawTextBufferSchema, bucketName } from './schema';
import { addLog } from '../../system/log';
import { setCron } from '../../system/cron';
import { checkTimerLock } from '../../system/timerLock/utils';
import { TimerIdEnum } from '../../system/timerLock/constants';
const getGridBucket = () => {
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
bucketName: bucketName
});
};
export const addRawTextBuffer = async ({
sourceId,
sourceName,
text,
expiredTime
}: {
sourceId: string;
sourceName: string;
text: string;
expiredTime: Date;
}) => {
const gridBucket = getGridBucket();
const metadata = {
sourceId,
sourceName,
expiredTime
};
const buffer = Buffer.from(text);
const fileSize = buffer.length;
// 单块大小:尽可能大,但不超过 14MB不小于128KB
const chunkSizeBytes = (() => {
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
// 确保块大小至少为128KB
const minChunkSize = 128 * 1024; // 128KB
// 取理想块大小和最小块大小中的较大值
let chunkSize = Math.max(idealChunkSize, minChunkSize);
// 将块大小向上取整到最接近的64KB的倍数使其更整齐
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
return chunkSize;
})();
const uploadStream = gridBucket.openUploadStream(sourceId, {
metadata,
chunkSizeBytes
});
return retryFn(async () => {
return new Promise((resolve, reject) => {
uploadStream.end(buffer);
uploadStream.on('finish', () => {
resolve(uploadStream.id);
});
uploadStream.on('error', (error) => {
addLog.error('addRawTextBuffer error', error);
resolve('');
});
});
});
};
export const getRawTextBuffer = async (sourceId: string) => {
const gridBucket = getGridBucket();
return retryFn(async () => {
const bufferData = await MongoRawTextBufferSchema.findOne(
{
'metadata.sourceId': sourceId
},
'_id metadata'
).lean();
if (!bufferData) {
return null;
}
// Read file content
const downloadStream = gridBucket.openDownloadStream(bufferData._id);
const chunks: Buffer[] = [];
return new Promise<{
text: string;
sourceName: string;
} | null>((resolve, reject) => {
downloadStream.on('data', (chunk) => {
chunks.push(chunk);
});
downloadStream.on('end', () => {
const buffer = Buffer.concat(chunks);
const text = buffer.toString('utf8');
resolve({
text,
sourceName: bufferData.metadata?.sourceName || ''
});
});
downloadStream.on('error', (error) => {
addLog.error('getRawTextBuffer error', error);
resolve(null);
});
});
});
};
export const deleteRawTextBuffer = async (sourceId: string): Promise<boolean> => {
const gridBucket = getGridBucket();
return retryFn(async () => {
const buffer = await MongoRawTextBufferSchema.findOne({ 'metadata.sourceId': sourceId });
if (!buffer) {
return false;
}
await gridBucket.delete(buffer._id);
return true;
});
};
export const updateRawTextBufferExpiredTime = async ({
sourceId,
expiredTime
}: {
sourceId: string;
expiredTime: Date;
}) => {
return retryFn(async () => {
return MongoRawTextBufferSchema.updateOne(
{ 'metadata.sourceId': sourceId },
{ $set: { 'metadata.expiredTime': expiredTime } }
);
});
};
export const clearExpiredRawTextBufferCron = async () => {
const clearExpiredRawTextBuffer = async () => {
addLog.debug('Clear expired raw text buffer start');
const gridBucket = getGridBucket();
return retryFn(async () => {
const data = await MongoRawTextBufferSchema.find(
{
'metadata.expiredTime': { $lt: new Date() }
},
'_id'
).lean();
for (const item of data) {
await gridBucket.delete(item._id);
}
addLog.debug('Clear expired raw text buffer end');
});
};
setCron('*/10 * * * *', async () => {
if (
await checkTimerLock({
timerId: TimerIdEnum.clearExpiredRawTextBuffer,
lockMinuted: 9
})
) {
try {
await clearExpiredRawTextBuffer();
} catch (error) {
addLog.error('clearExpiredRawTextBufferCron error', error);
}
}
});
};

View File

@@ -1,33 +1,22 @@
import { getMongoModel, Schema } from '../../mongo';
import { type RawTextBufferSchemaType } from './type';
import { getMongoModel, type Types, Schema } from '../../mongo';
export const collectionName = 'buffer_rawtexts';
export const bucketName = 'buffer_rawtext';
const RawTextBufferSchema = new Schema({
sourceId: {
type: String,
required: true
},
rawText: {
type: String,
default: ''
},
createTime: {
type: Date,
default: () => new Date()
},
metadata: Object
metadata: {
sourceId: { type: String, required: true },
sourceName: { type: String, required: true },
expiredTime: { type: Date, required: true }
}
});
RawTextBufferSchema.index({ 'metadata.sourceId': 'hashed' });
RawTextBufferSchema.index({ 'metadata.expiredTime': -1 });
try {
RawTextBufferSchema.index({ sourceId: 1 });
// 20 minutes
RawTextBufferSchema.index({ createTime: 1 }, { expireAfterSeconds: 20 * 60 });
} catch (error) {
console.log(error);
}
export const MongoRawTextBuffer = getMongoModel<RawTextBufferSchemaType>(
collectionName,
RawTextBufferSchema
);
export const MongoRawTextBufferSchema = getMongoModel<{
_id: Types.ObjectId;
metadata: {
sourceId: string;
sourceName: string;
expiredTime: Date;
};
}>(`${bucketName}.files`, RawTextBufferSchema);

View File

@@ -1,8 +0,0 @@
export type RawTextBufferSchemaType = {
sourceId: string;
rawText: string;
createTime: Date;
metadata?: {
filename: string;
};
};

View File

@@ -6,13 +6,13 @@ import { type DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
import { readRawContentByFileBuffer } from '../read/utils';
import { gridFsStream2Buffer, stream2Encoding } from './utils';
import { addLog } from '../../system/log';
import { readFromSecondary } from '../../mongo/utils';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
import { Readable } from 'stream';
import { addRawTextBuffer, getRawTextBuffer } from '../../buffer/rawText/controller';
import { addMinutes } from 'date-fns';
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoDatasetFileSchema;
@@ -210,28 +210,26 @@ export const readFileContentFromMongo = async ({
tmbId,
bucketName,
fileId,
isQAImport = false,
customPdfParse = false
customPdfParse = false,
getFormatText
}: {
teamId: string;
tmbId: string;
bucketName: `${BucketNameEnum}`;
fileId: string;
isQAImport?: boolean;
customPdfParse?: boolean;
getFormatText?: boolean; // 数据类型都尽可能转化成 markdown 格式
}): Promise<{
rawText: string;
filename: string;
}> => {
const bufferId = `${fileId}-${customPdfParse}`;
const bufferId = `${String(fileId)}-${customPdfParse}`;
// read buffer
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: bufferId }, undefined, {
...readFromSecondary
}).lean();
const fileBuffer = await getRawTextBuffer(bufferId);
if (fileBuffer) {
return {
rawText: fileBuffer.rawText,
filename: fileBuffer.metadata?.filename || ''
rawText: fileBuffer.text,
filename: fileBuffer?.sourceName
};
}
@@ -254,8 +252,8 @@ export const readFileContentFromMongo = async ({
// Get raw text
const { rawText } = await readRawContentByFileBuffer({
customPdfParse,
getFormatText,
extension,
isQAImport,
teamId,
tmbId,
buffer: fileBuffers,
@@ -265,16 +263,13 @@ export const readFileContentFromMongo = async ({
}
});
// < 14M
if (fileBuffers.length < 14 * 1024 * 1024 && rawText.trim()) {
MongoRawTextBuffer.create({
sourceId: bufferId,
rawText,
metadata: {
filename: file.filename
}
});
}
// Add buffer
addRawTextBuffer({
sourceId: bufferId,
sourceName: file.filename,
text: rawText,
expiredTime: addMinutes(new Date(), 20)
});
return {
rawText,

View File

@@ -1,16 +1,16 @@
import { Schema, getMongoModel } from '../../mongo';
const DatasetFileSchema = new Schema({});
const ChatFileSchema = new Schema({});
const DatasetFileSchema = new Schema({
metadata: Object
});
const ChatFileSchema = new Schema({
metadata: Object
});
try {
DatasetFileSchema.index({ uploadDate: -1 });
DatasetFileSchema.index({ uploadDate: -1 });
ChatFileSchema.index({ uploadDate: -1 });
ChatFileSchema.index({ 'metadata.chatId': 1 });
} catch (error) {
console.log(error);
}
ChatFileSchema.index({ uploadDate: -1 });
ChatFileSchema.index({ 'metadata.chatId': 1 });
export const MongoDatasetFileSchema = getMongoModel('dataset.files', DatasetFileSchema);
export const MongoChatFileSchema = getMongoModel('chat.files', ChatFileSchema);

View File

@@ -1,5 +1,57 @@
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { PassThrough } from 'stream';
import { getGridBucket } from './controller';
import { type BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { retryFn } from '@fastgpt/global/common/system/utils';
export const createFileFromText = async ({
bucket,
filename,
text,
metadata
}: {
bucket: `${BucketNameEnum}`;
filename: string;
text: string;
metadata: Record<string, any>;
}) => {
const gridBucket = getGridBucket(bucket);
const buffer = Buffer.from(text);
const fileSize = buffer.length;
// 单块大小:尽可能大,但不超过 14MB不小于128KB
const chunkSizeBytes = (() => {
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
// 确保块大小至少为128KB
const minChunkSize = 128 * 1024; // 128KB
// 取理想块大小和最小块大小中的较大值
let chunkSize = Math.max(idealChunkSize, minChunkSize);
// 将块大小向上取整到最接近的64KB的倍数使其更整齐
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
return chunkSize;
})();
const uploadStream = gridBucket.openUploadStream(filename, {
metadata,
chunkSizeBytes
});
return retryFn(async () => {
return new Promise<{ fileId: string }>((resolve, reject) => {
uploadStream.end(buffer);
uploadStream.on('finish', () => {
resolve({ fileId: String(uploadStream.id) });
});
uploadStream.on('error', reject);
});
});
};
export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
return new Promise<Buffer>((resolve, reject) => {

View File

@@ -16,6 +16,7 @@ export type readRawTextByLocalFileParams = {
path: string;
encoding: string;
customPdfParse?: boolean;
getFormatText?: boolean;
metadata?: Record<string, any>;
};
export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
@@ -27,8 +28,8 @@ export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParam
return readRawContentByFileBuffer({
extension,
isQAImport: false,
customPdfParse: params.customPdfParse,
getFormatText: params.getFormatText,
teamId: params.teamId,
tmbId: params.tmbId,
encoding: params.encoding,
@@ -46,7 +47,7 @@ export const readRawContentByFileBuffer = async ({
encoding,
metadata,
customPdfParse = false,
isQAImport = false
getFormatText = true
}: {
teamId: string;
tmbId: string;
@@ -57,8 +58,10 @@ export const readRawContentByFileBuffer = async ({
metadata?: Record<string, any>;
customPdfParse?: boolean;
isQAImport: boolean;
}): Promise<ReadFileResponse> => {
getFormatText?: boolean;
}): Promise<{
rawText: string;
}> => {
const systemParse = () =>
runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
extension,
@@ -107,7 +110,7 @@ export const readRawContentByFileBuffer = async ({
return {
rawText: text,
formatText: rawText,
formatText: text,
imageList
};
};
@@ -149,7 +152,7 @@ export const readRawContentByFileBuffer = async ({
return await systemParse();
})();
addLog.debug(`Parse file success, time: ${Date.now() - start}ms. Uploading file image.`);
addLog.debug(`Parse file success, time: ${Date.now() - start}ms. `);
// markdown data format
if (imageList) {
@@ -176,16 +179,7 @@ export const readRawContentByFileBuffer = async ({
});
}
if (['csv', 'xlsx'].includes(extension)) {
// qa data
if (isQAImport) {
rawText = rawText || '';
} else {
rawText = formatText || rawText;
}
}
addLog.debug(`Upload file success, time: ${Date.now() - start}ms`);
addLog.debug(`Upload file image success, time: ${Date.now() - start}ms`);
return { rawText, formatText, imageList };
return { rawText: getFormatText ? formatText || rawText : rawText };
};

View File

@@ -1,7 +1,10 @@
import { getGlobalRedisCacheConnection } from './index';
import { getGlobalRedisConnection } from './index';
import { addLog } from '../system/log';
import { retryFn } from '@fastgpt/global/common/system/utils';
const redisPrefix = 'cache:';
const getCacheKey = (key: string) => `${redisPrefix}${key}`;
export enum CacheKeyEnum {
team_vector_count = 'team_vector_count'
}
@@ -13,12 +16,12 @@ export const setRedisCache = async (
) => {
return await retryFn(async () => {
try {
const redis = getGlobalRedisCacheConnection();
const redis = getGlobalRedisConnection();
if (expireSeconds) {
await redis.set(key, data, 'EX', expireSeconds);
await redis.set(getCacheKey(key), data, 'EX', expireSeconds);
} else {
await redis.set(key, data);
await redis.set(getCacheKey(key), data);
}
} catch (error) {
addLog.error('Set cache error:', error);
@@ -28,11 +31,11 @@ export const setRedisCache = async (
};
export const getRedisCache = async (key: string) => {
const redis = getGlobalRedisCacheConnection();
return await retryFn(() => redis.get(key));
const redis = getGlobalRedisConnection();
return await retryFn(() => redis.get(getCacheKey(key)));
};
export const delRedisCache = async (key: string) => {
const redis = getGlobalRedisCacheConnection();
await retryFn(() => redis.del(key));
const redis = getGlobalRedisConnection();
await retryFn(() => redis.del(getCacheKey(key)));
};

View File

@@ -27,17 +27,26 @@ export const newWorkerRedisConnection = () => {
return redis;
};
export const getGlobalRedisCacheConnection = () => {
if (global.redisCache) return global.redisCache;
export const FASTGPT_REDIS_PREFIX = 'fastgpt:';
export const getGlobalRedisConnection = () => {
if (global.redisClient) return global.redisClient;
global.redisCache = new Redis(REDIS_URL, { keyPrefix: 'fastgpt:cache:' });
global.redisClient = new Redis(REDIS_URL, { keyPrefix: FASTGPT_REDIS_PREFIX });
global.redisCache.on('connect', () => {
global.redisClient.on('connect', () => {
addLog.info('Redis connected');
});
global.redisCache.on('error', (error) => {
global.redisClient.on('error', (error) => {
addLog.error('Redis connection error', error);
});
return global.redisCache;
return global.redisClient;
};
export const getAllKeysByPrefix = async (key: string) => {
const redis = getGlobalRedisConnection();
const keys = (await redis.keys(`${FASTGPT_REDIS_PREFIX}${key}:*`)).map((key) =>
key.replace(FASTGPT_REDIS_PREFIX, '')
);
return keys;
};

View File

@@ -1,5 +1,5 @@
import type Redis from 'ioredis';
declare global {
var redisCache: Redis | null;
var redisClient: Redis | null;
}

View File

@@ -10,6 +10,7 @@ let jieba: Jieba | undefined;
})();
const stopWords = new Set([
'\n',
'--',
'?',
'“',
@@ -1519,8 +1520,7 @@ const stopWords = new Set([
]);
export async function jiebaSplit({ text }: { text: string }) {
text = text.replace(/[#*`_~>[\](){}|]/g, '').replace(/\S*https?\S*/gi, '');
text = text.replace(/[#*`_~>[\](){}|]|\S*https?\S*/g, '').trim();
const tokens = (await jieba!.cutAsync(text, true)) as string[];
return (

View File

@@ -2,26 +2,44 @@ import { SystemConfigsTypeEnum } from '@fastgpt/global/common/system/config/cons
import { MongoSystemConfigs } from './schema';
import { type FastGPTConfigFileType } from '@fastgpt/global/common/system/types';
import { FastGPTProUrl } from '../constants';
import { type LicenseDataType } from '@fastgpt/global/common/system/types';
export const getFastGPTConfigFromDB = async () => {
export const getFastGPTConfigFromDB = async (): Promise<{
fastgptConfig: FastGPTConfigFileType;
licenseData?: LicenseDataType;
}> => {
if (!FastGPTProUrl) {
return {
config: {} as FastGPTConfigFileType
fastgptConfig: {} as FastGPTConfigFileType
};
}
const res = await MongoSystemConfigs.findOne({
type: SystemConfigsTypeEnum.fastgpt
}).sort({
createTime: -1
});
const [fastgptConfig, licenseConfig] = await Promise.all([
MongoSystemConfigs.findOne({
type: SystemConfigsTypeEnum.fastgpt
}).sort({
createTime: -1
}),
MongoSystemConfigs.findOne({
type: SystemConfigsTypeEnum.license
}).sort({
createTime: -1
})
]);
const config = res?.value || {};
const config = fastgptConfig?.value || {};
const licenseData = licenseConfig?.value?.data as LicenseDataType | undefined;
const fastgptConfigTime = fastgptConfig?.createTime.getTime().toString();
const licenseConfigTime = licenseConfig?.createTime.getTime().toString();
// 利用配置文件的创建时间(更新时间)来做缓存,如果前端命中缓存,则不需要再返回配置文件
global.systemInitBufferId = res ? res.createTime.getTime().toString() : undefined;
global.systemInitBufferId = fastgptConfigTime
? `${fastgptConfigTime}-${licenseConfigTime}`
: undefined;
return {
config: config as FastGPTConfigFileType
fastgptConfig: config as FastGPTConfigFileType,
licenseData
};
};

View File

@@ -57,14 +57,19 @@ export const addLog = {
level === LogLevelEnum.error && console.error(obj);
// store
// store log
if (level >= STORE_LOG_LEVEL && connectionMongo.connection.readyState === 1) {
// store log
getMongoLog().create({
text: msg,
level,
metadata: obj
});
(async () => {
try {
await getMongoLog().create({
text: msg,
level,
metadata: obj
});
} catch (error) {
console.error('store log error', error);
}
})();
}
},
debug(msg: string, obj?: Record<string, any>) {

View File

@@ -5,7 +5,8 @@ export enum TimerIdEnum {
clearExpiredSubPlan = 'clearExpiredSubPlan',
updateStandardPlan = 'updateStandardPlan',
scheduleTriggerApp = 'scheduleTriggerApp',
notification = 'notification'
notification = 'notification',
clearExpiredRawTextBuffer = 'clearExpiredRawTextBuffer'
}
export enum LockNotificationEnum {

View File

@@ -188,6 +188,7 @@ export class PgVectorCtrl {
const results: any = await PgClient.query(
`BEGIN;
SET LOCAL hnsw.ef_search = ${global.systemEnv?.hnswEfSearch || 100};
SET LOCAL hnsw.max_scan_tuples = ${global.systemEnv?.hnswMaxScanTuples || 100000};
SET LOCAL hnsw.iterative_scan = relaxed_order;
WITH relaxed_results AS MATERIALIZED (
select id, collection_id, vector <#> '[${vector}]' AS score
@@ -199,7 +200,7 @@ export class PgVectorCtrl {
) SELECT id, collection_id, score FROM relaxed_results ORDER BY score;
COMMIT;`
);
const rows = results?.[3]?.rows as PgSearchRawType[];
const rows = results?.[results.length - 2]?.rows as PgSearchRawType[];
if (!Array.isArray(rows)) {
return {

View File

@@ -78,7 +78,7 @@ export const createChatCompletion = async ({
}
body.model = modelConstantsData.model;
const formatTimeout = timeout ? timeout : body.stream ? 60000 : 600000;
const formatTimeout = timeout ? timeout : 600000;
const ai = getAIApi({
userKey,
timeout: formatTimeout

View File

@@ -1,6 +1,54 @@
{
"provider": "Claude",
"list": [
{
"model": "claude-sonnet-4-20250514",
"name": "claude-sonnet-4-20250514",
"maxContext": 200000,
"maxResponse": 8000,
"quoteMaxToken": 100000,
"maxTemperature": 1,
"showTopP": true,
"showStopSign": true,
"vision": true,
"toolChoice": true,
"functionCall": false,
"defaultSystemChatPrompt": "",
"datasetProcess": true,
"usedInClassify": true,
"customCQPrompt": "",
"usedInExtractFields": true,
"usedInQueryExtension": true,
"customExtractPrompt": "",
"usedInToolCall": true,
"defaultConfig": {},
"fieldMap": {},
"type": "llm"
},
{
"model": "claude-opus-4-20250514",
"name": "claude-opus-4-20250514",
"maxContext": 200000,
"maxResponse": 4096,
"quoteMaxToken": 100000,
"maxTemperature": 1,
"showTopP": true,
"showStopSign": true,
"vision": true,
"toolChoice": true,
"functionCall": false,
"defaultSystemChatPrompt": "",
"datasetProcess": true,
"usedInClassify": true,
"customCQPrompt": "",
"usedInExtractFields": true,
"usedInQueryExtension": true,
"customExtractPrompt": "",
"usedInToolCall": true,
"defaultConfig": {},
"fieldMap": {},
"type": "llm"
},
{
"model": "claude-3-7-sonnet-20250219",
"name": "claude-3-7-sonnet-20250219",

View File

@@ -25,6 +25,30 @@
"showTopP": true,
"showStopSign": true
},
{
"model": "gemini-2.5-flash-preview-04-17",
"name": "gemini-2.5-flash-preview-04-17",
"maxContext": 1000000,
"maxResponse": 8000,
"quoteMaxToken": 60000,
"maxTemperature": 1,
"vision": true,
"toolChoice": true,
"functionCall": false,
"defaultSystemChatPrompt": "",
"datasetProcess": true,
"usedInClassify": true,
"customCQPrompt": "",
"usedInExtractFields": true,
"usedInQueryExtension": true,
"customExtractPrompt": "",
"usedInToolCall": true,
"defaultConfig": {},
"fieldMap": {},
"type": "llm",
"showTopP": true,
"showStopSign": true
},
{
"model": "gemini-2.0-flash",
"name": "gemini-2.0-flash",

View File

@@ -18,15 +18,17 @@ import json5 from 'json5';
*/
export const computedMaxToken = ({
maxToken,
model
model,
min
}: {
maxToken?: number;
model: LLMModelItemType;
min?: number;
}) => {
if (maxToken === undefined) return;
maxToken = Math.min(maxToken, model.maxResponse);
return maxToken;
return Math.max(maxToken, min || 0);
};
// FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]……
@@ -135,12 +137,14 @@ export const llmStreamResponseToAnswerText = async (
// Tool calls
if (responseChoice?.tool_calls?.length) {
responseChoice.tool_calls.forEach((toolCall) => {
const index = toolCall.index;
responseChoice.tool_calls.forEach((toolCall, i) => {
const index = toolCall.index ?? i;
if (toolCall.id || callingTool) {
// 有 id代表新 call 工具
if (toolCall.id) {
// Call new tool
const hasNewTool = toolCall?.function?.name || callingTool;
if (hasNewTool) {
// 有 function name代表新 call 工具
if (toolCall?.function?.name) {
callingTool = {
name: toolCall.function?.name || '',
arguments: toolCall.function?.arguments || ''
@@ -176,7 +180,7 @@ export const llmStreamResponseToAnswerText = async (
}
}
return {
text: parseReasoningContent(answer)[1],
text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
usage,
toolCalls
};
@@ -190,8 +194,9 @@ export const llmUnStreamResponseToAnswerText = async (
}> => {
const answer = response.choices?.[0]?.message?.content || '';
const toolCalls = response.choices?.[0]?.message?.tool_calls;
return {
text: answer,
text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
usage: response.usage,
toolCalls
};
@@ -221,7 +226,9 @@ export const parseReasoningContent = (text: string): [string, string] => {
};
export const removeDatasetCiteText = (text: string, retainDatasetCite: boolean) => {
return retainDatasetCite ? text : text.replace(/\[([a-f0-9]{24})\](?:\([^\)]*\)?)?/g, '');
return retainDatasetCite
? text.replace(/\[id\]\(CITE\)/g, '')
: text.replace(/\[([a-f0-9]{24})\](?:\([^\)]*\)?)?/g, '').replace(/\[id\]\(CITE\)/g, '');
};
// Parse llm stream part
@@ -236,6 +243,12 @@ export const parseLLMStreamResponse = () => {
let citeBuffer = '';
const maxCiteBufferLength = 32; // [Object](CITE)总长度为32
// Buffer
let buffer_finishReason: CompletionFinishReason = null;
let buffer_usage: CompletionUsage = getLLMDefaultUsage();
let buffer_reasoningContent = '';
let buffer_content = '';
/*
parseThinkTag - 只控制是否主动解析 <think></think>,如果接口已经解析了,则不再解析。
retainDatasetCite -
@@ -253,6 +266,7 @@ export const parseLLMStreamResponse = () => {
};
finish_reason?: CompletionFinishReason;
}[];
usage?: CompletionUsage;
};
parseThinkTag?: boolean;
retainDatasetCite?: boolean;
@@ -262,72 +276,71 @@ export const parseLLMStreamResponse = () => {
responseContent: string;
finishReason: CompletionFinishReason;
} => {
const finishReason = part.choices?.[0]?.finish_reason || null;
const content = part.choices?.[0]?.delta?.content || '';
// @ts-ignore
const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
const isStreamEnd = !!finishReason;
const data = (() => {
buffer_usage = part.usage || buffer_usage;
// Parse think
const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } = (() => {
if (reasoningContent || !parseThinkTag) {
isInThinkTag = false;
return { reasoningContent, content };
}
const finishReason = part.choices?.[0]?.finish_reason || null;
buffer_finishReason = finishReason || buffer_finishReason;
if (!content) {
return {
reasoningContent: '',
content: ''
};
}
const content = part.choices?.[0]?.delta?.content || '';
// @ts-ignore
const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
const isStreamEnd = !!buffer_finishReason;
// 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content
if (isInThinkTag === false) {
return {
reasoningContent: '',
content
};
}
// Parse think
const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } =
(() => {
if (reasoningContent || !parseThinkTag) {
isInThinkTag = false;
return { reasoningContent, content };
}
// 检测是否为 think 标签开头的数据
if (isInThinkTag === undefined) {
// Parse content think and answer
startTagBuffer += content;
// 太少内容时候,暂时不解析
if (startTagBuffer.length < thinkStartChars.length) {
if (isStreamEnd) {
const tmpContent = startTagBuffer;
startTagBuffer = '';
// 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content
if (isInThinkTag === false) {
return {
reasoningContent: '',
content: tmpContent
content
};
}
return {
reasoningContent: '',
content: ''
};
}
if (startTagBuffer.startsWith(thinkStartChars)) {
isInThinkTag = true;
return {
reasoningContent: startTagBuffer.slice(thinkStartChars.length),
content: ''
};
}
// 检测是否为 think 标签开头的数据
if (isInThinkTag === undefined) {
// Parse content think and answer
startTagBuffer += content;
// 太少内容时候,暂时不解析
if (startTagBuffer.length < thinkStartChars.length) {
if (isStreamEnd) {
const tmpContent = startTagBuffer;
startTagBuffer = '';
return {
reasoningContent: '',
content: tmpContent
};
}
return {
reasoningContent: '',
content: ''
};
}
// 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content
isInThinkTag = false;
return {
reasoningContent: '',
content: startTagBuffer
};
}
if (startTagBuffer.startsWith(thinkStartChars)) {
isInThinkTag = true;
return {
reasoningContent: startTagBuffer.slice(thinkStartChars.length),
content: ''
};
}
// 确认是 think 标签内容,开始返回 think 内容,并实时检测 </think>
/*
// 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content
isInThinkTag = false;
return {
reasoningContent: '',
content: startTagBuffer
};
}
// 确认是 think 标签内容,开始返回 think 内容,并实时检测 </think>
/*
检测 </think> 方案。
存储所有疑似 </think> 的内容,直到检测到完整的 </think> 标签或超出 </think> 长度。
content 返回值包含以下几种情况:
@@ -338,124 +351,145 @@ export const parseLLMStreamResponse = () => {
</think>abc - 完全命中尾标签
k>abc - 命中一部分尾标签
*/
// endTagBuffer 专门用来记录疑似尾标签的内容
if (endTagBuffer) {
endTagBuffer += content;
if (endTagBuffer.includes(thinkEndChars)) {
isInThinkTag = false;
const answer = endTagBuffer.slice(thinkEndChars.length);
return {
reasoningContent: '',
content: answer
};
} else if (endTagBuffer.length >= thinkEndChars.length) {
// 缓存内容超出尾标签长度,且仍未命中 </think>,则认为本次猜测 </think> 失败,仍处于 think 阶段。
const tmp = endTagBuffer;
endTagBuffer = '';
return {
reasoningContent: tmp,
content: ''
};
}
return {
reasoningContent: '',
content: ''
};
} else if (content.includes(thinkEndChars)) {
// 返回内容,完整命中</think>,直接结束
isInThinkTag = false;
const [think, answer] = content.split(thinkEndChars);
return {
reasoningContent: think,
content: answer
};
} else {
// 无 buffer且未命中 </think>,开始疑似 </think> 检测。
for (let i = 1; i < thinkEndChars.length; i++) {
const partialEndTag = thinkEndChars.slice(0, i);
// 命中一部分尾标签
if (content.endsWith(partialEndTag)) {
const think = content.slice(0, -partialEndTag.length);
endTagBuffer += partialEndTag;
// endTagBuffer 专门用来记录疑似尾标签的内容
if (endTagBuffer) {
endTagBuffer += content;
if (endTagBuffer.includes(thinkEndChars)) {
isInThinkTag = false;
const answer = endTagBuffer.slice(thinkEndChars.length);
return {
reasoningContent: '',
content: answer
};
} else if (endTagBuffer.length >= thinkEndChars.length) {
// 缓存内容超出尾标签长度,且仍未命中 </think>,则认为本次猜测 </think> 失败,仍处于 think 阶段。
const tmp = endTagBuffer;
endTagBuffer = '';
return {
reasoningContent: tmp,
content: ''
};
}
return {
reasoningContent: think,
reasoningContent: '',
content: ''
};
} else if (content.includes(thinkEndChars)) {
// 返回内容,完整命中</think>,直接结束
isInThinkTag = false;
const [think, answer] = content.split(thinkEndChars);
return {
reasoningContent: think,
content: answer
};
} else {
// 无 buffer且未命中 </think>,开始疑似 </think> 检测。
for (let i = 1; i < thinkEndChars.length; i++) {
const partialEndTag = thinkEndChars.slice(0, i);
// 命中一部分尾标签
if (content.endsWith(partialEndTag)) {
const think = content.slice(0, -partialEndTag.length);
endTagBuffer += partialEndTag;
return {
reasoningContent: think,
content: ''
};
}
}
}
}
// 完全未命中尾标签,还是 think 阶段。
return {
reasoningContent: content,
content: ''
};
})();
// Parse datset cite
if (retainDatasetCite) {
return {
reasoningContent: parsedThinkReasoningContent,
content: parsedThinkContent,
responseContent: parsedThinkContent,
finishReason: buffer_finishReason
};
}
// 完全未命中尾标签,还是 think 阶段。
return {
reasoningContent: content,
content: ''
};
})();
// 缓存包含 [ 的字符串,直到超出 maxCiteBufferLength 再一次性返回
const parseCite = (text: string) => {
// 结束时,返回所有剩余内容
if (isStreamEnd) {
const content = citeBuffer + text;
return {
content: removeDatasetCiteText(content, false)
};
}
// 新内容包含 [,初始化缓冲数据
if (text.includes('[')) {
const index = text.indexOf('[');
const beforeContent = citeBuffer + text.slice(0, index);
citeBuffer = text.slice(index);
// beforeContent 可能是:普通字符串,带 [ 的字符串
return {
content: removeDatasetCiteText(beforeContent, false)
};
}
// 处于 Cite 缓冲区,判断是否满足条件
else if (citeBuffer) {
citeBuffer += text;
// 检查缓冲区长度是否达到完整Quote长度或已经流结束
if (citeBuffer.length >= maxCiteBufferLength) {
const content = removeDatasetCiteText(citeBuffer, false);
citeBuffer = '';
return {
content
};
} else {
// 暂时不返回内容
return { content: '' };
}
}
return {
content: text
};
};
const { content: pasedCiteContent } = parseCite(parsedThinkContent);
// Parse datset cite
if (retainDatasetCite) {
return {
reasoningContent: parsedThinkReasoningContent,
content: parsedThinkContent,
responseContent: parsedThinkContent,
finishReason
responseContent: pasedCiteContent,
finishReason: buffer_finishReason
};
}
})();
// 缓存包含 [ 的字符串,直到超出 maxCiteBufferLength 再一次性返回
const parseCite = (text: string) => {
// 结束时,返回所有剩余内容
if (isStreamEnd) {
const content = citeBuffer + text;
return {
content: removeDatasetCiteText(content, false)
};
}
buffer_reasoningContent += data.reasoningContent;
buffer_content += data.content;
// 新内容包含 [,初始化缓冲数据
if (text.includes('[')) {
const index = text.indexOf('[');
const beforeContent = citeBuffer + text.slice(0, index);
citeBuffer = text.slice(index);
// beforeContent 可能是:普通字符串,带 [ 的字符串
return {
content: removeDatasetCiteText(beforeContent, false)
};
}
// 处于 Cite 缓冲区,判断是否满足条件
else if (citeBuffer) {
citeBuffer += text;
// 检查缓冲区长度是否达到完整Quote长度或已经流结束
if (citeBuffer.length >= maxCiteBufferLength) {
const content = removeDatasetCiteText(citeBuffer, false);
citeBuffer = '';
return {
content
};
} else {
// 暂时不返回内容
return { content: '' };
}
}
return {
content: text
};
};
const { content: pasedCiteContent } = parseCite(parsedThinkContent);
return data;
};
const getResponseData = () => {
return {
reasoningContent: parsedThinkReasoningContent,
content: parsedThinkContent,
responseContent: pasedCiteContent,
finishReason
finish_reason: buffer_finishReason,
usage: buffer_usage,
reasoningContent: buffer_reasoningContent,
content: buffer_content
};
};
const updateFinishReason = (finishReason: CompletionFinishReason) => {
buffer_finishReason = finishReason;
};
return {
parsePart
parsePart,
getResponseData,
updateFinishReason
};
};

View File

@@ -11,40 +11,6 @@ export const beforeUpdateAppFormat = <T extends AppSchema['modules'] | undefined
nodes: T;
isPlugin: boolean;
}) => {
if (nodes) {
// Check dataset maxTokens
if (isPlugin) {
let maxTokens = 16000;
nodes.forEach((item) => {
if (
item.flowNodeType === FlowNodeTypeEnum.chatNode ||
item.flowNodeType === FlowNodeTypeEnum.tools
) {
const model =
item.inputs.find((item) => item.key === NodeInputKeyEnum.aiModel)?.value || '';
const chatModel = getLLMModel(model);
const quoteMaxToken = chatModel.quoteMaxToken || 16000;
maxTokens = Math.max(maxTokens, quoteMaxToken);
}
});
nodes.forEach((item) => {
if (item.flowNodeType === FlowNodeTypeEnum.datasetSearchNode) {
item.inputs.forEach((input) => {
if (input.key === NodeInputKeyEnum.datasetMaxTokens) {
const val = input.value as number;
if (val > maxTokens) {
input.value = maxTokens;
}
}
});
}
});
}
}
return {
nodes
};

View File

@@ -1,7 +1,7 @@
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
import { type ToolType } from '@fastgpt/global/core/app/type';
import { type McpToolConfigType } from '@fastgpt/global/core/app/type';
import { addLog } from '../../common/system/log';
import { retryFn } from '@fastgpt/global/common/system/utils';
@@ -41,7 +41,7 @@ export class MCPClient {
* Get available tools list
* @returns List of tools
*/
public async getTools(): Promise<ToolType[]> {
public async getTools(): Promise<McpToolConfigType[]> {
try {
const client = await this.getConnection();
const response = await client.listTools();

View File

@@ -30,8 +30,7 @@ import { Types } from 'mongoose';
community: community-id
commercial: commercial-id
*/
export async function splitCombinePluginId(id: string) {
export function splitCombineToolId(id: string) {
const splitRes = id.split('-');
if (splitRes.length === 1) {
// app id
@@ -42,7 +41,7 @@ export async function splitCombinePluginId(id: string) {
}
const [source, pluginId] = id.split('-') as [PluginSourceEnum, string];
if (!source || !pluginId) return Promise.reject('pluginId not found');
if (!source || !pluginId) throw new Error('pluginId not found');
return { source, pluginId: id };
}
@@ -54,7 +53,7 @@ const getSystemPluginTemplateById = async (
versionId?: string
): Promise<ChildAppType> => {
const item = getSystemPluginTemplates().find((plugin) => plugin.id === pluginId);
if (!item) return Promise.reject(PluginErrEnum.unAuth);
if (!item) return Promise.reject(PluginErrEnum.unExist);
const plugin = cloneDeep(item);
@@ -64,10 +63,10 @@ const getSystemPluginTemplateById = async (
{ pluginId: plugin.id, 'customConfig.associatedPluginId': plugin.associatedPluginId },
'associatedPluginId'
).lean();
if (!systemPlugin) return Promise.reject(PluginErrEnum.unAuth);
if (!systemPlugin) return Promise.reject(PluginErrEnum.unExist);
const app = await MongoApp.findById(plugin.associatedPluginId).lean();
if (!app) return Promise.reject(PluginErrEnum.unAuth);
if (!app) return Promise.reject(PluginErrEnum.unExist);
const version = versionId
? await getAppVersionById({
@@ -77,6 +76,12 @@ const getSystemPluginTemplateById = async (
})
: await getAppLatestVersion(plugin.associatedPluginId, app);
if (!version.versionId) return Promise.reject('App version not found');
const isLatest = version.versionId
? await checkIsLatestVersion({
appId: plugin.associatedPluginId,
versionId: version.versionId
})
: true;
return {
...plugin,
@@ -85,12 +90,19 @@ const getSystemPluginTemplateById = async (
edges: version.edges,
chatConfig: version.chatConfig
},
version: versionId || String(version.versionId),
version: versionId ? version?.versionId : '',
versionLabel: version?.versionName,
isLatestVersion: isLatest,
teamId: String(app.teamId),
tmbId: String(app.tmbId)
};
}
return plugin;
return {
...plugin,
version: undefined,
isLatestVersion: true
};
};
/* Format plugin to workflow preview node data */
@@ -102,11 +114,11 @@ export async function getChildAppPreviewNode({
versionId?: string;
}): Promise<FlowNodeTemplateType> {
const app: ChildAppType = await (async () => {
const { source, pluginId } = await splitCombinePluginId(appId);
const { source, pluginId } = splitCombineToolId(appId);
if (source === PluginSourceEnum.personal) {
const item = await MongoApp.findById(appId).lean();
if (!item) return Promise.reject('plugin not found');
if (!item) return Promise.reject(PluginErrEnum.unExist);
const version = await getAppVersionById({ appId, versionId, app: item });
@@ -132,8 +144,8 @@ export async function getChildAppPreviewNode({
},
templateType: FlowNodeTemplateTypeEnum.teamApp,
version: version.versionId,
versionLabel: version?.versionName || '',
version: versionId ? version?.versionId : '',
versionLabel: version?.versionName,
isLatestVersion: isLatest,
originCost: 0,
@@ -142,7 +154,7 @@ export async function getChildAppPreviewNode({
pluginOrder: 0
};
} else {
return getSystemPluginTemplateById(pluginId);
return getSystemPluginTemplateById(pluginId, versionId);
}
})();
@@ -216,12 +228,12 @@ export async function getChildAppRuntimeById(
id: string,
versionId?: string
): Promise<PluginRuntimeType> {
const app: ChildAppType = await (async () => {
const { source, pluginId } = await splitCombinePluginId(id);
const app = await (async () => {
const { source, pluginId } = splitCombineToolId(id);
if (source === PluginSourceEnum.personal) {
const item = await MongoApp.findById(id).lean();
if (!item) return Promise.reject('plugin not found');
if (!item) return Promise.reject(PluginErrEnum.unExist);
const version = await getAppVersionById({
appId: id,
@@ -244,8 +256,6 @@ export async function getChildAppRuntimeById(
},
templateType: FlowNodeTemplateTypeEnum.teamApp,
// 用不到
version: item?.pluginData?.nodeVersion,
originCost: 0,
currentCost: 0,
hasTokenFee: false,

View File

@@ -1,6 +1,6 @@
import { type ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
import { type PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
import { splitCombinePluginId } from './controller';
import { splitCombineToolId } from './controller';
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
/*
@@ -20,7 +20,7 @@ export const computedPluginUsage = async ({
childrenUsage: ChatNodeUsageType[];
error?: boolean;
}) => {
const { source } = await splitCombinePluginId(plugin.id);
const { source } = splitCombineToolId(plugin.id);
const childrenUsages = childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
if (source !== PluginSourceEnum.personal) {

View File

@@ -119,6 +119,7 @@ const AppSchema = new Schema({
defaultPermission: Number
});
AppSchema.index({ type: 1 });
AppSchema.index({ teamId: 1, updateTime: -1 });
AppSchema.index({ teamId: 1, type: 1 });
AppSchema.index(

View File

@@ -1,14 +1,13 @@
import { MongoDataset } from '../dataset/schema';
import { getEmbeddingModel } from '../ai/model';
import {
AppNodeFlowNodeTypeMap,
FlowNodeTypeEnum
} from '@fastgpt/global/core/workflow/node/constant';
import { FlowNodeTypeEnum } from '@fastgpt/global/core/workflow/node/constant';
import { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
import type { StoreNodeItemType } from '@fastgpt/global/core/workflow/type/node';
import { MongoAppVersion } from './version/schema';
import { checkIsLatestVersion } from './version/controller';
import { Types } from '../../common/mongo';
import { getChildAppPreviewNode, splitCombineToolId } from './plugin/controller';
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
import { authAppByTmbId } from '../../support/permission/app/auth';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { getErrText } from '@fastgpt/global/common/error/utils';
export async function listAppDatasetDataByTeamIdAndDatasetIds({
teamId,
@@ -33,52 +32,58 @@ export async function listAppDatasetDataByTeamIdAndDatasetIds({
export async function rewriteAppWorkflowToDetail({
nodes,
teamId,
isRoot
isRoot,
ownerTmbId
}: {
nodes: StoreNodeItemType[];
teamId: string;
isRoot: boolean;
ownerTmbId: string;
}) {
const datasetIdSet = new Set<string>();
// Add node(App Type) versionlabel and latest sign
const appNodes = nodes.filter((node) => AppNodeFlowNodeTypeMap[node.flowNodeType]);
const versionIds = appNodes
.filter((node) => node.version && Types.ObjectId.isValid(node.version))
.map((node) => node.version);
if (versionIds.length > 0) {
const versionDataList = await MongoAppVersion.find(
{
_id: { $in: versionIds }
},
'_id versionName appId time'
).lean();
/* Add node(App Type) versionlabel and latest sign ==== */
await Promise.all(
nodes.map(async (node) => {
if (!node.pluginId) return;
const { source } = splitCombineToolId(node.pluginId);
const versionMap: Record<string, any> = {};
try {
const [preview] = await Promise.all([
getChildAppPreviewNode({
appId: node.pluginId,
versionId: node.version
}),
...(source === PluginSourceEnum.personal
? [
authAppByTmbId({
tmbId: ownerTmbId,
appId: node.pluginId,
per: ReadPermissionVal
})
]
: [])
]);
const isLatestChecks = await Promise.all(
versionDataList.map(async (version) => {
const isLatest = await checkIsLatestVersion({
appId: version.appId,
versionId: version._id
});
return { versionId: String(version._id), isLatest };
})
);
const isLatestMap = new Map(isLatestChecks.map((item) => [item.versionId, item.isLatest]));
versionDataList.forEach((version) => {
versionMap[String(version._id)] = version;
});
appNodes.forEach((node) => {
if (!node.version) return;
const versionData = versionMap[String(node.version)];
if (versionData) {
node.versionLabel = versionData.versionName;
node.isLatestVersion = isLatestMap.get(String(node.version)) || false;
node.pluginData = {
diagram: preview.diagram,
userGuide: preview.userGuide,
courseUrl: preview.courseUrl,
name: preview.name,
avatar: preview.avatar
};
node.versionLabel = preview.versionLabel;
node.isLatestVersion = preview.isLatestVersion;
node.version = preview.version;
} catch (error) {
node.pluginData = {
error: getErrText(error)
};
}
});
}
})
);
/* Add node(App Type) versionlabel and latest sign ==== */
// Get all dataset ids from nodes
nodes.forEach((node) => {

View File

@@ -68,6 +68,9 @@ export const checkIsLatestVersion = async ({
appId: string;
versionId: string;
}) => {
if (!Types.ObjectId.isValid(versionId)) {
return false;
}
const version = await MongoAppVersion.findOne(
{
appId,

View File

@@ -61,6 +61,7 @@ const ChatItemSchema = new Schema({
type: Array,
default: []
},
errorMsg: String,
userGoodFeedback: {
type: String
},

View File

@@ -34,6 +34,10 @@ const ChatSchema = new Schema({
ref: AppCollectionName,
required: true
},
createTime: {
type: Date,
default: () => new Date()
},
updateTime: {
type: Date,
default: () => new Date()

View File

@@ -32,6 +32,7 @@ type Props = {
content: [UserChatItemType & { dataId?: string }, AIChatItemType & { dataId?: string }];
metadata?: Record<string, any>;
durationSeconds: number; //s
errorMsg?: string;
};
export async function saveChat({
@@ -50,6 +51,7 @@ export async function saveChat({
outLinkUid,
content,
durationSeconds,
errorMsg,
metadata = {}
}: Props) {
if (!chatId || chatId === 'NO_RECORD_HISTORIES') return;
@@ -104,7 +106,8 @@ export async function saveChat({
return {
...item,
[DispatchNodeResponseKeyEnum.nodeResponse]: nodeResponse,
durationSeconds
durationSeconds,
errorMsg
};
}
return item;

View File

@@ -65,8 +65,8 @@ export const filterGPTMessageByMaxContext = async ({
if (lastMessage.role === ChatCompletionRequestMessageRoleEnum.User) {
const tokens = await countGptMessagesTokens([lastMessage, ...tmpChats]);
maxContext -= tokens;
// 该轮信息整体 tokens 超出范围,这段数据不要了
if (maxContext < 0) {
// 该轮信息整体 tokens 超出范围,这段数据不要了。但是至少保证一组。
if (maxContext < 0 && chats.length > 0) {
break;
}

View File

@@ -2,7 +2,9 @@ import type {
APIFileListResponse,
ApiFileReadContentResponse,
APIFileReadResponse,
APIFileServer
ApiDatasetDetailResponse,
APIFileServer,
APIFileItem
} from '@fastgpt/global/core/dataset/apiDataset';
import axios, { type Method } from 'axios';
import { addLog } from '../../../common/system/log';
@@ -89,7 +91,7 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
`/v1/file/list`,
{
searchKey,
parentId
parentId: parentId || apiServer.basePath
},
'POST'
);
@@ -144,7 +146,8 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
tmbId,
url: previewUrl,
relatedId: apiFileId,
customPdfParse
customPdfParse,
getFormatText: true
});
return {
title,
@@ -164,9 +167,34 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
return url;
};
const getFileDetail = async ({
apiFileId
}: {
apiFileId: string;
}): Promise<ApiDatasetDetailResponse> => {
const fileData = await request<ApiDatasetDetailResponse>(
`/v1/file/detail`,
{
id: apiFileId
},
'GET'
);
if (fileData) {
return {
id: fileData.id,
name: fileData.name,
parentId: fileData.parentId === null ? '' : fileData.parentId
};
}
return Promise.reject('File not found');
};
return {
getFileContent,
listFiles,
getFilePreviewUrl
getFilePreviewUrl,
getFileDetail
};
};

View File

@@ -0,0 +1,27 @@
import type {
APIFileServer,
YuqueServer,
FeishuServer
} from '@fastgpt/global/core/dataset/apiDataset';
import { useApiDatasetRequest } from './api';
import { useYuqueDatasetRequest } from '../yuqueDataset/api';
import { useFeishuDatasetRequest } from '../feishuDataset/api';
export const getApiDatasetRequest = async (data: {
apiServer?: APIFileServer;
yuqueServer?: YuqueServer;
feishuServer?: FeishuServer;
}) => {
const { apiServer, yuqueServer, feishuServer } = data;
if (apiServer) {
return useApiDatasetRequest({ apiServer });
}
if (yuqueServer) {
return useYuqueDatasetRequest({ yuqueServer });
}
if (feishuServer) {
return useFeishuDatasetRequest({ feishuServer });
}
return Promise.reject('Can not find api dataset server');
};

View File

@@ -1,30 +0,0 @@
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
import { type FeishuServer, type YuqueServer } from '@fastgpt/global/core/dataset/apiDataset';
export enum ProApiDatasetOperationTypeEnum {
LIST = 'list',
READ = 'read',
CONTENT = 'content',
DETAIL = 'detail'
}
export type ProApiDatasetCommonParams = {
feishuServer?: FeishuServer;
yuqueServer?: YuqueServer;
};
export type GetProApiDatasetFileListParams = ProApiDatasetCommonParams & {
parentId?: ParentIdType;
};
export type GetProApiDatasetFileContentParams = ProApiDatasetCommonParams & {
apiFileId: string;
};
export type GetProApiDatasetFilePreviewUrlParams = ProApiDatasetCommonParams & {
apiFileId: string;
};
export type GetProApiDatasetFileDetailParams = ProApiDatasetCommonParams & {
apiFileId: string;
};

View File

@@ -34,15 +34,17 @@ import { getTrainingModeByCollection } from './utils';
import {
computeChunkSize,
computeChunkSplitter,
computeParagraphChunkDeep,
getLLMMaxChunkSize
} from '@fastgpt/global/core/dataset/training/utils';
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
export const createCollectionAndInsertData = async ({
dataset,
rawText,
relatedId,
createCollectionParams,
isQAImport = false,
backupParse = false,
billId,
session
}: {
@@ -50,8 +52,8 @@ export const createCollectionAndInsertData = async ({
rawText: string;
relatedId?: string;
createCollectionParams: CreateOneCollectionParams;
backupParse?: boolean;
isQAImport?: boolean;
billId?: string;
session?: ClientSession;
}) => {
@@ -73,15 +75,33 @@ export const createCollectionAndInsertData = async ({
llmModel: getLLMModel(dataset.agentModel)
});
const chunkSplitter = computeChunkSplitter(createCollectionParams);
const paragraphChunkDeep = computeParagraphChunkDeep(createCollectionParams);
if (
trainingType === DatasetCollectionDataProcessModeEnum.qa ||
trainingType === DatasetCollectionDataProcessModeEnum.backup
) {
delete createCollectionParams.chunkTriggerType;
delete createCollectionParams.chunkTriggerMinSize;
delete createCollectionParams.dataEnhanceCollectionName;
delete createCollectionParams.imageIndex;
delete createCollectionParams.autoIndexes;
delete createCollectionParams.indexSize;
delete createCollectionParams.qaPrompt;
}
// 1. split chunks
const chunks = rawText2Chunks({
rawText,
chunkTriggerType: createCollectionParams.chunkTriggerType,
chunkTriggerMinSize: createCollectionParams.chunkTriggerMinSize,
chunkSize,
paragraphChunkDeep,
paragraphChunkMinSize: createCollectionParams.paragraphChunkMinSize,
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : [],
isQAImport
backupParse
});
// 2. auth limit
@@ -102,6 +122,7 @@ export const createCollectionAndInsertData = async ({
const { _id: collectionId } = await createOneCollection({
...createCollectionParams,
trainingType,
paragraphChunkDeep,
chunkSize,
chunkSplitter,
@@ -157,6 +178,10 @@ export const createCollectionAndInsertData = async ({
billId: traingBillId,
data: chunks.map((item, index) => ({
...item,
indexes: item.indexes?.map((text) => ({
type: DatasetDataIndexTypeEnum.custom,
text
})),
chunkIndex: index
})),
session
@@ -198,46 +223,19 @@ export type CreateOneCollectionParams = CreateDatasetCollectionParams & {
tmbId: string;
session?: ClientSession;
};
export async function createOneCollection({
teamId,
tmbId,
name,
parentId,
datasetId,
type,
export async function createOneCollection({ session, ...props }: CreateOneCollectionParams) {
const {
teamId,
parentId,
datasetId,
tags,
createTime,
updateTime,
hashRawText,
rawTextLength,
metadata = {},
tags,
nextSyncTime,
fileId,
rawLink,
externalFileId,
externalFileUrl,
apiFileId,
// Parse settings
customPdfParse,
imageIndex,
autoIndexes,
// Chunk settings
trainingType,
chunkSettingMode,
chunkSplitMode,
chunkSize,
indexSize,
chunkSplitter,
qaPrompt,
session
}: CreateOneCollectionParams) {
fileId,
rawLink,
externalFileId,
externalFileUrl,
apiFileId
} = props;
// Create collection tags
const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
@@ -245,41 +243,18 @@ export async function createOneCollection({
const [collection] = await MongoDatasetCollection.create(
[
{
...props,
teamId,
tmbId,
parentId: parentId || null,
datasetId,
name,
type,
rawTextLength,
hashRawText,
tags: collectionTags,
metadata,
createTime,
updateTime,
nextSyncTime,
...(fileId ? { fileId } : {}),
...(rawLink ? { rawLink } : {}),
...(externalFileId ? { externalFileId } : {}),
...(externalFileUrl ? { externalFileUrl } : {}),
...(apiFileId ? { apiFileId } : {}),
// Parse settings
customPdfParse,
imageIndex,
autoIndexes,
// Chunk settings
trainingType,
chunkSettingMode,
chunkSplitMode,
chunkSize,
indexSize,
chunkSplitter,
qaPrompt
...(apiFileId ? { apiFileId } : {})
}
],
{ session, ordered: true }

View File

@@ -34,9 +34,9 @@ const DatasetDataTextSchema = new Schema({
try {
DatasetDataTextSchema.index(
{ teamId: 1, datasetId: 1, fullTextToken: 'text' },
{ teamId: 1, fullTextToken: 'text' },
{
name: 'teamId_1_datasetId_1_fullTextToken_text',
name: 'teamId_1_fullTextToken_text',
default_language: 'none'
}
);

View File

@@ -0,0 +1,208 @@
import type {
APIFileItem,
ApiFileReadContentResponse,
ApiDatasetDetailResponse,
FeishuServer
} from '@fastgpt/global/core/dataset/apiDataset';
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
import axios, { type Method } from 'axios';
import { addLog } from '../../../common/system/log';
type ResponseDataType = {
success: boolean;
message: string;
data: any;
};
type FeishuFileListResponse = {
files: {
token: string;
parent_token: string;
name: string;
type: string;
modified_time: number;
created_time: number;
url: string;
owner_id: string;
}[];
has_more: boolean;
next_page_token: string;
};
const feishuBaseUrl = process.env.FEISHU_BASE_URL || 'https://open.feishu.cn';
export const useFeishuDatasetRequest = ({ feishuServer }: { feishuServer: FeishuServer }) => {
const instance = axios.create({
baseURL: feishuBaseUrl,
timeout: 60000
});
// 添加请求拦截器
instance.interceptors.request.use(async (config) => {
if (!config.headers.Authorization) {
const { data } = await axios.post<{ tenant_access_token: string }>(
`${feishuBaseUrl}/open-apis/auth/v3/tenant_access_token/internal`,
{
app_id: feishuServer.appId,
app_secret: feishuServer.appSecret
}
);
config.headers['Authorization'] = `Bearer ${data.tenant_access_token}`;
config.headers['Content-Type'] = 'application/json; charset=utf-8';
}
return config;
});
/**
* 响应数据检查
*/
const checkRes = (data: ResponseDataType) => {
if (data === undefined) {
addLog.info('yuque dataset data is empty');
return Promise.reject('服务器异常');
}
return data.data;
};
const responseError = (err: any) => {
console.log('error->', '请求错误', err);
if (!err) {
return Promise.reject({ message: '未知错误' });
}
if (typeof err === 'string') {
return Promise.reject({ message: err });
}
if (typeof err.message === 'string') {
return Promise.reject({ message: err.message });
}
if (typeof err.data === 'string') {
return Promise.reject({ message: err.data });
}
if (err?.response?.data) {
return Promise.reject(err?.response?.data);
}
return Promise.reject(err);
};
const request = <T>(url: string, data: any, method: Method): Promise<T> => {
/* 去空 */
for (const key in data) {
if (data[key] === undefined) {
delete data[key];
}
}
return instance
.request({
url,
method,
data: ['POST', 'PUT'].includes(method) ? data : undefined,
params: !['POST', 'PUT'].includes(method) ? data : undefined
})
.then((res) => checkRes(res.data))
.catch((err) => responseError(err));
};
const listFiles = async ({ parentId }: { parentId?: ParentIdType }): Promise<APIFileItem[]> => {
const fetchFiles = async (pageToken?: string): Promise<FeishuFileListResponse['files']> => {
const data = await request<FeishuFileListResponse>(
`/open-apis/drive/v1/files`,
{
folder_token: parentId || feishuServer.folderToken,
page_size: 200,
page_token: pageToken
},
'GET'
);
if (data.has_more) {
const nextFiles = await fetchFiles(data.next_page_token);
return [...data.files, ...nextFiles];
}
return data.files;
};
const allFiles = await fetchFiles();
return allFiles
.filter((file) => ['folder', 'docx'].includes(file.type))
.map((file) => ({
id: file.token,
parentId: file.parent_token,
name: file.name,
type: file.type === 'folder' ? ('folder' as const) : ('file' as const),
hasChild: file.type === 'folder',
updateTime: new Date(file.modified_time * 1000),
createTime: new Date(file.created_time * 1000)
}));
};
const getFileContent = async ({
apiFileId
}: {
apiFileId: string;
}): Promise<ApiFileReadContentResponse> => {
const [{ content }, { document }] = await Promise.all([
request<{ content: string }>(
`/open-apis/docx/v1/documents/${apiFileId}/raw_content`,
{},
'GET'
),
request<{ document: { title: string } }>(
`/open-apis/docx/v1/documents/${apiFileId}`,
{},
'GET'
)
]);
return {
title: document?.title,
rawText: content
};
};
const getFilePreviewUrl = async ({ apiFileId }: { apiFileId: string }): Promise<string> => {
const { metas } = await request<{ metas: { url: string }[] }>(
`/open-apis/drive/v1/metas/batch_query`,
{
request_docs: [
{
doc_token: apiFileId,
doc_type: 'docx'
}
],
with_url: true
},
'POST'
);
return metas[0].url;
};
const getFileDetail = async ({
apiFileId
}: {
apiFileId: string;
}): Promise<ApiDatasetDetailResponse> => {
const { document } = await request<{ document: { title: string } }>(
`/open-apis/docx/v1/documents/${apiFileId}`,
{},
'GET'
);
return {
name: document?.title,
parentId: null,
id: apiFileId
};
};
return {
getFileContent,
listFiles,
getFilePreviewUrl,
getFileDetail
};
};

View File

@@ -1,8 +1,10 @@
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import {
ChunkTriggerConfigTypeEnum,
DatasetSourceReadTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import { readFileContentFromMongo } from '../../common/file/gridfs/controller';
import { urlsFetch } from '../../common/string/cheerio';
import { parseCsvTable2Chunks } from './training/utils';
import { type TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import axios from 'axios';
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
@@ -12,19 +14,22 @@ import {
type FeishuServer,
type YuqueServer
} from '@fastgpt/global/core/dataset/apiDataset';
import { useApiDatasetRequest } from './apiDataset/api';
import { getApiDatasetRequest } from './apiDataset';
import Papa from 'papaparse';
export const readFileRawTextByUrl = async ({
teamId,
tmbId,
url,
customPdfParse,
getFormatText,
relatedId
}: {
teamId: string;
tmbId: string;
url: string;
customPdfParse?: boolean;
getFormatText?: boolean;
relatedId: string; // externalFileId / apiFileId
}) => {
const response = await axios({
@@ -38,7 +43,7 @@ export const readFileRawTextByUrl = async ({
const { rawText } = await readRawContentByFileBuffer({
customPdfParse,
isQAImport: false,
getFormatText,
extension,
teamId,
tmbId,
@@ -62,21 +67,21 @@ export const readDatasetSourceRawText = async ({
tmbId,
type,
sourceId,
isQAImport,
selector,
externalFileId,
apiServer,
feishuServer,
yuqueServer,
customPdfParse
customPdfParse,
getFormatText
}: {
teamId: string;
tmbId: string;
type: DatasetSourceReadTypeEnum;
sourceId: string;
customPdfParse?: boolean;
getFormatText?: boolean;
isQAImport?: boolean; // csv data
selector?: string; // link selector
externalFileId?: string; // external file dataset
apiServer?: APIFileServer; // api dataset
@@ -92,8 +97,8 @@ export const readDatasetSourceRawText = async ({
tmbId,
bucketName: BucketNameEnum.dataset,
fileId: sourceId,
isQAImport,
customPdfParse
customPdfParse,
getFormatText
});
return {
title: filename,
@@ -161,38 +166,82 @@ export const readApiServerFileContent = async ({
title?: string;
rawText: string;
}> => {
if (apiServer) {
return useApiDatasetRequest({ apiServer }).getFileContent({
teamId,
tmbId,
apiFileId,
customPdfParse
});
}
if (feishuServer || yuqueServer) {
return global.getProApiDatasetFileContent({
feishuServer,
return (
await getApiDatasetRequest({
apiServer,
yuqueServer,
apiFileId
});
}
return Promise.reject('No apiServer or feishuServer or yuqueServer');
feishuServer
})
).getFileContent({
teamId,
tmbId,
apiFileId,
customPdfParse
});
};
export const rawText2Chunks = ({
rawText,
isQAImport,
chunkTriggerType = ChunkTriggerConfigTypeEnum.minSize,
chunkTriggerMinSize = 1000,
backupParse,
chunkSize = 512,
...splitProps
}: {
rawText: string;
isQAImport?: boolean;
} & TextSplitProps) => {
if (isQAImport) {
const { chunks } = parseCsvTable2Chunks(rawText);
return chunks;
chunkTriggerType?: ChunkTriggerConfigTypeEnum;
chunkTriggerMinSize?: number; // maxSize from agent model, not store
backupParse?: boolean;
tableParse?: boolean;
} & TextSplitProps): {
q: string;
a: string;
indexes?: string[];
}[] => {
const parseDatasetBackup2Chunks = (rawText: string) => {
const csvArr = Papa.parse(rawText).data as string[][];
console.log(rawText, csvArr);
const chunks = csvArr
.slice(1)
.map((item) => ({
q: item[0] || '',
a: item[1] || '',
indexes: item.slice(2)
}))
.filter((item) => item.q || item.a);
return {
chunks
};
};
if (backupParse) {
return parseDatasetBackup2Chunks(rawText).chunks;
}
// Chunk condition
// 1. 选择最大值条件,只有超过了最大值(默认为模型的最大值*0.7),才会触发分块
if (chunkTriggerType === ChunkTriggerConfigTypeEnum.maxSize) {
const textLength = rawText.trim().length;
const maxSize = splitProps.maxSize ? splitProps.maxSize * 0.7 : 16000;
if (textLength < maxSize) {
return [
{
q: rawText,
a: ''
}
];
}
}
// 2. 选择最小值条件,只有超过最小值(手动决定)才会触发分块
if (chunkTriggerType !== ChunkTriggerConfigTypeEnum.forceChunk) {
const textLength = rawText.trim().length;
if (textLength < chunkTriggerMinSize) {
return [{ q: rawText, a: '' }];
}
}
const { chunks } = splitText2Chunks({
@@ -203,6 +252,7 @@ export const rawText2Chunks = ({
return chunks.map((item) => ({
q: item,
a: ''
a: '',
indexes: []
}));
};

View File

@@ -1,10 +1,12 @@
import { getMongoModel, Schema } from '../../common/mongo';
import {
ChunkSettingModeEnum,
ChunkTriggerConfigTypeEnum,
DataChunkSplitModeEnum,
DatasetCollectionDataProcessModeEnum,
DatasetTypeEnum,
DatasetTypeMap
DatasetTypeMap,
ParagraphChunkAIModeEnum
} from '@fastgpt/global/core/dataset/constants';
import {
TeamCollectionName,
@@ -15,12 +17,22 @@ import type { DatasetSchemaType } from '@fastgpt/global/core/dataset/type.d';
export const DatasetCollectionName = 'datasets';
export const ChunkSettings = {
imageIndex: Boolean,
autoIndexes: Boolean,
trainingType: {
type: String,
enum: Object.values(DatasetCollectionDataProcessModeEnum)
},
chunkTriggerType: {
type: String,
enum: Object.values(ChunkTriggerConfigTypeEnum)
},
chunkTriggerMinSize: Number,
dataEnhanceCollectionName: Boolean,
imageIndex: Boolean,
autoIndexes: Boolean,
chunkSettingMode: {
type: String,
enum: Object.values(ChunkSettingModeEnum)
@@ -29,6 +41,12 @@ export const ChunkSettings = {
type: String,
enum: Object.values(DataChunkSplitModeEnum)
},
paragraphChunkAIMode: {
type: String,
enum: Object.values(ParagraphChunkAIModeEnum)
},
paragraphChunkDeep: Number,
paragraphChunkMinSize: Number,
chunkSize: Number,
chunkSplitter: String,
@@ -115,14 +133,13 @@ const DatasetSchema = new Schema({
// abandoned
autoSync: Boolean,
externalReadUrl: {
type: String
},
externalReadUrl: String,
defaultPermission: Number
});
try {
DatasetSchema.index({ teamId: 1 });
DatasetSchema.index({ type: 1 });
} catch (error) {
console.log(error);
}

View File

@@ -27,6 +27,7 @@ import { type ChatItemType } from '@fastgpt/global/core/chat/type';
import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
import { datasetSearchQueryExtension } from './utils';
import type { RerankModelItemType } from '@fastgpt/global/core/ai/model.d';
import { addLog } from '../../../common/system/log';
export type SearchDatasetDataProps = {
histories: ChatItemType[];
@@ -474,7 +475,7 @@ export async function searchDatasetData(
).lean()
]);
const set = new Map<string, number>();
const set = new Set<string>();
const formatResult = results
.map((item, index) => {
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
@@ -507,7 +508,7 @@ export async function searchDatasetData(
.filter((item) => {
if (!item) return false;
if (set.has(item.id)) return false;
set.set(item.id, 1);
set.add(item.id);
return true;
})
.map((item, index) => {
@@ -544,113 +545,125 @@ export async function searchDatasetData(
};
}
const searchResults = (
await Promise.all(
datasetIds.map(async (id) => {
return MongoDatasetDataText.aggregate(
[
{
$match: {
teamId: new Types.ObjectId(teamId),
datasetId: new Types.ObjectId(id),
$text: { $search: await jiebaSplit({ text: query }) },
...(filterCollectionIdList
? {
collectionId: {
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
}
}
: {}),
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
? {
collectionId: {
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
}
}
: {})
}
},
{
$sort: {
score: { $meta: 'textScore' }
}
},
{
$limit: limit
},
{
$project: {
_id: 1,
collectionId: 1,
dataId: 1,
score: { $meta: 'textScore' }
}
}
],
{
...readFromSecondary
try {
const searchResults = (await MongoDatasetDataText.aggregate(
[
{
$match: {
teamId: new Types.ObjectId(teamId),
$text: { $search: await jiebaSplit({ text: query }) },
datasetId: { $in: datasetIds.map((id) => new Types.ObjectId(id)) },
...(filterCollectionIdList
? {
collectionId: {
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
}
}
: {}),
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
? {
collectionId: {
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
}
}
: {})
}
},
{
$sort: {
score: { $meta: 'textScore' }
}
},
{
$limit: limit
},
{
$project: {
_id: 1,
collectionId: 1,
dataId: 1,
score: { $meta: 'textScore' }
}
);
})
)
).flat() as (DatasetDataTextSchemaType & { score: number })[];
// Get data and collections
const [dataList, collections] = await Promise.all([
MongoDatasetData.find(
{
_id: { $in: searchResults.map((item) => item.dataId) }
},
'_id datasetId collectionId updateTime q a chunkIndex indexes',
{ ...readFromSecondary }
).lean(),
MongoDatasetCollection.find(
{
_id: { $in: searchResults.map((item) => item.collectionId) }
},
'_id name fileId rawLink apiFileId externalFileId externalFileUrl',
{ ...readFromSecondary }
).lean()
]);
return {
fullTextRecallResults: searchResults
.map((item, index) => {
const collection = collections.find(
(col) => String(col._id) === String(item.collectionId)
);
if (!collection) {
console.log('Collection is not found', item);
return;
}
const data = dataList.find((data) => String(data._id) === String(item.dataId));
if (!data) {
console.log('Data is not found', item);
return;
}
],
{
...readFromSecondary
}
)) as (DatasetDataTextSchemaType & { score: number })[];
return {
id: String(data._id),
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
updateTime: data.updateTime,
q: data.q,
a: data.a,
chunkIndex: data.chunkIndex,
indexes: data.indexes,
...getCollectionSourceData(collection),
score: [
{
type: SearchScoreTypeEnum.fullText,
value: item.score || 0,
index
}
]
};
})
.filter(Boolean) as SearchDataResponseItemType[],
tokenLen: 0
};
// Get data and collections
const [dataList, collections] = await Promise.all([
MongoDatasetData.find(
{
_id: { $in: searchResults.map((item) => item.dataId) }
},
'_id datasetId collectionId updateTime q a chunkIndex indexes',
{ ...readFromSecondary }
).lean(),
MongoDatasetCollection.find(
{
_id: { $in: searchResults.map((item) => item.collectionId) }
},
'_id name fileId rawLink apiFileId externalFileId externalFileUrl',
{ ...readFromSecondary }
).lean()
]);
return {
fullTextRecallResults: searchResults
.map((item, index) => {
const collection = collections.find(
(col) => String(col._id) === String(item.collectionId)
);
if (!collection) {
console.log('Collection is not found', item);
return;
}
const data = dataList.find((data) => String(data._id) === String(item.dataId));
if (!data) {
console.log('Data is not found', item);
return;
}
return {
id: String(data._id),
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
updateTime: data.updateTime,
q: data.q,
a: data.a,
chunkIndex: data.chunkIndex,
indexes: data.indexes,
...getCollectionSourceData(collection),
score: [
{
type: SearchScoreTypeEnum.fullText,
value: item.score || 0,
index
}
]
};
})
.filter((item) => {
if (!item) return false;
return true;
})
.map((item, index) => {
if (!item) return;
return {
...item,
score: item.score.map((item) => ({ ...item, index }))
};
}) as SearchDataResponseItemType[],
tokenLen: 0
};
} catch (error) {
addLog.error('Full text search error', error);
return {
fullTextRecallResults: [],
tokenLen: 0
};
}
};
const multiQueryRecall = async ({
embeddingLimit,

View File

@@ -1,6 +1,5 @@
export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
tableLocal = 'tableLocal'
fileCustom = 'fileCustom'
}

View File

@@ -1,16 +0,0 @@
import Papa from 'papaparse';
export const parseCsvTable2Chunks = (rawText: string) => {
const csvArr = Papa.parse(rawText).data as string[][];
const chunks = csvArr
.map((item) => ({
q: item[0] || '',
a: item[1] || ''
}))
.filter((item) => item.q || item.a);
return {
chunks
};
};

View File

@@ -0,0 +1,304 @@
import type {
APIFileItem,
ApiFileReadContentResponse,
YuqueServer,
ApiDatasetDetailResponse
} from '@fastgpt/global/core/dataset/apiDataset';
import axios, { type Method } from 'axios';
import { addLog } from '../../../common/system/log';
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
type ResponseDataType = {
success: boolean;
message: string;
data: any;
};
type YuqueRepoListResponse = {
id: string;
name: string;
title: string;
book_id: string | null;
type: string;
updated_at: Date;
created_at: Date;
slug?: string;
}[];
type YuqueTocListResponse = {
uuid: string;
type: string;
title: string;
url: string;
slug: string;
id: string;
doc_id: string;
prev_uuid: string;
sibling_uuid: string;
child_uuid: string;
parent_uuid: string;
}[];
const yuqueBaseUrl = process.env.YUQUE_DATASET_BASE_URL || 'https://www.yuque.com';
export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServer }) => {
const instance = axios.create({
baseURL: yuqueBaseUrl,
timeout: 60000, // 超时时间
headers: {
'X-Auth-Token': yuqueServer.token
}
});
/**
* 响应数据检查
*/
const checkRes = (data: ResponseDataType) => {
if (data === undefined) {
addLog.info('yuque dataset data is empty');
return Promise.reject('服务器异常');
}
return data.data;
};
const responseError = (err: any) => {
console.log('error->', '请求错误', err);
if (!err) {
return Promise.reject({ message: '未知错误' });
}
if (typeof err === 'string') {
return Promise.reject({ message: err });
}
if (typeof err.message === 'string') {
return Promise.reject({ message: err.message });
}
if (typeof err.data === 'string') {
return Promise.reject({ message: err.data });
}
if (err?.response?.data) {
return Promise.reject(err?.response?.data);
}
return Promise.reject(err);
};
const request = <T>(url: string, data: any, method: Method): Promise<T> => {
/* 去空 */
for (const key in data) {
if (data[key] === undefined) {
delete data[key];
}
}
return instance
.request({
url,
method,
data: ['POST', 'PUT'].includes(method) ? data : undefined,
params: !['POST', 'PUT'].includes(method) ? data : undefined
})
.then((res) => checkRes(res.data))
.catch((err) => responseError(err));
};
const listFiles = async ({ parentId }: { parentId?: ParentIdType }) => {
// Auto set baseurl to parentId
if (!parentId) {
if (yuqueServer.basePath) parentId = yuqueServer.basePath;
}
let files: APIFileItem[] = [];
if (!parentId) {
const limit = 100;
let offset = 0;
let allData: YuqueRepoListResponse = [];
while (true) {
const data = await request<YuqueRepoListResponse>(
`/api/v2/groups/${yuqueServer.userId}/repos`,
{
offset,
limit
},
'GET'
);
if (!data || data.length === 0) break;
allData = [...allData, ...data];
if (data.length < limit) break;
offset += limit;
}
files = allData.map((item) => {
return {
id: item.id,
name: item.name,
parentId: null,
type: 'folder',
updateTime: item.updated_at,
createTime: item.created_at,
hasChild: true,
slug: item.slug
};
});
} else {
if (typeof parentId === 'number') {
const data = await request<YuqueTocListResponse>(
`/api/v2/repos/${parentId}/toc`,
{},
'GET'
);
return data
.filter((item) => !item.parent_uuid && item.type !== 'LINK')
.map((item) => ({
id: `${parentId}-${item.id}-${item.uuid}`,
name: item.title,
parentId: item.parent_uuid,
type: item.type === 'TITLE' ? ('folder' as const) : ('file' as const),
updateTime: new Date(),
createTime: new Date(),
uuid: item.uuid,
slug: item.slug,
hasChild: !!item.child_uuid
}));
} else {
const [repoId, uuid, parentUuid] = parentId.split(/-(.*?)-(.*)/);
const data = await request<YuqueTocListResponse>(`/api/v2/repos/${repoId}/toc`, {}, 'GET');
return data
.filter((item) => item.parent_uuid === parentUuid)
.map((item) => ({
id: `${repoId}-${item.id}-${item.uuid}`,
name: item.title,
parentId: item.parent_uuid,
type: item.type === 'TITLE' ? ('folder' as const) : ('file' as const),
updateTime: new Date(),
createTime: new Date(),
uuid: item.uuid,
slug: item.slug,
hasChild: !!item.child_uuid
}));
}
}
if (!Array.isArray(files)) {
return Promise.reject('Invalid file list format');
}
if (files.some((file) => !file.id || !file.name || typeof file.type === 'undefined')) {
return Promise.reject('Invalid file data format');
}
return files;
};
const getFileContent = async ({
apiFileId
}: {
apiFileId: string;
}): Promise<ApiFileReadContentResponse> => {
const [parentId, fileId] = apiFileId.split(/-(.*?)-(.*)/);
const data = await request<{ title: string; body: string }>(
`/api/v2/repos/${parentId}/docs/${fileId}`,
{},
'GET'
);
return {
title: data.title,
rawText: data.body
};
};
const getFilePreviewUrl = async ({ apiFileId }: { apiFileId: string }) => {
const [parentId, fileId] = apiFileId.split(/-(.*?)-(.*)/);
const { slug: parentSlug } = await request<{ slug: string }>(
`/api/v2/repos/${parentId}`,
{ id: apiFileId },
'GET'
);
const { slug: fileSlug } = await request<{ slug: string }>(
`/api/v2/repos/${parentId}/docs/${fileId}`,
{},
'GET'
);
return `${yuqueBaseUrl}/${yuqueServer.userId}/${parentSlug}/${fileSlug}`;
};
const getFileDetail = async ({
apiFileId
}: {
apiFileId: string;
}): Promise<ApiDatasetDetailResponse> => {
//如果id是数字认为是知识库获取知识库列表
if (typeof apiFileId === 'number' || !isNaN(Number(apiFileId))) {
const limit = 100;
let offset = 0;
let allData: YuqueRepoListResponse = [];
while (true) {
const data = await request<YuqueRepoListResponse>(
`/api/v2/groups/${yuqueServer.userId}/repos`,
{
offset,
limit
},
'GET'
);
if (!data || data.length === 0) break;
allData = [...allData, ...data];
if (data.length < limit) break;
offset += limit;
}
const file = allData.find((item) => Number(item.id) === Number(apiFileId));
if (!file) {
return Promise.reject('文件不存在');
}
return {
id: file.id,
name: file.name,
parentId: null
};
} else {
const [repoId, parentUuid, fileId] = apiFileId.split(/-(.*?)-(.*)/);
const data = await request<YuqueTocListResponse>(`/api/v2/repos/${repoId}/toc`, {}, 'GET');
const file = data.find((item) => item.uuid === fileId);
if (!file) {
return Promise.reject('文件不存在');
}
const parentfile = data.find((item) => item.uuid === file.parent_uuid);
const parentId = `${repoId}-${parentfile?.id}-${parentfile?.uuid}`;
//判断如果parent_uuid为空则认为是知识库的根目录返回知识库
if (file.parent_uuid) {
return {
id: file.id,
name: file.title,
parentId: parentId
};
} else {
return {
id: file.id,
name: file.title,
parentId: repoId
};
}
}
};
return {
getFileContent,
listFiles,
getFilePreviewUrl,
getFileDetail
};
};

Some files were not shown because too many files have changed in this diff Show More