Compare commits
46 Commits
v4.9.8-alp
...
test-html
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d7a722a609 | ||
|
|
0f866fc552 | ||
|
|
05c7ba4483 | ||
|
|
fa80ce3a77 | ||
|
|
830358aa72 | ||
|
|
02b214b3ec | ||
|
|
a171c7b11c | ||
|
|
802de11363 | ||
|
|
b4ecfb0b79 | ||
|
|
331b851a78 | ||
|
|
50d235c42a | ||
|
|
9838593451 | ||
|
|
c25cd48e72 | ||
|
|
874300a56a | ||
|
|
1dea2b71b4 | ||
|
|
a8673344b1 | ||
|
|
9709ae7a4f | ||
|
|
fae76e887a | ||
|
|
9af92d1eae | ||
|
|
6a6719e93d | ||
|
|
50481f4ca8 | ||
|
|
88bd3aaa9e | ||
|
|
dd3c251603 | ||
|
|
aa55f059d4 | ||
|
|
89c9a02650 | ||
|
|
0f3bfa280a | ||
|
|
593ebfd269 | ||
|
|
f6dc2204f5 | ||
|
|
d44c338059 | ||
|
|
1dac2b70ec | ||
|
|
9fef3e15fb | ||
|
|
2d2d0fffe9 | ||
|
|
c6e0b5a1e7 | ||
|
|
932aa28a1f | ||
|
|
9c59bc2c17 | ||
|
|
e145f63554 | ||
|
|
554b2ca8dc | ||
|
|
4e83840c14 | ||
|
|
a6c80684d1 | ||
|
|
a4db03a3b7 | ||
|
|
cba8f773fe | ||
|
|
bd93f28d6f | ||
|
|
2063cb6314 | ||
|
|
12acaf491c | ||
|
|
3688842cc7 | ||
|
|
398d131bac |
2
.vscode/settings.json
vendored
2
.vscode/settings.json
vendored
@@ -21,7 +21,7 @@
|
|||||||
"i18n-ally.namespace": true,
|
"i18n-ally.namespace": true,
|
||||||
"i18n-ally.pathMatcher": "{locale}/{namespaces}.json",
|
"i18n-ally.pathMatcher": "{locale}/{namespaces}.json",
|
||||||
"i18n-ally.extract.targetPickingStrategy": "most-similar-by-key",
|
"i18n-ally.extract.targetPickingStrategy": "most-similar-by-key",
|
||||||
"i18n-ally.translate.engines": ["google"],
|
"i18n-ally.translate.engines": ["deepl","google"],
|
||||||
"[typescript]": {
|
"[typescript]": {
|
||||||
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -132,15 +132,15 @@ services:
|
|||||||
# fastgpt
|
# fastgpt
|
||||||
sandbox:
|
sandbox:
|
||||||
container_name: sandbox
|
container_name: sandbox
|
||||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
|
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
|
||||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
|
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
|
||||||
networks:
|
networks:
|
||||||
- fastgpt
|
- fastgpt
|
||||||
restart: always
|
restart: always
|
||||||
fastgpt-mcp-server:
|
fastgpt-mcp-server:
|
||||||
container_name: fastgpt-mcp-server
|
container_name: fastgpt-mcp-server
|
||||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
|
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
|
||||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
|
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
|
||||||
ports:
|
ports:
|
||||||
- 3005:3000
|
- 3005:3000
|
||||||
networks:
|
networks:
|
||||||
@@ -150,8 +150,8 @@ services:
|
|||||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||||
fastgpt:
|
fastgpt:
|
||||||
container_name: fastgpt
|
container_name: fastgpt
|
||||||
image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
|
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
|
||||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
|
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
|
||||||
ports:
|
ports:
|
||||||
- 3000:3000
|
- 3000:3000
|
||||||
networks:
|
networks:
|
||||||
|
|||||||
@@ -109,15 +109,15 @@ services:
|
|||||||
# fastgpt
|
# fastgpt
|
||||||
sandbox:
|
sandbox:
|
||||||
container_name: sandbox
|
container_name: sandbox
|
||||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
|
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
|
||||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
|
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
|
||||||
networks:
|
networks:
|
||||||
- fastgpt
|
- fastgpt
|
||||||
restart: always
|
restart: always
|
||||||
fastgpt-mcp-server:
|
fastgpt-mcp-server:
|
||||||
container_name: fastgpt-mcp-server
|
container_name: fastgpt-mcp-server
|
||||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
|
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
|
||||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
|
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
|
||||||
ports:
|
ports:
|
||||||
- 3005:3000
|
- 3005:3000
|
||||||
networks:
|
networks:
|
||||||
@@ -127,8 +127,8 @@ services:
|
|||||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||||
fastgpt:
|
fastgpt:
|
||||||
container_name: fastgpt
|
container_name: fastgpt
|
||||||
image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
|
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
|
||||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
|
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
|
||||||
ports:
|
ports:
|
||||||
- 3000:3000
|
- 3000:3000
|
||||||
networks:
|
networks:
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./pg/data:/var/lib/postgresql/data
|
- ./pg/data:/var/lib/postgresql/data
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
|
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'postgres']
|
||||||
interval: 5s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 10
|
retries: 10
|
||||||
@@ -96,15 +96,15 @@ services:
|
|||||||
# fastgpt
|
# fastgpt
|
||||||
sandbox:
|
sandbox:
|
||||||
container_name: sandbox
|
container_name: sandbox
|
||||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
|
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
|
||||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
|
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
|
||||||
networks:
|
networks:
|
||||||
- fastgpt
|
- fastgpt
|
||||||
restart: always
|
restart: always
|
||||||
fastgpt-mcp-server:
|
fastgpt-mcp-server:
|
||||||
container_name: fastgpt-mcp-server
|
container_name: fastgpt-mcp-server
|
||||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
|
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
|
||||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
|
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
|
||||||
ports:
|
ports:
|
||||||
- 3005:3000
|
- 3005:3000
|
||||||
networks:
|
networks:
|
||||||
@@ -114,8 +114,8 @@ services:
|
|||||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||||
fastgpt:
|
fastgpt:
|
||||||
container_name: fastgpt
|
container_name: fastgpt
|
||||||
image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
|
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
|
||||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
|
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
|
||||||
ports:
|
ports:
|
||||||
- 3000:3000
|
- 3000:3000
|
||||||
networks:
|
networks:
|
||||||
|
|||||||
@@ -72,15 +72,15 @@ services:
|
|||||||
|
|
||||||
sandbox:
|
sandbox:
|
||||||
container_name: sandbox
|
container_name: sandbox
|
||||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
|
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
|
||||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
|
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
|
||||||
networks:
|
networks:
|
||||||
- fastgpt
|
- fastgpt
|
||||||
restart: always
|
restart: always
|
||||||
fastgpt-mcp-server:
|
fastgpt-mcp-server:
|
||||||
container_name: fastgpt-mcp-server
|
container_name: fastgpt-mcp-server
|
||||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
|
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
|
||||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
|
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
|
||||||
ports:
|
ports:
|
||||||
- 3005:3000
|
- 3005:3000
|
||||||
networks:
|
networks:
|
||||||
@@ -90,8 +90,8 @@ services:
|
|||||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||||
fastgpt:
|
fastgpt:
|
||||||
container_name: fastgpt
|
container_name: fastgpt
|
||||||
image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
|
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
|
||||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
|
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
|
||||||
ports:
|
ports:
|
||||||
- 3000:3000
|
- 3000:3000
|
||||||
networks:
|
networks:
|
||||||
|
|||||||
BIN
docSite/assets/imgs/official_account_faq.png
Normal file
BIN
docSite/assets/imgs/official_account_faq.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 386 KiB |
@@ -959,10 +959,16 @@ curl --location --request POST 'http://localhost:3000/api/core/chat/getHistories
|
|||||||
{{< markdownify >}}
|
{{< markdownify >}}
|
||||||
|
|
||||||
{{% alert icon=" " context="success" %}}
|
{{% alert icon=" " context="success" %}}
|
||||||
|
目前仅能获取到当前 API key 的创建者的对话。
|
||||||
|
|
||||||
- appId - 应用 Id
|
- appId - 应用 Id
|
||||||
- offset - 偏移量,即从第几条数据开始取
|
- offset - 偏移量,即从第几条数据开始取
|
||||||
- pageSize - 记录数量
|
- pageSize - 记录数量
|
||||||
- source - 对话源。source=api,表示获取通过 API 创建的对话(不会获取到页面上的对话记录)
|
- source - 对话源。source=api,表示获取通过 API 创建的对话(不会获取到页面上的对话记录)
|
||||||
|
- startCreateTime - 开始创建时间(可选)
|
||||||
|
- endCreateTime - 结束创建时间(可选)
|
||||||
|
- startUpdateTime - 开始更新时间(可选)
|
||||||
|
- endUpdateTime - 结束更新时间(可选)
|
||||||
{{% /alert %}}
|
{{% /alert %}}
|
||||||
|
|
||||||
{{< /markdownify >}}
|
{{< /markdownify >}}
|
||||||
|
|||||||
50
docSite/content/zh-cn/docs/development/upgrading/4910.md
Normal file
50
docSite/content/zh-cn/docs/development/upgrading/4910.md
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
---
|
||||||
|
title: 'V4.9.10'
|
||||||
|
description: 'FastGPT V4.9.10 更新说明'
|
||||||
|
icon: 'upgrade'
|
||||||
|
draft: false
|
||||||
|
toc: true
|
||||||
|
weight: 790
|
||||||
|
---
|
||||||
|
|
||||||
|
## 升级指南
|
||||||
|
|
||||||
|
重要提示:本次更新会重新构建全文索引,构建期间,全文检索结果会为空,4c16g 700 万组全文索引大致消耗 25 分钟。如需无缝升级,需自行做表同步工程。
|
||||||
|
|
||||||
|
### 1. 做好数据备份
|
||||||
|
|
||||||
|
### 2. 更新镜像 tag
|
||||||
|
|
||||||
|
- 更新 FastGPT 镜像 tag: v4.9.10-fix2
|
||||||
|
- 更新 FastGPT 商业版镜像 tag: v4.9.10-fix2
|
||||||
|
- mcp_server 无需更新
|
||||||
|
- Sandbox 无需更新
|
||||||
|
- AIProxy 无需更新
|
||||||
|
|
||||||
|
## 🚀 新增内容
|
||||||
|
|
||||||
|
1. 支持 PG 设置`systemEnv.hnswMaxScanTuples`参数,提高迭代搜索的数据总量。
|
||||||
|
2. 知识库预处理参数增加 “分块条件”,可控制某些情况下不进行分块处理。
|
||||||
|
3. 知识库预处理参数增加 “段落优先” 模式,可控制最大段落深度。原“长度优先”模式,不再内嵌段落优先逻辑。
|
||||||
|
4. 工作流调整为单向接入和接出,支持快速的添加下一步节点。
|
||||||
|
5. 开放飞书和语雀知识库到开源版。
|
||||||
|
6. gemini 和 claude 最新模型预设。
|
||||||
|
|
||||||
|
## ⚙️ 优化
|
||||||
|
|
||||||
|
1. LLM stream调用,默认超时调大。
|
||||||
|
2. 部分确认交互优化。
|
||||||
|
3. 纠正原先知识库的“表格数据集”名称,改成“备份导入”。同时支持知识库索引的导出和导入。
|
||||||
|
4. 工作流知识库引用上限,如果工作流中没有相关 AI 节点,则交互模式改成纯手动输入,并且上限为 1000万。
|
||||||
|
5. 语音输入,移动端判断逻辑,准确判断是否为手机,而不是小屏。
|
||||||
|
6. 优化上下文截取算法,至少保证留下一组 Human 信息。
|
||||||
|
|
||||||
|
## 🐛 修复
|
||||||
|
|
||||||
|
1. 全文检索多知识库时排序得分排序不正确。
|
||||||
|
2. 流响应捕获 finish_reason 可能不正确。
|
||||||
|
3. 工具调用模式,未保存思考输出。
|
||||||
|
4. 知识库 indexSize 参数未生效。
|
||||||
|
5. 工作流嵌套 2 层后,获取预览引用、上下文不正确。
|
||||||
|
6. xlsx 转成 Markdown 时候,前面会多出一个空格。
|
||||||
|
7. 读取 Markdown 文件时,Base64 图片未进行额外抓换保存。
|
||||||
25
docSite/content/zh-cn/docs/development/upgrading/4911.md
Normal file
25
docSite/content/zh-cn/docs/development/upgrading/4911.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
title: 'V4.9.11(进行中)'
|
||||||
|
description: 'FastGPT V4.9.11 更新说明'
|
||||||
|
icon: 'upgrade'
|
||||||
|
draft: false
|
||||||
|
toc: true
|
||||||
|
weight: 789
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
## 🚀 新增内容
|
||||||
|
|
||||||
|
1. 工作流中增加节点搜索功能。
|
||||||
|
2. 工作流中,子流程版本控制,可选择“保持最新版本”,无需手动更新。
|
||||||
|
|
||||||
|
## ⚙️ 优化
|
||||||
|
|
||||||
|
1. 原文缓存改用 gridfs 存储,提高上限。
|
||||||
|
|
||||||
|
## 🐛 修复
|
||||||
|
|
||||||
|
1. 工作流中,管理员声明的全局系统工具,无法进行版本管理。
|
||||||
|
2. 工具调用节点前,有交互节点时,上下文异常。
|
||||||
|
3. 修复备份导入,小于 1000 字时,无法分块问题。
|
||||||
|
4. 自定义 PDF 解析,无法保存 base64 图片。
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
title: 'V4.9.8(进行中)'
|
title: 'V4.9.8'
|
||||||
description: 'FastGPT V4.9.8 更新说明'
|
description: 'FastGPT V4.9.8 更新说明'
|
||||||
icon: 'upgrade'
|
icon: 'upgrade'
|
||||||
draft: false
|
draft: false
|
||||||
@@ -7,6 +7,17 @@ toc: true
|
|||||||
weight: 792
|
weight: 792
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## 升级指南
|
||||||
|
|
||||||
|
### 1. 做好数据备份
|
||||||
|
|
||||||
|
### 2. 更新镜像 tag
|
||||||
|
|
||||||
|
- 更新 FastGPT 镜像 tag: v4.9.8
|
||||||
|
- 更新 FastGPT 商业版镜像 tag: v4.9.8
|
||||||
|
- mcp_server 无需更新
|
||||||
|
- Sandbox 无需更新
|
||||||
|
- AIProxy 无需更新
|
||||||
|
|
||||||
## 🚀 新增内容
|
## 🚀 新增内容
|
||||||
|
|
||||||
|
|||||||
43
docSite/content/zh-cn/docs/development/upgrading/499.md
Normal file
43
docSite/content/zh-cn/docs/development/upgrading/499.md
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
---
|
||||||
|
title: 'V4.9.9'
|
||||||
|
description: 'FastGPT V4.9.9 更新说明'
|
||||||
|
icon: 'upgrade'
|
||||||
|
draft: false
|
||||||
|
toc: true
|
||||||
|
weight: 791
|
||||||
|
---
|
||||||
|
|
||||||
|
## 升级指南
|
||||||
|
|
||||||
|
### 1. 做好数据备份
|
||||||
|
|
||||||
|
### 2. 商业版用户替换新 License
|
||||||
|
|
||||||
|
商业版用户可以联系 FastGPT 团队支持同学,获取 License 替换方案。替换后,可以直接升级系统,管理后台会提示输入新 License。
|
||||||
|
|
||||||
|
### 3. 更新镜像 tag
|
||||||
|
|
||||||
|
- 更新 FastGPT 镜像 tag: v4.9.9
|
||||||
|
- 更新 FastGPT 商业版镜像 tag: v4.9.9
|
||||||
|
- mcp_server 无需更新
|
||||||
|
- Sandbox 无需更新
|
||||||
|
- AIProxy 无需更新
|
||||||
|
|
||||||
|
## 🚀 新增内容
|
||||||
|
|
||||||
|
1. 切换 SessionId 来替代 JWT 实现登录鉴权,可控制最大登录客户端数量。
|
||||||
|
2. 新的商业版 License 管理模式。
|
||||||
|
3. 公众号调用,显示记录 chat 对话错误,方便排查。
|
||||||
|
4. API 知识库支持 BasePath 选择,需增加 API 接口,具体可见[API 知识库介绍](/docs/guide/knowledge_base/api_dataset/#4-获取文件详细信息用于获取文件信息)
|
||||||
|
|
||||||
|
## ⚙️ 优化
|
||||||
|
|
||||||
|
1. 优化工具调用,新工具的判断逻辑。
|
||||||
|
2. 调整 Cite 引用提示词。
|
||||||
|
|
||||||
|
## 🐛 修复
|
||||||
|
|
||||||
|
1. 无法正常获取应用历史保存/发布记录。
|
||||||
|
2. 成员创建 MCP 工具权限问题。
|
||||||
|
3. 来源引用展示,存在 ID 传递错误,导致提示无权操作该文件。
|
||||||
|
4. 回答标注前端数据报错。
|
||||||
@@ -43,7 +43,7 @@ type ResponseType = {
|
|||||||
// 文件列表中,单项的文件类型
|
// 文件列表中,单项的文件类型
|
||||||
type FileListItem = {
|
type FileListItem = {
|
||||||
id: string;
|
id: string;
|
||||||
parentId: string | null;
|
parentId: string //也可能为 null 或者 undefined 类型;
|
||||||
name: string;
|
name: string;
|
||||||
type: 'file' | 'folder';
|
type: 'file' | 'folder';
|
||||||
updateTime: Date;
|
updateTime: Date;
|
||||||
@@ -59,7 +59,7 @@ type FileListItem = {
|
|||||||
{{< markdownify >}}
|
{{< markdownify >}}
|
||||||
|
|
||||||
{{% alert icon=" " context="success" %}}
|
{{% alert icon=" " context="success" %}}
|
||||||
- parentId - 父级 id,可选,或者 null。
|
- parentId - 父级 id,可选,或者 null | undefined。
|
||||||
- searchKey - 检索词,可选
|
- searchKey - 检索词,可选
|
||||||
{{% /alert %}}
|
{{% /alert %}}
|
||||||
|
|
||||||
@@ -68,7 +68,7 @@ curl --location --request POST '{{baseURL}}/v1/file/list' \
|
|||||||
--header 'Authorization: Bearer {{authorization}}' \
|
--header 'Authorization: Bearer {{authorization}}' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
"parentId": null,
|
"parentId": "",
|
||||||
"searchKey": ""
|
"searchKey": ""
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
@@ -185,3 +185,40 @@ curl --location --request GET '{{baseURL}}/v1/file/read?id=xx' \
|
|||||||
{{< /tabs >}}
|
{{< /tabs >}}
|
||||||
|
|
||||||
|
|
||||||
|
### 4. 获取文件详细信息(用于获取文件信息)
|
||||||
|
|
||||||
|
{{< tabs tabTotal="2" >}}
|
||||||
|
{{< tab tabName="请求示例" >}}
|
||||||
|
{{< markdownify >}}
|
||||||
|
|
||||||
|
id 为文件的 id。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location --request GET '{{baseURL}}/v1/file/detail?id=xx' \
|
||||||
|
--header 'Authorization: Bearer {{authorization}}'
|
||||||
|
```
|
||||||
|
|
||||||
|
{{< /markdownify >}}
|
||||||
|
{{< /tab >}}
|
||||||
|
|
||||||
|
{{< tab tabName="响应示例" >}}
|
||||||
|
{{< markdownify >}}
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"code": 200,
|
||||||
|
"success": true,
|
||||||
|
"message": "",
|
||||||
|
"data": {
|
||||||
|
"id": "docs",
|
||||||
|
"parentId": "",
|
||||||
|
"name": "docs"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
{{< /markdownify >}}
|
||||||
|
{{< /tab >}}
|
||||||
|
{{< /tabs >}}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -28,7 +28,6 @@ FastGPT 商业版是基于 FastGPT 开源版的增强版本,增加了一些独
|
|||||||
| 应用发布安全配置 | ❌ | ✅ | ✅ |
|
| 应用发布安全配置 | ❌ | ✅ | ✅ |
|
||||||
| 内容审核 | ❌ | ✅ | ✅ |
|
| 内容审核 | ❌ | ✅ | ✅ |
|
||||||
| web站点同步 | ❌ | ✅ | ✅ |
|
| web站点同步 | ❌ | ✅ | ✅ |
|
||||||
| 主流文档库接入(目前支持:语雀、飞书) | ❌ | ✅ | ✅ |
|
|
||||||
| 增强训练模式 | ❌ | ✅ | ✅ |
|
| 增强训练模式 | ❌ | ✅ | ✅ |
|
||||||
| 第三方应用快速接入(飞书、公众号) | ❌ | ✅ | ✅ |
|
| 第三方应用快速接入(飞书、公众号) | ❌ | ✅ | ✅ |
|
||||||
| 管理后台 | ❌ | ✅ | 不需要 |
|
| 管理后台 | ❌ | ✅ | 不需要 |
|
||||||
|
|||||||
@@ -132,7 +132,9 @@ weight: 506
|
|||||||
### 公众号没响应
|
### 公众号没响应
|
||||||
|
|
||||||
检查应用对话日志,如果有对话日志,但是微信公众号无响应,则是白名单 IP未成功。
|
检查应用对话日志,如果有对话日志,但是微信公众号无响应,则是白名单 IP未成功。
|
||||||
添加白名单IP 后,通常需要等待几分钟微信更新。
|
添加白名单IP 后,通常需要等待几分钟微信更新。可以在对话日志中,找点错误日志。
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
### 如何新开一个聊天记录
|
### 如何新开一个聊天记录
|
||||||
|
|
||||||
|
|||||||
2
env.d.ts
vendored
2
env.d.ts
vendored
@@ -4,7 +4,6 @@ declare global {
|
|||||||
LOG_DEPTH: string;
|
LOG_DEPTH: string;
|
||||||
DEFAULT_ROOT_PSW: string;
|
DEFAULT_ROOT_PSW: string;
|
||||||
DB_MAX_LINK: string;
|
DB_MAX_LINK: string;
|
||||||
TOKEN_KEY: string;
|
|
||||||
FILE_TOKEN_KEY: string;
|
FILE_TOKEN_KEY: string;
|
||||||
ROOT_KEY: string;
|
ROOT_KEY: string;
|
||||||
OPENAI_BASE_URL: string;
|
OPENAI_BASE_URL: string;
|
||||||
@@ -37,6 +36,7 @@ declare global {
|
|||||||
CONFIG_JSON_PATH?: string;
|
CONFIG_JSON_PATH?: string;
|
||||||
PASSWORD_LOGIN_LOCK_SECONDS?: string;
|
PASSWORD_LOGIN_LOCK_SECONDS?: string;
|
||||||
PASSWORD_EXPIRED_MONTH?: string;
|
PASSWORD_EXPIRED_MONTH?: string;
|
||||||
|
MAX_LOGIN_SESSION?: string;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ const datasetErr = [
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
statusText: DatasetErrEnum.unExist,
|
statusText: DatasetErrEnum.unExist,
|
||||||
message: 'core.dataset.error.unExistDataset'
|
message: i18nT('common:core.dataset.error.unExistDataset')
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
statusText: DatasetErrEnum.unExistCollection,
|
statusText: DatasetErrEnum.unExistCollection,
|
||||||
|
|||||||
@@ -2,13 +2,28 @@ import { type ErrType } from '../errorCode';
|
|||||||
import { i18nT } from '../../../../web/i18n/utils';
|
import { i18nT } from '../../../../web/i18n/utils';
|
||||||
/* dataset: 509000 */
|
/* dataset: 509000 */
|
||||||
export enum SystemErrEnum {
|
export enum SystemErrEnum {
|
||||||
communityVersionNumLimit = 'communityVersionNumLimit'
|
communityVersionNumLimit = 'communityVersionNumLimit',
|
||||||
|
licenseAppAmountLimit = 'licenseAppAmountLimit',
|
||||||
|
licenseDatasetAmountLimit = 'licenseDatasetAmountLimit',
|
||||||
|
licenseUserAmountLimit = 'licenseUserAmountLimit'
|
||||||
}
|
}
|
||||||
|
|
||||||
const systemErr = [
|
const systemErr = [
|
||||||
{
|
{
|
||||||
statusText: SystemErrEnum.communityVersionNumLimit,
|
statusText: SystemErrEnum.communityVersionNumLimit,
|
||||||
message: i18nT('common:code_error.system_error.community_version_num_limit')
|
message: i18nT('common:code_error.system_error.community_version_num_limit')
|
||||||
|
},
|
||||||
|
{
|
||||||
|
statusText: SystemErrEnum.licenseAppAmountLimit,
|
||||||
|
message: i18nT('common:code_error.system_error.license_app_amount_limit')
|
||||||
|
},
|
||||||
|
{
|
||||||
|
statusText: SystemErrEnum.licenseDatasetAmountLimit,
|
||||||
|
message: i18nT('common:code_error.system_error.license_dataset_amount_limit')
|
||||||
|
},
|
||||||
|
{
|
||||||
|
statusText: SystemErrEnum.licenseUserAmountLimit,
|
||||||
|
message: i18nT('common:code_error.system_error.license_user_amount_limit')
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ export const checkPasswordRule = (password: string) => {
|
|||||||
/[A-Z]/, // Contains uppercase letters
|
/[A-Z]/, // Contains uppercase letters
|
||||||
/[!@#$%^&*()_+=-]/ // Contains special characters
|
/[!@#$%^&*()_+=-]/ // Contains special characters
|
||||||
];
|
];
|
||||||
const validChars = /^[\dA-Za-z!@#$%^&*()_+=-]{6,100}$/;
|
const validChars = /^[\dA-Za-z!@#$%^&*()_+=-]{8,100}$/;
|
||||||
|
|
||||||
// Check length and valid characters
|
// Check length and valid characters
|
||||||
if (!validChars.test(password)) return false;
|
if (!validChars.test(password)) return false;
|
||||||
|
|||||||
@@ -7,6 +7,10 @@ export const CUSTOM_SPLIT_SIGN = '-----CUSTOM_SPLIT_SIGN-----';
|
|||||||
type SplitProps = {
|
type SplitProps = {
|
||||||
text: string;
|
text: string;
|
||||||
chunkSize: number;
|
chunkSize: number;
|
||||||
|
|
||||||
|
paragraphChunkDeep?: number; // Paragraph deep
|
||||||
|
paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
|
||||||
|
|
||||||
maxSize?: number;
|
maxSize?: number;
|
||||||
overlapRatio?: number;
|
overlapRatio?: number;
|
||||||
customReg?: string[];
|
customReg?: string[];
|
||||||
@@ -108,6 +112,8 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
|||||||
let {
|
let {
|
||||||
text = '',
|
text = '',
|
||||||
chunkSize,
|
chunkSize,
|
||||||
|
paragraphChunkDeep = 5,
|
||||||
|
paragraphChunkMinSize = 100,
|
||||||
maxSize = defaultMaxChunkSize,
|
maxSize = defaultMaxChunkSize,
|
||||||
overlapRatio = 0.15,
|
overlapRatio = 0.15,
|
||||||
customReg = []
|
customReg = []
|
||||||
@@ -123,7 +129,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
|||||||
text = text.replace(/(```[\s\S]*?```|~~~[\s\S]*?~~~)/g, function (match) {
|
text = text.replace(/(```[\s\S]*?```|~~~[\s\S]*?~~~)/g, function (match) {
|
||||||
return match.replace(/\n/g, codeBlockMarker);
|
return match.replace(/\n/g, codeBlockMarker);
|
||||||
});
|
});
|
||||||
// 2. 表格处理 - 单独提取表格出来,进行表头合并
|
// 2. Markdown 表格处理 - 单独提取表格出来,进行表头合并
|
||||||
const tableReg =
|
const tableReg =
|
||||||
/(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n?)*)(?:\n|$)/g;
|
/(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n?)*)(?:\n|$)/g;
|
||||||
const tableDataList = text.match(tableReg);
|
const tableDataList = text.match(tableReg);
|
||||||
@@ -143,25 +149,40 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
|||||||
text = text.replace(/(\r?\n|\r){3,}/g, '\n\n\n');
|
text = text.replace(/(\r?\n|\r){3,}/g, '\n\n\n');
|
||||||
|
|
||||||
// The larger maxLen is, the next sentence is less likely to trigger splitting
|
// The larger maxLen is, the next sentence is less likely to trigger splitting
|
||||||
const markdownIndex = 4;
|
const customRegLen = customReg.length;
|
||||||
const forbidOverlapIndex = 8;
|
const markdownIndex = paragraphChunkDeep - 1;
|
||||||
|
const forbidOverlapIndex = customRegLen + markdownIndex + 4;
|
||||||
|
|
||||||
|
const markdownHeaderRules = ((deep?: number): { reg: RegExp; maxLen: number }[] => {
|
||||||
|
if (!deep || deep === 0) return [];
|
||||||
|
|
||||||
|
const maxDeep = Math.min(deep, 8); // Maximum 8 levels
|
||||||
|
const rules: { reg: RegExp; maxLen: number }[] = [];
|
||||||
|
|
||||||
|
for (let i = 1; i <= maxDeep; i++) {
|
||||||
|
const hashSymbols = '#'.repeat(i);
|
||||||
|
rules.push({
|
||||||
|
reg: new RegExp(`^(${hashSymbols}\\s[^\\n]+\\n)`, 'gm'),
|
||||||
|
maxLen: chunkSize
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return rules;
|
||||||
|
})(paragraphChunkDeep);
|
||||||
|
|
||||||
const stepReges: { reg: RegExp | string; maxLen: number }[] = [
|
const stepReges: { reg: RegExp | string; maxLen: number }[] = [
|
||||||
...customReg.map((text) => ({
|
...customReg.map((text) => ({
|
||||||
reg: text.replaceAll('\\n', '\n'),
|
reg: text.replaceAll('\\n', '\n'),
|
||||||
maxLen: chunkSize
|
maxLen: chunkSize
|
||||||
})),
|
})),
|
||||||
{ reg: /^(#\s[^\n]+\n)/gm, maxLen: chunkSize },
|
...markdownHeaderRules,
|
||||||
{ reg: /^(##\s[^\n]+\n)/gm, maxLen: chunkSize },
|
|
||||||
{ reg: /^(###\s[^\n]+\n)/gm, maxLen: chunkSize },
|
|
||||||
{ reg: /^(####\s[^\n]+\n)/gm, maxLen: chunkSize },
|
|
||||||
{ reg: /^(#####\s[^\n]+\n)/gm, maxLen: chunkSize },
|
|
||||||
|
|
||||||
{ reg: /([\n](```[\s\S]*?```|~~~[\s\S]*?~~~))/g, maxLen: maxSize }, // code block
|
{ reg: /([\n](```[\s\S]*?```|~~~[\s\S]*?~~~))/g, maxLen: maxSize }, // code block
|
||||||
|
// HTML Table tag 尽可能保障完整
|
||||||
{
|
{
|
||||||
reg: /(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n)*)/g,
|
reg: /(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n)*)/g,
|
||||||
maxLen: Math.min(chunkSize * 1.5, maxSize)
|
maxLen: chunkSize
|
||||||
}, // Table 尽可能保证完整性
|
}, // Markdown Table 尽可能保证完整性
|
||||||
{ reg: /(\n{2,})/g, maxLen: chunkSize },
|
{ reg: /(\n{2,})/g, maxLen: chunkSize },
|
||||||
{ reg: /([\n])/g, maxLen: chunkSize },
|
{ reg: /([\n])/g, maxLen: chunkSize },
|
||||||
// ------ There's no overlap on the top
|
// ------ There's no overlap on the top
|
||||||
@@ -172,12 +193,10 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
|||||||
{ reg: /([,]|,\s)/g, maxLen: chunkSize }
|
{ reg: /([,]|,\s)/g, maxLen: chunkSize }
|
||||||
];
|
];
|
||||||
|
|
||||||
const customRegLen = customReg.length;
|
|
||||||
const checkIsCustomStep = (step: number) => step < customRegLen;
|
const checkIsCustomStep = (step: number) => step < customRegLen;
|
||||||
const checkIsMarkdownSplit = (step: number) =>
|
const checkIsMarkdownSplit = (step: number) =>
|
||||||
step >= customRegLen && step <= markdownIndex + customRegLen;
|
step >= customRegLen && step <= markdownIndex + customRegLen;
|
||||||
|
const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex;
|
||||||
const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex + customRegLen;
|
|
||||||
|
|
||||||
// if use markdown title split, Separate record title
|
// if use markdown title split, Separate record title
|
||||||
const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
|
const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
|
||||||
@@ -301,6 +320,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
|||||||
const splitTexts = getSplitTexts({ text, step });
|
const splitTexts = getSplitTexts({ text, step });
|
||||||
|
|
||||||
const chunks: string[] = [];
|
const chunks: string[] = [];
|
||||||
|
|
||||||
for (let i = 0; i < splitTexts.length; i++) {
|
for (let i = 0; i < splitTexts.length; i++) {
|
||||||
const item = splitTexts[i];
|
const item = splitTexts[i];
|
||||||
|
|
||||||
@@ -443,7 +463,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
|||||||
*/
|
*/
|
||||||
export const splitText2Chunks = (props: SplitProps): SplitResponse => {
|
export const splitText2Chunks = (props: SplitProps): SplitResponse => {
|
||||||
let { text = '' } = props;
|
let { text = '' } = props;
|
||||||
const start = Date.now();
|
|
||||||
const splitWithCustomSign = text.split(CUSTOM_SPLIT_SIGN);
|
const splitWithCustomSign = text.split(CUSTOM_SPLIT_SIGN);
|
||||||
|
|
||||||
const splitResult = splitWithCustomSign.map((item) => {
|
const splitResult = splitWithCustomSign.map((item) => {
|
||||||
|
|||||||
25
packages/global/common/system/types/index.d.ts
vendored
25
packages/global/common/system/types/index.d.ts
vendored
@@ -70,6 +70,9 @@ export type FastGPTFeConfigsType = {
|
|||||||
show_publish_dingtalk?: boolean;
|
show_publish_dingtalk?: boolean;
|
||||||
show_publish_offiaccount?: boolean;
|
show_publish_offiaccount?: boolean;
|
||||||
|
|
||||||
|
show_dataset_enhance?: boolean;
|
||||||
|
show_batch_eval?: boolean;
|
||||||
|
|
||||||
concatMd?: string;
|
concatMd?: string;
|
||||||
docUrl?: string;
|
docUrl?: string;
|
||||||
openAPIDocUrl?: string;
|
openAPIDocUrl?: string;
|
||||||
@@ -127,9 +130,11 @@ export type SystemEnvType = {
|
|||||||
vectorMaxProcess: number;
|
vectorMaxProcess: number;
|
||||||
qaMaxProcess: number;
|
qaMaxProcess: number;
|
||||||
vlmMaxProcess: number;
|
vlmMaxProcess: number;
|
||||||
hnswEfSearch: number;
|
|
||||||
tokenWorkers: number; // token count max worker
|
tokenWorkers: number; // token count max worker
|
||||||
|
|
||||||
|
hnswEfSearch: number;
|
||||||
|
hnswMaxScanTuples: number;
|
||||||
|
|
||||||
oneapiUrl?: string;
|
oneapiUrl?: string;
|
||||||
chatApiKey?: string;
|
chatApiKey?: string;
|
||||||
|
|
||||||
@@ -142,3 +147,21 @@ export type customPdfParseType = {
|
|||||||
doc2xKey?: string;
|
doc2xKey?: string;
|
||||||
price?: number;
|
price?: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type LicenseDataType = {
|
||||||
|
startTime: string;
|
||||||
|
expiredTime: string;
|
||||||
|
company: string;
|
||||||
|
description?: string; // 描述
|
||||||
|
hosts?: string[]; // 管理端有效域名
|
||||||
|
maxUsers?: number; // 最大用户数,不填默认不上限
|
||||||
|
maxApps?: number; // 最大应用数,不填默认不上限
|
||||||
|
maxDatasets?: number; // 最大数据集数,不填默认不上限
|
||||||
|
functions: {
|
||||||
|
sso: boolean;
|
||||||
|
pay: boolean;
|
||||||
|
customTemplates: boolean;
|
||||||
|
datasetEnhance: boolean;
|
||||||
|
batchEval: boolean;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|||||||
@@ -2,6 +2,248 @@ import { type PromptTemplateItem } from '../type.d';
|
|||||||
import { i18nT } from '../../../../web/i18n/utils';
|
import { i18nT } from '../../../../web/i18n/utils';
|
||||||
import { getPromptByVersion } from './utils';
|
import { getPromptByVersion } from './utils';
|
||||||
|
|
||||||
|
export const Prompt_userQuotePromptList: PromptTemplateItem[] = [
|
||||||
|
{
|
||||||
|
title: i18nT('app:template.standard_template'),
|
||||||
|
desc: '',
|
||||||
|
value: {
|
||||||
|
['4.9.7']: `## 任务描述
|
||||||
|
你是一个知识库回答助手,可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
|
||||||
|
同时,为了使回答结果更加可信并且可追溯,你需要在每段话结尾添加引用标记,标识参考了哪些内容。
|
||||||
|
|
||||||
|
## 追溯展示规则
|
||||||
|
|
||||||
|
- 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识,其中 CITE 是固定常量, id 为引文中的 id。
|
||||||
|
- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
|
||||||
|
- 每段话**至少包含一个引用**,多个引用时按顺序排列,例如:"Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
|
||||||
|
- 不要把示例作为知识点。
|
||||||
|
- 不要伪造 id,返回的 id 必须都存在 <Cites></Cites> 中!
|
||||||
|
|
||||||
|
## 通用规则
|
||||||
|
|
||||||
|
- 如果你不清楚答案,你需要澄清。
|
||||||
|
- 避免提及你是从 <Cites></Cites> 获取的知识。
|
||||||
|
- 保持答案与 <Cites></Cites> 中描述的一致。
|
||||||
|
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
||||||
|
- 使用与问题相同的语言回答。
|
||||||
|
|
||||||
|
<Cites>
|
||||||
|
{{quote}}
|
||||||
|
</Cites>
|
||||||
|
|
||||||
|
## 用户问题
|
||||||
|
|
||||||
|
{{question}}
|
||||||
|
|
||||||
|
## 回答
|
||||||
|
`
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: i18nT('app:template.qa_template'),
|
||||||
|
desc: '',
|
||||||
|
value: {
|
||||||
|
['4.9.7']: `## 任务描述
|
||||||
|
作为一个问答助手,你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
|
||||||
|
|
||||||
|
## 回答要求
|
||||||
|
- 选择其中一个或多个问答对进行回答。
|
||||||
|
- 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
|
||||||
|
- 如果没有相关的问答对,你需要澄清。
|
||||||
|
- 避免提及你是从 <QA></QA> 获取的知识,只需要回复答案。
|
||||||
|
- 使用与问题相同的语言回答。
|
||||||
|
|
||||||
|
<QA>
|
||||||
|
{{quote}}
|
||||||
|
</QA>
|
||||||
|
|
||||||
|
## 用户问题
|
||||||
|
|
||||||
|
{{question}}
|
||||||
|
|
||||||
|
## 回答
|
||||||
|
`
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: i18nT('app:template.standard_strict'),
|
||||||
|
desc: '',
|
||||||
|
value: {
|
||||||
|
['4.9.7']: `## 任务描述
|
||||||
|
你是一个知识库回答助手,可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
|
||||||
|
同时,为了使回答结果更加可信并且可追溯,你需要在每段话结尾添加引用标记,标识参考了哪些内容。
|
||||||
|
|
||||||
|
## 追溯展示规则
|
||||||
|
|
||||||
|
- 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识,其中 CITE 是固定常量, id 为引文中的 id。
|
||||||
|
- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
|
||||||
|
- 每段话**至少包含一个引用**,多个引用时按顺序排列,例如:"Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
|
||||||
|
- 不要把示例作为知识点。
|
||||||
|
- 不要伪造 id,返回的 id 必须都存在 <Cites></Cites> 中!
|
||||||
|
|
||||||
|
## 通用规则
|
||||||
|
|
||||||
|
- 如果你不清楚答案,你需要澄清。
|
||||||
|
- 避免提及你是从 <Cites></Cites> 获取的知识。
|
||||||
|
- 保持答案与 <Cites></Cites> 中描述的一致。
|
||||||
|
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
||||||
|
- 使用与问题相同的语言回答。
|
||||||
|
|
||||||
|
## 严格要求
|
||||||
|
|
||||||
|
你只能使用 <Cites></Cites> 标记中的内容作为参考,不能使用自身的知识,并且回答的内容需严格与 <Cites></Cites> 中的内容一致。
|
||||||
|
|
||||||
|
<Cites>
|
||||||
|
{{quote}}
|
||||||
|
</Cites>
|
||||||
|
|
||||||
|
## 用户问题
|
||||||
|
|
||||||
|
{{question}}
|
||||||
|
|
||||||
|
## 回答
|
||||||
|
`
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: i18nT('app:template.hard_strict'),
|
||||||
|
desc: '',
|
||||||
|
value: {
|
||||||
|
['4.9.7']: `## 任务描述
|
||||||
|
作为一个问答助手,你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
|
||||||
|
|
||||||
|
## 回答要求
|
||||||
|
- 选择其中一个或多个问答对进行回答。
|
||||||
|
- 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
|
||||||
|
- 如果没有相关的问答对,你需要澄清。
|
||||||
|
- 避免提及你是从 <QA></QA> 获取的知识,只需要回复答案。
|
||||||
|
- 使用与问题相同的语言回答。
|
||||||
|
|
||||||
|
## 严格要求
|
||||||
|
|
||||||
|
你只能使用 <QA></QA> 标记中的内容作为参考,不能使用自身的知识,并且回答的内容需严格与 <QA></QA> 中的内容一致。
|
||||||
|
|
||||||
|
<QA>
|
||||||
|
{{quote}}
|
||||||
|
</QA>
|
||||||
|
|
||||||
|
## 用户问题
|
||||||
|
|
||||||
|
{{question}}
|
||||||
|
|
||||||
|
## 回答
|
||||||
|
`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
export const Prompt_systemQuotePromptList: PromptTemplateItem[] = [
|
||||||
|
{
|
||||||
|
title: i18nT('app:template.standard_template'),
|
||||||
|
desc: '',
|
||||||
|
value: {
|
||||||
|
['4.9.7']: `## 任务描述
|
||||||
|
你是一个知识库回答助手,可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
|
||||||
|
同时,为了使回答结果更加可信并且可追溯,你需要在每段话结尾添加引用标记,标识参考了哪些内容。
|
||||||
|
|
||||||
|
## 追溯展示规则
|
||||||
|
|
||||||
|
- 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识,其中 CITE 是固定常量, id 为引文中的 id。
|
||||||
|
- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
|
||||||
|
- 每段话**至少包含一个引用**,多个引用时按顺序排列,例如:"Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
|
||||||
|
- 不要把示例作为知识点。
|
||||||
|
- 不要伪造 id,返回的 id 必须都存在 <Cites></Cites> 中!
|
||||||
|
|
||||||
|
## 通用规则
|
||||||
|
|
||||||
|
- 如果你不清楚答案,你需要澄清。
|
||||||
|
- 避免提及你是从 <Cites></Cites> 获取的知识。
|
||||||
|
- 保持答案与 <Cites></Cites> 中描述的一致。
|
||||||
|
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
||||||
|
- 使用与问题相同的语言回答。
|
||||||
|
|
||||||
|
<Cites>
|
||||||
|
{{quote}}
|
||||||
|
</Cites>`
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: i18nT('app:template.qa_template'),
|
||||||
|
desc: '',
|
||||||
|
value: {
|
||||||
|
['4.9.8']: `## 任务描述
|
||||||
|
作为一个问答助手,你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
|
||||||
|
|
||||||
|
## 回答要求
|
||||||
|
- 选择其中一个或多个问答对进行回答。
|
||||||
|
- 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
|
||||||
|
- 如果没有相关的问答对,你需要澄清。
|
||||||
|
- 避免提及你是从 <QA></QA> 获取的知识,只需要回复答案。
|
||||||
|
- 使用与问题相同的语言回答。
|
||||||
|
|
||||||
|
<QA>
|
||||||
|
{{quote}}
|
||||||
|
</QA>`
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: i18nT('app:template.standard_strict'),
|
||||||
|
desc: '',
|
||||||
|
value: {
|
||||||
|
['4.9.7']: `## 任务描述
|
||||||
|
你是一个知识库回答助手,可以使用 <Cites></Cites> 中的内容作为你本次回答的参考。
|
||||||
|
同时,为了使回答结果更加可信并且可追溯,你需要在每段话结尾添加引用标记,标识参考了哪些内容。
|
||||||
|
|
||||||
|
## 追溯展示规则
|
||||||
|
|
||||||
|
- 使用 [id](CITE) 的格式来引用 <Cites></Cites> 中的知识,其中 CITE 是固定常量, id 为引文中的 id。
|
||||||
|
- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
|
||||||
|
- 每段话**至少包含一个引用**,多个引用时按顺序排列,例如:"Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
|
||||||
|
- 不要把示例作为知识点。
|
||||||
|
- 不要伪造 id,返回的 id 必须都存在 <Cites></Cites> 中!
|
||||||
|
|
||||||
|
## 通用规则
|
||||||
|
|
||||||
|
- 如果你不清楚答案,你需要澄清。
|
||||||
|
- 避免提及你是从 <Cites></Cites> 获取的知识。
|
||||||
|
- 保持答案与 <Cites></Cites> 中描述的一致。
|
||||||
|
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
||||||
|
- 使用与问题相同的语言回答。
|
||||||
|
|
||||||
|
## 严格要求
|
||||||
|
|
||||||
|
你只能使用 <Cites></Cites> 标记中的内容作为参考,不能使用自身的知识,并且回答的内容需严格与 <Cites></Cites> 中的内容一致。
|
||||||
|
|
||||||
|
<Cites>
|
||||||
|
{{quote}}
|
||||||
|
</Cites>`
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: i18nT('app:template.hard_strict'),
|
||||||
|
desc: '',
|
||||||
|
value: {
|
||||||
|
['4.9.7']: `## 任务描述
|
||||||
|
作为一个问答助手,你会使用 <QA></QA> 标记中的提供的数据对进行内容回答。
|
||||||
|
|
||||||
|
## 回答要求
|
||||||
|
- 选择其中一个或多个问答对进行回答。
|
||||||
|
- 回答的内容应尽可能与 <Answer></Answer> 中的内容一致。
|
||||||
|
- 如果没有相关的问答对,你需要澄清。
|
||||||
|
- 避免提及你是从 <QA></QA> 获取的知识,只需要回复答案。
|
||||||
|
- 使用与问题相同的语言回答。
|
||||||
|
|
||||||
|
## 严格要求
|
||||||
|
|
||||||
|
你只能使用 <QA></QA> 标记中的内容作为参考,不能使用自身的知识,并且回答的内容需严格与 <QA></QA> 中的内容一致。
|
||||||
|
|
||||||
|
<QA>
|
||||||
|
{{quote}}
|
||||||
|
</QA>`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
|
export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
|
||||||
{
|
{
|
||||||
title: i18nT('app:template.standard_template'),
|
title: i18nT('app:template.standard_template'),
|
||||||
@@ -10,11 +252,6 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
|
|||||||
['4.9.7']: `{
|
['4.9.7']: `{
|
||||||
"id": "{{id}}",
|
"id": "{{id}}",
|
||||||
"sourceName": "{{source}}",
|
"sourceName": "{{source}}",
|
||||||
"content": "{{q}}\n{{a}}"
|
|
||||||
}
|
|
||||||
`,
|
|
||||||
['4.9.2']: `{
|
|
||||||
"sourceName": "{{source}}",
|
|
||||||
"updateTime": "{{updateTime}}",
|
"updateTime": "{{updateTime}}",
|
||||||
"content": "{{q}}\n{{a}}"
|
"content": "{{q}}\n{{a}}"
|
||||||
}
|
}
|
||||||
@@ -25,7 +262,7 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
|
|||||||
title: i18nT('app:template.qa_template'),
|
title: i18nT('app:template.qa_template'),
|
||||||
desc: i18nT('app:template.qa_template_des'),
|
desc: i18nT('app:template.qa_template_des'),
|
||||||
value: {
|
value: {
|
||||||
['4.9.2']: `<Question>
|
['4.9.7']: `<Question>
|
||||||
{{q}}
|
{{q}}
|
||||||
</Question>
|
</Question>
|
||||||
<Answer>
|
<Answer>
|
||||||
@@ -40,11 +277,6 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
|
|||||||
['4.9.7']: `{
|
['4.9.7']: `{
|
||||||
"id": "{{id}}",
|
"id": "{{id}}",
|
||||||
"sourceName": "{{source}}",
|
"sourceName": "{{source}}",
|
||||||
"content": "{{q}}\n{{a}}"
|
|
||||||
}
|
|
||||||
`,
|
|
||||||
['4.9.2']: `{
|
|
||||||
"sourceName": "{{source}}",
|
|
||||||
"updateTime": "{{updateTime}}",
|
"updateTime": "{{updateTime}}",
|
||||||
"content": "{{q}}\n{{a}}"
|
"content": "{{q}}\n{{a}}"
|
||||||
}
|
}
|
||||||
@@ -55,7 +287,7 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
|
|||||||
title: i18nT('app:template.hard_strict'),
|
title: i18nT('app:template.hard_strict'),
|
||||||
desc: i18nT('app:template.hard_strict_des'),
|
desc: i18nT('app:template.hard_strict_des'),
|
||||||
value: {
|
value: {
|
||||||
['4.9.2']: `<Question>
|
['4.9.7']: `<Question>
|
||||||
{{q}}
|
{{q}}
|
||||||
</Question>
|
</Question>
|
||||||
<Answer>
|
<Answer>
|
||||||
@@ -64,263 +296,12 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
export const getQuoteTemplate = (version?: string) => {
|
export const getQuoteTemplate = (version?: string) => {
|
||||||
const defaultTemplate = Prompt_QuoteTemplateList[0].value;
|
const defaultTemplate = Prompt_QuoteTemplateList[0].value;
|
||||||
|
|
||||||
return getPromptByVersion(version, defaultTemplate);
|
return getPromptByVersion(version, defaultTemplate);
|
||||||
};
|
};
|
||||||
|
|
||||||
export const Prompt_userQuotePromptList: PromptTemplateItem[] = [
|
|
||||||
{
|
|
||||||
title: i18nT('app:template.standard_template'),
|
|
||||||
desc: '',
|
|
||||||
value: {
|
|
||||||
['4.9.7']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
|
|
||||||
|
|
||||||
<Reference>
|
|
||||||
{{quote}}
|
|
||||||
</Reference>
|
|
||||||
|
|
||||||
回答要求:
|
|
||||||
- 如果你不清楚答案,你需要澄清。
|
|
||||||
- 避免提及你是从 <Reference></Reference> 获取的知识。
|
|
||||||
- 保持答案与 <Reference></Reference> 中描述的一致。
|
|
||||||
- 使用 Markdown 语法优化回答格式。
|
|
||||||
- 使用与问题相同的语言回答。
|
|
||||||
- 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识,其中 CITE 是固定常量, id 为引文中的 id。
|
|
||||||
- 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
|
|
||||||
- 每段至少包含一个引用,也可根据内容需要加入多个引用,按顺序排列。`,
|
|
||||||
['4.9.2']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
|
|
||||||
|
|
||||||
<Reference>
|
|
||||||
{{quote}}
|
|
||||||
</Reference>
|
|
||||||
|
|
||||||
回答要求:
|
|
||||||
- 如果你不清楚答案,你需要澄清。
|
|
||||||
- 避免提及你是从 <Reference></Reference> 获取的知识。
|
|
||||||
- 保持答案与 <Reference></Reference> 中描述的一致。
|
|
||||||
- 使用 Markdown 语法优化回答格式。
|
|
||||||
- 使用与问题相同的语言回答。
|
|
||||||
|
|
||||||
问题:"""{{question}}"""`
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
title: i18nT('app:template.qa_template'),
|
|
||||||
desc: '',
|
|
||||||
value: {
|
|
||||||
['4.9.2']: `使用 <QA></QA> 标记中的问答对进行回答。
|
|
||||||
|
|
||||||
<QA>
|
|
||||||
{{quote}}
|
|
||||||
</QA>
|
|
||||||
|
|
||||||
回答要求:
|
|
||||||
- 选择其中一个或多个问答对进行回答。
|
|
||||||
- 回答的内容应尽可能与 <答案></答案> 中的内容一致。
|
|
||||||
- 如果没有相关的问答对,你需要澄清。
|
|
||||||
- 避免提及你是从 QA 获取的知识,只需要回复答案。
|
|
||||||
|
|
||||||
问题:"""{{question}}"""`
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
title: i18nT('app:template.standard_strict'),
|
|
||||||
desc: '',
|
|
||||||
value: {
|
|
||||||
['4.9.7']: `忘记你已有的知识,仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
|
|
||||||
|
|
||||||
<Reference>
|
|
||||||
{{quote}}
|
|
||||||
</Reference>
|
|
||||||
|
|
||||||
思考流程:
|
|
||||||
1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
|
|
||||||
2. 如果有关,你按下面的要求回答。
|
|
||||||
3. 如果无关,你直接拒绝回答本次问题。
|
|
||||||
|
|
||||||
回答要求:
|
|
||||||
- 避免提及你是从 <Reference></Reference> 获取的知识。
|
|
||||||
- 保持答案与 <Reference></Reference> 中描述的一致。
|
|
||||||
- 使用 Markdown 语法优化回答格式。
|
|
||||||
- 使用与问题相同的语言回答。
|
|
||||||
- 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识,其中 CITE 是固定常量, id 为引文中的 id。
|
|
||||||
- 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
|
|
||||||
- 每段至少包含一个引用,也可根据内容需要加入多个引用,按顺序排列。
|
|
||||||
|
|
||||||
问题:"""{{question}}"""`,
|
|
||||||
['4.9.2']: `忘记你已有的知识,仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
|
|
||||||
|
|
||||||
<Reference>
|
|
||||||
{{quote}}
|
|
||||||
</Reference>
|
|
||||||
|
|
||||||
思考流程:
|
|
||||||
1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
|
|
||||||
2. 如果有关,你按下面的要求回答。
|
|
||||||
3. 如果无关,你直接拒绝回答本次问题。
|
|
||||||
|
|
||||||
回答要求:
|
|
||||||
- 避免提及你是从 <Reference></Reference> 获取的知识。
|
|
||||||
- 保持答案与 <Reference></Reference> 中描述的一致。
|
|
||||||
- 使用 Markdown 语法优化回答格式。
|
|
||||||
- 使用与问题相同的语言回答。
|
|
||||||
|
|
||||||
问题:"""{{question}}"""`
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
title: i18nT('app:template.hard_strict'),
|
|
||||||
desc: '',
|
|
||||||
value: {
|
|
||||||
['4.9.2']: `忘记你已有的知识,仅使用 <QA></QA> 标记中的问答对进行回答。
|
|
||||||
|
|
||||||
<QA>
|
|
||||||
{{quote}}
|
|
||||||
</QA>
|
|
||||||
|
|
||||||
思考流程:
|
|
||||||
1. 判断问题是否与 <QA></QA> 标记中的内容有关。
|
|
||||||
2. 如果无关,你直接拒绝回答本次问题。
|
|
||||||
3. 判断是否有相近或相同的问题。
|
|
||||||
4. 如果有相同的问题,直接输出对应答案。
|
|
||||||
5. 如果只有相近的问题,请把相近的问题和答案一起输出。
|
|
||||||
|
|
||||||
回答要求:
|
|
||||||
- 如果没有相关的问答对,你需要澄清。
|
|
||||||
- 回答的内容应尽可能与 <QA></QA> 标记中的内容一致。
|
|
||||||
- 避免提及你是从 QA 获取的知识,只需要回复答案。
|
|
||||||
- 使用 Markdown 语法优化回答格式。
|
|
||||||
- 使用与问题相同的语言回答。
|
|
||||||
|
|
||||||
问题:"""{{question}}"""`
|
|
||||||
}
|
|
||||||
}
|
|
||||||
];
|
|
||||||
|
|
||||||
export const Prompt_systemQuotePromptList: PromptTemplateItem[] = [
|
|
||||||
{
|
|
||||||
title: i18nT('app:template.standard_template'),
|
|
||||||
desc: '',
|
|
||||||
value: {
|
|
||||||
['4.9.7']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
|
|
||||||
|
|
||||||
<Reference>
|
|
||||||
{{quote}}
|
|
||||||
</Reference>
|
|
||||||
|
|
||||||
回答要求:
|
|
||||||
- 如果你不清楚答案,你需要澄清。
|
|
||||||
- 避免提及你是从 <Reference></Reference> 获取的知识。
|
|
||||||
- 保持答案与 <Reference></Reference> 中描述的一致。
|
|
||||||
- 使用 Markdown 语法优化回答格式。
|
|
||||||
- 使用与问题相同的语言回答。
|
|
||||||
- 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识,其中 CITE 是固定常量, id 为引文中的 id。
|
|
||||||
- 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
|
|
||||||
- 每段至少包含一个引用,也可根据内容需要加入多个引用,按顺序排列。`,
|
|
||||||
['4.9.2']: `使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
|
|
||||||
|
|
||||||
<Reference>
|
|
||||||
{{quote}}
|
|
||||||
</Reference>
|
|
||||||
|
|
||||||
回答要求:
|
|
||||||
- 如果你不清楚答案,你需要澄清。
|
|
||||||
- 避免提及你是从 <Reference></Reference> 获取的知识。
|
|
||||||
- 保持答案与 <Reference></Reference> 中描述的一致。
|
|
||||||
- 使用 Markdown 语法优化回答格式。
|
|
||||||
- 使用与问题相同的语言回答。`
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
title: i18nT('app:template.qa_template'),
|
|
||||||
desc: '',
|
|
||||||
value: {
|
|
||||||
['4.9.2']: `使用 <QA></QA> 标记中的问答对进行回答。
|
|
||||||
|
|
||||||
<QA>
|
|
||||||
{{quote}}
|
|
||||||
</QA>
|
|
||||||
|
|
||||||
回答要求:
|
|
||||||
- 选择其中一个或多个问答对进行回答。
|
|
||||||
- 回答的内容应尽可能与 <答案></答案> 中的内容一致。
|
|
||||||
- 如果没有相关的问答对,你需要澄清。
|
|
||||||
- 避免提及你是从 QA 获取的知识,只需要回复答案。`
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
title: i18nT('app:template.standard_strict'),
|
|
||||||
desc: '',
|
|
||||||
value: {
|
|
||||||
['4.9.7']: `忘记你已有的知识,仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
|
|
||||||
|
|
||||||
<Reference>
|
|
||||||
{{quote}}
|
|
||||||
</Reference>
|
|
||||||
|
|
||||||
思考流程:
|
|
||||||
1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
|
|
||||||
2. 如果有关,你按下面的要求回答。
|
|
||||||
3. 如果无关,你直接拒绝回答本次问题。
|
|
||||||
|
|
||||||
回答要求:
|
|
||||||
- 避免提及你是从 <Reference></Reference> 获取的知识。
|
|
||||||
- 保持答案与 <Reference></Reference> 中描述的一致。
|
|
||||||
- 使用 Markdown 语法优化回答格式。
|
|
||||||
- 使用与问题相同的语言回答。
|
|
||||||
- 使用 [id](CITE) 格式来引用<Reference></Reference>中的知识,其中 CITE 是固定常量, id 为引文中的 id。
|
|
||||||
- 在每段结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
|
|
||||||
- 每段至少包含一个引用,也可根据内容需要加入多个引用,按顺序排列。
|
|
||||||
|
|
||||||
问题:"""{{question}}"""`,
|
|
||||||
['4.9.2']: `忘记你已有的知识,仅使用 <Reference></Reference> 标记中的内容作为本次对话的参考:
|
|
||||||
|
|
||||||
<Reference>
|
|
||||||
{{quote}}
|
|
||||||
</Reference>
|
|
||||||
|
|
||||||
思考流程:
|
|
||||||
1. 判断问题是否与 <Reference></Reference> 标记中的内容有关。
|
|
||||||
2. 如果有关,你按下面的要求回答。
|
|
||||||
3. 如果无关,你直接拒绝回答本次问题。
|
|
||||||
|
|
||||||
回答要求:
|
|
||||||
- 避免提及你是从 <Reference></Reference> 获取的知识。
|
|
||||||
- 保持答案与 <Reference></Reference> 中描述的一致。
|
|
||||||
- 使用 Markdown 语法优化回答格式。
|
|
||||||
- 使用与问题相同的语言回答。`
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
title: i18nT('app:template.hard_strict'),
|
|
||||||
desc: '',
|
|
||||||
value: {
|
|
||||||
['4.9.2']: `忘记你已有的知识,仅使用 <QA></QA> 标记中的问答对进行回答。
|
|
||||||
|
|
||||||
<QA>
|
|
||||||
{{quote}}
|
|
||||||
</QA>
|
|
||||||
|
|
||||||
思考流程:
|
|
||||||
1. 判断问题是否与 <QA></QA> 标记中的内容有关。
|
|
||||||
2. 如果无关,你直接拒绝回答本次问题。
|
|
||||||
3. 判断是否有相近或相同的问题。
|
|
||||||
4. 如果有相同的问题,直接输出对应答案。
|
|
||||||
5. 如果只有相近的问题,请把相近的问题和答案一起输出。
|
|
||||||
|
|
||||||
回答要求:
|
|
||||||
- 如果没有相关的问答对,你需要澄清。
|
|
||||||
- 回答的内容应尽可能与 <QA></QA> 标记中的内容一致。
|
|
||||||
- 避免提及你是从 QA 获取的知识,只需要回复答案。
|
|
||||||
- 使用 Markdown 语法优化回答格式。
|
|
||||||
- 使用与问题相同的语言回答。`
|
|
||||||
}
|
|
||||||
}
|
|
||||||
];
|
|
||||||
|
|
||||||
export const getQuotePrompt = (version?: string, role: 'user' | 'system' = 'user') => {
|
export const getQuotePrompt = (version?: string, role: 'user' | 'system' = 'user') => {
|
||||||
const quotePromptTemplates =
|
const quotePromptTemplates =
|
||||||
role === 'user' ? Prompt_userQuotePromptList : Prompt_systemQuotePromptList;
|
role === 'user' ? Prompt_userQuotePromptList : Prompt_systemQuotePromptList;
|
||||||
@@ -333,7 +314,7 @@ export const getQuotePrompt = (version?: string, role: 'user' | 'system' = 'user
|
|||||||
// Document quote prompt
|
// Document quote prompt
|
||||||
export const getDocumentQuotePrompt = (version?: string) => {
|
export const getDocumentQuotePrompt = (version?: string) => {
|
||||||
const promptMap = {
|
const promptMap = {
|
||||||
['4.9.2']: `将 <FilesContent></FilesContent> 中的内容作为本次对话的参考:
|
['4.9.7']: `将 <FilesContent></FilesContent> 中的内容作为本次对话的参考:
|
||||||
<FilesContent>
|
<FilesContent>
|
||||||
{{quote}}
|
{{quote}}
|
||||||
</FilesContent>
|
</FilesContent>
|
||||||
|
|||||||
@@ -1,14 +1,19 @@
|
|||||||
export const getDatasetSearchToolResponsePrompt = () => {
|
export const getDatasetSearchToolResponsePrompt = () => {
|
||||||
return `## Role
|
return `## Role
|
||||||
你是一个知识库回答助手,可以 "quotes" 中的内容作为本次对话的参考。为了使回答结果更加可信并且可追溯,你需要在每段话结尾添加引用标记。
|
你是一个知识库回答助手,可以 "cites" 中的内容作为本次对话的参考。为了使回答结果更加可信并且可追溯,你需要在每段话结尾添加引用标记,标识参考了哪些内容。
|
||||||
|
|
||||||
## Rules
|
## 追溯展示规则
|
||||||
|
|
||||||
|
- 使用 **[id](CITE)** 格式来引用 "cites" 中的知识,其中 CITE 是固定常量, id 为引文中的 id。
|
||||||
|
- 在 **每段话结尾** 自然地整合引用。例如: "Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)。"。
|
||||||
|
- 每段话**至少包含一个引用**,多个引用时按顺序排列,例如:"Nginx是一款轻量级的Web服务器、反向代理服务器[67e517e74767063e882d6861](CITE)[67e517e74767063e882d6862](CITE)。\n 它的特点是非常轻量[67e517e74767063e882d6863](CITE)。"
|
||||||
|
- 不要把示例作为知识点。
|
||||||
|
- 不要伪造 id,返回的 id 必须都存在 cites 中!
|
||||||
|
|
||||||
|
## 通用规则
|
||||||
- 如果你不清楚答案,你需要澄清。
|
- 如果你不清楚答案,你需要澄清。
|
||||||
- 避免提及你是从 "quotes" 获取的知识。
|
- 避免提及你是从 "cites" 获取的知识。
|
||||||
- 保持答案与 "quotes" 中描述的一致。
|
- 保持答案与 "cites" 中描述的一致。
|
||||||
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
- 使用 Markdown 语法优化回答格式。尤其是图片、表格、序列号等内容,需严格完整输出。
|
||||||
- 使用与问题相同的语言回答。
|
- 使用与问题相同的语言回答。`;
|
||||||
- 使用 [id](CITE) 格式来引用 "quotes" 中的知识,其中 CITE 是固定常量, id 为引文中的 id。
|
|
||||||
- 在每段话结尾自然地整合引用。例如: "FastGPT 是一个基于大语言模型(LLM)的知识库问答系统[67e517e74767063e882d6861](CITE)。"
|
|
||||||
- 每段话至少包含一个引用,也可根据内容需要加入多个引用,按顺序排列。`;
|
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -60,5 +60,3 @@ export enum AppTemplateTypeEnum {
|
|||||||
// special type
|
// special type
|
||||||
contribute = 'contribute'
|
contribute = 'contribute'
|
||||||
}
|
}
|
||||||
|
|
||||||
export const defaultDatasetMaxTokens = 16000;
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import {
|
|||||||
FlowNodeTypeEnum
|
FlowNodeTypeEnum
|
||||||
} from '../../workflow/node/constant';
|
} from '../../workflow/node/constant';
|
||||||
import { nanoid } from 'nanoid';
|
import { nanoid } from 'nanoid';
|
||||||
import { type ToolType } from '../type';
|
import { type McpToolConfigType } from '../type';
|
||||||
import { i18nT } from '../../../../web/i18n/utils';
|
import { i18nT } from '../../../../web/i18n/utils';
|
||||||
import { type RuntimeNodeItemType } from '../../workflow/runtime/type';
|
import { type RuntimeNodeItemType } from '../../workflow/runtime/type';
|
||||||
|
|
||||||
@@ -16,7 +16,7 @@ export const getMCPToolSetRuntimeNode = ({
|
|||||||
avatar
|
avatar
|
||||||
}: {
|
}: {
|
||||||
url: string;
|
url: string;
|
||||||
toolList: ToolType[];
|
toolList: McpToolConfigType[];
|
||||||
name?: string;
|
name?: string;
|
||||||
avatar?: string;
|
avatar?: string;
|
||||||
}): RuntimeNodeItemType => {
|
}): RuntimeNodeItemType => {
|
||||||
@@ -45,7 +45,7 @@ export const getMCPToolRuntimeNode = ({
|
|||||||
url,
|
url,
|
||||||
avatar = 'core/app/type/mcpToolsFill'
|
avatar = 'core/app/type/mcpToolsFill'
|
||||||
}: {
|
}: {
|
||||||
tool: ToolType;
|
tool: McpToolConfigType;
|
||||||
url: string;
|
url: string;
|
||||||
avatar?: string;
|
avatar?: string;
|
||||||
}): RuntimeNodeItemType => {
|
}): RuntimeNodeItemType => {
|
||||||
@@ -65,7 +65,7 @@ export const getMCPToolRuntimeNode = ({
|
|||||||
...Object.entries(tool.inputSchema?.properties || {}).map(([key, value]) => ({
|
...Object.entries(tool.inputSchema?.properties || {}).map(([key, value]) => ({
|
||||||
key,
|
key,
|
||||||
label: key,
|
label: key,
|
||||||
valueType: value.type as WorkflowIOValueTypeEnum,
|
valueType: value.type as WorkflowIOValueTypeEnum, // TODO: 这里需要做一个映射
|
||||||
description: value.description,
|
description: value.description,
|
||||||
toolDescription: value.description || key,
|
toolDescription: value.description || key,
|
||||||
required: tool.inputSchema?.required?.includes(key) || false,
|
required: tool.inputSchema?.required?.includes(key) || false,
|
||||||
|
|||||||
20
packages/global/core/app/type.d.ts
vendored
20
packages/global/core/app/type.d.ts
vendored
@@ -16,16 +16,6 @@ import { FlowNodeInputTypeEnum } from '../../core/workflow/node/constant';
|
|||||||
import type { WorkflowTemplateBasicType } from '@fastgpt/global/core/workflow/type';
|
import type { WorkflowTemplateBasicType } from '@fastgpt/global/core/workflow/type';
|
||||||
import type { SourceMemberType } from '../../support/user/type';
|
import type { SourceMemberType } from '../../support/user/type';
|
||||||
|
|
||||||
export type ToolType = {
|
|
||||||
name: string;
|
|
||||||
description: string;
|
|
||||||
inputSchema: {
|
|
||||||
type: string;
|
|
||||||
properties?: Record<string, { type: string; description?: string }>;
|
|
||||||
required?: string[];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
export type AppSchema = {
|
export type AppSchema = {
|
||||||
_id: string;
|
_id: string;
|
||||||
parentId?: ParentIdType;
|
parentId?: ParentIdType;
|
||||||
@@ -117,6 +107,16 @@ export type AppSimpleEditFormType = {
|
|||||||
chatConfig: AppChatConfigType;
|
chatConfig: AppChatConfigType;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type McpToolConfigType = {
|
||||||
|
name: string;
|
||||||
|
description: string;
|
||||||
|
inputSchema: {
|
||||||
|
type: string;
|
||||||
|
properties?: Record<string, { type: string; description?: string }>;
|
||||||
|
required?: string[];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
/* app chat config type */
|
/* app chat config type */
|
||||||
export type AppChatConfigType = {
|
export type AppChatConfigType = {
|
||||||
welcomeText?: string;
|
welcomeText?: string;
|
||||||
|
|||||||
@@ -9,6 +9,9 @@ import { type WorkflowTemplateBasicType } from '../workflow/type';
|
|||||||
import { AppTypeEnum } from './constants';
|
import { AppTypeEnum } from './constants';
|
||||||
import { AppErrEnum } from '../../common/error/code/app';
|
import { AppErrEnum } from '../../common/error/code/app';
|
||||||
import { PluginErrEnum } from '../../common/error/code/plugin';
|
import { PluginErrEnum } from '../../common/error/code/plugin';
|
||||||
|
import { i18nT } from '../../../web/i18n/utils';
|
||||||
|
import appErrList from '../../common/error/code/app';
|
||||||
|
import pluginErrList from '../../common/error/code/plugin';
|
||||||
|
|
||||||
export const getDefaultAppForm = (): AppSimpleEditFormType => {
|
export const getDefaultAppForm = (): AppSimpleEditFormType => {
|
||||||
return {
|
return {
|
||||||
@@ -189,17 +192,10 @@ export const getAppType = (config?: WorkflowTemplateBasicType | AppSimpleEditFor
|
|||||||
return '';
|
return '';
|
||||||
};
|
};
|
||||||
|
|
||||||
export const checkAppUnExistError = (error?: string) => {
|
export const formatToolError = (error?: any) => {
|
||||||
const unExistError: Array<string> = [
|
if (!error || typeof error !== 'string') return;
|
||||||
AppErrEnum.unAuthApp,
|
|
||||||
AppErrEnum.unExist,
|
|
||||||
PluginErrEnum.unAuth,
|
|
||||||
PluginErrEnum.unExist
|
|
||||||
];
|
|
||||||
|
|
||||||
if (!!error && unExistError.includes(error)) {
|
const errorText = appErrList[error]?.message || pluginErrList[error]?.message;
|
||||||
return error;
|
|
||||||
} else {
|
return errorText || error;
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|||||||
3
packages/global/core/chat/type.d.ts
vendored
3
packages/global/core/chat/type.d.ts
vendored
@@ -26,6 +26,7 @@ export type ChatSchema = {
|
|||||||
teamId: string;
|
teamId: string;
|
||||||
tmbId: string;
|
tmbId: string;
|
||||||
appId: string;
|
appId: string;
|
||||||
|
createTime: Date;
|
||||||
updateTime: Date;
|
updateTime: Date;
|
||||||
title: string;
|
title: string;
|
||||||
customTitle: string;
|
customTitle: string;
|
||||||
@@ -112,6 +113,7 @@ export type ChatItemSchema = (UserChatItemType | SystemChatItemType | AIChatItem
|
|||||||
appId: string;
|
appId: string;
|
||||||
time: Date;
|
time: Date;
|
||||||
durationSeconds?: number;
|
durationSeconds?: number;
|
||||||
|
errorMsg?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type AdminFbkType = {
|
export type AdminFbkType = {
|
||||||
@@ -143,6 +145,7 @@ export type ChatSiteItemType = (UserChatItemType | SystemChatItemType | AIChatIt
|
|||||||
responseData?: ChatHistoryItemResType[];
|
responseData?: ChatHistoryItemResType[];
|
||||||
time?: Date;
|
time?: Date;
|
||||||
durationSeconds?: number;
|
durationSeconds?: number;
|
||||||
|
errorMsg?: string;
|
||||||
} & ChatBoxInputType &
|
} & ChatBoxInputType &
|
||||||
ResponseTagItemType;
|
ResponseTagItemType;
|
||||||
|
|
||||||
|
|||||||
31
packages/global/core/dataset/api.d.ts
vendored
31
packages/global/core/dataset/api.d.ts
vendored
@@ -1,9 +1,11 @@
|
|||||||
import type { DatasetDataIndexItemType, DatasetSchemaType } from './type';
|
import type { ChunkSettingsType, DatasetDataIndexItemType, DatasetSchemaType } from './type';
|
||||||
import type {
|
import type {
|
||||||
DatasetCollectionTypeEnum,
|
DatasetCollectionTypeEnum,
|
||||||
DatasetCollectionDataProcessModeEnum,
|
DatasetCollectionDataProcessModeEnum,
|
||||||
ChunkSettingModeEnum,
|
ChunkSettingModeEnum,
|
||||||
DataChunkSplitModeEnum
|
DataChunkSplitModeEnum,
|
||||||
|
ChunkTriggerConfigTypeEnum,
|
||||||
|
ParagraphChunkAIModeEnum
|
||||||
} from './constants';
|
} from './constants';
|
||||||
import type { LLMModelItemType } from '../ai/model.d';
|
import type { LLMModelItemType } from '../ai/model.d';
|
||||||
import type { ParentIdType } from 'common/parentFolder/type';
|
import type { ParentIdType } from 'common/parentFolder/type';
|
||||||
@@ -32,26 +34,16 @@ export type DatasetUpdateBody = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* ================= collection ===================== */
|
/* ================= collection ===================== */
|
||||||
export type DatasetCollectionChunkMetadataType = {
|
// Input + store params
|
||||||
|
type DatasetCollectionStoreDataType = ChunkSettingsType & {
|
||||||
parentId?: string;
|
parentId?: string;
|
||||||
customPdfParse?: boolean;
|
|
||||||
trainingType?: DatasetCollectionDataProcessModeEnum;
|
|
||||||
imageIndex?: boolean;
|
|
||||||
autoIndexes?: boolean;
|
|
||||||
|
|
||||||
chunkSettingMode?: ChunkSettingModeEnum;
|
|
||||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
|
||||||
|
|
||||||
chunkSize?: number;
|
|
||||||
indexSize?: number;
|
|
||||||
|
|
||||||
chunkSplitter?: string;
|
|
||||||
qaPrompt?: string;
|
|
||||||
metadata?: Record<string, any>;
|
metadata?: Record<string, any>;
|
||||||
|
|
||||||
|
customPdfParse?: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
// create collection params
|
// create collection params
|
||||||
export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
|
export type CreateDatasetCollectionParams = DatasetCollectionStoreDataType & {
|
||||||
datasetId: string;
|
datasetId: string;
|
||||||
name: string;
|
name: string;
|
||||||
type: DatasetCollectionTypeEnum;
|
type: DatasetCollectionTypeEnum;
|
||||||
@@ -72,7 +64,7 @@ export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType &
|
|||||||
nextSyncTime?: Date;
|
nextSyncTime?: Date;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type ApiCreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
|
export type ApiCreateDatasetCollectionParams = DatasetCollectionStoreDataType & {
|
||||||
datasetId: string;
|
datasetId: string;
|
||||||
tags?: string[];
|
tags?: string[];
|
||||||
};
|
};
|
||||||
@@ -90,7 +82,7 @@ export type ApiDatasetCreateDatasetCollectionParams = ApiCreateDatasetCollection
|
|||||||
export type FileIdCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
export type FileIdCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||||
fileId: string;
|
fileId: string;
|
||||||
};
|
};
|
||||||
export type reTrainingDatasetFileCollectionParams = DatasetCollectionChunkMetadataType & {
|
export type reTrainingDatasetFileCollectionParams = DatasetCollectionStoreDataType & {
|
||||||
datasetId: string;
|
datasetId: string;
|
||||||
collectionId: string;
|
collectionId: string;
|
||||||
};
|
};
|
||||||
@@ -147,6 +139,7 @@ export type PushDatasetDataProps = {
|
|||||||
collectionId: string;
|
collectionId: string;
|
||||||
data: PushDatasetDataChunkProps[];
|
data: PushDatasetDataChunkProps[];
|
||||||
trainingType?: DatasetCollectionDataProcessModeEnum;
|
trainingType?: DatasetCollectionDataProcessModeEnum;
|
||||||
|
indexSize?: number;
|
||||||
autoIndexes?: boolean;
|
autoIndexes?: boolean;
|
||||||
imageIndex?: boolean;
|
imageIndex?: boolean;
|
||||||
prompt?: string;
|
prompt?: string;
|
||||||
|
|||||||
@@ -120,6 +120,8 @@ export const DatasetCollectionSyncResultMap = {
|
|||||||
export enum DatasetCollectionDataProcessModeEnum {
|
export enum DatasetCollectionDataProcessModeEnum {
|
||||||
chunk = 'chunk',
|
chunk = 'chunk',
|
||||||
qa = 'qa',
|
qa = 'qa',
|
||||||
|
backup = 'backup',
|
||||||
|
|
||||||
auto = 'auto' // abandon
|
auto = 'auto' // abandon
|
||||||
}
|
}
|
||||||
export const DatasetCollectionDataProcessModeMap = {
|
export const DatasetCollectionDataProcessModeMap = {
|
||||||
@@ -131,21 +133,35 @@ export const DatasetCollectionDataProcessModeMap = {
|
|||||||
label: i18nT('common:core.dataset.training.QA mode'),
|
label: i18nT('common:core.dataset.training.QA mode'),
|
||||||
tooltip: i18nT('common:core.dataset.import.QA Import Tip')
|
tooltip: i18nT('common:core.dataset.import.QA Import Tip')
|
||||||
},
|
},
|
||||||
|
[DatasetCollectionDataProcessModeEnum.backup]: {
|
||||||
|
label: i18nT('dataset:backup_mode'),
|
||||||
|
tooltip: i18nT('dataset:backup_mode')
|
||||||
|
},
|
||||||
[DatasetCollectionDataProcessModeEnum.auto]: {
|
[DatasetCollectionDataProcessModeEnum.auto]: {
|
||||||
label: i18nT('common:core.dataset.training.Auto mode'),
|
label: i18nT('common:core.dataset.training.Auto mode'),
|
||||||
tooltip: i18nT('common:core.dataset.training.Auto mode Tip')
|
tooltip: i18nT('common:core.dataset.training.Auto mode Tip')
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export enum ChunkTriggerConfigTypeEnum {
|
||||||
|
minSize = 'minSize',
|
||||||
|
forceChunk = 'forceChunk',
|
||||||
|
maxSize = 'maxSize'
|
||||||
|
}
|
||||||
export enum ChunkSettingModeEnum {
|
export enum ChunkSettingModeEnum {
|
||||||
auto = 'auto',
|
auto = 'auto',
|
||||||
custom = 'custom'
|
custom = 'custom'
|
||||||
}
|
}
|
||||||
|
|
||||||
export enum DataChunkSplitModeEnum {
|
export enum DataChunkSplitModeEnum {
|
||||||
|
paragraph = 'paragraph',
|
||||||
size = 'size',
|
size = 'size',
|
||||||
char = 'char'
|
char = 'char'
|
||||||
}
|
}
|
||||||
|
export enum ParagraphChunkAIModeEnum {
|
||||||
|
auto = 'auto',
|
||||||
|
force = 'force'
|
||||||
|
}
|
||||||
|
|
||||||
/* ------------ data -------------- */
|
/* ------------ data -------------- */
|
||||||
|
|
||||||
@@ -154,7 +170,6 @@ export enum ImportDataSourceEnum {
|
|||||||
fileLocal = 'fileLocal',
|
fileLocal = 'fileLocal',
|
||||||
fileLink = 'fileLink',
|
fileLink = 'fileLink',
|
||||||
fileCustom = 'fileCustom',
|
fileCustom = 'fileCustom',
|
||||||
csvTable = 'csvTable',
|
|
||||||
externalFile = 'externalFile',
|
externalFile = 'externalFile',
|
||||||
apiDataset = 'apiDataset',
|
apiDataset = 'apiDataset',
|
||||||
reTraining = 'reTraining'
|
reTraining = 'reTraining'
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ export const DatasetDataIndexMap: Record<
|
|||||||
color: 'red'
|
color: 'red'
|
||||||
},
|
},
|
||||||
[DatasetDataIndexTypeEnum.image]: {
|
[DatasetDataIndexTypeEnum.image]: {
|
||||||
label: i18nT('common:data_index_image'),
|
label: i18nT('dataset:data_index_image'),
|
||||||
color: 'purple'
|
color: 'purple'
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -118,9 +118,8 @@ export const computeChunkSize = (params: {
|
|||||||
return getLLMMaxChunkSize(params.llmModel);
|
return getLLMMaxChunkSize(params.llmModel);
|
||||||
}
|
}
|
||||||
|
|
||||||
return Math.min(params.chunkSize || chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
|
return Math.min(params.chunkSize ?? chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
|
||||||
};
|
};
|
||||||
|
|
||||||
export const computeChunkSplitter = (params: {
|
export const computeChunkSplitter = (params: {
|
||||||
chunkSettingMode?: ChunkSettingModeEnum;
|
chunkSettingMode?: ChunkSettingModeEnum;
|
||||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||||
@@ -129,8 +128,21 @@ export const computeChunkSplitter = (params: {
|
|||||||
if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
|
if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
if (params.chunkSplitMode === DataChunkSplitModeEnum.size) {
|
if (params.chunkSplitMode !== DataChunkSplitModeEnum.char) {
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
return params.chunkSplitter;
|
return params.chunkSplitter;
|
||||||
};
|
};
|
||||||
|
export const computeParagraphChunkDeep = (params: {
|
||||||
|
chunkSettingMode?: ChunkSettingModeEnum;
|
||||||
|
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||||
|
paragraphChunkDeep?: number;
|
||||||
|
}) => {
|
||||||
|
if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
|
||||||
|
return 5;
|
||||||
|
}
|
||||||
|
if (params.chunkSplitMode === DataChunkSplitModeEnum.paragraph) {
|
||||||
|
return params.paragraphChunkDeep;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
|||||||
48
packages/global/core/dataset/type.d.ts
vendored
48
packages/global/core/dataset/type.d.ts
vendored
@@ -8,26 +8,42 @@ import type {
|
|||||||
DatasetStatusEnum,
|
DatasetStatusEnum,
|
||||||
DatasetTypeEnum,
|
DatasetTypeEnum,
|
||||||
SearchScoreTypeEnum,
|
SearchScoreTypeEnum,
|
||||||
TrainingModeEnum
|
TrainingModeEnum,
|
||||||
|
ChunkSettingModeEnum,
|
||||||
|
ChunkTriggerConfigTypeEnum
|
||||||
} from './constants';
|
} from './constants';
|
||||||
import type { DatasetPermission } from '../../support/permission/dataset/controller';
|
import type { DatasetPermission } from '../../support/permission/dataset/controller';
|
||||||
import { Permission } from '../../support/permission/controller';
|
|
||||||
import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
|
import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
|
||||||
import type { SourceMemberType } from 'support/user/type';
|
import type { SourceMemberType } from 'support/user/type';
|
||||||
import type { DatasetDataIndexTypeEnum } from './data/constants';
|
import type { DatasetDataIndexTypeEnum } from './data/constants';
|
||||||
import type { ChunkSettingModeEnum } from './constants';
|
|
||||||
|
|
||||||
export type ChunkSettingsType = {
|
export type ChunkSettingsType = {
|
||||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
trainingType?: DatasetCollectionDataProcessModeEnum;
|
||||||
autoIndexes?: boolean;
|
|
||||||
|
// Chunk trigger
|
||||||
|
chunkTriggerType?: ChunkTriggerConfigTypeEnum;
|
||||||
|
chunkTriggerMinSize?: number; // maxSize from agent model, not store
|
||||||
|
|
||||||
|
// Data enhance
|
||||||
|
dataEnhanceCollectionName?: boolean; // Auto add collection name to data
|
||||||
|
|
||||||
|
// Index enhance
|
||||||
imageIndex?: boolean;
|
imageIndex?: boolean;
|
||||||
|
autoIndexes?: boolean;
|
||||||
|
|
||||||
chunkSettingMode?: ChunkSettingModeEnum;
|
// Chunk setting
|
||||||
|
chunkSettingMode?: ChunkSettingModeEnum; // 系统参数/自定义参数
|
||||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||||
|
// Paragraph split
|
||||||
chunkSize?: number;
|
paragraphChunkAIMode?: ParagraphChunkAIModeEnum;
|
||||||
|
paragraphChunkDeep?: number; // Paragraph deep
|
||||||
|
paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
|
||||||
|
// Size split
|
||||||
|
chunkSize?: number; // chunk/qa chunk size, Paragraph max chunk size.
|
||||||
|
// Char split
|
||||||
|
chunkSplitter?: string; // chunk/qa chunk splitter
|
||||||
indexSize?: number;
|
indexSize?: number;
|
||||||
chunkSplitter?: string;
|
|
||||||
qaPrompt?: string;
|
qaPrompt?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -66,7 +82,7 @@ export type DatasetSchemaType = {
|
|||||||
defaultPermission?: number;
|
defaultPermission?: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type DatasetCollectionSchemaType = {
|
export type DatasetCollectionSchemaType = ChunkSettingsType & {
|
||||||
_id: string;
|
_id: string;
|
||||||
teamId: string;
|
teamId: string;
|
||||||
tmbId: string;
|
tmbId: string;
|
||||||
@@ -101,18 +117,7 @@ export type DatasetCollectionSchemaType = {
|
|||||||
|
|
||||||
// Parse settings
|
// Parse settings
|
||||||
customPdfParse?: boolean;
|
customPdfParse?: boolean;
|
||||||
// Chunk settings
|
|
||||||
autoIndexes?: boolean;
|
|
||||||
imageIndex?: boolean;
|
|
||||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||||
|
|
||||||
chunkSettingMode?: ChunkSettingModeEnum;
|
|
||||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
|
||||||
|
|
||||||
chunkSize?: number;
|
|
||||||
indexSize?: number;
|
|
||||||
chunkSplitter?: string;
|
|
||||||
qaPrompt?: string;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
export type DatasetCollectionTagsSchemaType = {
|
export type DatasetCollectionTagsSchemaType = {
|
||||||
@@ -175,6 +180,7 @@ export type DatasetTrainingSchemaType = {
|
|||||||
q: string;
|
q: string;
|
||||||
a: string;
|
a: string;
|
||||||
chunkIndex: number;
|
chunkIndex: number;
|
||||||
|
indexSize?: number;
|
||||||
weight: number;
|
weight: number;
|
||||||
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||||
retryCount: number;
|
retryCount: number;
|
||||||
|
|||||||
@@ -40,5 +40,6 @@ export function getSourceNameIcon({
|
|||||||
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
|
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
|
||||||
if (mode === TrainingModeEnum.qa) return data.length * 20;
|
if (mode === TrainingModeEnum.qa) return data.length * 20;
|
||||||
if (mode === TrainingModeEnum.auto) return data.length * 5;
|
if (mode === TrainingModeEnum.auto) return data.length * 5;
|
||||||
|
if (mode === TrainingModeEnum.image) return data.length * 2;
|
||||||
return data.length;
|
return data.length;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import type {
|
|||||||
} from '../../chat/type';
|
} from '../../chat/type';
|
||||||
import { NodeOutputItemType } from '../../chat/type';
|
import { NodeOutputItemType } from '../../chat/type';
|
||||||
import type { FlowNodeInputItemType, FlowNodeOutputItemType } from '../type/io.d';
|
import type { FlowNodeInputItemType, FlowNodeOutputItemType } from '../type/io.d';
|
||||||
import type { StoreNodeItemType } from '../type/node';
|
import type { NodeToolConfigType, StoreNodeItemType } from '../type/node';
|
||||||
import type { DispatchNodeResponseKeyEnum } from './constants';
|
import type { DispatchNodeResponseKeyEnum } from './constants';
|
||||||
import type { StoreEdgeItemType } from '../type/edge';
|
import type { StoreEdgeItemType } from '../type/edge';
|
||||||
import type { NodeInputKeyEnum } from '../constants';
|
import type { NodeInputKeyEnum } from '../constants';
|
||||||
@@ -102,6 +102,9 @@ export type RuntimeNodeItemType = {
|
|||||||
|
|
||||||
pluginId?: string; // workflow id / plugin id
|
pluginId?: string; // workflow id / plugin id
|
||||||
version?: string;
|
version?: string;
|
||||||
|
|
||||||
|
// tool
|
||||||
|
toolConfig?: NodeToolConfigType;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type RuntimeEdgeItemType = StoreEdgeItemType & {
|
export type RuntimeEdgeItemType = StoreEdgeItemType & {
|
||||||
@@ -114,7 +117,7 @@ export type DispatchNodeResponseType = {
|
|||||||
runningTime?: number;
|
runningTime?: number;
|
||||||
query?: string;
|
query?: string;
|
||||||
textOutput?: string;
|
textOutput?: string;
|
||||||
error?: Record<string, any>;
|
error?: Record<string, any> | string;
|
||||||
customInputs?: Record<string, any>;
|
customInputs?: Record<string, any>;
|
||||||
customOutputs?: Record<string, any>;
|
customOutputs?: Record<string, any>;
|
||||||
nodeInputs?: Record<string, any>;
|
nodeInputs?: Record<string, any>;
|
||||||
|
|||||||
16
packages/global/core/workflow/type/node.d.ts
vendored
16
packages/global/core/workflow/type/node.d.ts
vendored
@@ -20,11 +20,17 @@ import { RuntimeNodeItemType } from '../runtime/type';
|
|||||||
import { PluginTypeEnum } from '../../plugin/constants';
|
import { PluginTypeEnum } from '../../plugin/constants';
|
||||||
import { RuntimeEdgeItemType, StoreEdgeItemType } from './edge';
|
import { RuntimeEdgeItemType, StoreEdgeItemType } from './edge';
|
||||||
import { NextApiResponse } from 'next';
|
import { NextApiResponse } from 'next';
|
||||||
import { AppDetailType, AppSchema } from '../../app/type';
|
import type { AppDetailType, AppSchema, McpToolConfigType } from '../../app/type';
|
||||||
import type { ParentIdType } from 'common/parentFolder/type';
|
import type { ParentIdType } from 'common/parentFolder/type';
|
||||||
import { AppTypeEnum } from 'core/app/constants';
|
import { AppTypeEnum } from '../../app/constants';
|
||||||
import type { WorkflowInteractiveResponseType } from '../template/system/interactive/type';
|
import type { WorkflowInteractiveResponseType } from '../template/system/interactive/type';
|
||||||
|
|
||||||
|
export type NodeToolConfigType = {
|
||||||
|
mcpTool?: McpToolConfigType & {
|
||||||
|
url: string;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
export type FlowNodeCommonType = {
|
export type FlowNodeCommonType = {
|
||||||
parentNodeId?: string;
|
parentNodeId?: string;
|
||||||
flowNodeType: FlowNodeTypeEnum; // render node card
|
flowNodeType: FlowNodeTypeEnum; // render node card
|
||||||
@@ -46,12 +52,13 @@ export type FlowNodeCommonType = {
|
|||||||
// plugin data
|
// plugin data
|
||||||
pluginId?: string;
|
pluginId?: string;
|
||||||
isFolder?: boolean;
|
isFolder?: boolean;
|
||||||
// pluginType?: AppTypeEnum;
|
|
||||||
pluginData?: PluginDataType;
|
pluginData?: PluginDataType;
|
||||||
|
|
||||||
|
// tool data
|
||||||
|
toolData?: NodeToolConfigType;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type PluginDataType = {
|
export type PluginDataType = {
|
||||||
version?: string;
|
|
||||||
diagram?: string;
|
diagram?: string;
|
||||||
userGuide?: string;
|
userGuide?: string;
|
||||||
courseUrl?: string;
|
courseUrl?: string;
|
||||||
@@ -118,6 +125,7 @@ export type FlowNodeItemType = FlowNodeTemplateType & {
|
|||||||
nodeId: string;
|
nodeId: string;
|
||||||
parentNodeId?: string;
|
parentNodeId?: string;
|
||||||
isError?: boolean;
|
isError?: boolean;
|
||||||
|
searchedText?: string;
|
||||||
debugResult?: {
|
debugResult?: {
|
||||||
status: 'running' | 'success' | 'skipped' | 'failed';
|
status: 'running' | 'success' | 'skipped' | 'failed';
|
||||||
message?: string;
|
message?: string;
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "4816",
|
|
||||||
"name": "钉钉 webhook",
|
"name": "钉钉 webhook",
|
||||||
"avatar": "plugins/dingding",
|
"avatar": "plugins/dingding",
|
||||||
"intro": "向钉钉机器人发起 webhook 请求。",
|
"intro": "向钉钉机器人发起 webhook 请求。",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "Menghuan1918",
|
"author": "Menghuan1918",
|
||||||
"version": "488",
|
|
||||||
"name": "PDF识别",
|
"name": "PDF识别",
|
||||||
"avatar": "plugins/doc2x",
|
"avatar": "plugins/doc2x",
|
||||||
"intro": "将PDF文件发送至Doc2X进行解析,返回结构化的LaTeX公式的文本(markdown),支持传入String类型的URL或者流程输出中的文件链接变量",
|
"intro": "将PDF文件发送至Doc2X进行解析,返回结构化的LaTeX公式的文本(markdown),支持传入String类型的URL或者流程输出中的文件链接变量",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "Menghuan1918",
|
"author": "Menghuan1918",
|
||||||
"version": "488",
|
|
||||||
"name": "Doc2X服务",
|
"name": "Doc2X服务",
|
||||||
"avatar": "plugins/doc2x",
|
"avatar": "plugins/doc2x",
|
||||||
"intro": "将传入的图片或PDF文件发送至Doc2X进行解析,返回带LaTeX公式的markdown格式的文本。",
|
"intro": "将传入的图片或PDF文件发送至Doc2X进行解析,返回带LaTeX公式的markdown格式的文本。",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "4816",
|
|
||||||
"name": "企业微信 webhook",
|
"name": "企业微信 webhook",
|
||||||
"avatar": "plugins/qiwei",
|
"avatar": "plugins/qiwei",
|
||||||
"intro": "向企业微信机器人发起 webhook 请求。只能内部群使用。",
|
"intro": "向企业微信机器人发起 webhook 请求。只能内部群使用。",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "4811",
|
|
||||||
"name": "Bing搜索",
|
"name": "Bing搜索",
|
||||||
"avatar": "core/workflow/template/bing",
|
"avatar": "core/workflow/template/bing",
|
||||||
"intro": "在Bing中搜索。",
|
"intro": "在Bing中搜索。",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "silencezhang",
|
"author": "silencezhang",
|
||||||
"version": "4811",
|
|
||||||
"name": "数据库连接",
|
"name": "数据库连接",
|
||||||
"avatar": "core/workflow/template/datasource",
|
"avatar": "core/workflow/template/datasource",
|
||||||
"intro": "可连接常用数据库,并执行sql",
|
"intro": "可连接常用数据库,并执行sql",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "collin",
|
"author": "collin",
|
||||||
"version": "4817",
|
|
||||||
"name": "流程等待",
|
"name": "流程等待",
|
||||||
"avatar": "core/workflow/template/sleep",
|
"avatar": "core/workflow/template/sleep",
|
||||||
"intro": "让工作流等待指定时间后运行",
|
"intro": "让工作流等待指定时间后运行",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "silencezhang",
|
"author": "silencezhang",
|
||||||
"version": "4817",
|
|
||||||
"name": "基础图表",
|
"name": "基础图表",
|
||||||
"avatar": "core/workflow/template/baseChart",
|
"avatar": "core/workflow/template/baseChart",
|
||||||
"intro": "根据数据生成图表,可根据chartType生成柱状图,折线图,饼图",
|
"intro": "根据数据生成图表,可根据chartType生成柱状图,折线图,饼图",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "silencezhang",
|
"author": "silencezhang",
|
||||||
"version": "486",
|
|
||||||
"name": "BI图表功能",
|
"name": "BI图表功能",
|
||||||
"avatar": "core/workflow/template/BI",
|
"avatar": "core/workflow/template/BI",
|
||||||
"intro": "BI图表功能,可以生成一些常用的图表,如饼图,柱状图,折线图等",
|
"intro": "BI图表功能,可以生成一些常用的图表,如饼图,柱状图,折线图等",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "486",
|
|
||||||
"name": "DuckDuckGo 网络搜索",
|
"name": "DuckDuckGo 网络搜索",
|
||||||
"avatar": "core/workflow/template/duckduckgo",
|
"avatar": "core/workflow/template/duckduckgo",
|
||||||
"intro": "使用 DuckDuckGo 进行网络搜索",
|
"intro": "使用 DuckDuckGo 进行网络搜索",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "486",
|
|
||||||
"name": "DuckDuckGo 图片搜索",
|
"name": "DuckDuckGo 图片搜索",
|
||||||
"avatar": "core/workflow/template/duckduckgo",
|
"avatar": "core/workflow/template/duckduckgo",
|
||||||
"intro": "使用 DuckDuckGo 进行图片搜索",
|
"intro": "使用 DuckDuckGo 进行图片搜索",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "486",
|
|
||||||
"name": "DuckDuckGo 新闻检索",
|
"name": "DuckDuckGo 新闻检索",
|
||||||
"avatar": "core/workflow/template/duckduckgo",
|
"avatar": "core/workflow/template/duckduckgo",
|
||||||
"intro": "使用 DuckDuckGo 进行新闻检索",
|
"intro": "使用 DuckDuckGo 进行新闻检索",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "486",
|
|
||||||
"name": "DuckDuckGo 视频搜索",
|
"name": "DuckDuckGo 视频搜索",
|
||||||
"avatar": "core/workflow/template/duckduckgo",
|
"avatar": "core/workflow/template/duckduckgo",
|
||||||
"intro": "使用 DuckDuckGo 进行视频搜索",
|
"intro": "使用 DuckDuckGo 进行视频搜索",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "486",
|
|
||||||
"name": "DuckDuckGo服务",
|
"name": "DuckDuckGo服务",
|
||||||
"avatar": "core/workflow/template/duckduckgo",
|
"avatar": "core/workflow/template/duckduckgo",
|
||||||
"intro": "DuckDuckGo 服务,包含网络搜索、图片搜索、新闻搜索等。",
|
"intro": "DuckDuckGo 服务,包含网络搜索、图片搜索、新闻搜索等。",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "488",
|
|
||||||
"name": "飞书 webhook",
|
"name": "飞书 webhook",
|
||||||
"avatar": "core/app/templates/plugin-feishu",
|
"avatar": "core/app/templates/plugin-feishu",
|
||||||
"intro": "向飞书机器人发起 webhook 请求。",
|
"intro": "向飞书机器人发起 webhook 请求。",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "486",
|
|
||||||
"name": "网页内容抓取",
|
"name": "网页内容抓取",
|
||||||
"avatar": "core/workflow/template/fetchUrl",
|
"avatar": "core/workflow/template/fetchUrl",
|
||||||
"intro": "可获取一个网页链接内容,并以 Markdown 格式输出,仅支持获取静态网站。",
|
"intro": "可获取一个网页链接内容,并以 Markdown 格式输出,仅支持获取静态网站。",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "481",
|
|
||||||
"templateType": "tools",
|
"templateType": "tools",
|
||||||
"name": "获取当前时间",
|
"name": "获取当前时间",
|
||||||
"avatar": "core/workflow/template/getTime",
|
"avatar": "core/workflow/template/getTime",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "4811",
|
|
||||||
"name": "Google搜索",
|
"name": "Google搜索",
|
||||||
"avatar": "core/workflow/template/google",
|
"avatar": "core/workflow/template/google",
|
||||||
"intro": "在google中搜索。",
|
"intro": "在google中搜索。",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "486",
|
|
||||||
"name": "数学公式执行",
|
"name": "数学公式执行",
|
||||||
"avatar": "core/workflow/template/mathCall",
|
"avatar": "core/workflow/template/mathCall",
|
||||||
"intro": "用于执行数学表达式的工具,通过 js 的 expr-eval 库运行表达式并返回结果。",
|
"intro": "用于执行数学表达式的工具,通过 js 的 expr-eval 库运行表达式并返回结果。",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "4816",
|
|
||||||
"name": "Search XNG 搜索",
|
"name": "Search XNG 搜索",
|
||||||
"avatar": "core/workflow/template/searxng",
|
"avatar": "core/workflow/template/searxng",
|
||||||
"intro": "使用 Search XNG 服务进行搜索。",
|
"intro": "使用 Search XNG 服务进行搜索。",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "cloudpense",
|
"author": "cloudpense",
|
||||||
"version": "1.0.0",
|
|
||||||
"name": "Email 邮件发送",
|
"name": "Email 邮件发送",
|
||||||
"avatar": "plugins/email",
|
"avatar": "plugins/email",
|
||||||
"intro": "通过SMTP协议发送电子邮件(nodemailer)",
|
"intro": "通过SMTP协议发送电子邮件(nodemailer)",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "489",
|
|
||||||
"name": "文本加工",
|
"name": "文本加工",
|
||||||
"avatar": "/imgs/workflow/textEditor.svg",
|
"avatar": "/imgs/workflow/textEditor.svg",
|
||||||
"intro": "可对固定或传入的文本进行加工后输出,非字符串类型数据最终会转成字符串类型。",
|
"intro": "可对固定或传入的文本进行加工后输出,非字符串类型数据最终会转成字符串类型。",
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
{
|
{
|
||||||
"author": "",
|
"author": "",
|
||||||
"version": "4811",
|
|
||||||
"name": "Wiki搜索",
|
"name": "Wiki搜索",
|
||||||
"avatar": "core/workflow/template/wiki",
|
"avatar": "core/workflow/template/wiki",
|
||||||
"intro": "在Wiki中查询释义。",
|
"intro": "在Wiki中查询释义。",
|
||||||
|
|||||||
18
packages/service/common/api/type.d.ts
vendored
18
packages/service/common/api/type.d.ts
vendored
@@ -6,12 +6,6 @@ import type {
|
|||||||
} from '../../core/dataset/search/controller';
|
} from '../../core/dataset/search/controller';
|
||||||
import type { AuthOpenApiLimitProps } from '../../support/openapi/auth';
|
import type { AuthOpenApiLimitProps } from '../../support/openapi/auth';
|
||||||
import type { CreateUsageProps, ConcatUsageProps } from '@fastgpt/global/support/wallet/usage/api';
|
import type { CreateUsageProps, ConcatUsageProps } from '@fastgpt/global/support/wallet/usage/api';
|
||||||
import type {
|
|
||||||
GetProApiDatasetFileContentParams,
|
|
||||||
GetProApiDatasetFileDetailParams,
|
|
||||||
GetProApiDatasetFileListParams,
|
|
||||||
GetProApiDatasetFilePreviewUrlParams
|
|
||||||
} from '../../core/dataset/apiDataset/proApi';
|
|
||||||
|
|
||||||
declare global {
|
declare global {
|
||||||
var textCensorHandler: (params: { text: string }) => Promise<{ code: number; message?: string }>;
|
var textCensorHandler: (params: { text: string }) => Promise<{ code: number; message?: string }>;
|
||||||
@@ -19,16 +13,4 @@ declare global {
|
|||||||
var authOpenApiHandler: (data: AuthOpenApiLimitProps) => Promise<any>;
|
var authOpenApiHandler: (data: AuthOpenApiLimitProps) => Promise<any>;
|
||||||
var createUsageHandler: (data: CreateUsageProps) => any;
|
var createUsageHandler: (data: CreateUsageProps) => any;
|
||||||
var concatUsageHandler: (data: ConcatUsageProps) => any;
|
var concatUsageHandler: (data: ConcatUsageProps) => any;
|
||||||
|
|
||||||
// API dataset
|
|
||||||
var getProApiDatasetFileList: (data: GetProApiDatasetFileListParams) => Promise<APIFileItem[]>;
|
|
||||||
var getProApiDatasetFileContent: (
|
|
||||||
data: GetProApiDatasetFileContentParams
|
|
||||||
) => Promise<ApiFileReadContentResponse>;
|
|
||||||
var getProApiDatasetFilePreviewUrl: (
|
|
||||||
data: GetProApiDatasetFilePreviewUrlParams
|
|
||||||
) => Promise<string>;
|
|
||||||
var getProApiDatasetFileDetail: (
|
|
||||||
data: GetProApiDatasetFileDetailParams
|
|
||||||
) => Promise<ApiDatasetDetailResponse>;
|
|
||||||
}
|
}
|
||||||
|
|||||||
178
packages/service/common/buffer/rawText/controller.ts
Normal file
178
packages/service/common/buffer/rawText/controller.ts
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||||
|
import { connectionMongo } from '../../mongo';
|
||||||
|
import { MongoRawTextBufferSchema, bucketName } from './schema';
|
||||||
|
import { addLog } from '../../system/log';
|
||||||
|
import { setCron } from '../../system/cron';
|
||||||
|
import { checkTimerLock } from '../../system/timerLock/utils';
|
||||||
|
import { TimerIdEnum } from '../../system/timerLock/constants';
|
||||||
|
|
||||||
|
const getGridBucket = () => {
|
||||||
|
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
|
||||||
|
bucketName: bucketName
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
export const addRawTextBuffer = async ({
|
||||||
|
sourceId,
|
||||||
|
sourceName,
|
||||||
|
text,
|
||||||
|
expiredTime
|
||||||
|
}: {
|
||||||
|
sourceId: string;
|
||||||
|
sourceName: string;
|
||||||
|
text: string;
|
||||||
|
expiredTime: Date;
|
||||||
|
}) => {
|
||||||
|
const gridBucket = getGridBucket();
|
||||||
|
const metadata = {
|
||||||
|
sourceId,
|
||||||
|
sourceName,
|
||||||
|
expiredTime
|
||||||
|
};
|
||||||
|
|
||||||
|
const buffer = Buffer.from(text);
|
||||||
|
|
||||||
|
const fileSize = buffer.length;
|
||||||
|
// 单块大小:尽可能大,但不超过 14MB,不小于128KB
|
||||||
|
const chunkSizeBytes = (() => {
|
||||||
|
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
|
||||||
|
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
|
||||||
|
|
||||||
|
// 确保块大小至少为128KB
|
||||||
|
const minChunkSize = 128 * 1024; // 128KB
|
||||||
|
|
||||||
|
// 取理想块大小和最小块大小中的较大值
|
||||||
|
let chunkSize = Math.max(idealChunkSize, minChunkSize);
|
||||||
|
|
||||||
|
// 将块大小向上取整到最接近的64KB的倍数,使其更整齐
|
||||||
|
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
|
||||||
|
|
||||||
|
return chunkSize;
|
||||||
|
})();
|
||||||
|
|
||||||
|
const uploadStream = gridBucket.openUploadStream(sourceId, {
|
||||||
|
metadata,
|
||||||
|
chunkSizeBytes
|
||||||
|
});
|
||||||
|
|
||||||
|
return retryFn(async () => {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
uploadStream.end(buffer);
|
||||||
|
uploadStream.on('finish', () => {
|
||||||
|
resolve(uploadStream.id);
|
||||||
|
});
|
||||||
|
uploadStream.on('error', (error) => {
|
||||||
|
addLog.error('addRawTextBuffer error', error);
|
||||||
|
resolve('');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
export const getRawTextBuffer = async (sourceId: string) => {
|
||||||
|
const gridBucket = getGridBucket();
|
||||||
|
|
||||||
|
return retryFn(async () => {
|
||||||
|
const bufferData = await MongoRawTextBufferSchema.findOne(
|
||||||
|
{
|
||||||
|
'metadata.sourceId': sourceId
|
||||||
|
},
|
||||||
|
'_id metadata'
|
||||||
|
).lean();
|
||||||
|
if (!bufferData) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read file content
|
||||||
|
const downloadStream = gridBucket.openDownloadStream(bufferData._id);
|
||||||
|
const chunks: Buffer[] = [];
|
||||||
|
|
||||||
|
return new Promise<{
|
||||||
|
text: string;
|
||||||
|
sourceName: string;
|
||||||
|
} | null>((resolve, reject) => {
|
||||||
|
downloadStream.on('data', (chunk) => {
|
||||||
|
chunks.push(chunk);
|
||||||
|
});
|
||||||
|
|
||||||
|
downloadStream.on('end', () => {
|
||||||
|
const buffer = Buffer.concat(chunks);
|
||||||
|
const text = buffer.toString('utf8');
|
||||||
|
resolve({
|
||||||
|
text,
|
||||||
|
sourceName: bufferData.metadata?.sourceName || ''
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
downloadStream.on('error', (error) => {
|
||||||
|
addLog.error('getRawTextBuffer error', error);
|
||||||
|
resolve(null);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
export const deleteRawTextBuffer = async (sourceId: string): Promise<boolean> => {
|
||||||
|
const gridBucket = getGridBucket();
|
||||||
|
|
||||||
|
return retryFn(async () => {
|
||||||
|
const buffer = await MongoRawTextBufferSchema.findOne({ 'metadata.sourceId': sourceId });
|
||||||
|
if (!buffer) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
await gridBucket.delete(buffer._id);
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
export const updateRawTextBufferExpiredTime = async ({
|
||||||
|
sourceId,
|
||||||
|
expiredTime
|
||||||
|
}: {
|
||||||
|
sourceId: string;
|
||||||
|
expiredTime: Date;
|
||||||
|
}) => {
|
||||||
|
return retryFn(async () => {
|
||||||
|
return MongoRawTextBufferSchema.updateOne(
|
||||||
|
{ 'metadata.sourceId': sourceId },
|
||||||
|
{ $set: { 'metadata.expiredTime': expiredTime } }
|
||||||
|
);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
export const clearExpiredRawTextBufferCron = async () => {
|
||||||
|
const clearExpiredRawTextBuffer = async () => {
|
||||||
|
addLog.debug('Clear expired raw text buffer start');
|
||||||
|
const gridBucket = getGridBucket();
|
||||||
|
|
||||||
|
return retryFn(async () => {
|
||||||
|
const data = await MongoRawTextBufferSchema.find(
|
||||||
|
{
|
||||||
|
'metadata.expiredTime': { $lt: new Date() }
|
||||||
|
},
|
||||||
|
'_id'
|
||||||
|
).lean();
|
||||||
|
|
||||||
|
for (const item of data) {
|
||||||
|
await gridBucket.delete(item._id);
|
||||||
|
}
|
||||||
|
addLog.debug('Clear expired raw text buffer end');
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
setCron('*/10 * * * *', async () => {
|
||||||
|
if (
|
||||||
|
await checkTimerLock({
|
||||||
|
timerId: TimerIdEnum.clearExpiredRawTextBuffer,
|
||||||
|
lockMinuted: 9
|
||||||
|
})
|
||||||
|
) {
|
||||||
|
try {
|
||||||
|
await clearExpiredRawTextBuffer();
|
||||||
|
} catch (error) {
|
||||||
|
addLog.error('clearExpiredRawTextBufferCron error', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
@@ -1,33 +1,22 @@
|
|||||||
import { getMongoModel, Schema } from '../../mongo';
|
import { getMongoModel, type Types, Schema } from '../../mongo';
|
||||||
import { type RawTextBufferSchemaType } from './type';
|
|
||||||
|
|
||||||
export const collectionName = 'buffer_rawtexts';
|
export const bucketName = 'buffer_rawtext';
|
||||||
|
|
||||||
const RawTextBufferSchema = new Schema({
|
const RawTextBufferSchema = new Schema({
|
||||||
sourceId: {
|
metadata: {
|
||||||
type: String,
|
sourceId: { type: String, required: true },
|
||||||
required: true
|
sourceName: { type: String, required: true },
|
||||||
},
|
expiredTime: { type: Date, required: true }
|
||||||
rawText: {
|
|
||||||
type: String,
|
|
||||||
default: ''
|
|
||||||
},
|
|
||||||
createTime: {
|
|
||||||
type: Date,
|
|
||||||
default: () => new Date()
|
|
||||||
},
|
|
||||||
metadata: Object
|
|
||||||
});
|
|
||||||
|
|
||||||
try {
|
|
||||||
RawTextBufferSchema.index({ sourceId: 1 });
|
|
||||||
// 20 minutes
|
|
||||||
RawTextBufferSchema.index({ createTime: 1 }, { expireAfterSeconds: 20 * 60 });
|
|
||||||
} catch (error) {
|
|
||||||
console.log(error);
|
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
RawTextBufferSchema.index({ 'metadata.sourceId': 'hashed' });
|
||||||
|
RawTextBufferSchema.index({ 'metadata.expiredTime': -1 });
|
||||||
|
|
||||||
export const MongoRawTextBuffer = getMongoModel<RawTextBufferSchemaType>(
|
export const MongoRawTextBufferSchema = getMongoModel<{
|
||||||
collectionName,
|
_id: Types.ObjectId;
|
||||||
RawTextBufferSchema
|
metadata: {
|
||||||
);
|
sourceId: string;
|
||||||
|
sourceName: string;
|
||||||
|
expiredTime: Date;
|
||||||
|
};
|
||||||
|
}>(`${bucketName}.files`, RawTextBufferSchema);
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
export type RawTextBufferSchemaType = {
|
|
||||||
sourceId: string;
|
|
||||||
rawText: string;
|
|
||||||
createTime: Date;
|
|
||||||
metadata?: {
|
|
||||||
filename: string;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
@@ -6,13 +6,13 @@ import { type DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
|
|||||||
import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
|
import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
|
||||||
import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
|
import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
|
||||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||||
import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
|
|
||||||
import { readRawContentByFileBuffer } from '../read/utils';
|
import { readRawContentByFileBuffer } from '../read/utils';
|
||||||
import { gridFsStream2Buffer, stream2Encoding } from './utils';
|
import { gridFsStream2Buffer, stream2Encoding } from './utils';
|
||||||
import { addLog } from '../../system/log';
|
import { addLog } from '../../system/log';
|
||||||
import { readFromSecondary } from '../../mongo/utils';
|
|
||||||
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
|
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
|
||||||
import { Readable } from 'stream';
|
import { Readable } from 'stream';
|
||||||
|
import { addRawTextBuffer, getRawTextBuffer } from '../../buffer/rawText/controller';
|
||||||
|
import { addMinutes } from 'date-fns';
|
||||||
|
|
||||||
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
|
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
|
||||||
MongoDatasetFileSchema;
|
MongoDatasetFileSchema;
|
||||||
@@ -210,28 +210,26 @@ export const readFileContentFromMongo = async ({
|
|||||||
tmbId,
|
tmbId,
|
||||||
bucketName,
|
bucketName,
|
||||||
fileId,
|
fileId,
|
||||||
isQAImport = false,
|
customPdfParse = false,
|
||||||
customPdfParse = false
|
getFormatText
|
||||||
}: {
|
}: {
|
||||||
teamId: string;
|
teamId: string;
|
||||||
tmbId: string;
|
tmbId: string;
|
||||||
bucketName: `${BucketNameEnum}`;
|
bucketName: `${BucketNameEnum}`;
|
||||||
fileId: string;
|
fileId: string;
|
||||||
isQAImport?: boolean;
|
|
||||||
customPdfParse?: boolean;
|
customPdfParse?: boolean;
|
||||||
|
getFormatText?: boolean; // 数据类型都尽可能转化成 markdown 格式
|
||||||
}): Promise<{
|
}): Promise<{
|
||||||
rawText: string;
|
rawText: string;
|
||||||
filename: string;
|
filename: string;
|
||||||
}> => {
|
}> => {
|
||||||
const bufferId = `${fileId}-${customPdfParse}`;
|
const bufferId = `${String(fileId)}-${customPdfParse}`;
|
||||||
// read buffer
|
// read buffer
|
||||||
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: bufferId }, undefined, {
|
const fileBuffer = await getRawTextBuffer(bufferId);
|
||||||
...readFromSecondary
|
|
||||||
}).lean();
|
|
||||||
if (fileBuffer) {
|
if (fileBuffer) {
|
||||||
return {
|
return {
|
||||||
rawText: fileBuffer.rawText,
|
rawText: fileBuffer.text,
|
||||||
filename: fileBuffer.metadata?.filename || ''
|
filename: fileBuffer?.sourceName
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -254,8 +252,8 @@ export const readFileContentFromMongo = async ({
|
|||||||
// Get raw text
|
// Get raw text
|
||||||
const { rawText } = await readRawContentByFileBuffer({
|
const { rawText } = await readRawContentByFileBuffer({
|
||||||
customPdfParse,
|
customPdfParse,
|
||||||
|
getFormatText,
|
||||||
extension,
|
extension,
|
||||||
isQAImport,
|
|
||||||
teamId,
|
teamId,
|
||||||
tmbId,
|
tmbId,
|
||||||
buffer: fileBuffers,
|
buffer: fileBuffers,
|
||||||
@@ -265,16 +263,13 @@ export const readFileContentFromMongo = async ({
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// < 14M
|
// Add buffer
|
||||||
if (fileBuffers.length < 14 * 1024 * 1024 && rawText.trim()) {
|
addRawTextBuffer({
|
||||||
MongoRawTextBuffer.create({
|
|
||||||
sourceId: bufferId,
|
sourceId: bufferId,
|
||||||
rawText,
|
sourceName: file.filename,
|
||||||
metadata: {
|
text: rawText,
|
||||||
filename: file.filename
|
expiredTime: addMinutes(new Date(), 20)
|
||||||
}
|
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
rawText,
|
rawText,
|
||||||
|
|||||||
@@ -1,16 +1,16 @@
|
|||||||
import { Schema, getMongoModel } from '../../mongo';
|
import { Schema, getMongoModel } from '../../mongo';
|
||||||
|
|
||||||
const DatasetFileSchema = new Schema({});
|
const DatasetFileSchema = new Schema({
|
||||||
const ChatFileSchema = new Schema({});
|
metadata: Object
|
||||||
|
});
|
||||||
|
const ChatFileSchema = new Schema({
|
||||||
|
metadata: Object
|
||||||
|
});
|
||||||
|
|
||||||
try {
|
|
||||||
DatasetFileSchema.index({ uploadDate: -1 });
|
DatasetFileSchema.index({ uploadDate: -1 });
|
||||||
|
|
||||||
ChatFileSchema.index({ uploadDate: -1 });
|
ChatFileSchema.index({ uploadDate: -1 });
|
||||||
ChatFileSchema.index({ 'metadata.chatId': 1 });
|
ChatFileSchema.index({ 'metadata.chatId': 1 });
|
||||||
} catch (error) {
|
|
||||||
console.log(error);
|
|
||||||
}
|
|
||||||
|
|
||||||
export const MongoDatasetFileSchema = getMongoModel('dataset.files', DatasetFileSchema);
|
export const MongoDatasetFileSchema = getMongoModel('dataset.files', DatasetFileSchema);
|
||||||
export const MongoChatFileSchema = getMongoModel('chat.files', ChatFileSchema);
|
export const MongoChatFileSchema = getMongoModel('chat.files', ChatFileSchema);
|
||||||
|
|||||||
@@ -1,5 +1,57 @@
|
|||||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||||
import { PassThrough } from 'stream';
|
import { PassThrough } from 'stream';
|
||||||
|
import { getGridBucket } from './controller';
|
||||||
|
import { type BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||||
|
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||||
|
|
||||||
|
export const createFileFromText = async ({
|
||||||
|
bucket,
|
||||||
|
filename,
|
||||||
|
text,
|
||||||
|
metadata
|
||||||
|
}: {
|
||||||
|
bucket: `${BucketNameEnum}`;
|
||||||
|
filename: string;
|
||||||
|
text: string;
|
||||||
|
metadata: Record<string, any>;
|
||||||
|
}) => {
|
||||||
|
const gridBucket = getGridBucket(bucket);
|
||||||
|
|
||||||
|
const buffer = Buffer.from(text);
|
||||||
|
|
||||||
|
const fileSize = buffer.length;
|
||||||
|
// 单块大小:尽可能大,但不超过 14MB,不小于128KB
|
||||||
|
const chunkSizeBytes = (() => {
|
||||||
|
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
|
||||||
|
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
|
||||||
|
|
||||||
|
// 确保块大小至少为128KB
|
||||||
|
const minChunkSize = 128 * 1024; // 128KB
|
||||||
|
|
||||||
|
// 取理想块大小和最小块大小中的较大值
|
||||||
|
let chunkSize = Math.max(idealChunkSize, minChunkSize);
|
||||||
|
|
||||||
|
// 将块大小向上取整到最接近的64KB的倍数,使其更整齐
|
||||||
|
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
|
||||||
|
|
||||||
|
return chunkSize;
|
||||||
|
})();
|
||||||
|
|
||||||
|
const uploadStream = gridBucket.openUploadStream(filename, {
|
||||||
|
metadata,
|
||||||
|
chunkSizeBytes
|
||||||
|
});
|
||||||
|
|
||||||
|
return retryFn(async () => {
|
||||||
|
return new Promise<{ fileId: string }>((resolve, reject) => {
|
||||||
|
uploadStream.end(buffer);
|
||||||
|
uploadStream.on('finish', () => {
|
||||||
|
resolve({ fileId: String(uploadStream.id) });
|
||||||
|
});
|
||||||
|
uploadStream.on('error', reject);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
|
export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
|
||||||
return new Promise<Buffer>((resolve, reject) => {
|
return new Promise<Buffer>((resolve, reject) => {
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ export type readRawTextByLocalFileParams = {
|
|||||||
path: string;
|
path: string;
|
||||||
encoding: string;
|
encoding: string;
|
||||||
customPdfParse?: boolean;
|
customPdfParse?: boolean;
|
||||||
|
getFormatText?: boolean;
|
||||||
metadata?: Record<string, any>;
|
metadata?: Record<string, any>;
|
||||||
};
|
};
|
||||||
export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
|
export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
|
||||||
@@ -27,8 +28,8 @@ export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParam
|
|||||||
|
|
||||||
return readRawContentByFileBuffer({
|
return readRawContentByFileBuffer({
|
||||||
extension,
|
extension,
|
||||||
isQAImport: false,
|
|
||||||
customPdfParse: params.customPdfParse,
|
customPdfParse: params.customPdfParse,
|
||||||
|
getFormatText: params.getFormatText,
|
||||||
teamId: params.teamId,
|
teamId: params.teamId,
|
||||||
tmbId: params.tmbId,
|
tmbId: params.tmbId,
|
||||||
encoding: params.encoding,
|
encoding: params.encoding,
|
||||||
@@ -46,7 +47,7 @@ export const readRawContentByFileBuffer = async ({
|
|||||||
encoding,
|
encoding,
|
||||||
metadata,
|
metadata,
|
||||||
customPdfParse = false,
|
customPdfParse = false,
|
||||||
isQAImport = false
|
getFormatText = true
|
||||||
}: {
|
}: {
|
||||||
teamId: string;
|
teamId: string;
|
||||||
tmbId: string;
|
tmbId: string;
|
||||||
@@ -57,8 +58,10 @@ export const readRawContentByFileBuffer = async ({
|
|||||||
metadata?: Record<string, any>;
|
metadata?: Record<string, any>;
|
||||||
|
|
||||||
customPdfParse?: boolean;
|
customPdfParse?: boolean;
|
||||||
isQAImport: boolean;
|
getFormatText?: boolean;
|
||||||
}): Promise<ReadFileResponse> => {
|
}): Promise<{
|
||||||
|
rawText: string;
|
||||||
|
}> => {
|
||||||
const systemParse = () =>
|
const systemParse = () =>
|
||||||
runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
|
runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
|
||||||
extension,
|
extension,
|
||||||
@@ -107,7 +110,7 @@ export const readRawContentByFileBuffer = async ({
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
rawText: text,
|
rawText: text,
|
||||||
formatText: rawText,
|
formatText: text,
|
||||||
imageList
|
imageList
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
@@ -149,7 +152,7 @@ export const readRawContentByFileBuffer = async ({
|
|||||||
return await systemParse();
|
return await systemParse();
|
||||||
})();
|
})();
|
||||||
|
|
||||||
addLog.debug(`Parse file success, time: ${Date.now() - start}ms. Uploading file image.`);
|
addLog.debug(`Parse file success, time: ${Date.now() - start}ms. `);
|
||||||
|
|
||||||
// markdown data format
|
// markdown data format
|
||||||
if (imageList) {
|
if (imageList) {
|
||||||
@@ -176,16 +179,7 @@ export const readRawContentByFileBuffer = async ({
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (['csv', 'xlsx'].includes(extension)) {
|
addLog.debug(`Upload file success, time: ${Date.now() - start}ms`);
|
||||||
// qa data
|
|
||||||
if (isQAImport) {
|
|
||||||
rawText = rawText || '';
|
|
||||||
} else {
|
|
||||||
rawText = formatText || rawText;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
addLog.debug(`Upload file image success, time: ${Date.now() - start}ms`);
|
return { rawText: getFormatText ? formatText || rawText : rawText };
|
||||||
|
|
||||||
return { rawText, formatText, imageList };
|
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
import { getGlobalRedisCacheConnection } from './index';
|
import { getGlobalRedisConnection } from './index';
|
||||||
import { addLog } from '../system/log';
|
import { addLog } from '../system/log';
|
||||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||||
|
|
||||||
|
const redisPrefix = 'cache:';
|
||||||
|
const getCacheKey = (key: string) => `${redisPrefix}${key}`;
|
||||||
|
|
||||||
export enum CacheKeyEnum {
|
export enum CacheKeyEnum {
|
||||||
team_vector_count = 'team_vector_count'
|
team_vector_count = 'team_vector_count'
|
||||||
}
|
}
|
||||||
@@ -13,12 +16,12 @@ export const setRedisCache = async (
|
|||||||
) => {
|
) => {
|
||||||
return await retryFn(async () => {
|
return await retryFn(async () => {
|
||||||
try {
|
try {
|
||||||
const redis = getGlobalRedisCacheConnection();
|
const redis = getGlobalRedisConnection();
|
||||||
|
|
||||||
if (expireSeconds) {
|
if (expireSeconds) {
|
||||||
await redis.set(key, data, 'EX', expireSeconds);
|
await redis.set(getCacheKey(key), data, 'EX', expireSeconds);
|
||||||
} else {
|
} else {
|
||||||
await redis.set(key, data);
|
await redis.set(getCacheKey(key), data);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
addLog.error('Set cache error:', error);
|
addLog.error('Set cache error:', error);
|
||||||
@@ -28,11 +31,11 @@ export const setRedisCache = async (
|
|||||||
};
|
};
|
||||||
|
|
||||||
export const getRedisCache = async (key: string) => {
|
export const getRedisCache = async (key: string) => {
|
||||||
const redis = getGlobalRedisCacheConnection();
|
const redis = getGlobalRedisConnection();
|
||||||
return await retryFn(() => redis.get(key));
|
return await retryFn(() => redis.get(getCacheKey(key)));
|
||||||
};
|
};
|
||||||
|
|
||||||
export const delRedisCache = async (key: string) => {
|
export const delRedisCache = async (key: string) => {
|
||||||
const redis = getGlobalRedisCacheConnection();
|
const redis = getGlobalRedisConnection();
|
||||||
await retryFn(() => redis.del(key));
|
await retryFn(() => redis.del(getCacheKey(key)));
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -27,17 +27,26 @@ export const newWorkerRedisConnection = () => {
|
|||||||
return redis;
|
return redis;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const getGlobalRedisCacheConnection = () => {
|
export const FASTGPT_REDIS_PREFIX = 'fastgpt:';
|
||||||
if (global.redisCache) return global.redisCache;
|
export const getGlobalRedisConnection = () => {
|
||||||
|
if (global.redisClient) return global.redisClient;
|
||||||
|
|
||||||
global.redisCache = new Redis(REDIS_URL, { keyPrefix: 'fastgpt:cache:' });
|
global.redisClient = new Redis(REDIS_URL, { keyPrefix: FASTGPT_REDIS_PREFIX });
|
||||||
|
|
||||||
global.redisCache.on('connect', () => {
|
global.redisClient.on('connect', () => {
|
||||||
addLog.info('Redis connected');
|
addLog.info('Redis connected');
|
||||||
});
|
});
|
||||||
global.redisCache.on('error', (error) => {
|
global.redisClient.on('error', (error) => {
|
||||||
addLog.error('Redis connection error', error);
|
addLog.error('Redis connection error', error);
|
||||||
});
|
});
|
||||||
|
|
||||||
return global.redisCache;
|
return global.redisClient;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const getAllKeysByPrefix = async (key: string) => {
|
||||||
|
const redis = getGlobalRedisConnection();
|
||||||
|
const keys = (await redis.keys(`${FASTGPT_REDIS_PREFIX}${key}:*`)).map((key) =>
|
||||||
|
key.replace(FASTGPT_REDIS_PREFIX, '')
|
||||||
|
);
|
||||||
|
return keys;
|
||||||
};
|
};
|
||||||
|
|||||||
2
packages/service/common/redis/type.d.ts
vendored
2
packages/service/common/redis/type.d.ts
vendored
@@ -1,5 +1,5 @@
|
|||||||
import type Redis from 'ioredis';
|
import type Redis from 'ioredis';
|
||||||
|
|
||||||
declare global {
|
declare global {
|
||||||
var redisCache: Redis | null;
|
var redisClient: Redis | null;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ let jieba: Jieba | undefined;
|
|||||||
})();
|
})();
|
||||||
|
|
||||||
const stopWords = new Set([
|
const stopWords = new Set([
|
||||||
|
'\n',
|
||||||
'--',
|
'--',
|
||||||
'?',
|
'?',
|
||||||
'“',
|
'“',
|
||||||
@@ -1519,8 +1520,7 @@ const stopWords = new Set([
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
export async function jiebaSplit({ text }: { text: string }) {
|
export async function jiebaSplit({ text }: { text: string }) {
|
||||||
text = text.replace(/[#*`_~>[\](){}|]/g, '').replace(/\S*https?\S*/gi, '');
|
text = text.replace(/[#*`_~>[\](){}|]|\S*https?\S*/g, '').trim();
|
||||||
|
|
||||||
const tokens = (await jieba!.cutAsync(text, true)) as string[];
|
const tokens = (await jieba!.cutAsync(text, true)) as string[];
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|||||||
@@ -2,26 +2,44 @@ import { SystemConfigsTypeEnum } from '@fastgpt/global/common/system/config/cons
|
|||||||
import { MongoSystemConfigs } from './schema';
|
import { MongoSystemConfigs } from './schema';
|
||||||
import { type FastGPTConfigFileType } from '@fastgpt/global/common/system/types';
|
import { type FastGPTConfigFileType } from '@fastgpt/global/common/system/types';
|
||||||
import { FastGPTProUrl } from '../constants';
|
import { FastGPTProUrl } from '../constants';
|
||||||
|
import { type LicenseDataType } from '@fastgpt/global/common/system/types';
|
||||||
|
|
||||||
export const getFastGPTConfigFromDB = async () => {
|
export const getFastGPTConfigFromDB = async (): Promise<{
|
||||||
|
fastgptConfig: FastGPTConfigFileType;
|
||||||
|
licenseData?: LicenseDataType;
|
||||||
|
}> => {
|
||||||
if (!FastGPTProUrl) {
|
if (!FastGPTProUrl) {
|
||||||
return {
|
return {
|
||||||
config: {} as FastGPTConfigFileType
|
fastgptConfig: {} as FastGPTConfigFileType
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const res = await MongoSystemConfigs.findOne({
|
const [fastgptConfig, licenseConfig] = await Promise.all([
|
||||||
|
MongoSystemConfigs.findOne({
|
||||||
type: SystemConfigsTypeEnum.fastgpt
|
type: SystemConfigsTypeEnum.fastgpt
|
||||||
}).sort({
|
}).sort({
|
||||||
createTime: -1
|
createTime: -1
|
||||||
});
|
}),
|
||||||
|
MongoSystemConfigs.findOne({
|
||||||
|
type: SystemConfigsTypeEnum.license
|
||||||
|
}).sort({
|
||||||
|
createTime: -1
|
||||||
|
})
|
||||||
|
]);
|
||||||
|
|
||||||
const config = res?.value || {};
|
const config = fastgptConfig?.value || {};
|
||||||
|
const licenseData = licenseConfig?.value?.data as LicenseDataType | undefined;
|
||||||
|
|
||||||
|
const fastgptConfigTime = fastgptConfig?.createTime.getTime().toString();
|
||||||
|
const licenseConfigTime = licenseConfig?.createTime.getTime().toString();
|
||||||
// 利用配置文件的创建时间(更新时间)来做缓存,如果前端命中缓存,则不需要再返回配置文件
|
// 利用配置文件的创建时间(更新时间)来做缓存,如果前端命中缓存,则不需要再返回配置文件
|
||||||
global.systemInitBufferId = res ? res.createTime.getTime().toString() : undefined;
|
global.systemInitBufferId = fastgptConfigTime
|
||||||
|
? `${fastgptConfigTime}-${licenseConfigTime}`
|
||||||
|
: undefined;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
config: config as FastGPTConfigFileType
|
fastgptConfig: config as FastGPTConfigFileType,
|
||||||
|
licenseData
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -57,14 +57,19 @@ export const addLog = {
|
|||||||
|
|
||||||
level === LogLevelEnum.error && console.error(obj);
|
level === LogLevelEnum.error && console.error(obj);
|
||||||
|
|
||||||
// store
|
|
||||||
if (level >= STORE_LOG_LEVEL && connectionMongo.connection.readyState === 1) {
|
|
||||||
// store log
|
// store log
|
||||||
getMongoLog().create({
|
if (level >= STORE_LOG_LEVEL && connectionMongo.connection.readyState === 1) {
|
||||||
|
(async () => {
|
||||||
|
try {
|
||||||
|
await getMongoLog().create({
|
||||||
text: msg,
|
text: msg,
|
||||||
level,
|
level,
|
||||||
metadata: obj
|
metadata: obj
|
||||||
});
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('store log error', error);
|
||||||
|
}
|
||||||
|
})();
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
debug(msg: string, obj?: Record<string, any>) {
|
debug(msg: string, obj?: Record<string, any>) {
|
||||||
|
|||||||
@@ -5,7 +5,8 @@ export enum TimerIdEnum {
|
|||||||
clearExpiredSubPlan = 'clearExpiredSubPlan',
|
clearExpiredSubPlan = 'clearExpiredSubPlan',
|
||||||
updateStandardPlan = 'updateStandardPlan',
|
updateStandardPlan = 'updateStandardPlan',
|
||||||
scheduleTriggerApp = 'scheduleTriggerApp',
|
scheduleTriggerApp = 'scheduleTriggerApp',
|
||||||
notification = 'notification'
|
notification = 'notification',
|
||||||
|
clearExpiredRawTextBuffer = 'clearExpiredRawTextBuffer'
|
||||||
}
|
}
|
||||||
|
|
||||||
export enum LockNotificationEnum {
|
export enum LockNotificationEnum {
|
||||||
|
|||||||
@@ -188,6 +188,7 @@ export class PgVectorCtrl {
|
|||||||
const results: any = await PgClient.query(
|
const results: any = await PgClient.query(
|
||||||
`BEGIN;
|
`BEGIN;
|
||||||
SET LOCAL hnsw.ef_search = ${global.systemEnv?.hnswEfSearch || 100};
|
SET LOCAL hnsw.ef_search = ${global.systemEnv?.hnswEfSearch || 100};
|
||||||
|
SET LOCAL hnsw.max_scan_tuples = ${global.systemEnv?.hnswMaxScanTuples || 100000};
|
||||||
SET LOCAL hnsw.iterative_scan = relaxed_order;
|
SET LOCAL hnsw.iterative_scan = relaxed_order;
|
||||||
WITH relaxed_results AS MATERIALIZED (
|
WITH relaxed_results AS MATERIALIZED (
|
||||||
select id, collection_id, vector <#> '[${vector}]' AS score
|
select id, collection_id, vector <#> '[${vector}]' AS score
|
||||||
@@ -199,7 +200,7 @@ export class PgVectorCtrl {
|
|||||||
) SELECT id, collection_id, score FROM relaxed_results ORDER BY score;
|
) SELECT id, collection_id, score FROM relaxed_results ORDER BY score;
|
||||||
COMMIT;`
|
COMMIT;`
|
||||||
);
|
);
|
||||||
const rows = results?.[3]?.rows as PgSearchRawType[];
|
const rows = results?.[results.length - 2]?.rows as PgSearchRawType[];
|
||||||
|
|
||||||
if (!Array.isArray(rows)) {
|
if (!Array.isArray(rows)) {
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ export const createChatCompletion = async ({
|
|||||||
}
|
}
|
||||||
body.model = modelConstantsData.model;
|
body.model = modelConstantsData.model;
|
||||||
|
|
||||||
const formatTimeout = timeout ? timeout : body.stream ? 60000 : 600000;
|
const formatTimeout = timeout ? timeout : 600000;
|
||||||
const ai = getAIApi({
|
const ai = getAIApi({
|
||||||
userKey,
|
userKey,
|
||||||
timeout: formatTimeout
|
timeout: formatTimeout
|
||||||
|
|||||||
@@ -1,6 +1,54 @@
|
|||||||
{
|
{
|
||||||
"provider": "Claude",
|
"provider": "Claude",
|
||||||
"list": [
|
"list": [
|
||||||
|
{
|
||||||
|
"model": "claude-sonnet-4-20250514",
|
||||||
|
"name": "claude-sonnet-4-20250514",
|
||||||
|
"maxContext": 200000,
|
||||||
|
"maxResponse": 8000,
|
||||||
|
"quoteMaxToken": 100000,
|
||||||
|
"maxTemperature": 1,
|
||||||
|
"showTopP": true,
|
||||||
|
"showStopSign": true,
|
||||||
|
"vision": true,
|
||||||
|
"toolChoice": true,
|
||||||
|
"functionCall": false,
|
||||||
|
"defaultSystemChatPrompt": "",
|
||||||
|
"datasetProcess": true,
|
||||||
|
"usedInClassify": true,
|
||||||
|
"customCQPrompt": "",
|
||||||
|
"usedInExtractFields": true,
|
||||||
|
"usedInQueryExtension": true,
|
||||||
|
"customExtractPrompt": "",
|
||||||
|
"usedInToolCall": true,
|
||||||
|
"defaultConfig": {},
|
||||||
|
"fieldMap": {},
|
||||||
|
"type": "llm"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "claude-opus-4-20250514",
|
||||||
|
"name": "claude-opus-4-20250514",
|
||||||
|
"maxContext": 200000,
|
||||||
|
"maxResponse": 4096,
|
||||||
|
"quoteMaxToken": 100000,
|
||||||
|
"maxTemperature": 1,
|
||||||
|
"showTopP": true,
|
||||||
|
"showStopSign": true,
|
||||||
|
"vision": true,
|
||||||
|
"toolChoice": true,
|
||||||
|
"functionCall": false,
|
||||||
|
"defaultSystemChatPrompt": "",
|
||||||
|
"datasetProcess": true,
|
||||||
|
"usedInClassify": true,
|
||||||
|
"customCQPrompt": "",
|
||||||
|
"usedInExtractFields": true,
|
||||||
|
"usedInQueryExtension": true,
|
||||||
|
"customExtractPrompt": "",
|
||||||
|
"usedInToolCall": true,
|
||||||
|
"defaultConfig": {},
|
||||||
|
"fieldMap": {},
|
||||||
|
"type": "llm"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"model": "claude-3-7-sonnet-20250219",
|
"model": "claude-3-7-sonnet-20250219",
|
||||||
"name": "claude-3-7-sonnet-20250219",
|
"name": "claude-3-7-sonnet-20250219",
|
||||||
|
|||||||
@@ -25,6 +25,30 @@
|
|||||||
"showTopP": true,
|
"showTopP": true,
|
||||||
"showStopSign": true
|
"showStopSign": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"model": "gemini-2.5-flash-preview-04-17",
|
||||||
|
"name": "gemini-2.5-flash-preview-04-17",
|
||||||
|
"maxContext": 1000000,
|
||||||
|
"maxResponse": 8000,
|
||||||
|
"quoteMaxToken": 60000,
|
||||||
|
"maxTemperature": 1,
|
||||||
|
"vision": true,
|
||||||
|
"toolChoice": true,
|
||||||
|
"functionCall": false,
|
||||||
|
"defaultSystemChatPrompt": "",
|
||||||
|
"datasetProcess": true,
|
||||||
|
"usedInClassify": true,
|
||||||
|
"customCQPrompt": "",
|
||||||
|
"usedInExtractFields": true,
|
||||||
|
"usedInQueryExtension": true,
|
||||||
|
"customExtractPrompt": "",
|
||||||
|
"usedInToolCall": true,
|
||||||
|
"defaultConfig": {},
|
||||||
|
"fieldMap": {},
|
||||||
|
"type": "llm",
|
||||||
|
"showTopP": true,
|
||||||
|
"showStopSign": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"model": "gemini-2.0-flash",
|
"model": "gemini-2.0-flash",
|
||||||
"name": "gemini-2.0-flash",
|
"name": "gemini-2.0-flash",
|
||||||
|
|||||||
@@ -18,15 +18,17 @@ import json5 from 'json5';
|
|||||||
*/
|
*/
|
||||||
export const computedMaxToken = ({
|
export const computedMaxToken = ({
|
||||||
maxToken,
|
maxToken,
|
||||||
model
|
model,
|
||||||
|
min
|
||||||
}: {
|
}: {
|
||||||
maxToken?: number;
|
maxToken?: number;
|
||||||
model: LLMModelItemType;
|
model: LLMModelItemType;
|
||||||
|
min?: number;
|
||||||
}) => {
|
}) => {
|
||||||
if (maxToken === undefined) return;
|
if (maxToken === undefined) return;
|
||||||
|
|
||||||
maxToken = Math.min(maxToken, model.maxResponse);
|
maxToken = Math.min(maxToken, model.maxResponse);
|
||||||
return maxToken;
|
return Math.max(maxToken, min || 0);
|
||||||
};
|
};
|
||||||
|
|
||||||
// FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]……
|
// FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]……
|
||||||
@@ -135,12 +137,14 @@ export const llmStreamResponseToAnswerText = async (
|
|||||||
|
|
||||||
// Tool calls
|
// Tool calls
|
||||||
if (responseChoice?.tool_calls?.length) {
|
if (responseChoice?.tool_calls?.length) {
|
||||||
responseChoice.tool_calls.forEach((toolCall) => {
|
responseChoice.tool_calls.forEach((toolCall, i) => {
|
||||||
const index = toolCall.index;
|
const index = toolCall.index ?? i;
|
||||||
|
|
||||||
if (toolCall.id || callingTool) {
|
// Call new tool
|
||||||
// 有 id,代表新 call 工具
|
const hasNewTool = toolCall?.function?.name || callingTool;
|
||||||
if (toolCall.id) {
|
if (hasNewTool) {
|
||||||
|
// 有 function name,代表新 call 工具
|
||||||
|
if (toolCall?.function?.name) {
|
||||||
callingTool = {
|
callingTool = {
|
||||||
name: toolCall.function?.name || '',
|
name: toolCall.function?.name || '',
|
||||||
arguments: toolCall.function?.arguments || ''
|
arguments: toolCall.function?.arguments || ''
|
||||||
@@ -176,7 +180,7 @@ export const llmStreamResponseToAnswerText = async (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
text: parseReasoningContent(answer)[1],
|
text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
|
||||||
usage,
|
usage,
|
||||||
toolCalls
|
toolCalls
|
||||||
};
|
};
|
||||||
@@ -190,8 +194,9 @@ export const llmUnStreamResponseToAnswerText = async (
|
|||||||
}> => {
|
}> => {
|
||||||
const answer = response.choices?.[0]?.message?.content || '';
|
const answer = response.choices?.[0]?.message?.content || '';
|
||||||
const toolCalls = response.choices?.[0]?.message?.tool_calls;
|
const toolCalls = response.choices?.[0]?.message?.tool_calls;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
text: answer,
|
text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
|
||||||
usage: response.usage,
|
usage: response.usage,
|
||||||
toolCalls
|
toolCalls
|
||||||
};
|
};
|
||||||
@@ -221,7 +226,9 @@ export const parseReasoningContent = (text: string): [string, string] => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
export const removeDatasetCiteText = (text: string, retainDatasetCite: boolean) => {
|
export const removeDatasetCiteText = (text: string, retainDatasetCite: boolean) => {
|
||||||
return retainDatasetCite ? text : text.replace(/\[([a-f0-9]{24})\](?:\([^\)]*\)?)?/g, '');
|
return retainDatasetCite
|
||||||
|
? text.replace(/\[id\]\(CITE\)/g, '')
|
||||||
|
: text.replace(/\[([a-f0-9]{24})\](?:\([^\)]*\)?)?/g, '').replace(/\[id\]\(CITE\)/g, '');
|
||||||
};
|
};
|
||||||
|
|
||||||
// Parse llm stream part
|
// Parse llm stream part
|
||||||
@@ -236,6 +243,12 @@ export const parseLLMStreamResponse = () => {
|
|||||||
let citeBuffer = '';
|
let citeBuffer = '';
|
||||||
const maxCiteBufferLength = 32; // [Object](CITE)总长度为32
|
const maxCiteBufferLength = 32; // [Object](CITE)总长度为32
|
||||||
|
|
||||||
|
// Buffer
|
||||||
|
let buffer_finishReason: CompletionFinishReason = null;
|
||||||
|
let buffer_usage: CompletionUsage = getLLMDefaultUsage();
|
||||||
|
let buffer_reasoningContent = '';
|
||||||
|
let buffer_content = '';
|
||||||
|
|
||||||
/*
|
/*
|
||||||
parseThinkTag - 只控制是否主动解析 <think></think>,如果接口已经解析了,则不再解析。
|
parseThinkTag - 只控制是否主动解析 <think></think>,如果接口已经解析了,则不再解析。
|
||||||
retainDatasetCite -
|
retainDatasetCite -
|
||||||
@@ -253,6 +266,7 @@ export const parseLLMStreamResponse = () => {
|
|||||||
};
|
};
|
||||||
finish_reason?: CompletionFinishReason;
|
finish_reason?: CompletionFinishReason;
|
||||||
}[];
|
}[];
|
||||||
|
usage?: CompletionUsage;
|
||||||
};
|
};
|
||||||
parseThinkTag?: boolean;
|
parseThinkTag?: boolean;
|
||||||
retainDatasetCite?: boolean;
|
retainDatasetCite?: boolean;
|
||||||
@@ -262,26 +276,25 @@ export const parseLLMStreamResponse = () => {
|
|||||||
responseContent: string;
|
responseContent: string;
|
||||||
finishReason: CompletionFinishReason;
|
finishReason: CompletionFinishReason;
|
||||||
} => {
|
} => {
|
||||||
|
const data = (() => {
|
||||||
|
buffer_usage = part.usage || buffer_usage;
|
||||||
|
|
||||||
const finishReason = part.choices?.[0]?.finish_reason || null;
|
const finishReason = part.choices?.[0]?.finish_reason || null;
|
||||||
|
buffer_finishReason = finishReason || buffer_finishReason;
|
||||||
|
|
||||||
const content = part.choices?.[0]?.delta?.content || '';
|
const content = part.choices?.[0]?.delta?.content || '';
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
|
const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
|
||||||
const isStreamEnd = !!finishReason;
|
const isStreamEnd = !!buffer_finishReason;
|
||||||
|
|
||||||
// Parse think
|
// Parse think
|
||||||
const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } = (() => {
|
const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } =
|
||||||
|
(() => {
|
||||||
if (reasoningContent || !parseThinkTag) {
|
if (reasoningContent || !parseThinkTag) {
|
||||||
isInThinkTag = false;
|
isInThinkTag = false;
|
||||||
return { reasoningContent, content };
|
return { reasoningContent, content };
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!content) {
|
|
||||||
return {
|
|
||||||
reasoningContent: '',
|
|
||||||
content: ''
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content
|
// 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content
|
||||||
if (isInThinkTag === false) {
|
if (isInThinkTag === false) {
|
||||||
return {
|
return {
|
||||||
@@ -398,7 +411,7 @@ export const parseLLMStreamResponse = () => {
|
|||||||
reasoningContent: parsedThinkReasoningContent,
|
reasoningContent: parsedThinkReasoningContent,
|
||||||
content: parsedThinkContent,
|
content: parsedThinkContent,
|
||||||
responseContent: parsedThinkContent,
|
responseContent: parsedThinkContent,
|
||||||
finishReason
|
finishReason: buffer_finishReason
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -451,11 +464,32 @@ export const parseLLMStreamResponse = () => {
|
|||||||
reasoningContent: parsedThinkReasoningContent,
|
reasoningContent: parsedThinkReasoningContent,
|
||||||
content: parsedThinkContent,
|
content: parsedThinkContent,
|
||||||
responseContent: pasedCiteContent,
|
responseContent: pasedCiteContent,
|
||||||
finishReason
|
finishReason: buffer_finishReason
|
||||||
|
};
|
||||||
|
})();
|
||||||
|
|
||||||
|
buffer_reasoningContent += data.reasoningContent;
|
||||||
|
buffer_content += data.content;
|
||||||
|
|
||||||
|
return data;
|
||||||
|
};
|
||||||
|
|
||||||
|
const getResponseData = () => {
|
||||||
|
return {
|
||||||
|
finish_reason: buffer_finishReason,
|
||||||
|
usage: buffer_usage,
|
||||||
|
reasoningContent: buffer_reasoningContent,
|
||||||
|
content: buffer_content
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const updateFinishReason = (finishReason: CompletionFinishReason) => {
|
||||||
|
buffer_finishReason = finishReason;
|
||||||
|
};
|
||||||
|
|
||||||
return {
|
return {
|
||||||
parsePart
|
parsePart,
|
||||||
|
getResponseData,
|
||||||
|
updateFinishReason
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -11,40 +11,6 @@ export const beforeUpdateAppFormat = <T extends AppSchema['modules'] | undefined
|
|||||||
nodes: T;
|
nodes: T;
|
||||||
isPlugin: boolean;
|
isPlugin: boolean;
|
||||||
}) => {
|
}) => {
|
||||||
if (nodes) {
|
|
||||||
// Check dataset maxTokens
|
|
||||||
if (isPlugin) {
|
|
||||||
let maxTokens = 16000;
|
|
||||||
|
|
||||||
nodes.forEach((item) => {
|
|
||||||
if (
|
|
||||||
item.flowNodeType === FlowNodeTypeEnum.chatNode ||
|
|
||||||
item.flowNodeType === FlowNodeTypeEnum.tools
|
|
||||||
) {
|
|
||||||
const model =
|
|
||||||
item.inputs.find((item) => item.key === NodeInputKeyEnum.aiModel)?.value || '';
|
|
||||||
const chatModel = getLLMModel(model);
|
|
||||||
const quoteMaxToken = chatModel.quoteMaxToken || 16000;
|
|
||||||
|
|
||||||
maxTokens = Math.max(maxTokens, quoteMaxToken);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
nodes.forEach((item) => {
|
|
||||||
if (item.flowNodeType === FlowNodeTypeEnum.datasetSearchNode) {
|
|
||||||
item.inputs.forEach((input) => {
|
|
||||||
if (input.key === NodeInputKeyEnum.datasetMaxTokens) {
|
|
||||||
const val = input.value as number;
|
|
||||||
if (val > maxTokens) {
|
|
||||||
input.value = maxTokens;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
nodes
|
nodes
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
||||||
import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
|
import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
|
||||||
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
|
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
|
||||||
import { type ToolType } from '@fastgpt/global/core/app/type';
|
import { type McpToolConfigType } from '@fastgpt/global/core/app/type';
|
||||||
import { addLog } from '../../common/system/log';
|
import { addLog } from '../../common/system/log';
|
||||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||||
|
|
||||||
@@ -41,7 +41,7 @@ export class MCPClient {
|
|||||||
* Get available tools list
|
* Get available tools list
|
||||||
* @returns List of tools
|
* @returns List of tools
|
||||||
*/
|
*/
|
||||||
public async getTools(): Promise<ToolType[]> {
|
public async getTools(): Promise<McpToolConfigType[]> {
|
||||||
try {
|
try {
|
||||||
const client = await this.getConnection();
|
const client = await this.getConnection();
|
||||||
const response = await client.listTools();
|
const response = await client.listTools();
|
||||||
|
|||||||
@@ -22,8 +22,7 @@ import {
|
|||||||
import { type PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
|
import { type PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
|
||||||
import { MongoSystemPlugin } from './systemPluginSchema';
|
import { MongoSystemPlugin } from './systemPluginSchema';
|
||||||
import { PluginErrEnum } from '@fastgpt/global/common/error/code/plugin';
|
import { PluginErrEnum } from '@fastgpt/global/common/error/code/plugin';
|
||||||
import { MongoAppVersion } from '../version/schema';
|
import { Types } from 'mongoose';
|
||||||
import { i18nT } from '../../../../web/i18n/utils';
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
plugin id rule:
|
plugin id rule:
|
||||||
@@ -31,8 +30,7 @@ import { i18nT } from '../../../../web/i18n/utils';
|
|||||||
community: community-id
|
community: community-id
|
||||||
commercial: commercial-id
|
commercial: commercial-id
|
||||||
*/
|
*/
|
||||||
|
export function splitCombineToolId(id: string) {
|
||||||
export async function splitCombinePluginId(id: string) {
|
|
||||||
const splitRes = id.split('-');
|
const splitRes = id.split('-');
|
||||||
if (splitRes.length === 1) {
|
if (splitRes.length === 1) {
|
||||||
// app id
|
// app id
|
||||||
@@ -43,7 +41,7 @@ export async function splitCombinePluginId(id: string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const [source, pluginId] = id.split('-') as [PluginSourceEnum, string];
|
const [source, pluginId] = id.split('-') as [PluginSourceEnum, string];
|
||||||
if (!source || !pluginId) return Promise.reject('pluginId not found');
|
if (!source || !pluginId) throw new Error('pluginId not found');
|
||||||
|
|
||||||
return { source, pluginId: id };
|
return { source, pluginId: id };
|
||||||
}
|
}
|
||||||
@@ -55,7 +53,7 @@ const getSystemPluginTemplateById = async (
|
|||||||
versionId?: string
|
versionId?: string
|
||||||
): Promise<ChildAppType> => {
|
): Promise<ChildAppType> => {
|
||||||
const item = getSystemPluginTemplates().find((plugin) => plugin.id === pluginId);
|
const item = getSystemPluginTemplates().find((plugin) => plugin.id === pluginId);
|
||||||
if (!item) return Promise.reject(PluginErrEnum.unAuth);
|
if (!item) return Promise.reject(PluginErrEnum.unExist);
|
||||||
|
|
||||||
const plugin = cloneDeep(item);
|
const plugin = cloneDeep(item);
|
||||||
|
|
||||||
@@ -65,10 +63,10 @@ const getSystemPluginTemplateById = async (
|
|||||||
{ pluginId: plugin.id, 'customConfig.associatedPluginId': plugin.associatedPluginId },
|
{ pluginId: plugin.id, 'customConfig.associatedPluginId': plugin.associatedPluginId },
|
||||||
'associatedPluginId'
|
'associatedPluginId'
|
||||||
).lean();
|
).lean();
|
||||||
if (!systemPlugin) return Promise.reject(PluginErrEnum.unAuth);
|
if (!systemPlugin) return Promise.reject(PluginErrEnum.unExist);
|
||||||
|
|
||||||
const app = await MongoApp.findById(plugin.associatedPluginId).lean();
|
const app = await MongoApp.findById(plugin.associatedPluginId).lean();
|
||||||
if (!app) return Promise.reject(PluginErrEnum.unAuth);
|
if (!app) return Promise.reject(PluginErrEnum.unExist);
|
||||||
|
|
||||||
const version = versionId
|
const version = versionId
|
||||||
? await getAppVersionById({
|
? await getAppVersionById({
|
||||||
@@ -78,6 +76,12 @@ const getSystemPluginTemplateById = async (
|
|||||||
})
|
})
|
||||||
: await getAppLatestVersion(plugin.associatedPluginId, app);
|
: await getAppLatestVersion(plugin.associatedPluginId, app);
|
||||||
if (!version.versionId) return Promise.reject('App version not found');
|
if (!version.versionId) return Promise.reject('App version not found');
|
||||||
|
const isLatest = version.versionId
|
||||||
|
? await checkIsLatestVersion({
|
||||||
|
appId: plugin.associatedPluginId,
|
||||||
|
versionId: version.versionId
|
||||||
|
})
|
||||||
|
: true;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...plugin,
|
...plugin,
|
||||||
@@ -86,12 +90,19 @@ const getSystemPluginTemplateById = async (
|
|||||||
edges: version.edges,
|
edges: version.edges,
|
||||||
chatConfig: version.chatConfig
|
chatConfig: version.chatConfig
|
||||||
},
|
},
|
||||||
version: versionId || String(version.versionId),
|
version: versionId ? version?.versionId : '',
|
||||||
|
versionLabel: version?.versionName,
|
||||||
|
isLatestVersion: isLatest,
|
||||||
teamId: String(app.teamId),
|
teamId: String(app.teamId),
|
||||||
tmbId: String(app.tmbId)
|
tmbId: String(app.tmbId)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
return plugin;
|
|
||||||
|
return {
|
||||||
|
...plugin,
|
||||||
|
version: undefined,
|
||||||
|
isLatestVersion: true
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Format plugin to workflow preview node data */
|
/* Format plugin to workflow preview node data */
|
||||||
@@ -103,25 +114,19 @@ export async function getChildAppPreviewNode({
|
|||||||
versionId?: string;
|
versionId?: string;
|
||||||
}): Promise<FlowNodeTemplateType> {
|
}): Promise<FlowNodeTemplateType> {
|
||||||
const app: ChildAppType = await (async () => {
|
const app: ChildAppType = await (async () => {
|
||||||
const { source, pluginId } = await splitCombinePluginId(appId);
|
const { source, pluginId } = splitCombineToolId(appId);
|
||||||
|
|
||||||
if (source === PluginSourceEnum.personal) {
|
if (source === PluginSourceEnum.personal) {
|
||||||
const item = await MongoApp.findById(appId).lean();
|
const item = await MongoApp.findById(appId).lean();
|
||||||
if (!item) return Promise.reject('plugin not found');
|
if (!item) return Promise.reject(PluginErrEnum.unExist);
|
||||||
|
|
||||||
const version = await getAppVersionById({ appId, versionId, app: item });
|
const version = await getAppVersionById({ appId, versionId, app: item });
|
||||||
|
|
||||||
if (!version.versionId) return Promise.reject(i18nT('common:app_not_version'));
|
const isLatest =
|
||||||
|
version.versionId && Types.ObjectId.isValid(version.versionId)
|
||||||
const versionData = await MongoAppVersion.findById(
|
|
||||||
version.versionId,
|
|
||||||
'_id versionName appId time'
|
|
||||||
).lean();
|
|
||||||
|
|
||||||
const isLatest = versionData
|
|
||||||
? await checkIsLatestVersion({
|
? await checkIsLatestVersion({
|
||||||
appId,
|
appId,
|
||||||
versionId: versionData._id
|
versionId: version.versionId
|
||||||
})
|
})
|
||||||
: true;
|
: true;
|
||||||
|
|
||||||
@@ -139,8 +144,8 @@ export async function getChildAppPreviewNode({
|
|||||||
},
|
},
|
||||||
templateType: FlowNodeTemplateTypeEnum.teamApp,
|
templateType: FlowNodeTemplateTypeEnum.teamApp,
|
||||||
|
|
||||||
version: version.versionId,
|
version: versionId ? version?.versionId : '',
|
||||||
versionLabel: versionData?.versionName || '',
|
versionLabel: version?.versionName,
|
||||||
isLatestVersion: isLatest,
|
isLatestVersion: isLatest,
|
||||||
|
|
||||||
originCost: 0,
|
originCost: 0,
|
||||||
@@ -149,7 +154,7 @@ export async function getChildAppPreviewNode({
|
|||||||
pluginOrder: 0
|
pluginOrder: 0
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
return getSystemPluginTemplateById(pluginId);
|
return getSystemPluginTemplateById(pluginId, versionId);
|
||||||
}
|
}
|
||||||
})();
|
})();
|
||||||
|
|
||||||
@@ -223,12 +228,12 @@ export async function getChildAppRuntimeById(
|
|||||||
id: string,
|
id: string,
|
||||||
versionId?: string
|
versionId?: string
|
||||||
): Promise<PluginRuntimeType> {
|
): Promise<PluginRuntimeType> {
|
||||||
const app: ChildAppType = await (async () => {
|
const app = await (async () => {
|
||||||
const { source, pluginId } = await splitCombinePluginId(id);
|
const { source, pluginId } = splitCombineToolId(id);
|
||||||
|
|
||||||
if (source === PluginSourceEnum.personal) {
|
if (source === PluginSourceEnum.personal) {
|
||||||
const item = await MongoApp.findById(id).lean();
|
const item = await MongoApp.findById(id).lean();
|
||||||
if (!item) return Promise.reject('plugin not found');
|
if (!item) return Promise.reject(PluginErrEnum.unExist);
|
||||||
|
|
||||||
const version = await getAppVersionById({
|
const version = await getAppVersionById({
|
||||||
appId: id,
|
appId: id,
|
||||||
@@ -251,8 +256,6 @@ export async function getChildAppRuntimeById(
|
|||||||
},
|
},
|
||||||
templateType: FlowNodeTemplateTypeEnum.teamApp,
|
templateType: FlowNodeTemplateTypeEnum.teamApp,
|
||||||
|
|
||||||
// 用不到
|
|
||||||
version: item?.pluginData?.nodeVersion,
|
|
||||||
originCost: 0,
|
originCost: 0,
|
||||||
currentCost: 0,
|
currentCost: 0,
|
||||||
hasTokenFee: false,
|
hasTokenFee: false,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import { type ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
|
import { type ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
|
||||||
import { type PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
|
import { type PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
|
||||||
import { splitCombinePluginId } from './controller';
|
import { splitCombineToolId } from './controller';
|
||||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -20,7 +20,7 @@ export const computedPluginUsage = async ({
|
|||||||
childrenUsage: ChatNodeUsageType[];
|
childrenUsage: ChatNodeUsageType[];
|
||||||
error?: boolean;
|
error?: boolean;
|
||||||
}) => {
|
}) => {
|
||||||
const { source } = await splitCombinePluginId(plugin.id);
|
const { source } = splitCombineToolId(plugin.id);
|
||||||
const childrenUsages = childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
|
const childrenUsages = childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
|
||||||
|
|
||||||
if (source !== PluginSourceEnum.personal) {
|
if (source !== PluginSourceEnum.personal) {
|
||||||
|
|||||||
@@ -119,6 +119,7 @@ const AppSchema = new Schema({
|
|||||||
defaultPermission: Number
|
defaultPermission: Number
|
||||||
});
|
});
|
||||||
|
|
||||||
|
AppSchema.index({ type: 1 });
|
||||||
AppSchema.index({ teamId: 1, updateTime: -1 });
|
AppSchema.index({ teamId: 1, updateTime: -1 });
|
||||||
AppSchema.index({ teamId: 1, type: 1 });
|
AppSchema.index({ teamId: 1, type: 1 });
|
||||||
AppSchema.index(
|
AppSchema.index(
|
||||||
|
|||||||
@@ -1,14 +1,13 @@
|
|||||||
import { MongoDataset } from '../dataset/schema';
|
import { MongoDataset } from '../dataset/schema';
|
||||||
import { getEmbeddingModel } from '../ai/model';
|
import { getEmbeddingModel } from '../ai/model';
|
||||||
import {
|
import { FlowNodeTypeEnum } from '@fastgpt/global/core/workflow/node/constant';
|
||||||
AppNodeFlowNodeTypeMap,
|
|
||||||
FlowNodeTypeEnum
|
|
||||||
} from '@fastgpt/global/core/workflow/node/constant';
|
|
||||||
import { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
|
import { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
|
||||||
import type { StoreNodeItemType } from '@fastgpt/global/core/workflow/type/node';
|
import type { StoreNodeItemType } from '@fastgpt/global/core/workflow/type/node';
|
||||||
import { MongoAppVersion } from './version/schema';
|
import { getChildAppPreviewNode, splitCombineToolId } from './plugin/controller';
|
||||||
import { checkIsLatestVersion } from './version/controller';
|
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||||
import { Types } from '../../common/mongo';
|
import { authAppByTmbId } from '../../support/permission/app/auth';
|
||||||
|
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||||
|
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||||
|
|
||||||
export async function listAppDatasetDataByTeamIdAndDatasetIds({
|
export async function listAppDatasetDataByTeamIdAndDatasetIds({
|
||||||
teamId,
|
teamId,
|
||||||
@@ -33,52 +32,58 @@ export async function listAppDatasetDataByTeamIdAndDatasetIds({
|
|||||||
export async function rewriteAppWorkflowToDetail({
|
export async function rewriteAppWorkflowToDetail({
|
||||||
nodes,
|
nodes,
|
||||||
teamId,
|
teamId,
|
||||||
isRoot
|
isRoot,
|
||||||
|
ownerTmbId
|
||||||
}: {
|
}: {
|
||||||
nodes: StoreNodeItemType[];
|
nodes: StoreNodeItemType[];
|
||||||
teamId: string;
|
teamId: string;
|
||||||
isRoot: boolean;
|
isRoot: boolean;
|
||||||
|
ownerTmbId: string;
|
||||||
}) {
|
}) {
|
||||||
const datasetIdSet = new Set<string>();
|
const datasetIdSet = new Set<string>();
|
||||||
|
|
||||||
// Add node(App Type) versionlabel and latest sign
|
/* Add node(App Type) versionlabel and latest sign ==== */
|
||||||
const appNodes = nodes.filter((node) => AppNodeFlowNodeTypeMap[node.flowNodeType]);
|
await Promise.all(
|
||||||
const versionIds = appNodes
|
nodes.map(async (node) => {
|
||||||
.filter((node) => node.version && Types.ObjectId.isValid(node.version))
|
if (!node.pluginId) return;
|
||||||
.map((node) => node.version);
|
const { source } = splitCombineToolId(node.pluginId);
|
||||||
if (versionIds.length > 0) {
|
|
||||||
const versionDataList = await MongoAppVersion.find(
|
|
||||||
{
|
|
||||||
_id: { $in: versionIds }
|
|
||||||
},
|
|
||||||
'_id versionName appId time'
|
|
||||||
).lean();
|
|
||||||
|
|
||||||
const versionMap: Record<string, any> = {};
|
try {
|
||||||
|
const [preview] = await Promise.all([
|
||||||
|
getChildAppPreviewNode({
|
||||||
|
appId: node.pluginId,
|
||||||
|
versionId: node.version
|
||||||
|
}),
|
||||||
|
...(source === PluginSourceEnum.personal
|
||||||
|
? [
|
||||||
|
authAppByTmbId({
|
||||||
|
tmbId: ownerTmbId,
|
||||||
|
appId: node.pluginId,
|
||||||
|
per: ReadPermissionVal
|
||||||
|
})
|
||||||
|
]
|
||||||
|
: [])
|
||||||
|
]);
|
||||||
|
|
||||||
const isLatestChecks = await Promise.all(
|
node.pluginData = {
|
||||||
versionDataList.map(async (version) => {
|
diagram: preview.diagram,
|
||||||
const isLatest = await checkIsLatestVersion({
|
userGuide: preview.userGuide,
|
||||||
appId: version.appId,
|
courseUrl: preview.courseUrl,
|
||||||
versionId: version._id
|
name: preview.name,
|
||||||
});
|
avatar: preview.avatar
|
||||||
|
};
|
||||||
return { versionId: String(version._id), isLatest };
|
node.versionLabel = preview.versionLabel;
|
||||||
|
node.isLatestVersion = preview.isLatestVersion;
|
||||||
|
node.version = preview.version;
|
||||||
|
} catch (error) {
|
||||||
|
node.pluginData = {
|
||||||
|
error: getErrText(error)
|
||||||
|
};
|
||||||
|
}
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
const isLatestMap = new Map(isLatestChecks.map((item) => [item.versionId, item.isLatest]));
|
|
||||||
versionDataList.forEach((version) => {
|
/* Add node(App Type) versionlabel and latest sign ==== */
|
||||||
versionMap[String(version._id)] = version;
|
|
||||||
});
|
|
||||||
appNodes.forEach((node) => {
|
|
||||||
if (!node.version) return;
|
|
||||||
const versionData = versionMap[String(node.version)];
|
|
||||||
if (versionData) {
|
|
||||||
node.versionLabel = versionData.versionName;
|
|
||||||
node.isLatestVersion = isLatestMap.get(String(node.version)) || false;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get all dataset ids from nodes
|
// Get all dataset ids from nodes
|
||||||
nodes.forEach((node) => {
|
nodes.forEach((node) => {
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ export const getAppLatestVersion = async (appId: string, app?: AppSchema) => {
|
|||||||
if (version) {
|
if (version) {
|
||||||
return {
|
return {
|
||||||
versionId: version._id,
|
versionId: version._id,
|
||||||
|
versionName: version.versionName,
|
||||||
nodes: version.nodes,
|
nodes: version.nodes,
|
||||||
edges: version.edges,
|
edges: version.edges,
|
||||||
chatConfig: version.chatConfig || app?.chatConfig || {}
|
chatConfig: version.chatConfig || app?.chatConfig || {}
|
||||||
@@ -22,6 +23,7 @@ export const getAppLatestVersion = async (appId: string, app?: AppSchema) => {
|
|||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
versionId: app?.pluginData?.nodeVersion,
|
versionId: app?.pluginData?.nodeVersion,
|
||||||
|
versionName: app?.name,
|
||||||
nodes: app?.modules || [],
|
nodes: app?.modules || [],
|
||||||
edges: app?.edges || [],
|
edges: app?.edges || [],
|
||||||
chatConfig: app?.chatConfig || {}
|
chatConfig: app?.chatConfig || {}
|
||||||
@@ -47,6 +49,7 @@ export const getAppVersionById = async ({
|
|||||||
if (version) {
|
if (version) {
|
||||||
return {
|
return {
|
||||||
versionId: version._id,
|
versionId: version._id,
|
||||||
|
versionName: version.versionName,
|
||||||
nodes: version.nodes,
|
nodes: version.nodes,
|
||||||
edges: version.edges,
|
edges: version.edges,
|
||||||
chatConfig: version.chatConfig || app?.chatConfig || {}
|
chatConfig: version.chatConfig || app?.chatConfig || {}
|
||||||
@@ -65,6 +68,9 @@ export const checkIsLatestVersion = async ({
|
|||||||
appId: string;
|
appId: string;
|
||||||
versionId: string;
|
versionId: string;
|
||||||
}) => {
|
}) => {
|
||||||
|
if (!Types.ObjectId.isValid(versionId)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
const version = await MongoAppVersion.findOne(
|
const version = await MongoAppVersion.findOne(
|
||||||
{
|
{
|
||||||
appId,
|
appId,
|
||||||
|
|||||||
@@ -61,6 +61,7 @@ const ChatItemSchema = new Schema({
|
|||||||
type: Array,
|
type: Array,
|
||||||
default: []
|
default: []
|
||||||
},
|
},
|
||||||
|
errorMsg: String,
|
||||||
userGoodFeedback: {
|
userGoodFeedback: {
|
||||||
type: String
|
type: String
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -34,6 +34,10 @@ const ChatSchema = new Schema({
|
|||||||
ref: AppCollectionName,
|
ref: AppCollectionName,
|
||||||
required: true
|
required: true
|
||||||
},
|
},
|
||||||
|
createTime: {
|
||||||
|
type: Date,
|
||||||
|
default: () => new Date()
|
||||||
|
},
|
||||||
updateTime: {
|
updateTime: {
|
||||||
type: Date,
|
type: Date,
|
||||||
default: () => new Date()
|
default: () => new Date()
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ type Props = {
|
|||||||
content: [UserChatItemType & { dataId?: string }, AIChatItemType & { dataId?: string }];
|
content: [UserChatItemType & { dataId?: string }, AIChatItemType & { dataId?: string }];
|
||||||
metadata?: Record<string, any>;
|
metadata?: Record<string, any>;
|
||||||
durationSeconds: number; //s
|
durationSeconds: number; //s
|
||||||
|
errorMsg?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
export async function saveChat({
|
export async function saveChat({
|
||||||
@@ -50,6 +51,7 @@ export async function saveChat({
|
|||||||
outLinkUid,
|
outLinkUid,
|
||||||
content,
|
content,
|
||||||
durationSeconds,
|
durationSeconds,
|
||||||
|
errorMsg,
|
||||||
metadata = {}
|
metadata = {}
|
||||||
}: Props) {
|
}: Props) {
|
||||||
if (!chatId || chatId === 'NO_RECORD_HISTORIES') return;
|
if (!chatId || chatId === 'NO_RECORD_HISTORIES') return;
|
||||||
@@ -104,7 +106,8 @@ export async function saveChat({
|
|||||||
return {
|
return {
|
||||||
...item,
|
...item,
|
||||||
[DispatchNodeResponseKeyEnum.nodeResponse]: nodeResponse,
|
[DispatchNodeResponseKeyEnum.nodeResponse]: nodeResponse,
|
||||||
durationSeconds
|
durationSeconds,
|
||||||
|
errorMsg
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
return item;
|
return item;
|
||||||
|
|||||||
@@ -65,8 +65,8 @@ export const filterGPTMessageByMaxContext = async ({
|
|||||||
if (lastMessage.role === ChatCompletionRequestMessageRoleEnum.User) {
|
if (lastMessage.role === ChatCompletionRequestMessageRoleEnum.User) {
|
||||||
const tokens = await countGptMessagesTokens([lastMessage, ...tmpChats]);
|
const tokens = await countGptMessagesTokens([lastMessage, ...tmpChats]);
|
||||||
maxContext -= tokens;
|
maxContext -= tokens;
|
||||||
// 该轮信息整体 tokens 超出范围,这段数据不要了
|
// 该轮信息整体 tokens 超出范围,这段数据不要了。但是至少保证一组。
|
||||||
if (maxContext < 0) {
|
if (maxContext < 0 && chats.length > 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,9 @@ import type {
|
|||||||
APIFileListResponse,
|
APIFileListResponse,
|
||||||
ApiFileReadContentResponse,
|
ApiFileReadContentResponse,
|
||||||
APIFileReadResponse,
|
APIFileReadResponse,
|
||||||
APIFileServer
|
ApiDatasetDetailResponse,
|
||||||
|
APIFileServer,
|
||||||
|
APIFileItem
|
||||||
} from '@fastgpt/global/core/dataset/apiDataset';
|
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||||
import axios, { type Method } from 'axios';
|
import axios, { type Method } from 'axios';
|
||||||
import { addLog } from '../../../common/system/log';
|
import { addLog } from '../../../common/system/log';
|
||||||
@@ -89,7 +91,7 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
|
|||||||
`/v1/file/list`,
|
`/v1/file/list`,
|
||||||
{
|
{
|
||||||
searchKey,
|
searchKey,
|
||||||
parentId
|
parentId: parentId || apiServer.basePath
|
||||||
},
|
},
|
||||||
'POST'
|
'POST'
|
||||||
);
|
);
|
||||||
@@ -144,7 +146,8 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
|
|||||||
tmbId,
|
tmbId,
|
||||||
url: previewUrl,
|
url: previewUrl,
|
||||||
relatedId: apiFileId,
|
relatedId: apiFileId,
|
||||||
customPdfParse
|
customPdfParse,
|
||||||
|
getFormatText: true
|
||||||
});
|
});
|
||||||
return {
|
return {
|
||||||
title,
|
title,
|
||||||
@@ -164,9 +167,34 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
|
|||||||
return url;
|
return url;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const getFileDetail = async ({
|
||||||
|
apiFileId
|
||||||
|
}: {
|
||||||
|
apiFileId: string;
|
||||||
|
}): Promise<ApiDatasetDetailResponse> => {
|
||||||
|
const fileData = await request<ApiDatasetDetailResponse>(
|
||||||
|
`/v1/file/detail`,
|
||||||
|
{
|
||||||
|
id: apiFileId
|
||||||
|
},
|
||||||
|
'GET'
|
||||||
|
);
|
||||||
|
|
||||||
|
if (fileData) {
|
||||||
|
return {
|
||||||
|
id: fileData.id,
|
||||||
|
name: fileData.name,
|
||||||
|
parentId: fileData.parentId === null ? '' : fileData.parentId
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return Promise.reject('File not found');
|
||||||
|
};
|
||||||
|
|
||||||
return {
|
return {
|
||||||
getFileContent,
|
getFileContent,
|
||||||
listFiles,
|
listFiles,
|
||||||
getFilePreviewUrl
|
getFilePreviewUrl,
|
||||||
|
getFileDetail
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
27
packages/service/core/dataset/apiDataset/index.ts
Normal file
27
packages/service/core/dataset/apiDataset/index.ts
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
import type {
|
||||||
|
APIFileServer,
|
||||||
|
YuqueServer,
|
||||||
|
FeishuServer
|
||||||
|
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||||
|
import { useApiDatasetRequest } from './api';
|
||||||
|
import { useYuqueDatasetRequest } from '../yuqueDataset/api';
|
||||||
|
import { useFeishuDatasetRequest } from '../feishuDataset/api';
|
||||||
|
|
||||||
|
export const getApiDatasetRequest = async (data: {
|
||||||
|
apiServer?: APIFileServer;
|
||||||
|
yuqueServer?: YuqueServer;
|
||||||
|
feishuServer?: FeishuServer;
|
||||||
|
}) => {
|
||||||
|
const { apiServer, yuqueServer, feishuServer } = data;
|
||||||
|
|
||||||
|
if (apiServer) {
|
||||||
|
return useApiDatasetRequest({ apiServer });
|
||||||
|
}
|
||||||
|
if (yuqueServer) {
|
||||||
|
return useYuqueDatasetRequest({ yuqueServer });
|
||||||
|
}
|
||||||
|
if (feishuServer) {
|
||||||
|
return useFeishuDatasetRequest({ feishuServer });
|
||||||
|
}
|
||||||
|
return Promise.reject('Can not find api dataset server');
|
||||||
|
};
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
|
||||||
import { type FeishuServer, type YuqueServer } from '@fastgpt/global/core/dataset/apiDataset';
|
|
||||||
|
|
||||||
export enum ProApiDatasetOperationTypeEnum {
|
|
||||||
LIST = 'list',
|
|
||||||
READ = 'read',
|
|
||||||
CONTENT = 'content',
|
|
||||||
DETAIL = 'detail'
|
|
||||||
}
|
|
||||||
|
|
||||||
export type ProApiDatasetCommonParams = {
|
|
||||||
feishuServer?: FeishuServer;
|
|
||||||
yuqueServer?: YuqueServer;
|
|
||||||
};
|
|
||||||
|
|
||||||
export type GetProApiDatasetFileListParams = ProApiDatasetCommonParams & {
|
|
||||||
parentId?: ParentIdType;
|
|
||||||
};
|
|
||||||
|
|
||||||
export type GetProApiDatasetFileContentParams = ProApiDatasetCommonParams & {
|
|
||||||
apiFileId: string;
|
|
||||||
};
|
|
||||||
|
|
||||||
export type GetProApiDatasetFilePreviewUrlParams = ProApiDatasetCommonParams & {
|
|
||||||
apiFileId: string;
|
|
||||||
};
|
|
||||||
|
|
||||||
export type GetProApiDatasetFileDetailParams = ProApiDatasetCommonParams & {
|
|
||||||
apiFileId: string;
|
|
||||||
};
|
|
||||||
@@ -34,15 +34,17 @@ import { getTrainingModeByCollection } from './utils';
|
|||||||
import {
|
import {
|
||||||
computeChunkSize,
|
computeChunkSize,
|
||||||
computeChunkSplitter,
|
computeChunkSplitter,
|
||||||
|
computeParagraphChunkDeep,
|
||||||
getLLMMaxChunkSize
|
getLLMMaxChunkSize
|
||||||
} from '@fastgpt/global/core/dataset/training/utils';
|
} from '@fastgpt/global/core/dataset/training/utils';
|
||||||
|
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
|
||||||
|
|
||||||
export const createCollectionAndInsertData = async ({
|
export const createCollectionAndInsertData = async ({
|
||||||
dataset,
|
dataset,
|
||||||
rawText,
|
rawText,
|
||||||
relatedId,
|
relatedId,
|
||||||
createCollectionParams,
|
createCollectionParams,
|
||||||
isQAImport = false,
|
backupParse = false,
|
||||||
billId,
|
billId,
|
||||||
session
|
session
|
||||||
}: {
|
}: {
|
||||||
@@ -50,8 +52,8 @@ export const createCollectionAndInsertData = async ({
|
|||||||
rawText: string;
|
rawText: string;
|
||||||
relatedId?: string;
|
relatedId?: string;
|
||||||
createCollectionParams: CreateOneCollectionParams;
|
createCollectionParams: CreateOneCollectionParams;
|
||||||
|
backupParse?: boolean;
|
||||||
|
|
||||||
isQAImport?: boolean;
|
|
||||||
billId?: string;
|
billId?: string;
|
||||||
session?: ClientSession;
|
session?: ClientSession;
|
||||||
}) => {
|
}) => {
|
||||||
@@ -73,15 +75,33 @@ export const createCollectionAndInsertData = async ({
|
|||||||
llmModel: getLLMModel(dataset.agentModel)
|
llmModel: getLLMModel(dataset.agentModel)
|
||||||
});
|
});
|
||||||
const chunkSplitter = computeChunkSplitter(createCollectionParams);
|
const chunkSplitter = computeChunkSplitter(createCollectionParams);
|
||||||
|
const paragraphChunkDeep = computeParagraphChunkDeep(createCollectionParams);
|
||||||
|
|
||||||
|
if (
|
||||||
|
trainingType === DatasetCollectionDataProcessModeEnum.qa ||
|
||||||
|
trainingType === DatasetCollectionDataProcessModeEnum.backup
|
||||||
|
) {
|
||||||
|
delete createCollectionParams.chunkTriggerType;
|
||||||
|
delete createCollectionParams.chunkTriggerMinSize;
|
||||||
|
delete createCollectionParams.dataEnhanceCollectionName;
|
||||||
|
delete createCollectionParams.imageIndex;
|
||||||
|
delete createCollectionParams.autoIndexes;
|
||||||
|
delete createCollectionParams.indexSize;
|
||||||
|
delete createCollectionParams.qaPrompt;
|
||||||
|
}
|
||||||
|
|
||||||
// 1. split chunks
|
// 1. split chunks
|
||||||
const chunks = rawText2Chunks({
|
const chunks = rawText2Chunks({
|
||||||
rawText,
|
rawText,
|
||||||
|
chunkTriggerType: createCollectionParams.chunkTriggerType,
|
||||||
|
chunkTriggerMinSize: createCollectionParams.chunkTriggerMinSize,
|
||||||
chunkSize,
|
chunkSize,
|
||||||
|
paragraphChunkDeep,
|
||||||
|
paragraphChunkMinSize: createCollectionParams.paragraphChunkMinSize,
|
||||||
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
|
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
|
||||||
overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
|
overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
|
||||||
customReg: chunkSplitter ? [chunkSplitter] : [],
|
customReg: chunkSplitter ? [chunkSplitter] : [],
|
||||||
isQAImport
|
backupParse
|
||||||
});
|
});
|
||||||
|
|
||||||
// 2. auth limit
|
// 2. auth limit
|
||||||
@@ -102,6 +122,7 @@ export const createCollectionAndInsertData = async ({
|
|||||||
const { _id: collectionId } = await createOneCollection({
|
const { _id: collectionId } = await createOneCollection({
|
||||||
...createCollectionParams,
|
...createCollectionParams,
|
||||||
trainingType,
|
trainingType,
|
||||||
|
paragraphChunkDeep,
|
||||||
chunkSize,
|
chunkSize,
|
||||||
chunkSplitter,
|
chunkSplitter,
|
||||||
|
|
||||||
@@ -157,6 +178,10 @@ export const createCollectionAndInsertData = async ({
|
|||||||
billId: traingBillId,
|
billId: traingBillId,
|
||||||
data: chunks.map((item, index) => ({
|
data: chunks.map((item, index) => ({
|
||||||
...item,
|
...item,
|
||||||
|
indexes: item.indexes?.map((text) => ({
|
||||||
|
type: DatasetDataIndexTypeEnum.custom,
|
||||||
|
text
|
||||||
|
})),
|
||||||
chunkIndex: index
|
chunkIndex: index
|
||||||
})),
|
})),
|
||||||
session
|
session
|
||||||
@@ -198,46 +223,19 @@ export type CreateOneCollectionParams = CreateDatasetCollectionParams & {
|
|||||||
tmbId: string;
|
tmbId: string;
|
||||||
session?: ClientSession;
|
session?: ClientSession;
|
||||||
};
|
};
|
||||||
export async function createOneCollection({
|
export async function createOneCollection({ session, ...props }: CreateOneCollectionParams) {
|
||||||
|
const {
|
||||||
teamId,
|
teamId,
|
||||||
tmbId,
|
|
||||||
name,
|
|
||||||
parentId,
|
parentId,
|
||||||
datasetId,
|
datasetId,
|
||||||
type,
|
|
||||||
|
|
||||||
createTime,
|
|
||||||
updateTime,
|
|
||||||
|
|
||||||
hashRawText,
|
|
||||||
rawTextLength,
|
|
||||||
metadata = {},
|
|
||||||
tags,
|
tags,
|
||||||
|
|
||||||
nextSyncTime,
|
|
||||||
|
|
||||||
fileId,
|
fileId,
|
||||||
rawLink,
|
rawLink,
|
||||||
externalFileId,
|
externalFileId,
|
||||||
externalFileUrl,
|
externalFileUrl,
|
||||||
apiFileId,
|
apiFileId
|
||||||
|
} = props;
|
||||||
// Parse settings
|
|
||||||
customPdfParse,
|
|
||||||
imageIndex,
|
|
||||||
autoIndexes,
|
|
||||||
|
|
||||||
// Chunk settings
|
|
||||||
trainingType,
|
|
||||||
chunkSettingMode,
|
|
||||||
chunkSplitMode,
|
|
||||||
chunkSize,
|
|
||||||
indexSize,
|
|
||||||
chunkSplitter,
|
|
||||||
qaPrompt,
|
|
||||||
|
|
||||||
session
|
|
||||||
}: CreateOneCollectionParams) {
|
|
||||||
// Create collection tags
|
// Create collection tags
|
||||||
const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
|
const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
|
||||||
|
|
||||||
@@ -245,41 +243,18 @@ export async function createOneCollection({
|
|||||||
const [collection] = await MongoDatasetCollection.create(
|
const [collection] = await MongoDatasetCollection.create(
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
|
...props,
|
||||||
teamId,
|
teamId,
|
||||||
tmbId,
|
|
||||||
parentId: parentId || null,
|
parentId: parentId || null,
|
||||||
datasetId,
|
datasetId,
|
||||||
name,
|
|
||||||
type,
|
|
||||||
|
|
||||||
rawTextLength,
|
|
||||||
hashRawText,
|
|
||||||
tags: collectionTags,
|
tags: collectionTags,
|
||||||
metadata,
|
|
||||||
|
|
||||||
createTime,
|
|
||||||
updateTime,
|
|
||||||
nextSyncTime,
|
|
||||||
|
|
||||||
...(fileId ? { fileId } : {}),
|
...(fileId ? { fileId } : {}),
|
||||||
...(rawLink ? { rawLink } : {}),
|
...(rawLink ? { rawLink } : {}),
|
||||||
...(externalFileId ? { externalFileId } : {}),
|
...(externalFileId ? { externalFileId } : {}),
|
||||||
...(externalFileUrl ? { externalFileUrl } : {}),
|
...(externalFileUrl ? { externalFileUrl } : {}),
|
||||||
...(apiFileId ? { apiFileId } : {}),
|
...(apiFileId ? { apiFileId } : {})
|
||||||
|
|
||||||
// Parse settings
|
|
||||||
customPdfParse,
|
|
||||||
imageIndex,
|
|
||||||
autoIndexes,
|
|
||||||
|
|
||||||
// Chunk settings
|
|
||||||
trainingType,
|
|
||||||
chunkSettingMode,
|
|
||||||
chunkSplitMode,
|
|
||||||
chunkSize,
|
|
||||||
indexSize,
|
|
||||||
chunkSplitter,
|
|
||||||
qaPrompt
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
{ session, ordered: true }
|
{ session, ordered: true }
|
||||||
|
|||||||
@@ -34,9 +34,9 @@ const DatasetDataTextSchema = new Schema({
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
DatasetDataTextSchema.index(
|
DatasetDataTextSchema.index(
|
||||||
{ teamId: 1, datasetId: 1, fullTextToken: 'text' },
|
{ teamId: 1, fullTextToken: 'text' },
|
||||||
{
|
{
|
||||||
name: 'teamId_1_datasetId_1_fullTextToken_text',
|
name: 'teamId_1_fullTextToken_text',
|
||||||
default_language: 'none'
|
default_language: 'none'
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|||||||
208
packages/service/core/dataset/feishuDataset/api.ts
Normal file
208
packages/service/core/dataset/feishuDataset/api.ts
Normal file
@@ -0,0 +1,208 @@
|
|||||||
|
import type {
|
||||||
|
APIFileItem,
|
||||||
|
ApiFileReadContentResponse,
|
||||||
|
ApiDatasetDetailResponse,
|
||||||
|
FeishuServer
|
||||||
|
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||||
|
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
||||||
|
import axios, { type Method } from 'axios';
|
||||||
|
import { addLog } from '../../../common/system/log';
|
||||||
|
|
||||||
|
type ResponseDataType = {
|
||||||
|
success: boolean;
|
||||||
|
message: string;
|
||||||
|
data: any;
|
||||||
|
};
|
||||||
|
|
||||||
|
type FeishuFileListResponse = {
|
||||||
|
files: {
|
||||||
|
token: string;
|
||||||
|
parent_token: string;
|
||||||
|
name: string;
|
||||||
|
type: string;
|
||||||
|
modified_time: number;
|
||||||
|
created_time: number;
|
||||||
|
url: string;
|
||||||
|
owner_id: string;
|
||||||
|
}[];
|
||||||
|
has_more: boolean;
|
||||||
|
next_page_token: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const feishuBaseUrl = process.env.FEISHU_BASE_URL || 'https://open.feishu.cn';
|
||||||
|
|
||||||
|
export const useFeishuDatasetRequest = ({ feishuServer }: { feishuServer: FeishuServer }) => {
|
||||||
|
const instance = axios.create({
|
||||||
|
baseURL: feishuBaseUrl,
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
|
||||||
|
// 添加请求拦截器
|
||||||
|
instance.interceptors.request.use(async (config) => {
|
||||||
|
if (!config.headers.Authorization) {
|
||||||
|
const { data } = await axios.post<{ tenant_access_token: string }>(
|
||||||
|
`${feishuBaseUrl}/open-apis/auth/v3/tenant_access_token/internal`,
|
||||||
|
{
|
||||||
|
app_id: feishuServer.appId,
|
||||||
|
app_secret: feishuServer.appSecret
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
config.headers['Authorization'] = `Bearer ${data.tenant_access_token}`;
|
||||||
|
config.headers['Content-Type'] = 'application/json; charset=utf-8';
|
||||||
|
}
|
||||||
|
return config;
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 响应数据检查
|
||||||
|
*/
|
||||||
|
const checkRes = (data: ResponseDataType) => {
|
||||||
|
if (data === undefined) {
|
||||||
|
addLog.info('yuque dataset data is empty');
|
||||||
|
return Promise.reject('服务器异常');
|
||||||
|
}
|
||||||
|
return data.data;
|
||||||
|
};
|
||||||
|
const responseError = (err: any) => {
|
||||||
|
console.log('error->', '请求错误', err);
|
||||||
|
|
||||||
|
if (!err) {
|
||||||
|
return Promise.reject({ message: '未知错误' });
|
||||||
|
}
|
||||||
|
if (typeof err === 'string') {
|
||||||
|
return Promise.reject({ message: err });
|
||||||
|
}
|
||||||
|
if (typeof err.message === 'string') {
|
||||||
|
return Promise.reject({ message: err.message });
|
||||||
|
}
|
||||||
|
if (typeof err.data === 'string') {
|
||||||
|
return Promise.reject({ message: err.data });
|
||||||
|
}
|
||||||
|
if (err?.response?.data) {
|
||||||
|
return Promise.reject(err?.response?.data);
|
||||||
|
}
|
||||||
|
return Promise.reject(err);
|
||||||
|
};
|
||||||
|
|
||||||
|
const request = <T>(url: string, data: any, method: Method): Promise<T> => {
|
||||||
|
/* 去空 */
|
||||||
|
for (const key in data) {
|
||||||
|
if (data[key] === undefined) {
|
||||||
|
delete data[key];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return instance
|
||||||
|
.request({
|
||||||
|
url,
|
||||||
|
method,
|
||||||
|
data: ['POST', 'PUT'].includes(method) ? data : undefined,
|
||||||
|
params: !['POST', 'PUT'].includes(method) ? data : undefined
|
||||||
|
})
|
||||||
|
.then((res) => checkRes(res.data))
|
||||||
|
.catch((err) => responseError(err));
|
||||||
|
};
|
||||||
|
|
||||||
|
const listFiles = async ({ parentId }: { parentId?: ParentIdType }): Promise<APIFileItem[]> => {
|
||||||
|
const fetchFiles = async (pageToken?: string): Promise<FeishuFileListResponse['files']> => {
|
||||||
|
const data = await request<FeishuFileListResponse>(
|
||||||
|
`/open-apis/drive/v1/files`,
|
||||||
|
{
|
||||||
|
folder_token: parentId || feishuServer.folderToken,
|
||||||
|
page_size: 200,
|
||||||
|
page_token: pageToken
|
||||||
|
},
|
||||||
|
'GET'
|
||||||
|
);
|
||||||
|
|
||||||
|
if (data.has_more) {
|
||||||
|
const nextFiles = await fetchFiles(data.next_page_token);
|
||||||
|
return [...data.files, ...nextFiles];
|
||||||
|
}
|
||||||
|
|
||||||
|
return data.files;
|
||||||
|
};
|
||||||
|
|
||||||
|
const allFiles = await fetchFiles();
|
||||||
|
|
||||||
|
return allFiles
|
||||||
|
.filter((file) => ['folder', 'docx'].includes(file.type))
|
||||||
|
.map((file) => ({
|
||||||
|
id: file.token,
|
||||||
|
parentId: file.parent_token,
|
||||||
|
name: file.name,
|
||||||
|
type: file.type === 'folder' ? ('folder' as const) : ('file' as const),
|
||||||
|
hasChild: file.type === 'folder',
|
||||||
|
updateTime: new Date(file.modified_time * 1000),
|
||||||
|
createTime: new Date(file.created_time * 1000)
|
||||||
|
}));
|
||||||
|
};
|
||||||
|
|
||||||
|
const getFileContent = async ({
|
||||||
|
apiFileId
|
||||||
|
}: {
|
||||||
|
apiFileId: string;
|
||||||
|
}): Promise<ApiFileReadContentResponse> => {
|
||||||
|
const [{ content }, { document }] = await Promise.all([
|
||||||
|
request<{ content: string }>(
|
||||||
|
`/open-apis/docx/v1/documents/${apiFileId}/raw_content`,
|
||||||
|
{},
|
||||||
|
'GET'
|
||||||
|
),
|
||||||
|
request<{ document: { title: string } }>(
|
||||||
|
`/open-apis/docx/v1/documents/${apiFileId}`,
|
||||||
|
{},
|
||||||
|
'GET'
|
||||||
|
)
|
||||||
|
]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
title: document?.title,
|
||||||
|
rawText: content
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const getFilePreviewUrl = async ({ apiFileId }: { apiFileId: string }): Promise<string> => {
|
||||||
|
const { metas } = await request<{ metas: { url: string }[] }>(
|
||||||
|
`/open-apis/drive/v1/metas/batch_query`,
|
||||||
|
{
|
||||||
|
request_docs: [
|
||||||
|
{
|
||||||
|
doc_token: apiFileId,
|
||||||
|
doc_type: 'docx'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
with_url: true
|
||||||
|
},
|
||||||
|
'POST'
|
||||||
|
);
|
||||||
|
|
||||||
|
return metas[0].url;
|
||||||
|
};
|
||||||
|
|
||||||
|
const getFileDetail = async ({
|
||||||
|
apiFileId
|
||||||
|
}: {
|
||||||
|
apiFileId: string;
|
||||||
|
}): Promise<ApiDatasetDetailResponse> => {
|
||||||
|
const { document } = await request<{ document: { title: string } }>(
|
||||||
|
`/open-apis/docx/v1/documents/${apiFileId}`,
|
||||||
|
{},
|
||||||
|
'GET'
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: document?.title,
|
||||||
|
parentId: null,
|
||||||
|
id: apiFileId
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
getFileContent,
|
||||||
|
listFiles,
|
||||||
|
getFilePreviewUrl,
|
||||||
|
getFileDetail
|
||||||
|
};
|
||||||
|
};
|
||||||
@@ -1,8 +1,10 @@
|
|||||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||||
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
import {
|
||||||
|
ChunkTriggerConfigTypeEnum,
|
||||||
|
DatasetSourceReadTypeEnum
|
||||||
|
} from '@fastgpt/global/core/dataset/constants';
|
||||||
import { readFileContentFromMongo } from '../../common/file/gridfs/controller';
|
import { readFileContentFromMongo } from '../../common/file/gridfs/controller';
|
||||||
import { urlsFetch } from '../../common/string/cheerio';
|
import { urlsFetch } from '../../common/string/cheerio';
|
||||||
import { parseCsvTable2Chunks } from './training/utils';
|
|
||||||
import { type TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
import { type TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||||
import axios from 'axios';
|
import axios from 'axios';
|
||||||
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
|
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
|
||||||
@@ -12,19 +14,22 @@ import {
|
|||||||
type FeishuServer,
|
type FeishuServer,
|
||||||
type YuqueServer
|
type YuqueServer
|
||||||
} from '@fastgpt/global/core/dataset/apiDataset';
|
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||||
import { useApiDatasetRequest } from './apiDataset/api';
|
import { getApiDatasetRequest } from './apiDataset';
|
||||||
|
import Papa from 'papaparse';
|
||||||
|
|
||||||
export const readFileRawTextByUrl = async ({
|
export const readFileRawTextByUrl = async ({
|
||||||
teamId,
|
teamId,
|
||||||
tmbId,
|
tmbId,
|
||||||
url,
|
url,
|
||||||
customPdfParse,
|
customPdfParse,
|
||||||
|
getFormatText,
|
||||||
relatedId
|
relatedId
|
||||||
}: {
|
}: {
|
||||||
teamId: string;
|
teamId: string;
|
||||||
tmbId: string;
|
tmbId: string;
|
||||||
url: string;
|
url: string;
|
||||||
customPdfParse?: boolean;
|
customPdfParse?: boolean;
|
||||||
|
getFormatText?: boolean;
|
||||||
relatedId: string; // externalFileId / apiFileId
|
relatedId: string; // externalFileId / apiFileId
|
||||||
}) => {
|
}) => {
|
||||||
const response = await axios({
|
const response = await axios({
|
||||||
@@ -38,7 +43,7 @@ export const readFileRawTextByUrl = async ({
|
|||||||
|
|
||||||
const { rawText } = await readRawContentByFileBuffer({
|
const { rawText } = await readRawContentByFileBuffer({
|
||||||
customPdfParse,
|
customPdfParse,
|
||||||
isQAImport: false,
|
getFormatText,
|
||||||
extension,
|
extension,
|
||||||
teamId,
|
teamId,
|
||||||
tmbId,
|
tmbId,
|
||||||
@@ -62,21 +67,21 @@ export const readDatasetSourceRawText = async ({
|
|||||||
tmbId,
|
tmbId,
|
||||||
type,
|
type,
|
||||||
sourceId,
|
sourceId,
|
||||||
isQAImport,
|
|
||||||
selector,
|
selector,
|
||||||
externalFileId,
|
externalFileId,
|
||||||
apiServer,
|
apiServer,
|
||||||
feishuServer,
|
feishuServer,
|
||||||
yuqueServer,
|
yuqueServer,
|
||||||
customPdfParse
|
customPdfParse,
|
||||||
|
getFormatText
|
||||||
}: {
|
}: {
|
||||||
teamId: string;
|
teamId: string;
|
||||||
tmbId: string;
|
tmbId: string;
|
||||||
type: DatasetSourceReadTypeEnum;
|
type: DatasetSourceReadTypeEnum;
|
||||||
sourceId: string;
|
sourceId: string;
|
||||||
customPdfParse?: boolean;
|
customPdfParse?: boolean;
|
||||||
|
getFormatText?: boolean;
|
||||||
|
|
||||||
isQAImport?: boolean; // csv data
|
|
||||||
selector?: string; // link selector
|
selector?: string; // link selector
|
||||||
externalFileId?: string; // external file dataset
|
externalFileId?: string; // external file dataset
|
||||||
apiServer?: APIFileServer; // api dataset
|
apiServer?: APIFileServer; // api dataset
|
||||||
@@ -92,8 +97,8 @@ export const readDatasetSourceRawText = async ({
|
|||||||
tmbId,
|
tmbId,
|
||||||
bucketName: BucketNameEnum.dataset,
|
bucketName: BucketNameEnum.dataset,
|
||||||
fileId: sourceId,
|
fileId: sourceId,
|
||||||
isQAImport,
|
customPdfParse,
|
||||||
customPdfParse
|
getFormatText
|
||||||
});
|
});
|
||||||
return {
|
return {
|
||||||
title: filename,
|
title: filename,
|
||||||
@@ -161,38 +166,82 @@ export const readApiServerFileContent = async ({
|
|||||||
title?: string;
|
title?: string;
|
||||||
rawText: string;
|
rawText: string;
|
||||||
}> => {
|
}> => {
|
||||||
if (apiServer) {
|
return (
|
||||||
return useApiDatasetRequest({ apiServer }).getFileContent({
|
await getApiDatasetRequest({
|
||||||
|
apiServer,
|
||||||
|
yuqueServer,
|
||||||
|
feishuServer
|
||||||
|
})
|
||||||
|
).getFileContent({
|
||||||
teamId,
|
teamId,
|
||||||
tmbId,
|
tmbId,
|
||||||
apiFileId,
|
apiFileId,
|
||||||
customPdfParse
|
customPdfParse
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
|
||||||
if (feishuServer || yuqueServer) {
|
|
||||||
return global.getProApiDatasetFileContent({
|
|
||||||
feishuServer,
|
|
||||||
yuqueServer,
|
|
||||||
apiFileId
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
return Promise.reject('No apiServer or feishuServer or yuqueServer');
|
|
||||||
};
|
};
|
||||||
|
|
||||||
export const rawText2Chunks = ({
|
export const rawText2Chunks = ({
|
||||||
rawText,
|
rawText,
|
||||||
isQAImport,
|
chunkTriggerType = ChunkTriggerConfigTypeEnum.minSize,
|
||||||
|
chunkTriggerMinSize = 1000,
|
||||||
|
backupParse,
|
||||||
chunkSize = 512,
|
chunkSize = 512,
|
||||||
...splitProps
|
...splitProps
|
||||||
}: {
|
}: {
|
||||||
rawText: string;
|
rawText: string;
|
||||||
isQAImport?: boolean;
|
|
||||||
} & TextSplitProps) => {
|
chunkTriggerType?: ChunkTriggerConfigTypeEnum;
|
||||||
if (isQAImport) {
|
chunkTriggerMinSize?: number; // maxSize from agent model, not store
|
||||||
const { chunks } = parseCsvTable2Chunks(rawText);
|
|
||||||
return chunks;
|
backupParse?: boolean;
|
||||||
|
tableParse?: boolean;
|
||||||
|
} & TextSplitProps): {
|
||||||
|
q: string;
|
||||||
|
a: string;
|
||||||
|
indexes?: string[];
|
||||||
|
}[] => {
|
||||||
|
const parseDatasetBackup2Chunks = (rawText: string) => {
|
||||||
|
const csvArr = Papa.parse(rawText).data as string[][];
|
||||||
|
console.log(rawText, csvArr);
|
||||||
|
|
||||||
|
const chunks = csvArr
|
||||||
|
.slice(1)
|
||||||
|
.map((item) => ({
|
||||||
|
q: item[0] || '',
|
||||||
|
a: item[1] || '',
|
||||||
|
indexes: item.slice(2)
|
||||||
|
}))
|
||||||
|
.filter((item) => item.q || item.a);
|
||||||
|
|
||||||
|
return {
|
||||||
|
chunks
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
if (backupParse) {
|
||||||
|
return parseDatasetBackup2Chunks(rawText).chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chunk condition
|
||||||
|
// 1. 选择最大值条件,只有超过了最大值(默认为模型的最大值*0.7),才会触发分块
|
||||||
|
if (chunkTriggerType === ChunkTriggerConfigTypeEnum.maxSize) {
|
||||||
|
const textLength = rawText.trim().length;
|
||||||
|
const maxSize = splitProps.maxSize ? splitProps.maxSize * 0.7 : 16000;
|
||||||
|
if (textLength < maxSize) {
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
q: rawText,
|
||||||
|
a: ''
|
||||||
|
}
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 2. 选择最小值条件,只有超过最小值(手动决定)才会触发分块
|
||||||
|
if (chunkTriggerType !== ChunkTriggerConfigTypeEnum.forceChunk) {
|
||||||
|
const textLength = rawText.trim().length;
|
||||||
|
if (textLength < chunkTriggerMinSize) {
|
||||||
|
return [{ q: rawText, a: '' }];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const { chunks } = splitText2Chunks({
|
const { chunks } = splitText2Chunks({
|
||||||
@@ -203,6 +252,7 @@ export const rawText2Chunks = ({
|
|||||||
|
|
||||||
return chunks.map((item) => ({
|
return chunks.map((item) => ({
|
||||||
q: item,
|
q: item,
|
||||||
a: ''
|
a: '',
|
||||||
|
indexes: []
|
||||||
}));
|
}));
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
import { getMongoModel, Schema } from '../../common/mongo';
|
import { getMongoModel, Schema } from '../../common/mongo';
|
||||||
import {
|
import {
|
||||||
ChunkSettingModeEnum,
|
ChunkSettingModeEnum,
|
||||||
|
ChunkTriggerConfigTypeEnum,
|
||||||
DataChunkSplitModeEnum,
|
DataChunkSplitModeEnum,
|
||||||
DatasetCollectionDataProcessModeEnum,
|
DatasetCollectionDataProcessModeEnum,
|
||||||
DatasetTypeEnum,
|
DatasetTypeEnum,
|
||||||
DatasetTypeMap
|
DatasetTypeMap,
|
||||||
|
ParagraphChunkAIModeEnum
|
||||||
} from '@fastgpt/global/core/dataset/constants';
|
} from '@fastgpt/global/core/dataset/constants';
|
||||||
import {
|
import {
|
||||||
TeamCollectionName,
|
TeamCollectionName,
|
||||||
@@ -15,12 +17,22 @@ import type { DatasetSchemaType } from '@fastgpt/global/core/dataset/type.d';
|
|||||||
export const DatasetCollectionName = 'datasets';
|
export const DatasetCollectionName = 'datasets';
|
||||||
|
|
||||||
export const ChunkSettings = {
|
export const ChunkSettings = {
|
||||||
imageIndex: Boolean,
|
|
||||||
autoIndexes: Boolean,
|
|
||||||
trainingType: {
|
trainingType: {
|
||||||
type: String,
|
type: String,
|
||||||
enum: Object.values(DatasetCollectionDataProcessModeEnum)
|
enum: Object.values(DatasetCollectionDataProcessModeEnum)
|
||||||
},
|
},
|
||||||
|
|
||||||
|
chunkTriggerType: {
|
||||||
|
type: String,
|
||||||
|
enum: Object.values(ChunkTriggerConfigTypeEnum)
|
||||||
|
},
|
||||||
|
chunkTriggerMinSize: Number,
|
||||||
|
|
||||||
|
dataEnhanceCollectionName: Boolean,
|
||||||
|
|
||||||
|
imageIndex: Boolean,
|
||||||
|
autoIndexes: Boolean,
|
||||||
|
|
||||||
chunkSettingMode: {
|
chunkSettingMode: {
|
||||||
type: String,
|
type: String,
|
||||||
enum: Object.values(ChunkSettingModeEnum)
|
enum: Object.values(ChunkSettingModeEnum)
|
||||||
@@ -29,6 +41,12 @@ export const ChunkSettings = {
|
|||||||
type: String,
|
type: String,
|
||||||
enum: Object.values(DataChunkSplitModeEnum)
|
enum: Object.values(DataChunkSplitModeEnum)
|
||||||
},
|
},
|
||||||
|
paragraphChunkAIMode: {
|
||||||
|
type: String,
|
||||||
|
enum: Object.values(ParagraphChunkAIModeEnum)
|
||||||
|
},
|
||||||
|
paragraphChunkDeep: Number,
|
||||||
|
paragraphChunkMinSize: Number,
|
||||||
chunkSize: Number,
|
chunkSize: Number,
|
||||||
chunkSplitter: String,
|
chunkSplitter: String,
|
||||||
|
|
||||||
@@ -115,14 +133,13 @@ const DatasetSchema = new Schema({
|
|||||||
|
|
||||||
// abandoned
|
// abandoned
|
||||||
autoSync: Boolean,
|
autoSync: Boolean,
|
||||||
externalReadUrl: {
|
externalReadUrl: String,
|
||||||
type: String
|
|
||||||
},
|
|
||||||
defaultPermission: Number
|
defaultPermission: Number
|
||||||
});
|
});
|
||||||
|
|
||||||
try {
|
try {
|
||||||
DatasetSchema.index({ teamId: 1 });
|
DatasetSchema.index({ teamId: 1 });
|
||||||
|
DatasetSchema.index({ type: 1 });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ import { type ChatItemType } from '@fastgpt/global/core/chat/type';
|
|||||||
import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
|
import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
|
||||||
import { datasetSearchQueryExtension } from './utils';
|
import { datasetSearchQueryExtension } from './utils';
|
||||||
import type { RerankModelItemType } from '@fastgpt/global/core/ai/model.d';
|
import type { RerankModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||||
|
import { addLog } from '../../../common/system/log';
|
||||||
|
|
||||||
export type SearchDatasetDataProps = {
|
export type SearchDatasetDataProps = {
|
||||||
histories: ChatItemType[];
|
histories: ChatItemType[];
|
||||||
@@ -474,7 +475,7 @@ export async function searchDatasetData(
|
|||||||
).lean()
|
).lean()
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const set = new Map<string, number>();
|
const set = new Set<string>();
|
||||||
const formatResult = results
|
const formatResult = results
|
||||||
.map((item, index) => {
|
.map((item, index) => {
|
||||||
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
|
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
|
||||||
@@ -507,7 +508,7 @@ export async function searchDatasetData(
|
|||||||
.filter((item) => {
|
.filter((item) => {
|
||||||
if (!item) return false;
|
if (!item) return false;
|
||||||
if (set.has(item.id)) return false;
|
if (set.has(item.id)) return false;
|
||||||
set.set(item.id, 1);
|
set.add(item.id);
|
||||||
return true;
|
return true;
|
||||||
})
|
})
|
||||||
.map((item, index) => {
|
.map((item, index) => {
|
||||||
@@ -544,16 +545,14 @@ export async function searchDatasetData(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const searchResults = (
|
try {
|
||||||
await Promise.all(
|
const searchResults = (await MongoDatasetDataText.aggregate(
|
||||||
datasetIds.map(async (id) => {
|
|
||||||
return MongoDatasetDataText.aggregate(
|
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
$match: {
|
$match: {
|
||||||
teamId: new Types.ObjectId(teamId),
|
teamId: new Types.ObjectId(teamId),
|
||||||
datasetId: new Types.ObjectId(id),
|
|
||||||
$text: { $search: await jiebaSplit({ text: query }) },
|
$text: { $search: await jiebaSplit({ text: query }) },
|
||||||
|
datasetId: { $in: datasetIds.map((id) => new Types.ObjectId(id)) },
|
||||||
...(filterCollectionIdList
|
...(filterCollectionIdList
|
||||||
? {
|
? {
|
||||||
collectionId: {
|
collectionId: {
|
||||||
@@ -590,10 +589,7 @@ export async function searchDatasetData(
|
|||||||
{
|
{
|
||||||
...readFromSecondary
|
...readFromSecondary
|
||||||
}
|
}
|
||||||
);
|
)) as (DatasetDataTextSchemaType & { score: number })[];
|
||||||
})
|
|
||||||
)
|
|
||||||
).flat() as (DatasetDataTextSchemaType & { score: number })[];
|
|
||||||
|
|
||||||
// Get data and collections
|
// Get data and collections
|
||||||
const [dataList, collections] = await Promise.all([
|
const [dataList, collections] = await Promise.all([
|
||||||
@@ -648,9 +644,26 @@ export async function searchDatasetData(
|
|||||||
]
|
]
|
||||||
};
|
};
|
||||||
})
|
})
|
||||||
.filter(Boolean) as SearchDataResponseItemType[],
|
.filter((item) => {
|
||||||
|
if (!item) return false;
|
||||||
|
return true;
|
||||||
|
})
|
||||||
|
.map((item, index) => {
|
||||||
|
if (!item) return;
|
||||||
|
return {
|
||||||
|
...item,
|
||||||
|
score: item.score.map((item) => ({ ...item, index }))
|
||||||
|
};
|
||||||
|
}) as SearchDataResponseItemType[],
|
||||||
tokenLen: 0
|
tokenLen: 0
|
||||||
};
|
};
|
||||||
|
} catch (error) {
|
||||||
|
addLog.error('Full text search error', error);
|
||||||
|
return {
|
||||||
|
fullTextRecallResults: [],
|
||||||
|
tokenLen: 0
|
||||||
|
};
|
||||||
|
}
|
||||||
};
|
};
|
||||||
const multiQueryRecall = async ({
|
const multiQueryRecall = async ({
|
||||||
embeddingLimit,
|
embeddingLimit,
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
export enum ImportDataSourceEnum {
|
export enum ImportDataSourceEnum {
|
||||||
fileLocal = 'fileLocal',
|
fileLocal = 'fileLocal',
|
||||||
fileLink = 'fileLink',
|
fileLink = 'fileLink',
|
||||||
fileCustom = 'fileCustom',
|
fileCustom = 'fileCustom'
|
||||||
tableLocal = 'tableLocal'
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,16 +0,0 @@
|
|||||||
import Papa from 'papaparse';
|
|
||||||
|
|
||||||
export const parseCsvTable2Chunks = (rawText: string) => {
|
|
||||||
const csvArr = Papa.parse(rawText).data as string[][];
|
|
||||||
|
|
||||||
const chunks = csvArr
|
|
||||||
.map((item) => ({
|
|
||||||
q: item[0] || '',
|
|
||||||
a: item[1] || ''
|
|
||||||
}))
|
|
||||||
.filter((item) => item.q || item.a);
|
|
||||||
|
|
||||||
return {
|
|
||||||
chunks
|
|
||||||
};
|
|
||||||
};
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user