Compare commits
42 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
361e255af8 | ||
|
|
2b888fb0fa | ||
|
|
2128d306ad | ||
|
|
e59816aba4 | ||
|
|
ded0383ac4 | ||
|
|
81202c53a8 | ||
|
|
e74ab643fe | ||
|
|
3b0f0a8108 | ||
|
|
165b783a95 | ||
|
|
d7b9f94270 | ||
|
|
5a5367d30b | ||
|
|
8ed35ffe7e | ||
|
|
0f866fc552 | ||
|
|
05c7ba4483 | ||
|
|
fa80ce3a77 | ||
|
|
830358aa72 | ||
|
|
02b214b3ec | ||
|
|
a171c7b11c | ||
|
|
802de11363 | ||
|
|
b4ecfb0b79 | ||
|
|
331b851a78 | ||
|
|
50d235c42a | ||
|
|
9838593451 | ||
|
|
c25cd48e72 | ||
|
|
874300a56a | ||
|
|
1dea2b71b4 | ||
|
|
a8673344b1 | ||
|
|
9709ae7a4f | ||
|
|
fae76e887a | ||
|
|
9af92d1eae | ||
|
|
6a6719e93d | ||
|
|
50481f4ca8 | ||
|
|
88bd3aaa9e | ||
|
|
dd3c251603 | ||
|
|
aa55f059d4 | ||
|
|
89c9a02650 | ||
|
|
0f3bfa280a | ||
|
|
593ebfd269 | ||
|
|
f6dc2204f5 | ||
|
|
d44c338059 | ||
|
|
1dac2b70ec | ||
|
|
9fef3e15fb |
5
.vscode/settings.json
vendored
5
.vscode/settings.json
vendored
@@ -21,11 +21,14 @@
|
||||
"i18n-ally.namespace": true,
|
||||
"i18n-ally.pathMatcher": "{locale}/{namespaces}.json",
|
||||
"i18n-ally.extract.targetPickingStrategy": "most-similar-by-key",
|
||||
"i18n-ally.translate.engines": ["google"],
|
||||
"i18n-ally.translate.engines": ["deepl","google"],
|
||||
"[typescript]": {
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
||||
},
|
||||
"markdown.copyFiles.destination": {
|
||||
"/docSite/content/**/*": "${documentWorkspaceFolder}/docSite/assets/imgs/"
|
||||
},
|
||||
"[svg]": {
|
||||
"editor.defaultFormatter": "jock.svg"
|
||||
}
|
||||
}
|
||||
@@ -132,15 +132,15 @@ services:
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.8 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.8 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt-mcp-server:
|
||||
container_name: fastgpt-mcp-server
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.8 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.8 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3005:3000
|
||||
networks:
|
||||
@@ -150,8 +150,8 @@ services:
|
||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.9.8 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.8 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
|
||||
@@ -109,15 +109,15 @@ services:
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.8 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.8 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt-mcp-server:
|
||||
container_name: fastgpt-mcp-server
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.8 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.8 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3005:3000
|
||||
networks:
|
||||
@@ -127,8 +127,8 @@ services:
|
||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.9.8 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.8 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
|
||||
@@ -23,7 +23,7 @@ services:
|
||||
volumes:
|
||||
- ./pg/data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
|
||||
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'postgres']
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
@@ -96,15 +96,15 @@ services:
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.8 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.8 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt-mcp-server:
|
||||
container_name: fastgpt-mcp-server
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.8 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.8 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3005:3000
|
||||
networks:
|
||||
@@ -114,8 +114,8 @@ services:
|
||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.9.8 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.8 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
|
||||
@@ -72,15 +72,15 @@ services:
|
||||
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.8 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.8 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt-mcp-server:
|
||||
container_name: fastgpt-mcp-server
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.8 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.8 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3005:3000
|
||||
networks:
|
||||
@@ -90,8 +90,8 @@ services:
|
||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.9.8 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.8 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
|
||||
@@ -959,10 +959,16 @@ curl --location --request POST 'http://localhost:3000/api/core/chat/getHistories
|
||||
{{< markdownify >}}
|
||||
|
||||
{{% alert icon=" " context="success" %}}
|
||||
目前仅能获取到当前 API key 的创建者的对话。
|
||||
|
||||
- appId - 应用 Id
|
||||
- offset - 偏移量,即从第几条数据开始取
|
||||
- pageSize - 记录数量
|
||||
- source - 对话源。source=api,表示获取通过 API 创建的对话(不会获取到页面上的对话记录)
|
||||
- startCreateTime - 开始创建时间(可选)
|
||||
- endCreateTime - 结束创建时间(可选)
|
||||
- startUpdateTime - 开始更新时间(可选)
|
||||
- endUpdateTime - 结束更新时间(可选)
|
||||
{{% /alert %}}
|
||||
|
||||
{{< /markdownify >}}
|
||||
|
||||
@@ -645,7 +645,7 @@ data 为集合的 ID。
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
### 创建一个外部文件库集合(商业版)
|
||||
### 创建一个外部文件库集合(弃用)
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
{{< tab tabName="请求示例" >}}
|
||||
|
||||
50
docSite/content/zh-cn/docs/development/upgrading/4910.md
Normal file
50
docSite/content/zh-cn/docs/development/upgrading/4910.md
Normal file
@@ -0,0 +1,50 @@
|
||||
---
|
||||
title: 'V4.9.10'
|
||||
description: 'FastGPT V4.9.10 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 790
|
||||
---
|
||||
|
||||
## 升级指南
|
||||
|
||||
重要提示:本次更新会重新构建全文索引,构建期间,全文检索结果会为空,4c16g 700 万组全文索引大致消耗 25 分钟。如需无缝升级,需自行做表同步工程。
|
||||
|
||||
### 1. 做好数据备份
|
||||
|
||||
### 2. 更新镜像 tag
|
||||
|
||||
- 更新 FastGPT 镜像 tag: v4.9.10-fix2
|
||||
- 更新 FastGPT 商业版镜像 tag: v4.9.10-fix2
|
||||
- mcp_server 无需更新
|
||||
- Sandbox 无需更新
|
||||
- AIProxy 无需更新
|
||||
|
||||
## 🚀 新增内容
|
||||
|
||||
1. 支持 PG 设置`systemEnv.hnswMaxScanTuples`参数,提高迭代搜索的数据总量。
|
||||
2. 知识库预处理参数增加 “分块条件”,可控制某些情况下不进行分块处理。
|
||||
3. 知识库预处理参数增加 “段落优先” 模式,可控制最大段落深度。原“长度优先”模式,不再内嵌段落优先逻辑。
|
||||
4. 工作流调整为单向接入和接出,支持快速的添加下一步节点。
|
||||
5. 开放飞书和语雀知识库到开源版。
|
||||
6. gemini 和 claude 最新模型预设。
|
||||
|
||||
## ⚙️ 优化
|
||||
|
||||
1. LLM stream调用,默认超时调大。
|
||||
2. 部分确认交互优化。
|
||||
3. 纠正原先知识库的“表格数据集”名称,改成“备份导入”。同时支持知识库索引的导出和导入。
|
||||
4. 工作流知识库引用上限,如果工作流中没有相关 AI 节点,则交互模式改成纯手动输入,并且上限为 1000万。
|
||||
5. 语音输入,移动端判断逻辑,准确判断是否为手机,而不是小屏。
|
||||
6. 优化上下文截取算法,至少保证留下一组 Human 信息。
|
||||
|
||||
## 🐛 修复
|
||||
|
||||
1. 全文检索多知识库时排序得分排序不正确。
|
||||
2. 流响应捕获 finish_reason 可能不正确。
|
||||
3. 工具调用模式,未保存思考输出。
|
||||
4. 知识库 indexSize 参数未生效。
|
||||
5. 工作流嵌套 2 层后,获取预览引用、上下文不正确。
|
||||
6. xlsx 转成 Markdown 时候,前面会多出一个空格。
|
||||
7. 读取 Markdown 文件时,Base64 图片未进行额外抓换保存。
|
||||
25
docSite/content/zh-cn/docs/development/upgrading/4911.md
Normal file
25
docSite/content/zh-cn/docs/development/upgrading/4911.md
Normal file
@@ -0,0 +1,25 @@
|
||||
---
|
||||
title: 'V4.9.11(进行中)'
|
||||
description: 'FastGPT V4.9.11 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 789
|
||||
---
|
||||
|
||||
|
||||
## 🚀 新增内容
|
||||
|
||||
1. 工作流中增加节点搜索功能。
|
||||
2. 工作流中,子流程版本控制,可选择“保持最新版本”,无需手动更新。
|
||||
|
||||
## ⚙️ 优化
|
||||
|
||||
1. 原文缓存改用 gridfs 存储,提高上限。
|
||||
|
||||
## 🐛 修复
|
||||
|
||||
1. 工作流中,管理员声明的全局系统工具,无法进行版本管理。
|
||||
2. 工具调用节点前,有交互节点时,上下文异常。
|
||||
3. 修复备份导入,小于 1000 字时,无法分块问题。
|
||||
4. 自定义 PDF 解析,无法保存 base64 图片。
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: 'V4.9.9(进行中)'
|
||||
title: 'V4.9.9'
|
||||
description: 'FastGPT V4.9.9 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
@@ -7,12 +7,28 @@ toc: true
|
||||
weight: 791
|
||||
---
|
||||
|
||||
## 升级指南
|
||||
|
||||
### 1. 做好数据备份
|
||||
|
||||
### 2. 商业版用户替换新 License
|
||||
|
||||
商业版用户可以联系 FastGPT 团队支持同学,获取 License 替换方案。替换后,可以直接升级系统,管理后台会提示输入新 License。
|
||||
|
||||
### 3. 更新镜像 tag
|
||||
|
||||
- 更新 FastGPT 镜像 tag: v4.9.9
|
||||
- 更新 FastGPT 商业版镜像 tag: v4.9.9
|
||||
- mcp_server 无需更新
|
||||
- Sandbox 无需更新
|
||||
- AIProxy 无需更新
|
||||
|
||||
## 🚀 新增内容
|
||||
|
||||
1. 切换 SessionId 来替代 JWT 实现登录鉴权,可控制最大登录客户端数量。
|
||||
2. 新的商业版 License 管理模式。
|
||||
3. 公众号调用,显示记录 chat 对话错误,方便排查。
|
||||
4. API 知识库支持 BasePath 选择,需增加 API 接口,具体可见[API 知识库介绍](/docs/guide/knowledge_base/api_dataset/#4-获取文件详细信息用于获取文件信息)
|
||||
|
||||
## ⚙️ 优化
|
||||
|
||||
@@ -24,3 +40,4 @@ weight: 791
|
||||
1. 无法正常获取应用历史保存/发布记录。
|
||||
2. 成员创建 MCP 工具权限问题。
|
||||
3. 来源引用展示,存在 ID 传递错误,导致提示无权操作该文件。
|
||||
4. 回答标注前端数据报错。
|
||||
|
||||
@@ -28,7 +28,6 @@ FastGPT 商业版是基于 FastGPT 开源版的增强版本,增加了一些独
|
||||
| 应用发布安全配置 | ❌ | ✅ | ✅ |
|
||||
| 内容审核 | ❌ | ✅ | ✅ |
|
||||
| web站点同步 | ❌ | ✅ | ✅ |
|
||||
| 主流文档库接入(目前支持:语雀、飞书) | ❌ | ✅ | ✅ |
|
||||
| 增强训练模式 | ❌ | ✅ | ✅ |
|
||||
| 第三方应用快速接入(飞书、公众号) | ❌ | ✅ | ✅ |
|
||||
| 管理后台 | ❌ | ✅ | 不需要 |
|
||||
|
||||
@@ -4,6 +4,9 @@ import { type ErrType } from '../errorCode';
|
||||
/* dataset: 507000 */
|
||||
const startCode = 507000;
|
||||
export enum CommonErrEnum {
|
||||
methodNotAllowed = 'methodNotAllowed',
|
||||
systemError = 'systemError',
|
||||
unauthorized = 'unauthorized',
|
||||
invalidParams = 'invalidParams',
|
||||
invalidResource = 'invalidResource',
|
||||
fileNotFound = 'fileNotFound',
|
||||
@@ -35,6 +38,22 @@ const datasetErr = [
|
||||
{
|
||||
statusText: CommonErrEnum.inheritPermissionError,
|
||||
message: 'error.inheritPermissionError'
|
||||
},
|
||||
{
|
||||
statusText: CommonErrEnum.methodNotAllowed,
|
||||
message: i18nT('common:code_error.error_message.405')
|
||||
},
|
||||
{
|
||||
statusText: CommonErrEnum.systemError,
|
||||
message: i18nT('common:code_error.error_message.500')
|
||||
},
|
||||
{
|
||||
statusText: CommonErrEnum.unauthorized,
|
||||
message: i18nT('common:code_error.error_message.403')
|
||||
},
|
||||
{
|
||||
statusText: CommonErrEnum.invalidParams,
|
||||
message: i18nT('common:code_error.error_message.422')
|
||||
}
|
||||
];
|
||||
export default datasetErr.reduce((acc, cur, index) => {
|
||||
|
||||
@@ -27,7 +27,7 @@ const datasetErr = [
|
||||
},
|
||||
{
|
||||
statusText: DatasetErrEnum.unExist,
|
||||
message: 'core.dataset.error.unExistDataset'
|
||||
message: i18nT('common:core.dataset.error.unExistDataset')
|
||||
},
|
||||
{
|
||||
statusText: DatasetErrEnum.unExistCollection,
|
||||
|
||||
@@ -7,6 +7,10 @@ export const CUSTOM_SPLIT_SIGN = '-----CUSTOM_SPLIT_SIGN-----';
|
||||
type SplitProps = {
|
||||
text: string;
|
||||
chunkSize: number;
|
||||
|
||||
paragraphChunkDeep?: number; // Paragraph deep
|
||||
paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
|
||||
|
||||
maxSize?: number;
|
||||
overlapRatio?: number;
|
||||
customReg?: string[];
|
||||
@@ -108,6 +112,8 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
let {
|
||||
text = '',
|
||||
chunkSize,
|
||||
paragraphChunkDeep = 5,
|
||||
paragraphChunkMinSize = 100,
|
||||
maxSize = defaultMaxChunkSize,
|
||||
overlapRatio = 0.15,
|
||||
customReg = []
|
||||
@@ -123,7 +129,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
text = text.replace(/(```[\s\S]*?```|~~~[\s\S]*?~~~)/g, function (match) {
|
||||
return match.replace(/\n/g, codeBlockMarker);
|
||||
});
|
||||
// 2. 表格处理 - 单独提取表格出来,进行表头合并
|
||||
// 2. Markdown 表格处理 - 单独提取表格出来,进行表头合并
|
||||
const tableReg =
|
||||
/(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n?)*)(?:\n|$)/g;
|
||||
const tableDataList = text.match(tableReg);
|
||||
@@ -143,25 +149,40 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
text = text.replace(/(\r?\n|\r){3,}/g, '\n\n\n');
|
||||
|
||||
// The larger maxLen is, the next sentence is less likely to trigger splitting
|
||||
const markdownIndex = 4;
|
||||
const forbidOverlapIndex = 8;
|
||||
const customRegLen = customReg.length;
|
||||
const markdownIndex = paragraphChunkDeep - 1;
|
||||
const forbidOverlapIndex = customRegLen + markdownIndex + 4;
|
||||
|
||||
const markdownHeaderRules = ((deep?: number): { reg: RegExp; maxLen: number }[] => {
|
||||
if (!deep || deep === 0) return [];
|
||||
|
||||
const maxDeep = Math.min(deep, 8); // Maximum 8 levels
|
||||
const rules: { reg: RegExp; maxLen: number }[] = [];
|
||||
|
||||
for (let i = 1; i <= maxDeep; i++) {
|
||||
const hashSymbols = '#'.repeat(i);
|
||||
rules.push({
|
||||
reg: new RegExp(`^(${hashSymbols}\\s[^\\n]+\\n)`, 'gm'),
|
||||
maxLen: chunkSize
|
||||
});
|
||||
}
|
||||
|
||||
return rules;
|
||||
})(paragraphChunkDeep);
|
||||
|
||||
const stepReges: { reg: RegExp | string; maxLen: number }[] = [
|
||||
...customReg.map((text) => ({
|
||||
reg: text.replaceAll('\\n', '\n'),
|
||||
maxLen: chunkSize
|
||||
})),
|
||||
{ reg: /^(#\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
{ reg: /^(##\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
{ reg: /^(###\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
{ reg: /^(####\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
{ reg: /^(#####\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
...markdownHeaderRules,
|
||||
|
||||
{ reg: /([\n](```[\s\S]*?```|~~~[\s\S]*?~~~))/g, maxLen: maxSize }, // code block
|
||||
// HTML Table tag 尽可能保障完整
|
||||
{
|
||||
reg: /(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n)*)/g,
|
||||
maxLen: Math.min(chunkSize * 1.5, maxSize)
|
||||
}, // Table 尽可能保证完整性
|
||||
maxLen: chunkSize
|
||||
}, // Markdown Table 尽可能保证完整性
|
||||
{ reg: /(\n{2,})/g, maxLen: chunkSize },
|
||||
{ reg: /([\n])/g, maxLen: chunkSize },
|
||||
// ------ There's no overlap on the top
|
||||
@@ -172,12 +193,10 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
{ reg: /([,]|,\s)/g, maxLen: chunkSize }
|
||||
];
|
||||
|
||||
const customRegLen = customReg.length;
|
||||
const checkIsCustomStep = (step: number) => step < customRegLen;
|
||||
const checkIsMarkdownSplit = (step: number) =>
|
||||
step >= customRegLen && step <= markdownIndex + customRegLen;
|
||||
|
||||
const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex + customRegLen;
|
||||
const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex;
|
||||
|
||||
// if use markdown title split, Separate record title
|
||||
const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
|
||||
@@ -301,6 +320,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
const splitTexts = getSplitTexts({ text, step });
|
||||
|
||||
const chunks: string[] = [];
|
||||
|
||||
for (let i = 0; i < splitTexts.length; i++) {
|
||||
const item = splitTexts[i];
|
||||
|
||||
@@ -443,7 +463,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
*/
|
||||
export const splitText2Chunks = (props: SplitProps): SplitResponse => {
|
||||
let { text = '' } = props;
|
||||
const start = Date.now();
|
||||
const splitWithCustomSign = text.split(CUSTOM_SPLIT_SIGN);
|
||||
|
||||
const splitResult = splitWithCustomSign.map((item) => {
|
||||
|
||||
@@ -130,9 +130,11 @@ export type SystemEnvType = {
|
||||
vectorMaxProcess: number;
|
||||
qaMaxProcess: number;
|
||||
vlmMaxProcess: number;
|
||||
hnswEfSearch: number;
|
||||
tokenWorkers: number; // token count max worker
|
||||
|
||||
hnswEfSearch: number;
|
||||
hnswMaxScanTuples: number;
|
||||
|
||||
oneapiUrl?: string;
|
||||
chatApiKey?: string;
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
} from './type';
|
||||
|
||||
export enum AppTypeEnum {
|
||||
gate = 'gate',
|
||||
folder = 'folder',
|
||||
simple = 'simple',
|
||||
workflow = 'advanced',
|
||||
@@ -60,5 +61,3 @@ export enum AppTemplateTypeEnum {
|
||||
// special type
|
||||
contribute = 'contribute'
|
||||
}
|
||||
|
||||
export const defaultDatasetMaxTokens = 16000;
|
||||
|
||||
24
packages/global/core/app/tags.d.ts
vendored
Normal file
24
packages/global/core/app/tags.d.ts
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
import { TeamMemberStatusEnum } from 'support/user/team/constant';
|
||||
import type { SourceMemberType } from 'support/user/type';
|
||||
|
||||
export type TagSchemaType = {
|
||||
_id: string;
|
||||
teamId: string;
|
||||
name: string;
|
||||
color: string;
|
||||
createTime: Date;
|
||||
};
|
||||
|
||||
export type TagWithCountType = TagSchemaType & {
|
||||
count: number;
|
||||
};
|
||||
|
||||
export type TagListItemType = {
|
||||
_id: string;
|
||||
teamId: string;
|
||||
name: string;
|
||||
color: string;
|
||||
createTime: Date;
|
||||
count?: number;
|
||||
sourceMember?: SourceMemberType;
|
||||
};
|
||||
1
packages/global/core/app/type.d.ts
vendored
1
packages/global/core/app/type.d.ts
vendored
@@ -65,6 +65,7 @@ export type AppListItemType = {
|
||||
inheritPermission?: boolean;
|
||||
private?: boolean;
|
||||
sourceMember: SourceMemberType;
|
||||
tags?: string[];
|
||||
};
|
||||
|
||||
export type AppDetailType = AppSchema & {
|
||||
|
||||
@@ -10,6 +10,8 @@ import { AppTypeEnum } from './constants';
|
||||
import { AppErrEnum } from '../../common/error/code/app';
|
||||
import { PluginErrEnum } from '../../common/error/code/plugin';
|
||||
import { i18nT } from '../../../web/i18n/utils';
|
||||
import appErrList from '../../common/error/code/app';
|
||||
import pluginErrList from '../../common/error/code/plugin';
|
||||
|
||||
export const getDefaultAppForm = (): AppSimpleEditFormType => {
|
||||
return {
|
||||
@@ -44,7 +46,7 @@ export const appWorkflow2Form = ({
|
||||
chatConfig
|
||||
}: {
|
||||
nodes: StoreNodeItemType[];
|
||||
chatConfig: AppChatConfigType;
|
||||
chatConfig?: AppChatConfigType;
|
||||
}) => {
|
||||
const defaultAppForm = getDefaultAppForm();
|
||||
const findInputValueByKey = (inputs: FlowNodeInputItemType[], key: string) => {
|
||||
@@ -170,6 +172,10 @@ export const appWorkflow2Form = ({
|
||||
}
|
||||
});
|
||||
|
||||
if (chatConfig) {
|
||||
defaultAppForm.chatConfig = chatConfig;
|
||||
}
|
||||
|
||||
return defaultAppForm;
|
||||
};
|
||||
|
||||
@@ -190,17 +196,10 @@ export const getAppType = (config?: WorkflowTemplateBasicType | AppSimpleEditFor
|
||||
return '';
|
||||
};
|
||||
|
||||
export const formatToolError = (error?: string) => {
|
||||
const unExistError: Array<string> = [
|
||||
AppErrEnum.unAuthApp,
|
||||
AppErrEnum.unExist,
|
||||
PluginErrEnum.unAuth,
|
||||
PluginErrEnum.unExist
|
||||
];
|
||||
export const formatToolError = (error?: any) => {
|
||||
if (!error || typeof error !== 'string') return;
|
||||
|
||||
if (error && unExistError.includes(error)) {
|
||||
return i18nT('app:un_auth');
|
||||
} else {
|
||||
return error;
|
||||
}
|
||||
const errorText = appErrList[error]?.message || pluginErrList[error]?.message;
|
||||
|
||||
return errorText || error;
|
||||
};
|
||||
|
||||
1
packages/global/core/chat/type.d.ts
vendored
1
packages/global/core/chat/type.d.ts
vendored
@@ -26,6 +26,7 @@ export type ChatSchema = {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
appId: string;
|
||||
createTime: Date;
|
||||
updateTime: Date;
|
||||
title: string;
|
||||
customTitle: string;
|
||||
|
||||
31
packages/global/core/dataset/api.d.ts
vendored
31
packages/global/core/dataset/api.d.ts
vendored
@@ -1,9 +1,11 @@
|
||||
import type { DatasetDataIndexItemType, DatasetSchemaType } from './type';
|
||||
import type { ChunkSettingsType, DatasetDataIndexItemType, DatasetSchemaType } from './type';
|
||||
import type {
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
ChunkSettingModeEnum,
|
||||
DataChunkSplitModeEnum
|
||||
DataChunkSplitModeEnum,
|
||||
ChunkTriggerConfigTypeEnum,
|
||||
ParagraphChunkAIModeEnum
|
||||
} from './constants';
|
||||
import type { LLMModelItemType } from '../ai/model.d';
|
||||
import type { ParentIdType } from 'common/parentFolder/type';
|
||||
@@ -32,26 +34,16 @@ export type DatasetUpdateBody = {
|
||||
};
|
||||
|
||||
/* ================= collection ===================== */
|
||||
export type DatasetCollectionChunkMetadataType = {
|
||||
// Input + store params
|
||||
type DatasetCollectionStoreDataType = ChunkSettingsType & {
|
||||
parentId?: string;
|
||||
customPdfParse?: boolean;
|
||||
trainingType?: DatasetCollectionDataProcessModeEnum;
|
||||
imageIndex?: boolean;
|
||||
autoIndexes?: boolean;
|
||||
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
|
||||
chunkSize?: number;
|
||||
indexSize?: number;
|
||||
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
metadata?: Record<string, any>;
|
||||
|
||||
customPdfParse?: boolean;
|
||||
};
|
||||
|
||||
// create collection params
|
||||
export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
|
||||
export type CreateDatasetCollectionParams = DatasetCollectionStoreDataType & {
|
||||
datasetId: string;
|
||||
name: string;
|
||||
type: DatasetCollectionTypeEnum;
|
||||
@@ -72,7 +64,7 @@ export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType &
|
||||
nextSyncTime?: Date;
|
||||
};
|
||||
|
||||
export type ApiCreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
|
||||
export type ApiCreateDatasetCollectionParams = DatasetCollectionStoreDataType & {
|
||||
datasetId: string;
|
||||
tags?: string[];
|
||||
};
|
||||
@@ -90,7 +82,7 @@ export type ApiDatasetCreateDatasetCollectionParams = ApiCreateDatasetCollection
|
||||
export type FileIdCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
fileId: string;
|
||||
};
|
||||
export type reTrainingDatasetFileCollectionParams = DatasetCollectionChunkMetadataType & {
|
||||
export type reTrainingDatasetFileCollectionParams = DatasetCollectionStoreDataType & {
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
};
|
||||
@@ -147,6 +139,7 @@ export type PushDatasetDataProps = {
|
||||
collectionId: string;
|
||||
data: PushDatasetDataChunkProps[];
|
||||
trainingType?: DatasetCollectionDataProcessModeEnum;
|
||||
indexSize?: number;
|
||||
autoIndexes?: boolean;
|
||||
imageIndex?: boolean;
|
||||
prompt?: string;
|
||||
|
||||
@@ -120,6 +120,8 @@ export const DatasetCollectionSyncResultMap = {
|
||||
export enum DatasetCollectionDataProcessModeEnum {
|
||||
chunk = 'chunk',
|
||||
qa = 'qa',
|
||||
backup = 'backup',
|
||||
|
||||
auto = 'auto' // abandon
|
||||
}
|
||||
export const DatasetCollectionDataProcessModeMap = {
|
||||
@@ -131,21 +133,35 @@ export const DatasetCollectionDataProcessModeMap = {
|
||||
label: i18nT('common:core.dataset.training.QA mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.QA Import Tip')
|
||||
},
|
||||
[DatasetCollectionDataProcessModeEnum.backup]: {
|
||||
label: i18nT('dataset:backup_mode'),
|
||||
tooltip: i18nT('dataset:backup_mode')
|
||||
},
|
||||
[DatasetCollectionDataProcessModeEnum.auto]: {
|
||||
label: i18nT('common:core.dataset.training.Auto mode'),
|
||||
tooltip: i18nT('common:core.dataset.training.Auto mode Tip')
|
||||
}
|
||||
};
|
||||
|
||||
export enum ChunkTriggerConfigTypeEnum {
|
||||
minSize = 'minSize',
|
||||
forceChunk = 'forceChunk',
|
||||
maxSize = 'maxSize'
|
||||
}
|
||||
export enum ChunkSettingModeEnum {
|
||||
auto = 'auto',
|
||||
custom = 'custom'
|
||||
}
|
||||
|
||||
export enum DataChunkSplitModeEnum {
|
||||
paragraph = 'paragraph',
|
||||
size = 'size',
|
||||
char = 'char'
|
||||
}
|
||||
export enum ParagraphChunkAIModeEnum {
|
||||
auto = 'auto',
|
||||
force = 'force'
|
||||
}
|
||||
|
||||
/* ------------ data -------------- */
|
||||
|
||||
@@ -154,7 +170,6 @@ export enum ImportDataSourceEnum {
|
||||
fileLocal = 'fileLocal',
|
||||
fileLink = 'fileLink',
|
||||
fileCustom = 'fileCustom',
|
||||
csvTable = 'csvTable',
|
||||
externalFile = 'externalFile',
|
||||
apiDataset = 'apiDataset',
|
||||
reTraining = 'reTraining'
|
||||
|
||||
@@ -32,7 +32,7 @@ export const DatasetDataIndexMap: Record<
|
||||
color: 'red'
|
||||
},
|
||||
[DatasetDataIndexTypeEnum.image]: {
|
||||
label: i18nT('common:data_index_image'),
|
||||
label: i18nT('dataset:data_index_image'),
|
||||
color: 'purple'
|
||||
}
|
||||
};
|
||||
|
||||
@@ -118,9 +118,8 @@ export const computeChunkSize = (params: {
|
||||
return getLLMMaxChunkSize(params.llmModel);
|
||||
}
|
||||
|
||||
return Math.min(params.chunkSize || chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
|
||||
return Math.min(params.chunkSize ?? chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
|
||||
};
|
||||
|
||||
export const computeChunkSplitter = (params: {
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
@@ -129,8 +128,21 @@ export const computeChunkSplitter = (params: {
|
||||
if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
|
||||
return undefined;
|
||||
}
|
||||
if (params.chunkSplitMode === DataChunkSplitModeEnum.size) {
|
||||
if (params.chunkSplitMode !== DataChunkSplitModeEnum.char) {
|
||||
return undefined;
|
||||
}
|
||||
return params.chunkSplitter;
|
||||
};
|
||||
export const computeParagraphChunkDeep = (params: {
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
paragraphChunkDeep?: number;
|
||||
}) => {
|
||||
if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
|
||||
return 5;
|
||||
}
|
||||
if (params.chunkSplitMode === DataChunkSplitModeEnum.paragraph) {
|
||||
return params.paragraphChunkDeep;
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
48
packages/global/core/dataset/type.d.ts
vendored
48
packages/global/core/dataset/type.d.ts
vendored
@@ -8,26 +8,42 @@ import type {
|
||||
DatasetStatusEnum,
|
||||
DatasetTypeEnum,
|
||||
SearchScoreTypeEnum,
|
||||
TrainingModeEnum
|
||||
TrainingModeEnum,
|
||||
ChunkSettingModeEnum,
|
||||
ChunkTriggerConfigTypeEnum
|
||||
} from './constants';
|
||||
import type { DatasetPermission } from '../../support/permission/dataset/controller';
|
||||
import { Permission } from '../../support/permission/controller';
|
||||
import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
|
||||
import type { SourceMemberType } from 'support/user/type';
|
||||
import type { DatasetDataIndexTypeEnum } from './data/constants';
|
||||
import type { ChunkSettingModeEnum } from './constants';
|
||||
|
||||
export type ChunkSettingsType = {
|
||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||
autoIndexes?: boolean;
|
||||
trainingType?: DatasetCollectionDataProcessModeEnum;
|
||||
|
||||
// Chunk trigger
|
||||
chunkTriggerType?: ChunkTriggerConfigTypeEnum;
|
||||
chunkTriggerMinSize?: number; // maxSize from agent model, not store
|
||||
|
||||
// Data enhance
|
||||
dataEnhanceCollectionName?: boolean; // Auto add collection name to data
|
||||
|
||||
// Index enhance
|
||||
imageIndex?: boolean;
|
||||
autoIndexes?: boolean;
|
||||
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
// Chunk setting
|
||||
chunkSettingMode?: ChunkSettingModeEnum; // 系统参数/自定义参数
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
|
||||
chunkSize?: number;
|
||||
// Paragraph split
|
||||
paragraphChunkAIMode?: ParagraphChunkAIModeEnum;
|
||||
paragraphChunkDeep?: number; // Paragraph deep
|
||||
paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
|
||||
// Size split
|
||||
chunkSize?: number; // chunk/qa chunk size, Paragraph max chunk size.
|
||||
// Char split
|
||||
chunkSplitter?: string; // chunk/qa chunk splitter
|
||||
indexSize?: number;
|
||||
chunkSplitter?: string;
|
||||
|
||||
qaPrompt?: string;
|
||||
};
|
||||
|
||||
@@ -66,7 +82,7 @@ export type DatasetSchemaType = {
|
||||
defaultPermission?: number;
|
||||
};
|
||||
|
||||
export type DatasetCollectionSchemaType = {
|
||||
export type DatasetCollectionSchemaType = ChunkSettingsType & {
|
||||
_id: string;
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
@@ -101,18 +117,7 @@ export type DatasetCollectionSchemaType = {
|
||||
|
||||
// Parse settings
|
||||
customPdfParse?: boolean;
|
||||
// Chunk settings
|
||||
autoIndexes?: boolean;
|
||||
imageIndex?: boolean;
|
||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
|
||||
chunkSize?: number;
|
||||
indexSize?: number;
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
};
|
||||
|
||||
export type DatasetCollectionTagsSchemaType = {
|
||||
@@ -175,6 +180,7 @@ export type DatasetTrainingSchemaType = {
|
||||
q: string;
|
||||
a: string;
|
||||
chunkIndex: number;
|
||||
indexSize?: number;
|
||||
weight: number;
|
||||
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||
retryCount: number;
|
||||
|
||||
@@ -40,5 +40,6 @@ export function getSourceNameIcon({
|
||||
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
|
||||
if (mode === TrainingModeEnum.qa) return data.length * 20;
|
||||
if (mode === TrainingModeEnum.auto) return data.length * 5;
|
||||
if (mode === TrainingModeEnum.image) return data.length * 2;
|
||||
return data.length;
|
||||
};
|
||||
|
||||
2
packages/global/core/workflow/type/node.d.ts
vendored
2
packages/global/core/workflow/type/node.d.ts
vendored
@@ -59,7 +59,6 @@ export type FlowNodeCommonType = {
|
||||
};
|
||||
|
||||
export type PluginDataType = {
|
||||
version?: string;
|
||||
diagram?: string;
|
||||
userGuide?: string;
|
||||
courseUrl?: string;
|
||||
@@ -126,6 +125,7 @@ export type FlowNodeItemType = FlowNodeTemplateType & {
|
||||
nodeId: string;
|
||||
parentNodeId?: string;
|
||||
isError?: boolean;
|
||||
searchedText?: string;
|
||||
debugResult?: {
|
||||
status: 'running' | 'success' | 'skipped' | 'failed';
|
||||
message?: string;
|
||||
|
||||
@@ -2,7 +2,18 @@ import { NullPermission, PermissionKeyEnum, PermissionList } from '../constant';
|
||||
import { type PermissionListType } from '../type';
|
||||
import { i18nT } from '../../../../web/i18n/utils';
|
||||
export enum AppPermissionKeyEnum {}
|
||||
export const AppPermissionList: PermissionListType = {
|
||||
|
||||
export enum AppPermissionKeyEnum {
|
||||
log = 'log',
|
||||
quickGate = 'quickGate',
|
||||
featuredGate = 'featuredGate'
|
||||
}
|
||||
|
||||
export const AppLogPermission = 0b100000;
|
||||
export const GateQuickAppPermission = 0b001100;
|
||||
export const GateFeaturedAppPermission = 0b010100;
|
||||
|
||||
export const AppPermissionList: PermissionListType<AppPermissionKeyEnum> = {
|
||||
[PermissionKeyEnum.read]: {
|
||||
...PermissionList[PermissionKeyEnum.read],
|
||||
description: i18nT('app:permission.des.read')
|
||||
@@ -13,8 +24,28 @@ export const AppPermissionList: PermissionListType = {
|
||||
},
|
||||
[PermissionKeyEnum.manage]: {
|
||||
...PermissionList[PermissionKeyEnum.manage],
|
||||
value: 0b111111,
|
||||
description: i18nT('app:permission.des.manage')
|
||||
},
|
||||
[AppPermissionKeyEnum.log]: {
|
||||
name: i18nT('app:permission.name.log'),
|
||||
value: AppLogPermission,
|
||||
checkBoxType: 'multiple',
|
||||
description: i18nT('app:permission.des.log')
|
||||
},
|
||||
[AppPermissionKeyEnum.quickGate]: {
|
||||
name: '门户快捷应用权限',
|
||||
description: '',
|
||||
value: GateQuickAppPermission,
|
||||
checkBoxType: 'hiden'
|
||||
},
|
||||
[AppPermissionKeyEnum.featuredGate]: {
|
||||
name: '门户推荐应用权限',
|
||||
description: '',
|
||||
value: GateFeaturedAppPermission,
|
||||
checkBoxType: 'hiden'
|
||||
}
|
||||
};
|
||||
|
||||
export const AppDefaultPermissionVal = NullPermission;
|
||||
export const AppLogPermissionVal = AppPermissionList[AppPermissionKeyEnum.log].value;
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import { type PerConstructPros, Permission } from '../controller';
|
||||
import { AppDefaultPermissionVal } from './constant';
|
||||
import { AppDefaultPermissionVal, AppPermissionList } from './constant';
|
||||
|
||||
export class AppPermission extends Permission {
|
||||
hasLogPer: boolean = false;
|
||||
constructor(props?: PerConstructPros) {
|
||||
if (!props) {
|
||||
props = {
|
||||
@@ -10,6 +11,13 @@ export class AppPermission extends Permission {
|
||||
} else if (!props?.per) {
|
||||
props.per = AppDefaultPermissionVal;
|
||||
}
|
||||
props.permissionList = AppPermissionList;
|
||||
super(props);
|
||||
this.setUpdatePermissionCallback(() => {
|
||||
this.hasReadPer = this.checkPer(AppPermissionList.read.value);
|
||||
this.hasWritePer = this.checkPer(AppPermissionList.write.value);
|
||||
this.hasManagePer = this.checkPer(AppPermissionList.manage.value);
|
||||
this.hasLogPer = this.checkPer(AppPermissionList.log.value);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
import { type PermissionListType, type PermissionValueType } from './type';
|
||||
import { PermissionList, NullPermission, OwnerPermissionVal } from './constant';
|
||||
import {
|
||||
PermissionList,
|
||||
NullPermission,
|
||||
OwnerPermissionVal,
|
||||
ManagePermissionVal
|
||||
} from './constant';
|
||||
|
||||
export type PerConstructPros = {
|
||||
per?: PermissionValueType;
|
||||
@@ -63,6 +68,7 @@ export class Permission {
|
||||
if (perm === OwnerPermissionVal) {
|
||||
return this.value === OwnerPermissionVal;
|
||||
}
|
||||
|
||||
return (this.value & perm) === perm;
|
||||
}
|
||||
|
||||
|
||||
2
packages/global/support/permission/type.d.ts
vendored
2
packages/global/support/permission/type.d.ts
vendored
@@ -18,7 +18,7 @@ export type PermissionListType<T = {}> = Record<
|
||||
name: string;
|
||||
description: string;
|
||||
value: PermissionValueType;
|
||||
checkBoxType: 'single' | 'multiple';
|
||||
checkBoxType: 'single' | 'multiple' | 'hiden';
|
||||
}
|
||||
>;
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ export const TeamPermissionList: PermissionListType<TeamPermissionKeyEnum> = {
|
||||
},
|
||||
[PermissionKeyEnum.manage]: {
|
||||
...PermissionList[PermissionKeyEnum.manage],
|
||||
value: 0b000001
|
||||
value: 0b000101
|
||||
},
|
||||
[TeamPermissionKeyEnum.appCreate]: {
|
||||
checkBoxType: 'multiple',
|
||||
|
||||
31
packages/global/support/user/team/gate/api.d.ts
vendored
Normal file
31
packages/global/support/user/team/gate/api.d.ts
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
export type putUpdateGateConfigData = {
|
||||
status?: boolean;
|
||||
tools?: GateTool[];
|
||||
slogan?: string;
|
||||
placeholderText?: string;
|
||||
};
|
||||
|
||||
export type putUpdateGateConfigResponse = {
|
||||
status?: boolean;
|
||||
tools?: string[];
|
||||
slogan?: string;
|
||||
placeholderText?: string;
|
||||
};
|
||||
|
||||
export type putUpdateGateConfigCopyRightData = {
|
||||
name?: string;
|
||||
logo?: string;
|
||||
banner?: string;
|
||||
};
|
||||
|
||||
export type putUpdateGateConfigCopyRightResponse = {
|
||||
name: string;
|
||||
logo: string;
|
||||
banner: string;
|
||||
};
|
||||
|
||||
export type getGateConfigCopyRightResponse = {
|
||||
name: string;
|
||||
logo: string;
|
||||
banner: string;
|
||||
};
|
||||
12
packages/global/support/user/team/gate/type.d.ts
vendored
Normal file
12
packages/global/support/user/team/gate/type.d.ts
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
export type GateSchemaType = {
|
||||
teamId: string;
|
||||
status: boolean;
|
||||
tools: string[];
|
||||
featuredApps: string[];
|
||||
quickApps: string[];
|
||||
slogan: string;
|
||||
placeholderText: string;
|
||||
name: string;
|
||||
logo: string;
|
||||
banner: string;
|
||||
};
|
||||
@@ -13,6 +13,7 @@ const staticPluginList = [
|
||||
'WeWorkWebhook',
|
||||
'google',
|
||||
'bing',
|
||||
'bocha',
|
||||
'delay'
|
||||
];
|
||||
// Run in worker thread (Have npm packages)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4816",
|
||||
"name": "钉钉 webhook",
|
||||
"avatar": "plugins/dingding",
|
||||
"intro": "向钉钉机器人发起 webhook 请求。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "Menghuan1918",
|
||||
"version": "488",
|
||||
"name": "PDF识别",
|
||||
"avatar": "plugins/doc2x",
|
||||
"intro": "将PDF文件发送至Doc2X进行解析,返回结构化的LaTeX公式的文本(markdown),支持传入String类型的URL或者流程输出中的文件链接变量",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "Menghuan1918",
|
||||
"version": "488",
|
||||
"name": "Doc2X服务",
|
||||
"avatar": "plugins/doc2x",
|
||||
"intro": "将传入的图片或PDF文件发送至Doc2X进行解析,返回带LaTeX公式的markdown格式的文本。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4816",
|
||||
"name": "企业微信 webhook",
|
||||
"avatar": "plugins/qiwei",
|
||||
"intro": "向企业微信机器人发起 webhook 请求。只能内部群使用。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4811",
|
||||
"name": "Bing搜索",
|
||||
"avatar": "core/workflow/template/bing",
|
||||
"intro": "在Bing中搜索。",
|
||||
|
||||
604
packages/plugins/src/bocha/template.json
Normal file
604
packages/plugins/src/bocha/template.json
Normal file
@@ -0,0 +1,604 @@
|
||||
{
|
||||
"author": "",
|
||||
"name": "网络搜索",
|
||||
"avatar": "common/searchLight",
|
||||
"intro": "使用博查AI搜索引擎进行网络搜索。",
|
||||
"showStatus": true,
|
||||
"weight": 10,
|
||||
"courseUrl": "",
|
||||
"isTool": true,
|
||||
"templateType": "search",
|
||||
"workflow": {
|
||||
"nodes": [
|
||||
{
|
||||
"nodeId": "pluginInput",
|
||||
"name": "workflow:template.plugin_start",
|
||||
"intro": "workflow:intro_plugin_input",
|
||||
"avatar": "core/workflow/template/workflowStart",
|
||||
"flowNodeType": "pluginInput",
|
||||
"showStatus": false,
|
||||
"position": {
|
||||
"x": 636.3048409085379,
|
||||
"y": -238.61714728578016
|
||||
},
|
||||
"version": "481",
|
||||
"inputs": [
|
||||
{
|
||||
"renderTypeList": ["input"],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "apiKey",
|
||||
"label": "apiKey",
|
||||
"description": "博查API密钥",
|
||||
"defaultValue": "",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"renderTypeList": ["input", "reference"],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "query",
|
||||
"label": "query",
|
||||
"description": "搜索查询词",
|
||||
"defaultValue": "",
|
||||
"required": true,
|
||||
"toolDescription": "搜索查询词"
|
||||
},
|
||||
{
|
||||
"renderTypeList": ["input", "reference"],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "freshness",
|
||||
"label": "freshness",
|
||||
"description": "搜索指定时间范围内的网页。可填值:oneDay(一天内)、oneWeek(一周内)、oneMonth(一个月内)、oneYear(一年内)、noLimit(不限,默认)、YYYY-MM-DD..YYYY-MM-DD(日期范围)、YYYY-MM-DD(指定日期)",
|
||||
"defaultValue": "noLimit",
|
||||
"required": false,
|
||||
"toolDescription": "搜索时间范围"
|
||||
},
|
||||
{
|
||||
"renderTypeList": ["input", "reference"],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "boolean",
|
||||
"canEdit": true,
|
||||
"key": "summary",
|
||||
"label": "summary",
|
||||
"description": "是否显示文本摘要。true显示,false不显示(默认)",
|
||||
"defaultValue": false,
|
||||
"required": false,
|
||||
"toolDescription": "是否显示文本摘要"
|
||||
},
|
||||
{
|
||||
"renderTypeList": ["input", "reference"],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "include",
|
||||
"label": "include",
|
||||
"description": "指定搜索的site范围。多个域名使用|或,分隔,最多20个。例如:qq.com|m.163.com",
|
||||
"defaultValue": "",
|
||||
"required": false,
|
||||
"toolDescription": "指定搜索的site范围"
|
||||
},
|
||||
{
|
||||
"renderTypeList": ["input", "reference"],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "exclude",
|
||||
"label": "exclude",
|
||||
"description": "排除搜索的网站范围。多个域名使用|或,分隔,最多20个。例如:qq.com|m.163.com",
|
||||
"defaultValue": "",
|
||||
"required": false,
|
||||
"toolDescription": "排除搜索的网站范围"
|
||||
},
|
||||
{
|
||||
"renderTypeList": ["input", "reference"],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "number",
|
||||
"canEdit": true,
|
||||
"key": "count",
|
||||
"label": "count",
|
||||
"description": "返回结果的条数。可填范围:1-50,默认为10",
|
||||
"defaultValue": 10,
|
||||
"required": false,
|
||||
"min": 1,
|
||||
"max": 50,
|
||||
"toolDescription": "返回结果条数"
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"id": "apiKey",
|
||||
"valueType": "string",
|
||||
"key": "apiKey",
|
||||
"label": "apiKey",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "query",
|
||||
"valueType": "string",
|
||||
"key": "query",
|
||||
"label": "query",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "freshness",
|
||||
"valueType": "string",
|
||||
"key": "freshness",
|
||||
"label": "freshness",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "summary",
|
||||
"valueType": "boolean",
|
||||
"key": "summary",
|
||||
"label": "summary",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "include",
|
||||
"valueType": "string",
|
||||
"key": "include",
|
||||
"label": "include",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "exclude",
|
||||
"valueType": "string",
|
||||
"key": "exclude",
|
||||
"label": "exclude",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "count",
|
||||
"valueType": "number",
|
||||
"key": "count",
|
||||
"label": "count",
|
||||
"type": "hidden"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"nodeId": "pluginOutput",
|
||||
"name": "common:core.module.template.self_output",
|
||||
"intro": "workflow:intro_custom_plugin_output",
|
||||
"avatar": "core/workflow/template/pluginOutput",
|
||||
"flowNodeType": "pluginOutput",
|
||||
"showStatus": false,
|
||||
"position": {
|
||||
"x": 2764.1105686698083,
|
||||
"y": -30.617147285780163
|
||||
},
|
||||
"version": "481",
|
||||
"inputs": [
|
||||
{
|
||||
"renderTypeList": ["reference"],
|
||||
"valueType": "object",
|
||||
"canEdit": true,
|
||||
"key": "result",
|
||||
"label": "result",
|
||||
"isToolOutput": true,
|
||||
"description": "",
|
||||
"value": ["nyA6oA8mF1iW", "httpRawResponse"]
|
||||
}
|
||||
],
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"nodeId": "pluginConfig",
|
||||
"name": "common:core.module.template.system_config",
|
||||
"intro": "",
|
||||
"avatar": "core/workflow/template/systemConfig",
|
||||
"flowNodeType": "pluginConfig",
|
||||
"position": {
|
||||
"x": 184.66337662472682,
|
||||
"y": -216.05298493910115
|
||||
},
|
||||
"version": "4811",
|
||||
"inputs": [],
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"nodeId": "nyA6oA8mF1iW",
|
||||
"name": "HTTP 请求",
|
||||
"intro": "调用博查搜索API",
|
||||
"avatar": "core/workflow/template/httpRequest",
|
||||
"flowNodeType": "httpRequest468",
|
||||
"showStatus": true,
|
||||
"position": {
|
||||
"x": 1335.0647252518884,
|
||||
"y": -455.9043948565971
|
||||
},
|
||||
"version": "481",
|
||||
"inputs": [
|
||||
{
|
||||
"key": "system_addInputParam",
|
||||
"renderTypeList": ["addInputParam"],
|
||||
"valueType": "dynamic",
|
||||
"label": "",
|
||||
"required": false,
|
||||
"description": "common:core.module.input.description.HTTP Dynamic Input",
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectDataset",
|
||||
"selectApp"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpMethod",
|
||||
"renderTypeList": ["custom"],
|
||||
"valueType": "string",
|
||||
"label": "",
|
||||
"value": "POST",
|
||||
"required": true,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpTimeout",
|
||||
"renderTypeList": ["custom"],
|
||||
"valueType": "number",
|
||||
"label": "",
|
||||
"value": 30,
|
||||
"min": 5,
|
||||
"max": 600,
|
||||
"required": true,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpReqUrl",
|
||||
"renderTypeList": ["hidden"],
|
||||
"valueType": "string",
|
||||
"label": "",
|
||||
"description": "common:core.module.input.description.Http Request Url",
|
||||
"placeholder": "https://api.ai.com/getInventory",
|
||||
"required": false,
|
||||
"value": "https://api.bochaai.com/v1/web-search",
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpHeader",
|
||||
"renderTypeList": ["custom"],
|
||||
"valueType": "any",
|
||||
"value": [
|
||||
{
|
||||
"key": "Authorization",
|
||||
"type": "string",
|
||||
"value": "Bearer {{$pluginInput.apiKey$}}"
|
||||
},
|
||||
{
|
||||
"key": "Content-Type",
|
||||
"type": "string",
|
||||
"value": "application/json"
|
||||
}
|
||||
],
|
||||
"label": "",
|
||||
"description": "common:core.module.input.description.Http Request Header",
|
||||
"placeholder": "common:core.module.input.description.Http Request Header",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpParams",
|
||||
"renderTypeList": ["hidden"],
|
||||
"valueType": "any",
|
||||
"value": [],
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpJsonBody",
|
||||
"renderTypeList": ["hidden"],
|
||||
"valueType": "any",
|
||||
"value": "{\n \"query\": \"{{query}}\",\n \"freshness\": \"{{freshness}}\",\n \"summary\": {{summary}},\n \"include\": \"{{include}}\",\n \"exclude\": \"{{exclude}}\",\n \"count\": {{count}}\n}",
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpFormBody",
|
||||
"renderTypeList": ["hidden"],
|
||||
"valueType": "any",
|
||||
"value": [],
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpContentType",
|
||||
"renderTypeList": ["hidden"],
|
||||
"valueType": "string",
|
||||
"value": "json",
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"renderTypeList": ["reference"],
|
||||
"key": "query",
|
||||
"label": "query",
|
||||
"toolDescription": "博查搜索检索词",
|
||||
"required": true,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": ["pluginInput", "query"]
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"renderTypeList": ["reference"],
|
||||
"key": "freshness",
|
||||
"label": "freshness",
|
||||
"toolDescription": "搜索时间范围",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": ["pluginInput", "freshness"]
|
||||
},
|
||||
{
|
||||
"valueType": "boolean",
|
||||
"renderTypeList": ["reference"],
|
||||
"key": "summary",
|
||||
"label": "summary",
|
||||
"toolDescription": "是否显示文本摘要",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": ["pluginInput", "summary"]
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"renderTypeList": ["reference"],
|
||||
"key": "include",
|
||||
"label": "include",
|
||||
"toolDescription": "指定搜索的site范围",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": ["pluginInput", "include"]
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"renderTypeList": ["reference"],
|
||||
"key": "exclude",
|
||||
"label": "exclude",
|
||||
"toolDescription": "排除搜索的网站范围",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": ["pluginInput", "exclude"]
|
||||
},
|
||||
{
|
||||
"valueType": "number",
|
||||
"renderTypeList": ["reference"],
|
||||
"key": "count",
|
||||
"label": "count",
|
||||
"toolDescription": "返回结果条数",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": ["pluginInput", "count"]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"id": "error",
|
||||
"key": "error",
|
||||
"label": "workflow:request_error",
|
||||
"description": "HTTP请求错误信息,成功时返回空",
|
||||
"valueType": "object",
|
||||
"type": "static"
|
||||
},
|
||||
{
|
||||
"id": "httpRawResponse",
|
||||
"key": "httpRawResponse",
|
||||
"required": true,
|
||||
"label": "workflow:raw_response",
|
||||
"description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。",
|
||||
"valueType": "any",
|
||||
"type": "static"
|
||||
},
|
||||
{
|
||||
"id": "system_addOutputParam",
|
||||
"key": "system_addOutputParam",
|
||||
"type": "dynamic",
|
||||
"valueType": "dynamic",
|
||||
"label": "",
|
||||
"editField": {
|
||||
"key": true,
|
||||
"valueType": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"edges": [
|
||||
{
|
||||
"source": "pluginInput",
|
||||
"target": "nyA6oA8mF1iW",
|
||||
"sourceHandle": "pluginInput-source-right",
|
||||
"targetHandle": "nyA6oA8mF1iW-target-left"
|
||||
},
|
||||
{
|
||||
"source": "nyA6oA8mF1iW",
|
||||
"target": "pluginOutput",
|
||||
"sourceHandle": "nyA6oA8mF1iW-source-right",
|
||||
"targetHandle": "pluginOutput-target-left"
|
||||
}
|
||||
]
|
||||
},
|
||||
"chatConfig": {}
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "silencezhang",
|
||||
"version": "4811",
|
||||
"name": "数据库连接",
|
||||
"avatar": "core/workflow/template/datasource",
|
||||
"intro": "可连接常用数据库,并执行sql",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "collin",
|
||||
"version": "4817",
|
||||
"name": "流程等待",
|
||||
"avatar": "core/workflow/template/sleep",
|
||||
"intro": "让工作流等待指定时间后运行",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "silencezhang",
|
||||
"version": "4817",
|
||||
"name": "基础图表",
|
||||
"avatar": "core/workflow/template/baseChart",
|
||||
"intro": "根据数据生成图表,可根据chartType生成柱状图,折线图,饼图",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "silencezhang",
|
||||
"version": "486",
|
||||
"name": "BI图表功能",
|
||||
"avatar": "core/workflow/template/BI",
|
||||
"intro": "BI图表功能,可以生成一些常用的图表,如饼图,柱状图,折线图等",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo 网络搜索",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "使用 DuckDuckGo 进行网络搜索",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo 图片搜索",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "使用 DuckDuckGo 进行图片搜索",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo 新闻检索",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "使用 DuckDuckGo 进行新闻检索",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo 视频搜索",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "使用 DuckDuckGo 进行视频搜索",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo服务",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "DuckDuckGo 服务,包含网络搜索、图片搜索、新闻搜索等。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "488",
|
||||
"name": "飞书 webhook",
|
||||
"avatar": "core/app/templates/plugin-feishu",
|
||||
"intro": "向飞书机器人发起 webhook 请求。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "网页内容抓取",
|
||||
"avatar": "core/workflow/template/fetchUrl",
|
||||
"intro": "可获取一个网页链接内容,并以 Markdown 格式输出,仅支持获取静态网站。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "481",
|
||||
"templateType": "tools",
|
||||
"name": "获取当前时间",
|
||||
"avatar": "core/workflow/template/getTime",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4811",
|
||||
"name": "Google搜索",
|
||||
"avatar": "core/workflow/template/google",
|
||||
"intro": "在google中搜索。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "数学公式执行",
|
||||
"avatar": "core/workflow/template/mathCall",
|
||||
"intro": "用于执行数学表达式的工具,通过 js 的 expr-eval 库运行表达式并返回结果。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4816",
|
||||
"name": "Search XNG 搜索",
|
||||
"avatar": "core/workflow/template/searxng",
|
||||
"intro": "使用 Search XNG 服务进行搜索。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "cloudpense",
|
||||
"version": "1.0.0",
|
||||
"name": "Email 邮件发送",
|
||||
"avatar": "plugins/email",
|
||||
"intro": "通过SMTP协议发送电子邮件(nodemailer)",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "489",
|
||||
"name": "文本加工",
|
||||
"avatar": "/imgs/workflow/textEditor.svg",
|
||||
"intro": "可对固定或传入的文本进行加工后输出,非字符串类型数据最终会转成字符串类型。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4811",
|
||||
"name": "Wiki搜索",
|
||||
"avatar": "core/workflow/template/wiki",
|
||||
"intro": "在Wiki中查询释义。",
|
||||
|
||||
18
packages/service/common/api/type.d.ts
vendored
18
packages/service/common/api/type.d.ts
vendored
@@ -6,12 +6,6 @@ import type {
|
||||
} from '../../core/dataset/search/controller';
|
||||
import type { AuthOpenApiLimitProps } from '../../support/openapi/auth';
|
||||
import type { CreateUsageProps, ConcatUsageProps } from '@fastgpt/global/support/wallet/usage/api';
|
||||
import type {
|
||||
GetProApiDatasetFileContentParams,
|
||||
GetProApiDatasetFileDetailParams,
|
||||
GetProApiDatasetFileListParams,
|
||||
GetProApiDatasetFilePreviewUrlParams
|
||||
} from '../../core/dataset/apiDataset/proApi';
|
||||
|
||||
declare global {
|
||||
var textCensorHandler: (params: { text: string }) => Promise<{ code: number; message?: string }>;
|
||||
@@ -19,16 +13,4 @@ declare global {
|
||||
var authOpenApiHandler: (data: AuthOpenApiLimitProps) => Promise<any>;
|
||||
var createUsageHandler: (data: CreateUsageProps) => any;
|
||||
var concatUsageHandler: (data: ConcatUsageProps) => any;
|
||||
|
||||
// API dataset
|
||||
var getProApiDatasetFileList: (data: GetProApiDatasetFileListParams) => Promise<APIFileItem[]>;
|
||||
var getProApiDatasetFileContent: (
|
||||
data: GetProApiDatasetFileContentParams
|
||||
) => Promise<ApiFileReadContentResponse>;
|
||||
var getProApiDatasetFilePreviewUrl: (
|
||||
data: GetProApiDatasetFilePreviewUrlParams
|
||||
) => Promise<string>;
|
||||
var getProApiDatasetFileDetail: (
|
||||
data: GetProApiDatasetFileDetailParams
|
||||
) => Promise<ApiDatasetDetailResponse>;
|
||||
}
|
||||
|
||||
178
packages/service/common/buffer/rawText/controller.ts
Normal file
178
packages/service/common/buffer/rawText/controller.ts
Normal file
@@ -0,0 +1,178 @@
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
import { connectionMongo } from '../../mongo';
|
||||
import { MongoRawTextBufferSchema, bucketName } from './schema';
|
||||
import { addLog } from '../../system/log';
|
||||
import { setCron } from '../../system/cron';
|
||||
import { checkTimerLock } from '../../system/timerLock/utils';
|
||||
import { TimerIdEnum } from '../../system/timerLock/constants';
|
||||
|
||||
const getGridBucket = () => {
|
||||
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
|
||||
bucketName: bucketName
|
||||
});
|
||||
};
|
||||
|
||||
export const addRawTextBuffer = async ({
|
||||
sourceId,
|
||||
sourceName,
|
||||
text,
|
||||
expiredTime
|
||||
}: {
|
||||
sourceId: string;
|
||||
sourceName: string;
|
||||
text: string;
|
||||
expiredTime: Date;
|
||||
}) => {
|
||||
const gridBucket = getGridBucket();
|
||||
const metadata = {
|
||||
sourceId,
|
||||
sourceName,
|
||||
expiredTime
|
||||
};
|
||||
|
||||
const buffer = Buffer.from(text);
|
||||
|
||||
const fileSize = buffer.length;
|
||||
// 单块大小:尽可能大,但不超过 14MB,不小于128KB
|
||||
const chunkSizeBytes = (() => {
|
||||
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
|
||||
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
|
||||
|
||||
// 确保块大小至少为128KB
|
||||
const minChunkSize = 128 * 1024; // 128KB
|
||||
|
||||
// 取理想块大小和最小块大小中的较大值
|
||||
let chunkSize = Math.max(idealChunkSize, minChunkSize);
|
||||
|
||||
// 将块大小向上取整到最接近的64KB的倍数,使其更整齐
|
||||
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
|
||||
|
||||
return chunkSize;
|
||||
})();
|
||||
|
||||
const uploadStream = gridBucket.openUploadStream(sourceId, {
|
||||
metadata,
|
||||
chunkSizeBytes
|
||||
});
|
||||
|
||||
return retryFn(async () => {
|
||||
return new Promise((resolve, reject) => {
|
||||
uploadStream.end(buffer);
|
||||
uploadStream.on('finish', () => {
|
||||
resolve(uploadStream.id);
|
||||
});
|
||||
uploadStream.on('error', (error) => {
|
||||
addLog.error('addRawTextBuffer error', error);
|
||||
resolve('');
|
||||
});
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
export const getRawTextBuffer = async (sourceId: string) => {
|
||||
const gridBucket = getGridBucket();
|
||||
|
||||
return retryFn(async () => {
|
||||
const bufferData = await MongoRawTextBufferSchema.findOne(
|
||||
{
|
||||
'metadata.sourceId': sourceId
|
||||
},
|
||||
'_id metadata'
|
||||
).lean();
|
||||
if (!bufferData) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Read file content
|
||||
const downloadStream = gridBucket.openDownloadStream(bufferData._id);
|
||||
const chunks: Buffer[] = [];
|
||||
|
||||
return new Promise<{
|
||||
text: string;
|
||||
sourceName: string;
|
||||
} | null>((resolve, reject) => {
|
||||
downloadStream.on('data', (chunk) => {
|
||||
chunks.push(chunk);
|
||||
});
|
||||
|
||||
downloadStream.on('end', () => {
|
||||
const buffer = Buffer.concat(chunks);
|
||||
const text = buffer.toString('utf8');
|
||||
resolve({
|
||||
text,
|
||||
sourceName: bufferData.metadata?.sourceName || ''
|
||||
});
|
||||
});
|
||||
|
||||
downloadStream.on('error', (error) => {
|
||||
addLog.error('getRawTextBuffer error', error);
|
||||
resolve(null);
|
||||
});
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
export const deleteRawTextBuffer = async (sourceId: string): Promise<boolean> => {
|
||||
const gridBucket = getGridBucket();
|
||||
|
||||
return retryFn(async () => {
|
||||
const buffer = await MongoRawTextBufferSchema.findOne({ 'metadata.sourceId': sourceId });
|
||||
if (!buffer) {
|
||||
return false;
|
||||
}
|
||||
|
||||
await gridBucket.delete(buffer._id);
|
||||
return true;
|
||||
});
|
||||
};
|
||||
|
||||
export const updateRawTextBufferExpiredTime = async ({
|
||||
sourceId,
|
||||
expiredTime
|
||||
}: {
|
||||
sourceId: string;
|
||||
expiredTime: Date;
|
||||
}) => {
|
||||
return retryFn(async () => {
|
||||
return MongoRawTextBufferSchema.updateOne(
|
||||
{ 'metadata.sourceId': sourceId },
|
||||
{ $set: { 'metadata.expiredTime': expiredTime } }
|
||||
);
|
||||
});
|
||||
};
|
||||
|
||||
export const clearExpiredRawTextBufferCron = async () => {
|
||||
const clearExpiredRawTextBuffer = async () => {
|
||||
addLog.debug('Clear expired raw text buffer start');
|
||||
const gridBucket = getGridBucket();
|
||||
|
||||
return retryFn(async () => {
|
||||
const data = await MongoRawTextBufferSchema.find(
|
||||
{
|
||||
'metadata.expiredTime': { $lt: new Date() }
|
||||
},
|
||||
'_id'
|
||||
).lean();
|
||||
|
||||
for (const item of data) {
|
||||
await gridBucket.delete(item._id);
|
||||
}
|
||||
addLog.debug('Clear expired raw text buffer end');
|
||||
});
|
||||
};
|
||||
|
||||
setCron('*/10 * * * *', async () => {
|
||||
if (
|
||||
await checkTimerLock({
|
||||
timerId: TimerIdEnum.clearExpiredRawTextBuffer,
|
||||
lockMinuted: 9
|
||||
})
|
||||
) {
|
||||
try {
|
||||
await clearExpiredRawTextBuffer();
|
||||
} catch (error) {
|
||||
addLog.error('clearExpiredRawTextBufferCron error', error);
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
@@ -1,33 +1,22 @@
|
||||
import { getMongoModel, Schema } from '../../mongo';
|
||||
import { type RawTextBufferSchemaType } from './type';
|
||||
import { getMongoModel, type Types, Schema } from '../../mongo';
|
||||
|
||||
export const collectionName = 'buffer_rawtexts';
|
||||
export const bucketName = 'buffer_rawtext';
|
||||
|
||||
const RawTextBufferSchema = new Schema({
|
||||
sourceId: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
rawText: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
createTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
metadata: Object
|
||||
metadata: {
|
||||
sourceId: { type: String, required: true },
|
||||
sourceName: { type: String, required: true },
|
||||
expiredTime: { type: Date, required: true }
|
||||
}
|
||||
});
|
||||
RawTextBufferSchema.index({ 'metadata.sourceId': 'hashed' });
|
||||
RawTextBufferSchema.index({ 'metadata.expiredTime': -1 });
|
||||
|
||||
try {
|
||||
RawTextBufferSchema.index({ sourceId: 1 });
|
||||
// 20 minutes
|
||||
RawTextBufferSchema.index({ createTime: 1 }, { expireAfterSeconds: 20 * 60 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
export const MongoRawTextBuffer = getMongoModel<RawTextBufferSchemaType>(
|
||||
collectionName,
|
||||
RawTextBufferSchema
|
||||
);
|
||||
export const MongoRawTextBufferSchema = getMongoModel<{
|
||||
_id: Types.ObjectId;
|
||||
metadata: {
|
||||
sourceId: string;
|
||||
sourceName: string;
|
||||
expiredTime: Date;
|
||||
};
|
||||
}>(`${bucketName}.files`, RawTextBufferSchema);
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
export type RawTextBufferSchemaType = {
|
||||
sourceId: string;
|
||||
rawText: string;
|
||||
createTime: Date;
|
||||
metadata?: {
|
||||
filename: string;
|
||||
};
|
||||
};
|
||||
@@ -6,13 +6,13 @@ import { type DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
|
||||
import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
|
||||
import { readRawContentByFileBuffer } from '../read/utils';
|
||||
import { gridFsStream2Buffer, stream2Encoding } from './utils';
|
||||
import { addLog } from '../../system/log';
|
||||
import { readFromSecondary } from '../../mongo/utils';
|
||||
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
|
||||
import { Readable } from 'stream';
|
||||
import { addRawTextBuffer, getRawTextBuffer } from '../../buffer/rawText/controller';
|
||||
import { addMinutes } from 'date-fns';
|
||||
|
||||
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
|
||||
MongoDatasetFileSchema;
|
||||
@@ -210,28 +210,26 @@ export const readFileContentFromMongo = async ({
|
||||
tmbId,
|
||||
bucketName,
|
||||
fileId,
|
||||
isQAImport = false,
|
||||
customPdfParse = false
|
||||
customPdfParse = false,
|
||||
getFormatText
|
||||
}: {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
bucketName: `${BucketNameEnum}`;
|
||||
fileId: string;
|
||||
isQAImport?: boolean;
|
||||
customPdfParse?: boolean;
|
||||
getFormatText?: boolean; // 数据类型都尽可能转化成 markdown 格式
|
||||
}): Promise<{
|
||||
rawText: string;
|
||||
filename: string;
|
||||
}> => {
|
||||
const bufferId = `${fileId}-${customPdfParse}`;
|
||||
const bufferId = `${String(fileId)}-${customPdfParse}`;
|
||||
// read buffer
|
||||
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: bufferId }, undefined, {
|
||||
...readFromSecondary
|
||||
}).lean();
|
||||
const fileBuffer = await getRawTextBuffer(bufferId);
|
||||
if (fileBuffer) {
|
||||
return {
|
||||
rawText: fileBuffer.rawText,
|
||||
filename: fileBuffer.metadata?.filename || ''
|
||||
rawText: fileBuffer.text,
|
||||
filename: fileBuffer?.sourceName
|
||||
};
|
||||
}
|
||||
|
||||
@@ -254,8 +252,8 @@ export const readFileContentFromMongo = async ({
|
||||
// Get raw text
|
||||
const { rawText } = await readRawContentByFileBuffer({
|
||||
customPdfParse,
|
||||
getFormatText,
|
||||
extension,
|
||||
isQAImport,
|
||||
teamId,
|
||||
tmbId,
|
||||
buffer: fileBuffers,
|
||||
@@ -265,16 +263,13 @@ export const readFileContentFromMongo = async ({
|
||||
}
|
||||
});
|
||||
|
||||
// < 14M
|
||||
if (fileBuffers.length < 14 * 1024 * 1024 && rawText.trim()) {
|
||||
MongoRawTextBuffer.create({
|
||||
sourceId: bufferId,
|
||||
rawText,
|
||||
metadata: {
|
||||
filename: file.filename
|
||||
}
|
||||
});
|
||||
}
|
||||
// Add buffer
|
||||
addRawTextBuffer({
|
||||
sourceId: bufferId,
|
||||
sourceName: file.filename,
|
||||
text: rawText,
|
||||
expiredTime: addMinutes(new Date(), 20)
|
||||
});
|
||||
|
||||
return {
|
||||
rawText,
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
import { Schema, getMongoModel } from '../../mongo';
|
||||
|
||||
const DatasetFileSchema = new Schema({});
|
||||
const ChatFileSchema = new Schema({});
|
||||
const DatasetFileSchema = new Schema({
|
||||
metadata: Object
|
||||
});
|
||||
const ChatFileSchema = new Schema({
|
||||
metadata: Object
|
||||
});
|
||||
|
||||
try {
|
||||
DatasetFileSchema.index({ uploadDate: -1 });
|
||||
DatasetFileSchema.index({ uploadDate: -1 });
|
||||
|
||||
ChatFileSchema.index({ uploadDate: -1 });
|
||||
ChatFileSchema.index({ 'metadata.chatId': 1 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
ChatFileSchema.index({ uploadDate: -1 });
|
||||
ChatFileSchema.index({ 'metadata.chatId': 1 });
|
||||
|
||||
export const MongoDatasetFileSchema = getMongoModel('dataset.files', DatasetFileSchema);
|
||||
export const MongoChatFileSchema = getMongoModel('chat.files', ChatFileSchema);
|
||||
|
||||
@@ -1,5 +1,57 @@
|
||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
import { PassThrough } from 'stream';
|
||||
import { getGridBucket } from './controller';
|
||||
import { type BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
|
||||
export const createFileFromText = async ({
|
||||
bucket,
|
||||
filename,
|
||||
text,
|
||||
metadata
|
||||
}: {
|
||||
bucket: `${BucketNameEnum}`;
|
||||
filename: string;
|
||||
text: string;
|
||||
metadata: Record<string, any>;
|
||||
}) => {
|
||||
const gridBucket = getGridBucket(bucket);
|
||||
|
||||
const buffer = Buffer.from(text);
|
||||
|
||||
const fileSize = buffer.length;
|
||||
// 单块大小:尽可能大,但不超过 14MB,不小于128KB
|
||||
const chunkSizeBytes = (() => {
|
||||
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
|
||||
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
|
||||
|
||||
// 确保块大小至少为128KB
|
||||
const minChunkSize = 128 * 1024; // 128KB
|
||||
|
||||
// 取理想块大小和最小块大小中的较大值
|
||||
let chunkSize = Math.max(idealChunkSize, minChunkSize);
|
||||
|
||||
// 将块大小向上取整到最接近的64KB的倍数,使其更整齐
|
||||
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
|
||||
|
||||
return chunkSize;
|
||||
})();
|
||||
|
||||
const uploadStream = gridBucket.openUploadStream(filename, {
|
||||
metadata,
|
||||
chunkSizeBytes
|
||||
});
|
||||
|
||||
return retryFn(async () => {
|
||||
return new Promise<{ fileId: string }>((resolve, reject) => {
|
||||
uploadStream.end(buffer);
|
||||
uploadStream.on('finish', () => {
|
||||
resolve({ fileId: String(uploadStream.id) });
|
||||
});
|
||||
uploadStream.on('error', reject);
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
|
||||
return new Promise<Buffer>((resolve, reject) => {
|
||||
|
||||
@@ -16,6 +16,7 @@ export type readRawTextByLocalFileParams = {
|
||||
path: string;
|
||||
encoding: string;
|
||||
customPdfParse?: boolean;
|
||||
getFormatText?: boolean;
|
||||
metadata?: Record<string, any>;
|
||||
};
|
||||
export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
|
||||
@@ -27,8 +28,8 @@ export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParam
|
||||
|
||||
return readRawContentByFileBuffer({
|
||||
extension,
|
||||
isQAImport: false,
|
||||
customPdfParse: params.customPdfParse,
|
||||
getFormatText: params.getFormatText,
|
||||
teamId: params.teamId,
|
||||
tmbId: params.tmbId,
|
||||
encoding: params.encoding,
|
||||
@@ -46,7 +47,7 @@ export const readRawContentByFileBuffer = async ({
|
||||
encoding,
|
||||
metadata,
|
||||
customPdfParse = false,
|
||||
isQAImport = false
|
||||
getFormatText = true
|
||||
}: {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
@@ -57,8 +58,10 @@ export const readRawContentByFileBuffer = async ({
|
||||
metadata?: Record<string, any>;
|
||||
|
||||
customPdfParse?: boolean;
|
||||
isQAImport: boolean;
|
||||
}): Promise<ReadFileResponse> => {
|
||||
getFormatText?: boolean;
|
||||
}): Promise<{
|
||||
rawText: string;
|
||||
}> => {
|
||||
const systemParse = () =>
|
||||
runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
|
||||
extension,
|
||||
@@ -107,7 +110,7 @@ export const readRawContentByFileBuffer = async ({
|
||||
|
||||
return {
|
||||
rawText: text,
|
||||
formatText: rawText,
|
||||
formatText: text,
|
||||
imageList
|
||||
};
|
||||
};
|
||||
@@ -176,16 +179,7 @@ export const readRawContentByFileBuffer = async ({
|
||||
});
|
||||
}
|
||||
|
||||
if (['csv', 'xlsx'].includes(extension)) {
|
||||
// qa data
|
||||
if (isQAImport) {
|
||||
rawText = rawText || '';
|
||||
} else {
|
||||
rawText = formatText || rawText;
|
||||
}
|
||||
}
|
||||
|
||||
addLog.debug(`Upload file success, time: ${Date.now() - start}ms`);
|
||||
|
||||
return { rawText, formatText, imageList };
|
||||
return { rawText: getFormatText ? formatText || rawText : rawText };
|
||||
};
|
||||
|
||||
@@ -10,6 +10,7 @@ let jieba: Jieba | undefined;
|
||||
})();
|
||||
|
||||
const stopWords = new Set([
|
||||
'\n',
|
||||
'--',
|
||||
'?',
|
||||
'“',
|
||||
@@ -1519,8 +1520,7 @@ const stopWords = new Set([
|
||||
]);
|
||||
|
||||
export async function jiebaSplit({ text }: { text: string }) {
|
||||
text = text.replace(/[#*`_~>[\](){}|]/g, '').replace(/\S*https?\S*/gi, '');
|
||||
|
||||
text = text.replace(/[#*`_~>[\](){}|]|\S*https?\S*/g, '').trim();
|
||||
const tokens = (await jieba!.cutAsync(text, true)) as string[];
|
||||
|
||||
return (
|
||||
|
||||
@@ -5,7 +5,8 @@ export enum TimerIdEnum {
|
||||
clearExpiredSubPlan = 'clearExpiredSubPlan',
|
||||
updateStandardPlan = 'updateStandardPlan',
|
||||
scheduleTriggerApp = 'scheduleTriggerApp',
|
||||
notification = 'notification'
|
||||
notification = 'notification',
|
||||
clearExpiredRawTextBuffer = 'clearExpiredRawTextBuffer'
|
||||
}
|
||||
|
||||
export enum LockNotificationEnum {
|
||||
|
||||
@@ -188,6 +188,7 @@ export class PgVectorCtrl {
|
||||
const results: any = await PgClient.query(
|
||||
`BEGIN;
|
||||
SET LOCAL hnsw.ef_search = ${global.systemEnv?.hnswEfSearch || 100};
|
||||
SET LOCAL hnsw.max_scan_tuples = ${global.systemEnv?.hnswMaxScanTuples || 100000};
|
||||
SET LOCAL hnsw.iterative_scan = relaxed_order;
|
||||
WITH relaxed_results AS MATERIALIZED (
|
||||
select id, collection_id, vector <#> '[${vector}]' AS score
|
||||
@@ -199,7 +200,7 @@ export class PgVectorCtrl {
|
||||
) SELECT id, collection_id, score FROM relaxed_results ORDER BY score;
|
||||
COMMIT;`
|
||||
);
|
||||
const rows = results?.[3]?.rows as PgSearchRawType[];
|
||||
const rows = results?.[results.length - 2]?.rows as PgSearchRawType[];
|
||||
|
||||
if (!Array.isArray(rows)) {
|
||||
return {
|
||||
|
||||
@@ -78,7 +78,7 @@ export const createChatCompletion = async ({
|
||||
}
|
||||
body.model = modelConstantsData.model;
|
||||
|
||||
const formatTimeout = timeout ? timeout : body.stream ? 60000 : 600000;
|
||||
const formatTimeout = timeout ? timeout : 600000;
|
||||
const ai = getAIApi({
|
||||
userKey,
|
||||
timeout: formatTimeout
|
||||
|
||||
@@ -1,6 +1,54 @@
|
||||
{
|
||||
"provider": "Claude",
|
||||
"list": [
|
||||
{
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"name": "claude-sonnet-4-20250514",
|
||||
"maxContext": 200000,
|
||||
"maxResponse": 8000,
|
||||
"quoteMaxToken": 100000,
|
||||
"maxTemperature": 1,
|
||||
"showTopP": true,
|
||||
"showStopSign": true,
|
||||
"vision": true,
|
||||
"toolChoice": true,
|
||||
"functionCall": false,
|
||||
"defaultSystemChatPrompt": "",
|
||||
"datasetProcess": true,
|
||||
"usedInClassify": true,
|
||||
"customCQPrompt": "",
|
||||
"usedInExtractFields": true,
|
||||
"usedInQueryExtension": true,
|
||||
"customExtractPrompt": "",
|
||||
"usedInToolCall": true,
|
||||
"defaultConfig": {},
|
||||
"fieldMap": {},
|
||||
"type": "llm"
|
||||
},
|
||||
{
|
||||
"model": "claude-opus-4-20250514",
|
||||
"name": "claude-opus-4-20250514",
|
||||
"maxContext": 200000,
|
||||
"maxResponse": 4096,
|
||||
"quoteMaxToken": 100000,
|
||||
"maxTemperature": 1,
|
||||
"showTopP": true,
|
||||
"showStopSign": true,
|
||||
"vision": true,
|
||||
"toolChoice": true,
|
||||
"functionCall": false,
|
||||
"defaultSystemChatPrompt": "",
|
||||
"datasetProcess": true,
|
||||
"usedInClassify": true,
|
||||
"customCQPrompt": "",
|
||||
"usedInExtractFields": true,
|
||||
"usedInQueryExtension": true,
|
||||
"customExtractPrompt": "",
|
||||
"usedInToolCall": true,
|
||||
"defaultConfig": {},
|
||||
"fieldMap": {},
|
||||
"type": "llm"
|
||||
},
|
||||
{
|
||||
"model": "claude-3-7-sonnet-20250219",
|
||||
"name": "claude-3-7-sonnet-20250219",
|
||||
|
||||
@@ -25,6 +25,30 @@
|
||||
"showTopP": true,
|
||||
"showStopSign": true
|
||||
},
|
||||
{
|
||||
"model": "gemini-2.5-flash-preview-04-17",
|
||||
"name": "gemini-2.5-flash-preview-04-17",
|
||||
"maxContext": 1000000,
|
||||
"maxResponse": 8000,
|
||||
"quoteMaxToken": 60000,
|
||||
"maxTemperature": 1,
|
||||
"vision": true,
|
||||
"toolChoice": true,
|
||||
"functionCall": false,
|
||||
"defaultSystemChatPrompt": "",
|
||||
"datasetProcess": true,
|
||||
"usedInClassify": true,
|
||||
"customCQPrompt": "",
|
||||
"usedInExtractFields": true,
|
||||
"usedInQueryExtension": true,
|
||||
"customExtractPrompt": "",
|
||||
"usedInToolCall": true,
|
||||
"defaultConfig": {},
|
||||
"fieldMap": {},
|
||||
"type": "llm",
|
||||
"showTopP": true,
|
||||
"showStopSign": true
|
||||
},
|
||||
{
|
||||
"model": "gemini-2.0-flash",
|
||||
"name": "gemini-2.0-flash",
|
||||
|
||||
@@ -18,15 +18,17 @@ import json5 from 'json5';
|
||||
*/
|
||||
export const computedMaxToken = ({
|
||||
maxToken,
|
||||
model
|
||||
model,
|
||||
min
|
||||
}: {
|
||||
maxToken?: number;
|
||||
model: LLMModelItemType;
|
||||
min?: number;
|
||||
}) => {
|
||||
if (maxToken === undefined) return;
|
||||
|
||||
maxToken = Math.min(maxToken, model.maxResponse);
|
||||
return maxToken;
|
||||
return Math.max(maxToken, min || 0);
|
||||
};
|
||||
|
||||
// FastGPT temperature range: [0,10], ai temperature:[0,2],{0,1]……
|
||||
@@ -178,7 +180,7 @@ export const llmStreamResponseToAnswerText = async (
|
||||
}
|
||||
}
|
||||
return {
|
||||
text: parseReasoningContent(answer)[1],
|
||||
text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
|
||||
usage,
|
||||
toolCalls
|
||||
};
|
||||
@@ -192,8 +194,9 @@ export const llmUnStreamResponseToAnswerText = async (
|
||||
}> => {
|
||||
const answer = response.choices?.[0]?.message?.content || '';
|
||||
const toolCalls = response.choices?.[0]?.message?.tool_calls;
|
||||
|
||||
return {
|
||||
text: answer,
|
||||
text: removeDatasetCiteText(parseReasoningContent(answer)[1], false),
|
||||
usage: response.usage,
|
||||
toolCalls
|
||||
};
|
||||
@@ -240,6 +243,12 @@ export const parseLLMStreamResponse = () => {
|
||||
let citeBuffer = '';
|
||||
const maxCiteBufferLength = 32; // [Object](CITE)总长度为32
|
||||
|
||||
// Buffer
|
||||
let buffer_finishReason: CompletionFinishReason = null;
|
||||
let buffer_usage: CompletionUsage = getLLMDefaultUsage();
|
||||
let buffer_reasoningContent = '';
|
||||
let buffer_content = '';
|
||||
|
||||
/*
|
||||
parseThinkTag - 只控制是否主动解析 <think></think>,如果接口已经解析了,则不再解析。
|
||||
retainDatasetCite -
|
||||
@@ -257,6 +266,7 @@ export const parseLLMStreamResponse = () => {
|
||||
};
|
||||
finish_reason?: CompletionFinishReason;
|
||||
}[];
|
||||
usage?: CompletionUsage;
|
||||
};
|
||||
parseThinkTag?: boolean;
|
||||
retainDatasetCite?: boolean;
|
||||
@@ -266,72 +276,71 @@ export const parseLLMStreamResponse = () => {
|
||||
responseContent: string;
|
||||
finishReason: CompletionFinishReason;
|
||||
} => {
|
||||
const finishReason = part.choices?.[0]?.finish_reason || null;
|
||||
const content = part.choices?.[0]?.delta?.content || '';
|
||||
// @ts-ignore
|
||||
const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
|
||||
const isStreamEnd = !!finishReason;
|
||||
const data = (() => {
|
||||
buffer_usage = part.usage || buffer_usage;
|
||||
|
||||
// Parse think
|
||||
const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } = (() => {
|
||||
if (reasoningContent || !parseThinkTag) {
|
||||
isInThinkTag = false;
|
||||
return { reasoningContent, content };
|
||||
}
|
||||
const finishReason = part.choices?.[0]?.finish_reason || null;
|
||||
buffer_finishReason = finishReason || buffer_finishReason;
|
||||
|
||||
if (!content) {
|
||||
return {
|
||||
reasoningContent: '',
|
||||
content: ''
|
||||
};
|
||||
}
|
||||
const content = part.choices?.[0]?.delta?.content || '';
|
||||
// @ts-ignore
|
||||
const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
|
||||
const isStreamEnd = !!buffer_finishReason;
|
||||
|
||||
// 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content
|
||||
if (isInThinkTag === false) {
|
||||
return {
|
||||
reasoningContent: '',
|
||||
content
|
||||
};
|
||||
}
|
||||
// Parse think
|
||||
const { reasoningContent: parsedThinkReasoningContent, content: parsedThinkContent } =
|
||||
(() => {
|
||||
if (reasoningContent || !parseThinkTag) {
|
||||
isInThinkTag = false;
|
||||
return { reasoningContent, content };
|
||||
}
|
||||
|
||||
// 检测是否为 think 标签开头的数据
|
||||
if (isInThinkTag === undefined) {
|
||||
// Parse content think and answer
|
||||
startTagBuffer += content;
|
||||
// 太少内容时候,暂时不解析
|
||||
if (startTagBuffer.length < thinkStartChars.length) {
|
||||
if (isStreamEnd) {
|
||||
const tmpContent = startTagBuffer;
|
||||
startTagBuffer = '';
|
||||
// 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content
|
||||
if (isInThinkTag === false) {
|
||||
return {
|
||||
reasoningContent: '',
|
||||
content: tmpContent
|
||||
content
|
||||
};
|
||||
}
|
||||
return {
|
||||
reasoningContent: '',
|
||||
content: ''
|
||||
};
|
||||
}
|
||||
|
||||
if (startTagBuffer.startsWith(thinkStartChars)) {
|
||||
isInThinkTag = true;
|
||||
return {
|
||||
reasoningContent: startTagBuffer.slice(thinkStartChars.length),
|
||||
content: ''
|
||||
};
|
||||
}
|
||||
// 检测是否为 think 标签开头的数据
|
||||
if (isInThinkTag === undefined) {
|
||||
// Parse content think and answer
|
||||
startTagBuffer += content;
|
||||
// 太少内容时候,暂时不解析
|
||||
if (startTagBuffer.length < thinkStartChars.length) {
|
||||
if (isStreamEnd) {
|
||||
const tmpContent = startTagBuffer;
|
||||
startTagBuffer = '';
|
||||
return {
|
||||
reasoningContent: '',
|
||||
content: tmpContent
|
||||
};
|
||||
}
|
||||
return {
|
||||
reasoningContent: '',
|
||||
content: ''
|
||||
};
|
||||
}
|
||||
|
||||
// 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content
|
||||
isInThinkTag = false;
|
||||
return {
|
||||
reasoningContent: '',
|
||||
content: startTagBuffer
|
||||
};
|
||||
}
|
||||
if (startTagBuffer.startsWith(thinkStartChars)) {
|
||||
isInThinkTag = true;
|
||||
return {
|
||||
reasoningContent: startTagBuffer.slice(thinkStartChars.length),
|
||||
content: ''
|
||||
};
|
||||
}
|
||||
|
||||
// 确认是 think 标签内容,开始返回 think 内容,并实时检测 </think>
|
||||
/*
|
||||
// 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content
|
||||
isInThinkTag = false;
|
||||
return {
|
||||
reasoningContent: '',
|
||||
content: startTagBuffer
|
||||
};
|
||||
}
|
||||
|
||||
// 确认是 think 标签内容,开始返回 think 内容,并实时检测 </think>
|
||||
/*
|
||||
检测 </think> 方案。
|
||||
存储所有疑似 </think> 的内容,直到检测到完整的 </think> 标签或超出 </think> 长度。
|
||||
content 返回值包含以下几种情况:
|
||||
@@ -342,124 +351,145 @@ export const parseLLMStreamResponse = () => {
|
||||
</think>abc - 完全命中尾标签
|
||||
k>abc - 命中一部分尾标签
|
||||
*/
|
||||
// endTagBuffer 专门用来记录疑似尾标签的内容
|
||||
if (endTagBuffer) {
|
||||
endTagBuffer += content;
|
||||
if (endTagBuffer.includes(thinkEndChars)) {
|
||||
isInThinkTag = false;
|
||||
const answer = endTagBuffer.slice(thinkEndChars.length);
|
||||
return {
|
||||
reasoningContent: '',
|
||||
content: answer
|
||||
};
|
||||
} else if (endTagBuffer.length >= thinkEndChars.length) {
|
||||
// 缓存内容超出尾标签长度,且仍未命中 </think>,则认为本次猜测 </think> 失败,仍处于 think 阶段。
|
||||
const tmp = endTagBuffer;
|
||||
endTagBuffer = '';
|
||||
return {
|
||||
reasoningContent: tmp,
|
||||
content: ''
|
||||
};
|
||||
}
|
||||
return {
|
||||
reasoningContent: '',
|
||||
content: ''
|
||||
};
|
||||
} else if (content.includes(thinkEndChars)) {
|
||||
// 返回内容,完整命中</think>,直接结束
|
||||
isInThinkTag = false;
|
||||
const [think, answer] = content.split(thinkEndChars);
|
||||
return {
|
||||
reasoningContent: think,
|
||||
content: answer
|
||||
};
|
||||
} else {
|
||||
// 无 buffer,且未命中 </think>,开始疑似 </think> 检测。
|
||||
for (let i = 1; i < thinkEndChars.length; i++) {
|
||||
const partialEndTag = thinkEndChars.slice(0, i);
|
||||
// 命中一部分尾标签
|
||||
if (content.endsWith(partialEndTag)) {
|
||||
const think = content.slice(0, -partialEndTag.length);
|
||||
endTagBuffer += partialEndTag;
|
||||
// endTagBuffer 专门用来记录疑似尾标签的内容
|
||||
if (endTagBuffer) {
|
||||
endTagBuffer += content;
|
||||
if (endTagBuffer.includes(thinkEndChars)) {
|
||||
isInThinkTag = false;
|
||||
const answer = endTagBuffer.slice(thinkEndChars.length);
|
||||
return {
|
||||
reasoningContent: '',
|
||||
content: answer
|
||||
};
|
||||
} else if (endTagBuffer.length >= thinkEndChars.length) {
|
||||
// 缓存内容超出尾标签长度,且仍未命中 </think>,则认为本次猜测 </think> 失败,仍处于 think 阶段。
|
||||
const tmp = endTagBuffer;
|
||||
endTagBuffer = '';
|
||||
return {
|
||||
reasoningContent: tmp,
|
||||
content: ''
|
||||
};
|
||||
}
|
||||
return {
|
||||
reasoningContent: think,
|
||||
reasoningContent: '',
|
||||
content: ''
|
||||
};
|
||||
} else if (content.includes(thinkEndChars)) {
|
||||
// 返回内容,完整命中</think>,直接结束
|
||||
isInThinkTag = false;
|
||||
const [think, answer] = content.split(thinkEndChars);
|
||||
return {
|
||||
reasoningContent: think,
|
||||
content: answer
|
||||
};
|
||||
} else {
|
||||
// 无 buffer,且未命中 </think>,开始疑似 </think> 检测。
|
||||
for (let i = 1; i < thinkEndChars.length; i++) {
|
||||
const partialEndTag = thinkEndChars.slice(0, i);
|
||||
// 命中一部分尾标签
|
||||
if (content.endsWith(partialEndTag)) {
|
||||
const think = content.slice(0, -partialEndTag.length);
|
||||
endTagBuffer += partialEndTag;
|
||||
return {
|
||||
reasoningContent: think,
|
||||
content: ''
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 完全未命中尾标签,还是 think 阶段。
|
||||
return {
|
||||
reasoningContent: content,
|
||||
content: ''
|
||||
};
|
||||
})();
|
||||
|
||||
// Parse datset cite
|
||||
if (retainDatasetCite) {
|
||||
return {
|
||||
reasoningContent: parsedThinkReasoningContent,
|
||||
content: parsedThinkContent,
|
||||
responseContent: parsedThinkContent,
|
||||
finishReason: buffer_finishReason
|
||||
};
|
||||
}
|
||||
|
||||
// 完全未命中尾标签,还是 think 阶段。
|
||||
return {
|
||||
reasoningContent: content,
|
||||
content: ''
|
||||
};
|
||||
})();
|
||||
// 缓存包含 [ 的字符串,直到超出 maxCiteBufferLength 再一次性返回
|
||||
const parseCite = (text: string) => {
|
||||
// 结束时,返回所有剩余内容
|
||||
if (isStreamEnd) {
|
||||
const content = citeBuffer + text;
|
||||
return {
|
||||
content: removeDatasetCiteText(content, false)
|
||||
};
|
||||
}
|
||||
|
||||
// 新内容包含 [,初始化缓冲数据
|
||||
if (text.includes('[')) {
|
||||
const index = text.indexOf('[');
|
||||
const beforeContent = citeBuffer + text.slice(0, index);
|
||||
citeBuffer = text.slice(index);
|
||||
|
||||
// beforeContent 可能是:普通字符串,带 [ 的字符串
|
||||
return {
|
||||
content: removeDatasetCiteText(beforeContent, false)
|
||||
};
|
||||
}
|
||||
// 处于 Cite 缓冲区,判断是否满足条件
|
||||
else if (citeBuffer) {
|
||||
citeBuffer += text;
|
||||
|
||||
// 检查缓冲区长度是否达到完整Quote长度或已经流结束
|
||||
if (citeBuffer.length >= maxCiteBufferLength) {
|
||||
const content = removeDatasetCiteText(citeBuffer, false);
|
||||
citeBuffer = '';
|
||||
|
||||
return {
|
||||
content
|
||||
};
|
||||
} else {
|
||||
// 暂时不返回内容
|
||||
return { content: '' };
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
content: text
|
||||
};
|
||||
};
|
||||
const { content: pasedCiteContent } = parseCite(parsedThinkContent);
|
||||
|
||||
// Parse datset cite
|
||||
if (retainDatasetCite) {
|
||||
return {
|
||||
reasoningContent: parsedThinkReasoningContent,
|
||||
content: parsedThinkContent,
|
||||
responseContent: parsedThinkContent,
|
||||
finishReason
|
||||
responseContent: pasedCiteContent,
|
||||
finishReason: buffer_finishReason
|
||||
};
|
||||
}
|
||||
})();
|
||||
|
||||
// 缓存包含 [ 的字符串,直到超出 maxCiteBufferLength 再一次性返回
|
||||
const parseCite = (text: string) => {
|
||||
// 结束时,返回所有剩余内容
|
||||
if (isStreamEnd) {
|
||||
const content = citeBuffer + text;
|
||||
return {
|
||||
content: removeDatasetCiteText(content, false)
|
||||
};
|
||||
}
|
||||
buffer_reasoningContent += data.reasoningContent;
|
||||
buffer_content += data.content;
|
||||
|
||||
// 新内容包含 [,初始化缓冲数据
|
||||
if (text.includes('[')) {
|
||||
const index = text.indexOf('[');
|
||||
const beforeContent = citeBuffer + text.slice(0, index);
|
||||
citeBuffer = text.slice(index);
|
||||
|
||||
// beforeContent 可能是:普通字符串,带 [ 的字符串
|
||||
return {
|
||||
content: removeDatasetCiteText(beforeContent, false)
|
||||
};
|
||||
}
|
||||
// 处于 Cite 缓冲区,判断是否满足条件
|
||||
else if (citeBuffer) {
|
||||
citeBuffer += text;
|
||||
|
||||
// 检查缓冲区长度是否达到完整Quote长度或已经流结束
|
||||
if (citeBuffer.length >= maxCiteBufferLength) {
|
||||
const content = removeDatasetCiteText(citeBuffer, false);
|
||||
citeBuffer = '';
|
||||
|
||||
return {
|
||||
content
|
||||
};
|
||||
} else {
|
||||
// 暂时不返回内容
|
||||
return { content: '' };
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
content: text
|
||||
};
|
||||
};
|
||||
const { content: pasedCiteContent } = parseCite(parsedThinkContent);
|
||||
return data;
|
||||
};
|
||||
|
||||
const getResponseData = () => {
|
||||
return {
|
||||
reasoningContent: parsedThinkReasoningContent,
|
||||
content: parsedThinkContent,
|
||||
responseContent: pasedCiteContent,
|
||||
finishReason
|
||||
finish_reason: buffer_finishReason,
|
||||
usage: buffer_usage,
|
||||
reasoningContent: buffer_reasoningContent,
|
||||
content: buffer_content
|
||||
};
|
||||
};
|
||||
|
||||
const updateFinishReason = (finishReason: CompletionFinishReason) => {
|
||||
buffer_finishReason = finishReason;
|
||||
};
|
||||
|
||||
return {
|
||||
parsePart
|
||||
parsePart,
|
||||
getResponseData,
|
||||
updateFinishReason
|
||||
};
|
||||
};
|
||||
|
||||
@@ -11,40 +11,6 @@ export const beforeUpdateAppFormat = <T extends AppSchema['modules'] | undefined
|
||||
nodes: T;
|
||||
isPlugin: boolean;
|
||||
}) => {
|
||||
if (nodes) {
|
||||
// Check dataset maxTokens
|
||||
if (isPlugin) {
|
||||
let maxTokens = 16000;
|
||||
|
||||
nodes.forEach((item) => {
|
||||
if (
|
||||
item.flowNodeType === FlowNodeTypeEnum.chatNode ||
|
||||
item.flowNodeType === FlowNodeTypeEnum.tools
|
||||
) {
|
||||
const model =
|
||||
item.inputs.find((item) => item.key === NodeInputKeyEnum.aiModel)?.value || '';
|
||||
const chatModel = getLLMModel(model);
|
||||
const quoteMaxToken = chatModel.quoteMaxToken || 16000;
|
||||
|
||||
maxTokens = Math.max(maxTokens, quoteMaxToken);
|
||||
}
|
||||
});
|
||||
|
||||
nodes.forEach((item) => {
|
||||
if (item.flowNodeType === FlowNodeTypeEnum.datasetSearchNode) {
|
||||
item.inputs.forEach((input) => {
|
||||
if (input.key === NodeInputKeyEnum.datasetMaxTokens) {
|
||||
const val = input.value as number;
|
||||
if (val > maxTokens) {
|
||||
input.value = maxTokens;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
nodes
|
||||
};
|
||||
|
||||
@@ -30,8 +30,7 @@ import { Types } from 'mongoose';
|
||||
community: community-id
|
||||
commercial: commercial-id
|
||||
*/
|
||||
|
||||
export async function splitCombinePluginId(id: string) {
|
||||
export function splitCombineToolId(id: string) {
|
||||
const splitRes = id.split('-');
|
||||
if (splitRes.length === 1) {
|
||||
// app id
|
||||
@@ -42,7 +41,7 @@ export async function splitCombinePluginId(id: string) {
|
||||
}
|
||||
|
||||
const [source, pluginId] = id.split('-') as [PluginSourceEnum, string];
|
||||
if (!source || !pluginId) return Promise.reject('pluginId not found');
|
||||
if (!source || !pluginId) throw new Error('pluginId not found');
|
||||
|
||||
return { source, pluginId: id };
|
||||
}
|
||||
@@ -54,7 +53,7 @@ const getSystemPluginTemplateById = async (
|
||||
versionId?: string
|
||||
): Promise<ChildAppType> => {
|
||||
const item = getSystemPluginTemplates().find((plugin) => plugin.id === pluginId);
|
||||
if (!item) return Promise.reject(PluginErrEnum.unAuth);
|
||||
if (!item) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const plugin = cloneDeep(item);
|
||||
|
||||
@@ -64,10 +63,10 @@ const getSystemPluginTemplateById = async (
|
||||
{ pluginId: plugin.id, 'customConfig.associatedPluginId': plugin.associatedPluginId },
|
||||
'associatedPluginId'
|
||||
).lean();
|
||||
if (!systemPlugin) return Promise.reject(PluginErrEnum.unAuth);
|
||||
if (!systemPlugin) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const app = await MongoApp.findById(plugin.associatedPluginId).lean();
|
||||
if (!app) return Promise.reject(PluginErrEnum.unAuth);
|
||||
if (!app) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const version = versionId
|
||||
? await getAppVersionById({
|
||||
@@ -77,6 +76,12 @@ const getSystemPluginTemplateById = async (
|
||||
})
|
||||
: await getAppLatestVersion(plugin.associatedPluginId, app);
|
||||
if (!version.versionId) return Promise.reject('App version not found');
|
||||
const isLatest = version.versionId
|
||||
? await checkIsLatestVersion({
|
||||
appId: plugin.associatedPluginId,
|
||||
versionId: version.versionId
|
||||
})
|
||||
: true;
|
||||
|
||||
return {
|
||||
...plugin,
|
||||
@@ -85,12 +90,19 @@ const getSystemPluginTemplateById = async (
|
||||
edges: version.edges,
|
||||
chatConfig: version.chatConfig
|
||||
},
|
||||
version: versionId || String(version.versionId),
|
||||
version: versionId ? version?.versionId : '',
|
||||
versionLabel: version?.versionName,
|
||||
isLatestVersion: isLatest,
|
||||
teamId: String(app.teamId),
|
||||
tmbId: String(app.tmbId)
|
||||
};
|
||||
}
|
||||
return plugin;
|
||||
|
||||
return {
|
||||
...plugin,
|
||||
version: undefined,
|
||||
isLatestVersion: true
|
||||
};
|
||||
};
|
||||
|
||||
/* Format plugin to workflow preview node data */
|
||||
@@ -102,11 +114,11 @@ export async function getChildAppPreviewNode({
|
||||
versionId?: string;
|
||||
}): Promise<FlowNodeTemplateType> {
|
||||
const app: ChildAppType = await (async () => {
|
||||
const { source, pluginId } = await splitCombinePluginId(appId);
|
||||
const { source, pluginId } = splitCombineToolId(appId);
|
||||
|
||||
if (source === PluginSourceEnum.personal) {
|
||||
const item = await MongoApp.findById(appId).lean();
|
||||
if (!item) return Promise.reject('plugin not found');
|
||||
if (!item) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const version = await getAppVersionById({ appId, versionId, app: item });
|
||||
|
||||
@@ -132,8 +144,8 @@ export async function getChildAppPreviewNode({
|
||||
},
|
||||
templateType: FlowNodeTemplateTypeEnum.teamApp,
|
||||
|
||||
version: version.versionId,
|
||||
versionLabel: version?.versionName || '',
|
||||
version: versionId ? version?.versionId : '',
|
||||
versionLabel: version?.versionName,
|
||||
isLatestVersion: isLatest,
|
||||
|
||||
originCost: 0,
|
||||
@@ -142,7 +154,7 @@ export async function getChildAppPreviewNode({
|
||||
pluginOrder: 0
|
||||
};
|
||||
} else {
|
||||
return getSystemPluginTemplateById(pluginId);
|
||||
return getSystemPluginTemplateById(pluginId, versionId);
|
||||
}
|
||||
})();
|
||||
|
||||
@@ -216,12 +228,12 @@ export async function getChildAppRuntimeById(
|
||||
id: string,
|
||||
versionId?: string
|
||||
): Promise<PluginRuntimeType> {
|
||||
const app: ChildAppType = await (async () => {
|
||||
const { source, pluginId } = await splitCombinePluginId(id);
|
||||
const app = await (async () => {
|
||||
const { source, pluginId } = splitCombineToolId(id);
|
||||
|
||||
if (source === PluginSourceEnum.personal) {
|
||||
const item = await MongoApp.findById(id).lean();
|
||||
if (!item) return Promise.reject('plugin not found');
|
||||
if (!item) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const version = await getAppVersionById({
|
||||
appId: id,
|
||||
@@ -244,8 +256,6 @@ export async function getChildAppRuntimeById(
|
||||
},
|
||||
templateType: FlowNodeTemplateTypeEnum.teamApp,
|
||||
|
||||
// 用不到
|
||||
version: item?.pluginData?.nodeVersion,
|
||||
originCost: 0,
|
||||
currentCost: 0,
|
||||
hasTokenFee: false,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { type ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
|
||||
import { type PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
|
||||
import { splitCombinePluginId } from './controller';
|
||||
import { splitCombineToolId } from './controller';
|
||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
|
||||
/*
|
||||
@@ -20,7 +20,7 @@ export const computedPluginUsage = async ({
|
||||
childrenUsage: ChatNodeUsageType[];
|
||||
error?: boolean;
|
||||
}) => {
|
||||
const { source } = await splitCombinePluginId(plugin.id);
|
||||
const { source } = splitCombineToolId(plugin.id);
|
||||
const childrenUsages = childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
|
||||
|
||||
if (source !== PluginSourceEnum.personal) {
|
||||
|
||||
@@ -64,7 +64,12 @@ const AppSchema = new Schema({
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
|
||||
tags: [
|
||||
{
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: 'app_tags'
|
||||
}
|
||||
],
|
||||
// role and auth
|
||||
teamTags: {
|
||||
type: [String]
|
||||
|
||||
242
packages/service/core/app/tags/controller.ts
Normal file
242
packages/service/core/app/tags/controller.ts
Normal file
@@ -0,0 +1,242 @@
|
||||
import { MongoTag } from './schema';
|
||||
import { MongoApp } from '../schema';
|
||||
import { Types } from '../../../common/mongo';
|
||||
|
||||
/**
|
||||
* 创建新标签
|
||||
*/
|
||||
export const createTag = async ({
|
||||
teamId,
|
||||
name,
|
||||
color
|
||||
}: {
|
||||
teamId: string;
|
||||
name: string;
|
||||
color?: string;
|
||||
}) => {
|
||||
const tag = await MongoTag.create({
|
||||
teamId,
|
||||
name,
|
||||
color
|
||||
});
|
||||
|
||||
return tag.toObject();
|
||||
};
|
||||
|
||||
/**
|
||||
* 获取团队所有标签
|
||||
*/
|
||||
export const getTeamTags = async (teamId: string) => {
|
||||
const tags = await MongoTag.find({ teamId }).lean();
|
||||
return tags;
|
||||
};
|
||||
|
||||
/**
|
||||
* 获取标签使用统计
|
||||
*/
|
||||
export const getTagsWithCount = async (teamId: string) => {
|
||||
return MongoTag.aggregate([
|
||||
{ $match: { teamId: new Types.ObjectId(teamId) } },
|
||||
{
|
||||
$lookup: {
|
||||
from: 'apps',
|
||||
localField: '_id',
|
||||
foreignField: 'tags',
|
||||
as: 'apps'
|
||||
}
|
||||
},
|
||||
{
|
||||
$addFields: {
|
||||
count: { $size: '$apps' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
apps: 0
|
||||
}
|
||||
}
|
||||
]);
|
||||
};
|
||||
|
||||
/**
|
||||
* 更新标签
|
||||
*/
|
||||
export const updateTag = async ({
|
||||
tagId,
|
||||
teamId,
|
||||
name,
|
||||
color
|
||||
}: {
|
||||
tagId: string;
|
||||
teamId: string;
|
||||
name?: string;
|
||||
color?: string;
|
||||
}) => {
|
||||
const updateData: Record<string, any> = {};
|
||||
if (name !== undefined) updateData.name = name;
|
||||
if (color !== undefined) updateData.color = color;
|
||||
|
||||
await MongoTag.updateOne({ _id: tagId, teamId }, { $set: updateData });
|
||||
|
||||
return MongoTag.findById(tagId).lean();
|
||||
};
|
||||
|
||||
/**
|
||||
* 删除标签
|
||||
*/
|
||||
export const deleteTag = async ({ tagId, teamId }: { tagId: string; teamId: string }) => {
|
||||
// 先从所有 app 中移除该标签
|
||||
await MongoApp.updateMany({ teamId, tags: tagId }, { $pull: { tags: tagId } });
|
||||
|
||||
// 然后删除标签
|
||||
await MongoTag.deleteOne({ _id: tagId, teamId });
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
/**
|
||||
* 为 app 添加标签
|
||||
*/
|
||||
export const addTagToApp = async ({
|
||||
appId,
|
||||
tagId,
|
||||
teamId
|
||||
}: {
|
||||
appId: string;
|
||||
tagId: string;
|
||||
teamId: string;
|
||||
}) => {
|
||||
// 确认标签存在且属于该团队
|
||||
const tag = await MongoTag.findOne({ _id: tagId, teamId });
|
||||
if (!tag) {
|
||||
throw new Error('Tag not found or not authorized');
|
||||
}
|
||||
|
||||
await MongoApp.updateOne({ _id: appId, teamId }, { $addToSet: { tags: tagId } });
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
/**
|
||||
* 从 app 移除标签
|
||||
*/
|
||||
export const removeTagFromApp = async ({
|
||||
appId,
|
||||
tagId,
|
||||
teamId
|
||||
}: {
|
||||
appId: string;
|
||||
tagId: string;
|
||||
teamId: string;
|
||||
}) => {
|
||||
await MongoApp.updateOne({ _id: appId, teamId }, { $pull: { tags: tagId } });
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
/**
|
||||
* 批量删除标签
|
||||
*/
|
||||
export const batchDeleteTags = async ({ tagIds, teamId }: { tagIds: string[]; teamId: string }) => {
|
||||
if (!tagIds || tagIds.length === 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 先从所有 app 中移除这些标签
|
||||
await MongoApp.updateMany(
|
||||
{ teamId, tags: { $in: tagIds } },
|
||||
{ $pull: { tags: { $in: tagIds } } }
|
||||
);
|
||||
|
||||
// 然后删除标签
|
||||
const result = await MongoTag.deleteMany({ _id: { $in: tagIds }, teamId });
|
||||
|
||||
return { deletedCount: result.deletedCount };
|
||||
};
|
||||
|
||||
/**
|
||||
* 批量为 app 添加标签
|
||||
*/
|
||||
export const batchAddTagsToApp = async ({
|
||||
appId,
|
||||
tagIds,
|
||||
teamId
|
||||
}: {
|
||||
appId: string;
|
||||
tagIds: string[];
|
||||
teamId: string;
|
||||
}) => {
|
||||
if (!tagIds || tagIds.length === 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 确认标签存在且属于该团队
|
||||
const tags = await MongoTag.find({ _id: { $in: tagIds }, teamId });
|
||||
if (tags.length !== tagIds.length) {
|
||||
throw new Error('Some tags not found or not authorized');
|
||||
}
|
||||
|
||||
await MongoApp.updateOne({ _id: appId, teamId }, { $addToSet: { tags: { $each: tagIds } } });
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
/**
|
||||
* 批量从 app 移除标签
|
||||
*/
|
||||
export const batchRemoveTagsFromApp = async ({
|
||||
appId,
|
||||
tagIds,
|
||||
teamId
|
||||
}: {
|
||||
appId: string;
|
||||
tagIds: string[];
|
||||
teamId: string;
|
||||
}) => {
|
||||
if (!tagIds || tagIds.length === 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
await MongoApp.updateOne({ _id: appId, teamId }, { $pull: { tags: { $in: tagIds } } });
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
/**
|
||||
* 批量为某一标签添加 app(全量更新)
|
||||
*/
|
||||
export const batchAddAppsToTag = async ({
|
||||
tagId,
|
||||
appIds,
|
||||
teamId
|
||||
}: {
|
||||
tagId: string;
|
||||
appIds: string[];
|
||||
teamId: string;
|
||||
}) => {
|
||||
// 确认标签存在且属于该团队
|
||||
const tag = await MongoTag.findOne({ _id: tagId, teamId });
|
||||
if (!tag) {
|
||||
throw new Error('Tag not found or not authorized');
|
||||
}
|
||||
|
||||
// 如果 appIds 为空数组,则移除该标签的所有应用
|
||||
if (!appIds || appIds.length === 0) {
|
||||
await MongoApp.updateMany({ teamId, tags: tagId }, { $pull: { tags: tagId } });
|
||||
return true;
|
||||
}
|
||||
|
||||
// 确认所有 app 都存在且属于该团队
|
||||
const apps = await MongoApp.find({ _id: { $in: appIds }, teamId });
|
||||
if (apps.length !== appIds.length) {
|
||||
throw new Error('Some apps not found or not authorized');
|
||||
}
|
||||
|
||||
// 先从所有应用中移除该标签
|
||||
await MongoApp.updateMany({ teamId, tags: tagId }, { $pull: { tags: tagId } });
|
||||
|
||||
// 然后为指定的应用添加该标签
|
||||
await MongoApp.updateMany({ _id: { $in: appIds }, teamId }, { $addToSet: { tags: tagId } });
|
||||
|
||||
return true;
|
||||
};
|
||||
37
packages/service/core/app/tags/schema.ts
Normal file
37
packages/service/core/app/tags/schema.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import { TeamCollectionName } from '@fastgpt/global/support/user/team/constant';
|
||||
import { getMongoModel, Schema } from '../../../common/mongo';
|
||||
|
||||
export const TagCollectionName = 'app_tags';
|
||||
|
||||
export type TagSchemaType = {
|
||||
_id: string;
|
||||
teamId: string;
|
||||
name: string;
|
||||
color: string;
|
||||
createTime: Date;
|
||||
};
|
||||
|
||||
const TagSchema = new Schema({
|
||||
teamId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamCollectionName,
|
||||
required: true
|
||||
},
|
||||
name: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
color: {
|
||||
type: String,
|
||||
default: '#3370ff'
|
||||
},
|
||||
createTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
}
|
||||
});
|
||||
|
||||
// 创建复合索引:按团队和名称确保唯一性
|
||||
TagSchema.index({ teamId: 1, name: 1 }, { unique: true });
|
||||
|
||||
export const MongoTag = getMongoModel<TagSchemaType>(TagCollectionName, TagSchema);
|
||||
@@ -1,14 +1,13 @@
|
||||
import { MongoDataset } from '../dataset/schema';
|
||||
import { getEmbeddingModel } from '../ai/model';
|
||||
import {
|
||||
AppNodeFlowNodeTypeMap,
|
||||
FlowNodeTypeEnum
|
||||
} from '@fastgpt/global/core/workflow/node/constant';
|
||||
import { FlowNodeTypeEnum } from '@fastgpt/global/core/workflow/node/constant';
|
||||
import { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
|
||||
import type { StoreNodeItemType } from '@fastgpt/global/core/workflow/type/node';
|
||||
import { MongoAppVersion } from './version/schema';
|
||||
import { checkIsLatestVersion } from './version/controller';
|
||||
import { Types } from '../../common/mongo';
|
||||
import { getChildAppPreviewNode, splitCombineToolId } from './plugin/controller';
|
||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
import { authAppByTmbId } from '../../support/permission/app/auth';
|
||||
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
|
||||
export async function listAppDatasetDataByTeamIdAndDatasetIds({
|
||||
teamId,
|
||||
@@ -33,53 +32,58 @@ export async function listAppDatasetDataByTeamIdAndDatasetIds({
|
||||
export async function rewriteAppWorkflowToDetail({
|
||||
nodes,
|
||||
teamId,
|
||||
isRoot
|
||||
isRoot,
|
||||
ownerTmbId
|
||||
}: {
|
||||
nodes: StoreNodeItemType[];
|
||||
teamId: string;
|
||||
isRoot: boolean;
|
||||
ownerTmbId: string;
|
||||
}) {
|
||||
const datasetIdSet = new Set<string>();
|
||||
|
||||
// Add node(App Type) versionlabel and latest sign
|
||||
const appNodes = nodes.filter((node) => AppNodeFlowNodeTypeMap[node.flowNodeType]);
|
||||
const versionIds = appNodes
|
||||
.filter((node) => node.version && Types.ObjectId.isValid(node.version))
|
||||
.map((node) => node.version);
|
||||
/* Add node(App Type) versionlabel and latest sign ==== */
|
||||
await Promise.all(
|
||||
nodes.map(async (node) => {
|
||||
if (!node.pluginId) return;
|
||||
const { source } = splitCombineToolId(node.pluginId);
|
||||
|
||||
if (versionIds.length > 0) {
|
||||
const versionDataList = await MongoAppVersion.find(
|
||||
{
|
||||
_id: { $in: versionIds }
|
||||
},
|
||||
'_id versionName appId time'
|
||||
).lean();
|
||||
try {
|
||||
const [preview] = await Promise.all([
|
||||
getChildAppPreviewNode({
|
||||
appId: node.pluginId,
|
||||
versionId: node.version
|
||||
}),
|
||||
...(source === PluginSourceEnum.personal
|
||||
? [
|
||||
authAppByTmbId({
|
||||
tmbId: ownerTmbId,
|
||||
appId: node.pluginId,
|
||||
per: ReadPermissionVal
|
||||
})
|
||||
]
|
||||
: [])
|
||||
]);
|
||||
|
||||
const versionMap: Record<string, any> = {};
|
||||
|
||||
const isLatestChecks = await Promise.all(
|
||||
versionDataList.map(async (version) => {
|
||||
const isLatest = await checkIsLatestVersion({
|
||||
appId: version.appId,
|
||||
versionId: version._id
|
||||
});
|
||||
|
||||
return { versionId: String(version._id), isLatest };
|
||||
})
|
||||
);
|
||||
const isLatestMap = new Map(isLatestChecks.map((item) => [item.versionId, item.isLatest]));
|
||||
versionDataList.forEach((version) => {
|
||||
versionMap[String(version._id)] = version;
|
||||
});
|
||||
appNodes.forEach((node) => {
|
||||
if (!node.version) return;
|
||||
const versionData = versionMap[String(node.version)];
|
||||
if (versionData) {
|
||||
node.versionLabel = versionData.versionName;
|
||||
node.isLatestVersion = isLatestMap.get(String(node.version)) || false;
|
||||
node.pluginData = {
|
||||
diagram: preview.diagram,
|
||||
userGuide: preview.userGuide,
|
||||
courseUrl: preview.courseUrl,
|
||||
name: preview.name,
|
||||
avatar: preview.avatar
|
||||
};
|
||||
node.versionLabel = preview.versionLabel;
|
||||
node.isLatestVersion = preview.isLatestVersion;
|
||||
node.version = preview.version;
|
||||
} catch (error) {
|
||||
node.pluginData = {
|
||||
error: getErrText(error)
|
||||
};
|
||||
}
|
||||
});
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
/* Add node(App Type) versionlabel and latest sign ==== */
|
||||
|
||||
// Get all dataset ids from nodes
|
||||
nodes.forEach((node) => {
|
||||
|
||||
@@ -68,6 +68,9 @@ export const checkIsLatestVersion = async ({
|
||||
appId: string;
|
||||
versionId: string;
|
||||
}) => {
|
||||
if (!Types.ObjectId.isValid(versionId)) {
|
||||
return false;
|
||||
}
|
||||
const version = await MongoAppVersion.findOne(
|
||||
{
|
||||
appId,
|
||||
|
||||
@@ -34,6 +34,10 @@ const ChatSchema = new Schema({
|
||||
ref: AppCollectionName,
|
||||
required: true
|
||||
},
|
||||
createTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
updateTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
|
||||
@@ -65,8 +65,8 @@ export const filterGPTMessageByMaxContext = async ({
|
||||
if (lastMessage.role === ChatCompletionRequestMessageRoleEnum.User) {
|
||||
const tokens = await countGptMessagesTokens([lastMessage, ...tmpChats]);
|
||||
maxContext -= tokens;
|
||||
// 该轮信息整体 tokens 超出范围,这段数据不要了
|
||||
if (maxContext < 0) {
|
||||
// 该轮信息整体 tokens 超出范围,这段数据不要了。但是至少保证一组。
|
||||
if (maxContext < 0 && chats.length > 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -146,7 +146,8 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
|
||||
tmbId,
|
||||
url: previewUrl,
|
||||
relatedId: apiFileId,
|
||||
customPdfParse
|
||||
customPdfParse,
|
||||
getFormatText: true
|
||||
});
|
||||
return {
|
||||
title,
|
||||
|
||||
27
packages/service/core/dataset/apiDataset/index.ts
Normal file
27
packages/service/core/dataset/apiDataset/index.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import type {
|
||||
APIFileServer,
|
||||
YuqueServer,
|
||||
FeishuServer
|
||||
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||
import { useApiDatasetRequest } from './api';
|
||||
import { useYuqueDatasetRequest } from '../yuqueDataset/api';
|
||||
import { useFeishuDatasetRequest } from '../feishuDataset/api';
|
||||
|
||||
export const getApiDatasetRequest = async (data: {
|
||||
apiServer?: APIFileServer;
|
||||
yuqueServer?: YuqueServer;
|
||||
feishuServer?: FeishuServer;
|
||||
}) => {
|
||||
const { apiServer, yuqueServer, feishuServer } = data;
|
||||
|
||||
if (apiServer) {
|
||||
return useApiDatasetRequest({ apiServer });
|
||||
}
|
||||
if (yuqueServer) {
|
||||
return useYuqueDatasetRequest({ yuqueServer });
|
||||
}
|
||||
if (feishuServer) {
|
||||
return useFeishuDatasetRequest({ feishuServer });
|
||||
}
|
||||
return Promise.reject('Can not find api dataset server');
|
||||
};
|
||||
@@ -1,30 +0,0 @@
|
||||
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
||||
import { type FeishuServer, type YuqueServer } from '@fastgpt/global/core/dataset/apiDataset';
|
||||
|
||||
export enum ProApiDatasetOperationTypeEnum {
|
||||
LIST = 'list',
|
||||
READ = 'read',
|
||||
CONTENT = 'content',
|
||||
DETAIL = 'detail'
|
||||
}
|
||||
|
||||
export type ProApiDatasetCommonParams = {
|
||||
feishuServer?: FeishuServer;
|
||||
yuqueServer?: YuqueServer;
|
||||
};
|
||||
|
||||
export type GetProApiDatasetFileListParams = ProApiDatasetCommonParams & {
|
||||
parentId?: ParentIdType;
|
||||
};
|
||||
|
||||
export type GetProApiDatasetFileContentParams = ProApiDatasetCommonParams & {
|
||||
apiFileId: string;
|
||||
};
|
||||
|
||||
export type GetProApiDatasetFilePreviewUrlParams = ProApiDatasetCommonParams & {
|
||||
apiFileId: string;
|
||||
};
|
||||
|
||||
export type GetProApiDatasetFileDetailParams = ProApiDatasetCommonParams & {
|
||||
apiFileId: string;
|
||||
};
|
||||
@@ -34,15 +34,17 @@ import { getTrainingModeByCollection } from './utils';
|
||||
import {
|
||||
computeChunkSize,
|
||||
computeChunkSplitter,
|
||||
computeParagraphChunkDeep,
|
||||
getLLMMaxChunkSize
|
||||
} from '@fastgpt/global/core/dataset/training/utils';
|
||||
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
|
||||
|
||||
export const createCollectionAndInsertData = async ({
|
||||
dataset,
|
||||
rawText,
|
||||
relatedId,
|
||||
createCollectionParams,
|
||||
isQAImport = false,
|
||||
backupParse = false,
|
||||
billId,
|
||||
session
|
||||
}: {
|
||||
@@ -50,8 +52,8 @@ export const createCollectionAndInsertData = async ({
|
||||
rawText: string;
|
||||
relatedId?: string;
|
||||
createCollectionParams: CreateOneCollectionParams;
|
||||
backupParse?: boolean;
|
||||
|
||||
isQAImport?: boolean;
|
||||
billId?: string;
|
||||
session?: ClientSession;
|
||||
}) => {
|
||||
@@ -73,15 +75,33 @@ export const createCollectionAndInsertData = async ({
|
||||
llmModel: getLLMModel(dataset.agentModel)
|
||||
});
|
||||
const chunkSplitter = computeChunkSplitter(createCollectionParams);
|
||||
const paragraphChunkDeep = computeParagraphChunkDeep(createCollectionParams);
|
||||
|
||||
if (
|
||||
trainingType === DatasetCollectionDataProcessModeEnum.qa ||
|
||||
trainingType === DatasetCollectionDataProcessModeEnum.backup
|
||||
) {
|
||||
delete createCollectionParams.chunkTriggerType;
|
||||
delete createCollectionParams.chunkTriggerMinSize;
|
||||
delete createCollectionParams.dataEnhanceCollectionName;
|
||||
delete createCollectionParams.imageIndex;
|
||||
delete createCollectionParams.autoIndexes;
|
||||
delete createCollectionParams.indexSize;
|
||||
delete createCollectionParams.qaPrompt;
|
||||
}
|
||||
|
||||
// 1. split chunks
|
||||
const chunks = rawText2Chunks({
|
||||
rawText,
|
||||
chunkTriggerType: createCollectionParams.chunkTriggerType,
|
||||
chunkTriggerMinSize: createCollectionParams.chunkTriggerMinSize,
|
||||
chunkSize,
|
||||
paragraphChunkDeep,
|
||||
paragraphChunkMinSize: createCollectionParams.paragraphChunkMinSize,
|
||||
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
|
||||
overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : [],
|
||||
isQAImport
|
||||
backupParse
|
||||
});
|
||||
|
||||
// 2. auth limit
|
||||
@@ -102,6 +122,7 @@ export const createCollectionAndInsertData = async ({
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...createCollectionParams,
|
||||
trainingType,
|
||||
paragraphChunkDeep,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
|
||||
@@ -157,6 +178,10 @@ export const createCollectionAndInsertData = async ({
|
||||
billId: traingBillId,
|
||||
data: chunks.map((item, index) => ({
|
||||
...item,
|
||||
indexes: item.indexes?.map((text) => ({
|
||||
type: DatasetDataIndexTypeEnum.custom,
|
||||
text
|
||||
})),
|
||||
chunkIndex: index
|
||||
})),
|
||||
session
|
||||
@@ -198,46 +223,19 @@ export type CreateOneCollectionParams = CreateDatasetCollectionParams & {
|
||||
tmbId: string;
|
||||
session?: ClientSession;
|
||||
};
|
||||
export async function createOneCollection({
|
||||
teamId,
|
||||
tmbId,
|
||||
name,
|
||||
parentId,
|
||||
datasetId,
|
||||
type,
|
||||
export async function createOneCollection({ session, ...props }: CreateOneCollectionParams) {
|
||||
const {
|
||||
teamId,
|
||||
parentId,
|
||||
datasetId,
|
||||
tags,
|
||||
|
||||
createTime,
|
||||
updateTime,
|
||||
|
||||
hashRawText,
|
||||
rawTextLength,
|
||||
metadata = {},
|
||||
tags,
|
||||
|
||||
nextSyncTime,
|
||||
|
||||
fileId,
|
||||
rawLink,
|
||||
externalFileId,
|
||||
externalFileUrl,
|
||||
apiFileId,
|
||||
|
||||
// Parse settings
|
||||
customPdfParse,
|
||||
imageIndex,
|
||||
autoIndexes,
|
||||
|
||||
// Chunk settings
|
||||
trainingType,
|
||||
chunkSettingMode,
|
||||
chunkSplitMode,
|
||||
chunkSize,
|
||||
indexSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
session
|
||||
}: CreateOneCollectionParams) {
|
||||
fileId,
|
||||
rawLink,
|
||||
externalFileId,
|
||||
externalFileUrl,
|
||||
apiFileId
|
||||
} = props;
|
||||
// Create collection tags
|
||||
const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
|
||||
|
||||
@@ -245,41 +243,18 @@ export async function createOneCollection({
|
||||
const [collection] = await MongoDatasetCollection.create(
|
||||
[
|
||||
{
|
||||
...props,
|
||||
teamId,
|
||||
tmbId,
|
||||
parentId: parentId || null,
|
||||
datasetId,
|
||||
name,
|
||||
type,
|
||||
|
||||
rawTextLength,
|
||||
hashRawText,
|
||||
tags: collectionTags,
|
||||
metadata,
|
||||
|
||||
createTime,
|
||||
updateTime,
|
||||
nextSyncTime,
|
||||
|
||||
...(fileId ? { fileId } : {}),
|
||||
...(rawLink ? { rawLink } : {}),
|
||||
...(externalFileId ? { externalFileId } : {}),
|
||||
...(externalFileUrl ? { externalFileUrl } : {}),
|
||||
...(apiFileId ? { apiFileId } : {}),
|
||||
|
||||
// Parse settings
|
||||
customPdfParse,
|
||||
imageIndex,
|
||||
autoIndexes,
|
||||
|
||||
// Chunk settings
|
||||
trainingType,
|
||||
chunkSettingMode,
|
||||
chunkSplitMode,
|
||||
chunkSize,
|
||||
indexSize,
|
||||
chunkSplitter,
|
||||
qaPrompt
|
||||
...(apiFileId ? { apiFileId } : {})
|
||||
}
|
||||
],
|
||||
{ session, ordered: true }
|
||||
|
||||
@@ -34,9 +34,9 @@ const DatasetDataTextSchema = new Schema({
|
||||
|
||||
try {
|
||||
DatasetDataTextSchema.index(
|
||||
{ teamId: 1, datasetId: 1, fullTextToken: 'text' },
|
||||
{ teamId: 1, fullTextToken: 'text' },
|
||||
{
|
||||
name: 'teamId_1_datasetId_1_fullTextToken_text',
|
||||
name: 'teamId_1_fullTextToken_text',
|
||||
default_language: 'none'
|
||||
}
|
||||
);
|
||||
|
||||
208
packages/service/core/dataset/feishuDataset/api.ts
Normal file
208
packages/service/core/dataset/feishuDataset/api.ts
Normal file
@@ -0,0 +1,208 @@
|
||||
import type {
|
||||
APIFileItem,
|
||||
ApiFileReadContentResponse,
|
||||
ApiDatasetDetailResponse,
|
||||
FeishuServer
|
||||
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
||||
import axios, { type Method } from 'axios';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
|
||||
type ResponseDataType = {
|
||||
success: boolean;
|
||||
message: string;
|
||||
data: any;
|
||||
};
|
||||
|
||||
type FeishuFileListResponse = {
|
||||
files: {
|
||||
token: string;
|
||||
parent_token: string;
|
||||
name: string;
|
||||
type: string;
|
||||
modified_time: number;
|
||||
created_time: number;
|
||||
url: string;
|
||||
owner_id: string;
|
||||
}[];
|
||||
has_more: boolean;
|
||||
next_page_token: string;
|
||||
};
|
||||
|
||||
const feishuBaseUrl = process.env.FEISHU_BASE_URL || 'https://open.feishu.cn';
|
||||
|
||||
export const useFeishuDatasetRequest = ({ feishuServer }: { feishuServer: FeishuServer }) => {
|
||||
const instance = axios.create({
|
||||
baseURL: feishuBaseUrl,
|
||||
timeout: 60000
|
||||
});
|
||||
|
||||
// 添加请求拦截器
|
||||
instance.interceptors.request.use(async (config) => {
|
||||
if (!config.headers.Authorization) {
|
||||
const { data } = await axios.post<{ tenant_access_token: string }>(
|
||||
`${feishuBaseUrl}/open-apis/auth/v3/tenant_access_token/internal`,
|
||||
{
|
||||
app_id: feishuServer.appId,
|
||||
app_secret: feishuServer.appSecret
|
||||
}
|
||||
);
|
||||
|
||||
config.headers['Authorization'] = `Bearer ${data.tenant_access_token}`;
|
||||
config.headers['Content-Type'] = 'application/json; charset=utf-8';
|
||||
}
|
||||
return config;
|
||||
});
|
||||
|
||||
/**
|
||||
* 响应数据检查
|
||||
*/
|
||||
const checkRes = (data: ResponseDataType) => {
|
||||
if (data === undefined) {
|
||||
addLog.info('yuque dataset data is empty');
|
||||
return Promise.reject('服务器异常');
|
||||
}
|
||||
return data.data;
|
||||
};
|
||||
const responseError = (err: any) => {
|
||||
console.log('error->', '请求错误', err);
|
||||
|
||||
if (!err) {
|
||||
return Promise.reject({ message: '未知错误' });
|
||||
}
|
||||
if (typeof err === 'string') {
|
||||
return Promise.reject({ message: err });
|
||||
}
|
||||
if (typeof err.message === 'string') {
|
||||
return Promise.reject({ message: err.message });
|
||||
}
|
||||
if (typeof err.data === 'string') {
|
||||
return Promise.reject({ message: err.data });
|
||||
}
|
||||
if (err?.response?.data) {
|
||||
return Promise.reject(err?.response?.data);
|
||||
}
|
||||
return Promise.reject(err);
|
||||
};
|
||||
|
||||
const request = <T>(url: string, data: any, method: Method): Promise<T> => {
|
||||
/* 去空 */
|
||||
for (const key in data) {
|
||||
if (data[key] === undefined) {
|
||||
delete data[key];
|
||||
}
|
||||
}
|
||||
|
||||
return instance
|
||||
.request({
|
||||
url,
|
||||
method,
|
||||
data: ['POST', 'PUT'].includes(method) ? data : undefined,
|
||||
params: !['POST', 'PUT'].includes(method) ? data : undefined
|
||||
})
|
||||
.then((res) => checkRes(res.data))
|
||||
.catch((err) => responseError(err));
|
||||
};
|
||||
|
||||
const listFiles = async ({ parentId }: { parentId?: ParentIdType }): Promise<APIFileItem[]> => {
|
||||
const fetchFiles = async (pageToken?: string): Promise<FeishuFileListResponse['files']> => {
|
||||
const data = await request<FeishuFileListResponse>(
|
||||
`/open-apis/drive/v1/files`,
|
||||
{
|
||||
folder_token: parentId || feishuServer.folderToken,
|
||||
page_size: 200,
|
||||
page_token: pageToken
|
||||
},
|
||||
'GET'
|
||||
);
|
||||
|
||||
if (data.has_more) {
|
||||
const nextFiles = await fetchFiles(data.next_page_token);
|
||||
return [...data.files, ...nextFiles];
|
||||
}
|
||||
|
||||
return data.files;
|
||||
};
|
||||
|
||||
const allFiles = await fetchFiles();
|
||||
|
||||
return allFiles
|
||||
.filter((file) => ['folder', 'docx'].includes(file.type))
|
||||
.map((file) => ({
|
||||
id: file.token,
|
||||
parentId: file.parent_token,
|
||||
name: file.name,
|
||||
type: file.type === 'folder' ? ('folder' as const) : ('file' as const),
|
||||
hasChild: file.type === 'folder',
|
||||
updateTime: new Date(file.modified_time * 1000),
|
||||
createTime: new Date(file.created_time * 1000)
|
||||
}));
|
||||
};
|
||||
|
||||
const getFileContent = async ({
|
||||
apiFileId
|
||||
}: {
|
||||
apiFileId: string;
|
||||
}): Promise<ApiFileReadContentResponse> => {
|
||||
const [{ content }, { document }] = await Promise.all([
|
||||
request<{ content: string }>(
|
||||
`/open-apis/docx/v1/documents/${apiFileId}/raw_content`,
|
||||
{},
|
||||
'GET'
|
||||
),
|
||||
request<{ document: { title: string } }>(
|
||||
`/open-apis/docx/v1/documents/${apiFileId}`,
|
||||
{},
|
||||
'GET'
|
||||
)
|
||||
]);
|
||||
|
||||
return {
|
||||
title: document?.title,
|
||||
rawText: content
|
||||
};
|
||||
};
|
||||
|
||||
const getFilePreviewUrl = async ({ apiFileId }: { apiFileId: string }): Promise<string> => {
|
||||
const { metas } = await request<{ metas: { url: string }[] }>(
|
||||
`/open-apis/drive/v1/metas/batch_query`,
|
||||
{
|
||||
request_docs: [
|
||||
{
|
||||
doc_token: apiFileId,
|
||||
doc_type: 'docx'
|
||||
}
|
||||
],
|
||||
with_url: true
|
||||
},
|
||||
'POST'
|
||||
);
|
||||
|
||||
return metas[0].url;
|
||||
};
|
||||
|
||||
const getFileDetail = async ({
|
||||
apiFileId
|
||||
}: {
|
||||
apiFileId: string;
|
||||
}): Promise<ApiDatasetDetailResponse> => {
|
||||
const { document } = await request<{ document: { title: string } }>(
|
||||
`/open-apis/docx/v1/documents/${apiFileId}`,
|
||||
{},
|
||||
'GET'
|
||||
);
|
||||
|
||||
return {
|
||||
name: document?.title,
|
||||
parentId: null,
|
||||
id: apiFileId
|
||||
};
|
||||
};
|
||||
|
||||
return {
|
||||
getFileContent,
|
||||
listFiles,
|
||||
getFilePreviewUrl,
|
||||
getFileDetail
|
||||
};
|
||||
};
|
||||
@@ -1,8 +1,10 @@
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
ChunkTriggerConfigTypeEnum,
|
||||
DatasetSourceReadTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { readFileContentFromMongo } from '../../common/file/gridfs/controller';
|
||||
import { urlsFetch } from '../../common/string/cheerio';
|
||||
import { parseCsvTable2Chunks } from './training/utils';
|
||||
import { type TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import axios from 'axios';
|
||||
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
|
||||
@@ -12,19 +14,22 @@ import {
|
||||
type FeishuServer,
|
||||
type YuqueServer
|
||||
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||
import { useApiDatasetRequest } from './apiDataset/api';
|
||||
import { getApiDatasetRequest } from './apiDataset';
|
||||
import Papa from 'papaparse';
|
||||
|
||||
export const readFileRawTextByUrl = async ({
|
||||
teamId,
|
||||
tmbId,
|
||||
url,
|
||||
customPdfParse,
|
||||
getFormatText,
|
||||
relatedId
|
||||
}: {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
url: string;
|
||||
customPdfParse?: boolean;
|
||||
getFormatText?: boolean;
|
||||
relatedId: string; // externalFileId / apiFileId
|
||||
}) => {
|
||||
const response = await axios({
|
||||
@@ -38,7 +43,7 @@ export const readFileRawTextByUrl = async ({
|
||||
|
||||
const { rawText } = await readRawContentByFileBuffer({
|
||||
customPdfParse,
|
||||
isQAImport: false,
|
||||
getFormatText,
|
||||
extension,
|
||||
teamId,
|
||||
tmbId,
|
||||
@@ -62,21 +67,21 @@ export const readDatasetSourceRawText = async ({
|
||||
tmbId,
|
||||
type,
|
||||
sourceId,
|
||||
isQAImport,
|
||||
selector,
|
||||
externalFileId,
|
||||
apiServer,
|
||||
feishuServer,
|
||||
yuqueServer,
|
||||
customPdfParse
|
||||
customPdfParse,
|
||||
getFormatText
|
||||
}: {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
type: DatasetSourceReadTypeEnum;
|
||||
sourceId: string;
|
||||
customPdfParse?: boolean;
|
||||
getFormatText?: boolean;
|
||||
|
||||
isQAImport?: boolean; // csv data
|
||||
selector?: string; // link selector
|
||||
externalFileId?: string; // external file dataset
|
||||
apiServer?: APIFileServer; // api dataset
|
||||
@@ -92,8 +97,8 @@ export const readDatasetSourceRawText = async ({
|
||||
tmbId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId: sourceId,
|
||||
isQAImport,
|
||||
customPdfParse
|
||||
customPdfParse,
|
||||
getFormatText
|
||||
});
|
||||
return {
|
||||
title: filename,
|
||||
@@ -161,38 +166,82 @@ export const readApiServerFileContent = async ({
|
||||
title?: string;
|
||||
rawText: string;
|
||||
}> => {
|
||||
if (apiServer) {
|
||||
return useApiDatasetRequest({ apiServer }).getFileContent({
|
||||
teamId,
|
||||
tmbId,
|
||||
apiFileId,
|
||||
customPdfParse
|
||||
});
|
||||
}
|
||||
|
||||
if (feishuServer || yuqueServer) {
|
||||
return global.getProApiDatasetFileContent({
|
||||
feishuServer,
|
||||
return (
|
||||
await getApiDatasetRequest({
|
||||
apiServer,
|
||||
yuqueServer,
|
||||
apiFileId
|
||||
});
|
||||
}
|
||||
|
||||
return Promise.reject('No apiServer or feishuServer or yuqueServer');
|
||||
feishuServer
|
||||
})
|
||||
).getFileContent({
|
||||
teamId,
|
||||
tmbId,
|
||||
apiFileId,
|
||||
customPdfParse
|
||||
});
|
||||
};
|
||||
|
||||
export const rawText2Chunks = ({
|
||||
rawText,
|
||||
isQAImport,
|
||||
chunkTriggerType = ChunkTriggerConfigTypeEnum.minSize,
|
||||
chunkTriggerMinSize = 1000,
|
||||
backupParse,
|
||||
chunkSize = 512,
|
||||
...splitProps
|
||||
}: {
|
||||
rawText: string;
|
||||
isQAImport?: boolean;
|
||||
} & TextSplitProps) => {
|
||||
if (isQAImport) {
|
||||
const { chunks } = parseCsvTable2Chunks(rawText);
|
||||
return chunks;
|
||||
|
||||
chunkTriggerType?: ChunkTriggerConfigTypeEnum;
|
||||
chunkTriggerMinSize?: number; // maxSize from agent model, not store
|
||||
|
||||
backupParse?: boolean;
|
||||
tableParse?: boolean;
|
||||
} & TextSplitProps): {
|
||||
q: string;
|
||||
a: string;
|
||||
indexes?: string[];
|
||||
}[] => {
|
||||
const parseDatasetBackup2Chunks = (rawText: string) => {
|
||||
const csvArr = Papa.parse(rawText).data as string[][];
|
||||
console.log(rawText, csvArr);
|
||||
|
||||
const chunks = csvArr
|
||||
.slice(1)
|
||||
.map((item) => ({
|
||||
q: item[0] || '',
|
||||
a: item[1] || '',
|
||||
indexes: item.slice(2)
|
||||
}))
|
||||
.filter((item) => item.q || item.a);
|
||||
|
||||
return {
|
||||
chunks
|
||||
};
|
||||
};
|
||||
|
||||
if (backupParse) {
|
||||
return parseDatasetBackup2Chunks(rawText).chunks;
|
||||
}
|
||||
|
||||
// Chunk condition
|
||||
// 1. 选择最大值条件,只有超过了最大值(默认为模型的最大值*0.7),才会触发分块
|
||||
if (chunkTriggerType === ChunkTriggerConfigTypeEnum.maxSize) {
|
||||
const textLength = rawText.trim().length;
|
||||
const maxSize = splitProps.maxSize ? splitProps.maxSize * 0.7 : 16000;
|
||||
if (textLength < maxSize) {
|
||||
return [
|
||||
{
|
||||
q: rawText,
|
||||
a: ''
|
||||
}
|
||||
];
|
||||
}
|
||||
}
|
||||
// 2. 选择最小值条件,只有超过最小值(手动决定)才会触发分块
|
||||
if (chunkTriggerType !== ChunkTriggerConfigTypeEnum.forceChunk) {
|
||||
const textLength = rawText.trim().length;
|
||||
if (textLength < chunkTriggerMinSize) {
|
||||
return [{ q: rawText, a: '' }];
|
||||
}
|
||||
}
|
||||
|
||||
const { chunks } = splitText2Chunks({
|
||||
@@ -203,6 +252,7 @@ export const rawText2Chunks = ({
|
||||
|
||||
return chunks.map((item) => ({
|
||||
q: item,
|
||||
a: ''
|
||||
a: '',
|
||||
indexes: []
|
||||
}));
|
||||
};
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import { getMongoModel, Schema } from '../../common/mongo';
|
||||
import {
|
||||
ChunkSettingModeEnum,
|
||||
ChunkTriggerConfigTypeEnum,
|
||||
DataChunkSplitModeEnum,
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetTypeEnum,
|
||||
DatasetTypeMap
|
||||
DatasetTypeMap,
|
||||
ParagraphChunkAIModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
TeamCollectionName,
|
||||
@@ -15,12 +17,22 @@ import type { DatasetSchemaType } from '@fastgpt/global/core/dataset/type.d';
|
||||
export const DatasetCollectionName = 'datasets';
|
||||
|
||||
export const ChunkSettings = {
|
||||
imageIndex: Boolean,
|
||||
autoIndexes: Boolean,
|
||||
trainingType: {
|
||||
type: String,
|
||||
enum: Object.values(DatasetCollectionDataProcessModeEnum)
|
||||
},
|
||||
|
||||
chunkTriggerType: {
|
||||
type: String,
|
||||
enum: Object.values(ChunkTriggerConfigTypeEnum)
|
||||
},
|
||||
chunkTriggerMinSize: Number,
|
||||
|
||||
dataEnhanceCollectionName: Boolean,
|
||||
|
||||
imageIndex: Boolean,
|
||||
autoIndexes: Boolean,
|
||||
|
||||
chunkSettingMode: {
|
||||
type: String,
|
||||
enum: Object.values(ChunkSettingModeEnum)
|
||||
@@ -29,6 +41,12 @@ export const ChunkSettings = {
|
||||
type: String,
|
||||
enum: Object.values(DataChunkSplitModeEnum)
|
||||
},
|
||||
paragraphChunkAIMode: {
|
||||
type: String,
|
||||
enum: Object.values(ParagraphChunkAIModeEnum)
|
||||
},
|
||||
paragraphChunkDeep: Number,
|
||||
paragraphChunkMinSize: Number,
|
||||
chunkSize: Number,
|
||||
chunkSplitter: String,
|
||||
|
||||
@@ -115,9 +133,7 @@ const DatasetSchema = new Schema({
|
||||
|
||||
// abandoned
|
||||
autoSync: Boolean,
|
||||
externalReadUrl: {
|
||||
type: String
|
||||
},
|
||||
externalReadUrl: String,
|
||||
defaultPermission: Number
|
||||
});
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ import { type ChatItemType } from '@fastgpt/global/core/chat/type';
|
||||
import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
|
||||
import { datasetSearchQueryExtension } from './utils';
|
||||
import type { RerankModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
|
||||
export type SearchDatasetDataProps = {
|
||||
histories: ChatItemType[];
|
||||
@@ -544,123 +545,125 @@ export async function searchDatasetData(
|
||||
};
|
||||
}
|
||||
|
||||
const searchResults = (
|
||||
await Promise.all(
|
||||
datasetIds.map(async (id) => {
|
||||
return MongoDatasetDataText.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
datasetId: new Types.ObjectId(id),
|
||||
$text: { $search: await jiebaSplit({ text: query }) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {})
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
collectionId: 1,
|
||||
dataId: 1,
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
}
|
||||
],
|
||||
{
|
||||
...readFromSecondary
|
||||
try {
|
||||
const searchResults = (await MongoDatasetDataText.aggregate(
|
||||
[
|
||||
{
|
||||
$match: {
|
||||
teamId: new Types.ObjectId(teamId),
|
||||
$text: { $search: await jiebaSplit({ text: query }) },
|
||||
datasetId: { $in: datasetIds.map((id) => new Types.ObjectId(id)) },
|
||||
...(filterCollectionIdList
|
||||
? {
|
||||
collectionId: {
|
||||
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {}),
|
||||
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
|
||||
? {
|
||||
collectionId: {
|
||||
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
|
||||
}
|
||||
}
|
||||
: {})
|
||||
}
|
||||
},
|
||||
{
|
||||
$sort: {
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
},
|
||||
{
|
||||
$limit: limit
|
||||
},
|
||||
{
|
||||
$project: {
|
||||
_id: 1,
|
||||
collectionId: 1,
|
||||
dataId: 1,
|
||||
score: { $meta: 'textScore' }
|
||||
}
|
||||
);
|
||||
})
|
||||
)
|
||||
).flat() as (DatasetDataTextSchemaType & { score: number })[];
|
||||
|
||||
// Get data and collections
|
||||
const [dataList, collections] = await Promise.all([
|
||||
MongoDatasetData.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.dataId) }
|
||||
},
|
||||
'_id datasetId collectionId updateTime q a chunkIndex indexes',
|
||||
{ ...readFromSecondary }
|
||||
).lean(),
|
||||
MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.collectionId) }
|
||||
},
|
||||
'_id name fileId rawLink apiFileId externalFileId externalFileUrl',
|
||||
{ ...readFromSecondary }
|
||||
).lean()
|
||||
]);
|
||||
|
||||
return {
|
||||
fullTextRecallResults: searchResults
|
||||
.map((item, index) => {
|
||||
const collection = collections.find(
|
||||
(col) => String(col._id) === String(item.collectionId)
|
||||
);
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', item);
|
||||
return;
|
||||
}
|
||||
const data = dataList.find((data) => String(data._id) === String(item.dataId));
|
||||
if (!data) {
|
||||
console.log('Data is not found', item);
|
||||
return;
|
||||
}
|
||||
],
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
)) as (DatasetDataTextSchemaType & { score: number })[];
|
||||
|
||||
return {
|
||||
id: String(data._id),
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: data.indexes,
|
||||
...getCollectionSourceData(collection),
|
||||
score: [
|
||||
{
|
||||
type: SearchScoreTypeEnum.fullText,
|
||||
value: item.score || 0,
|
||||
index
|
||||
}
|
||||
]
|
||||
};
|
||||
})
|
||||
.filter((item) => {
|
||||
if (!item) return false;
|
||||
return true;
|
||||
})
|
||||
.map((item, index) => {
|
||||
if (!item) return;
|
||||
return {
|
||||
...item,
|
||||
score: item.score.map((item) => ({ ...item, index }))
|
||||
};
|
||||
}) as SearchDataResponseItemType[],
|
||||
tokenLen: 0
|
||||
};
|
||||
// Get data and collections
|
||||
const [dataList, collections] = await Promise.all([
|
||||
MongoDatasetData.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.dataId) }
|
||||
},
|
||||
'_id datasetId collectionId updateTime q a chunkIndex indexes',
|
||||
{ ...readFromSecondary }
|
||||
).lean(),
|
||||
MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.collectionId) }
|
||||
},
|
||||
'_id name fileId rawLink apiFileId externalFileId externalFileUrl',
|
||||
{ ...readFromSecondary }
|
||||
).lean()
|
||||
]);
|
||||
|
||||
return {
|
||||
fullTextRecallResults: searchResults
|
||||
.map((item, index) => {
|
||||
const collection = collections.find(
|
||||
(col) => String(col._id) === String(item.collectionId)
|
||||
);
|
||||
if (!collection) {
|
||||
console.log('Collection is not found', item);
|
||||
return;
|
||||
}
|
||||
const data = dataList.find((data) => String(data._id) === String(item.dataId));
|
||||
if (!data) {
|
||||
console.log('Data is not found', item);
|
||||
return;
|
||||
}
|
||||
|
||||
return {
|
||||
id: String(data._id),
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: data.indexes,
|
||||
...getCollectionSourceData(collection),
|
||||
score: [
|
||||
{
|
||||
type: SearchScoreTypeEnum.fullText,
|
||||
value: item.score || 0,
|
||||
index
|
||||
}
|
||||
]
|
||||
};
|
||||
})
|
||||
.filter((item) => {
|
||||
if (!item) return false;
|
||||
return true;
|
||||
})
|
||||
.map((item, index) => {
|
||||
if (!item) return;
|
||||
return {
|
||||
...item,
|
||||
score: item.score.map((item) => ({ ...item, index }))
|
||||
};
|
||||
}) as SearchDataResponseItemType[],
|
||||
tokenLen: 0
|
||||
};
|
||||
} catch (error) {
|
||||
addLog.error('Full text search error', error);
|
||||
return {
|
||||
fullTextRecallResults: [],
|
||||
tokenLen: 0
|
||||
};
|
||||
}
|
||||
};
|
||||
const multiQueryRecall = async ({
|
||||
embeddingLimit,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
export enum ImportDataSourceEnum {
|
||||
fileLocal = 'fileLocal',
|
||||
fileLink = 'fileLink',
|
||||
fileCustom = 'fileCustom',
|
||||
tableLocal = 'tableLocal'
|
||||
fileCustom = 'fileCustom'
|
||||
}
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
import Papa from 'papaparse';
|
||||
|
||||
export const parseCsvTable2Chunks = (rawText: string) => {
|
||||
const csvArr = Papa.parse(rawText).data as string[][];
|
||||
|
||||
const chunks = csvArr
|
||||
.map((item) => ({
|
||||
q: item[0] || '',
|
||||
a: item[1] || ''
|
||||
}))
|
||||
.filter((item) => item.q || item.a);
|
||||
|
||||
return {
|
||||
chunks
|
||||
};
|
||||
};
|
||||
304
packages/service/core/dataset/yuqueDataset/api.ts
Normal file
304
packages/service/core/dataset/yuqueDataset/api.ts
Normal file
@@ -0,0 +1,304 @@
|
||||
import type {
|
||||
APIFileItem,
|
||||
ApiFileReadContentResponse,
|
||||
YuqueServer,
|
||||
ApiDatasetDetailResponse
|
||||
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||
import axios, { type Method } from 'axios';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
||||
|
||||
type ResponseDataType = {
|
||||
success: boolean;
|
||||
message: string;
|
||||
data: any;
|
||||
};
|
||||
|
||||
type YuqueRepoListResponse = {
|
||||
id: string;
|
||||
name: string;
|
||||
title: string;
|
||||
book_id: string | null;
|
||||
type: string;
|
||||
updated_at: Date;
|
||||
created_at: Date;
|
||||
slug?: string;
|
||||
}[];
|
||||
|
||||
type YuqueTocListResponse = {
|
||||
uuid: string;
|
||||
type: string;
|
||||
title: string;
|
||||
url: string;
|
||||
slug: string;
|
||||
id: string;
|
||||
doc_id: string;
|
||||
prev_uuid: string;
|
||||
sibling_uuid: string;
|
||||
child_uuid: string;
|
||||
parent_uuid: string;
|
||||
}[];
|
||||
|
||||
const yuqueBaseUrl = process.env.YUQUE_DATASET_BASE_URL || 'https://www.yuque.com';
|
||||
|
||||
export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServer }) => {
|
||||
const instance = axios.create({
|
||||
baseURL: yuqueBaseUrl,
|
||||
timeout: 60000, // 超时时间
|
||||
headers: {
|
||||
'X-Auth-Token': yuqueServer.token
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* 响应数据检查
|
||||
*/
|
||||
const checkRes = (data: ResponseDataType) => {
|
||||
if (data === undefined) {
|
||||
addLog.info('yuque dataset data is empty');
|
||||
return Promise.reject('服务器异常');
|
||||
}
|
||||
return data.data;
|
||||
};
|
||||
const responseError = (err: any) => {
|
||||
console.log('error->', '请求错误', err);
|
||||
|
||||
if (!err) {
|
||||
return Promise.reject({ message: '未知错误' });
|
||||
}
|
||||
if (typeof err === 'string') {
|
||||
return Promise.reject({ message: err });
|
||||
}
|
||||
if (typeof err.message === 'string') {
|
||||
return Promise.reject({ message: err.message });
|
||||
}
|
||||
if (typeof err.data === 'string') {
|
||||
return Promise.reject({ message: err.data });
|
||||
}
|
||||
if (err?.response?.data) {
|
||||
return Promise.reject(err?.response?.data);
|
||||
}
|
||||
return Promise.reject(err);
|
||||
};
|
||||
|
||||
const request = <T>(url: string, data: any, method: Method): Promise<T> => {
|
||||
/* 去空 */
|
||||
for (const key in data) {
|
||||
if (data[key] === undefined) {
|
||||
delete data[key];
|
||||
}
|
||||
}
|
||||
|
||||
return instance
|
||||
.request({
|
||||
url,
|
||||
method,
|
||||
data: ['POST', 'PUT'].includes(method) ? data : undefined,
|
||||
params: !['POST', 'PUT'].includes(method) ? data : undefined
|
||||
})
|
||||
.then((res) => checkRes(res.data))
|
||||
.catch((err) => responseError(err));
|
||||
};
|
||||
|
||||
const listFiles = async ({ parentId }: { parentId?: ParentIdType }) => {
|
||||
// Auto set baseurl to parentId
|
||||
if (!parentId) {
|
||||
if (yuqueServer.basePath) parentId = yuqueServer.basePath;
|
||||
}
|
||||
|
||||
let files: APIFileItem[] = [];
|
||||
|
||||
if (!parentId) {
|
||||
const limit = 100;
|
||||
let offset = 0;
|
||||
let allData: YuqueRepoListResponse = [];
|
||||
|
||||
while (true) {
|
||||
const data = await request<YuqueRepoListResponse>(
|
||||
`/api/v2/groups/${yuqueServer.userId}/repos`,
|
||||
{
|
||||
offset,
|
||||
limit
|
||||
},
|
||||
'GET'
|
||||
);
|
||||
|
||||
if (!data || data.length === 0) break;
|
||||
|
||||
allData = [...allData, ...data];
|
||||
if (data.length < limit) break;
|
||||
|
||||
offset += limit;
|
||||
}
|
||||
|
||||
files = allData.map((item) => {
|
||||
return {
|
||||
id: item.id,
|
||||
name: item.name,
|
||||
parentId: null,
|
||||
type: 'folder',
|
||||
updateTime: item.updated_at,
|
||||
createTime: item.created_at,
|
||||
hasChild: true,
|
||||
slug: item.slug
|
||||
};
|
||||
});
|
||||
} else {
|
||||
if (typeof parentId === 'number') {
|
||||
const data = await request<YuqueTocListResponse>(
|
||||
`/api/v2/repos/${parentId}/toc`,
|
||||
{},
|
||||
'GET'
|
||||
);
|
||||
|
||||
return data
|
||||
.filter((item) => !item.parent_uuid && item.type !== 'LINK')
|
||||
.map((item) => ({
|
||||
id: `${parentId}-${item.id}-${item.uuid}`,
|
||||
name: item.title,
|
||||
parentId: item.parent_uuid,
|
||||
type: item.type === 'TITLE' ? ('folder' as const) : ('file' as const),
|
||||
updateTime: new Date(),
|
||||
createTime: new Date(),
|
||||
uuid: item.uuid,
|
||||
slug: item.slug,
|
||||
hasChild: !!item.child_uuid
|
||||
}));
|
||||
} else {
|
||||
const [repoId, uuid, parentUuid] = parentId.split(/-(.*?)-(.*)/);
|
||||
const data = await request<YuqueTocListResponse>(`/api/v2/repos/${repoId}/toc`, {}, 'GET');
|
||||
|
||||
return data
|
||||
.filter((item) => item.parent_uuid === parentUuid)
|
||||
.map((item) => ({
|
||||
id: `${repoId}-${item.id}-${item.uuid}`,
|
||||
name: item.title,
|
||||
parentId: item.parent_uuid,
|
||||
type: item.type === 'TITLE' ? ('folder' as const) : ('file' as const),
|
||||
updateTime: new Date(),
|
||||
createTime: new Date(),
|
||||
uuid: item.uuid,
|
||||
slug: item.slug,
|
||||
hasChild: !!item.child_uuid
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
if (!Array.isArray(files)) {
|
||||
return Promise.reject('Invalid file list format');
|
||||
}
|
||||
if (files.some((file) => !file.id || !file.name || typeof file.type === 'undefined')) {
|
||||
return Promise.reject('Invalid file data format');
|
||||
}
|
||||
return files;
|
||||
};
|
||||
|
||||
const getFileContent = async ({
|
||||
apiFileId
|
||||
}: {
|
||||
apiFileId: string;
|
||||
}): Promise<ApiFileReadContentResponse> => {
|
||||
const [parentId, fileId] = apiFileId.split(/-(.*?)-(.*)/);
|
||||
|
||||
const data = await request<{ title: string; body: string }>(
|
||||
`/api/v2/repos/${parentId}/docs/${fileId}`,
|
||||
{},
|
||||
'GET'
|
||||
);
|
||||
|
||||
return {
|
||||
title: data.title,
|
||||
rawText: data.body
|
||||
};
|
||||
};
|
||||
|
||||
const getFilePreviewUrl = async ({ apiFileId }: { apiFileId: string }) => {
|
||||
const [parentId, fileId] = apiFileId.split(/-(.*?)-(.*)/);
|
||||
|
||||
const { slug: parentSlug } = await request<{ slug: string }>(
|
||||
`/api/v2/repos/${parentId}`,
|
||||
{ id: apiFileId },
|
||||
'GET'
|
||||
);
|
||||
|
||||
const { slug: fileSlug } = await request<{ slug: string }>(
|
||||
`/api/v2/repos/${parentId}/docs/${fileId}`,
|
||||
{},
|
||||
'GET'
|
||||
);
|
||||
|
||||
return `${yuqueBaseUrl}/${yuqueServer.userId}/${parentSlug}/${fileSlug}`;
|
||||
};
|
||||
|
||||
const getFileDetail = async ({
|
||||
apiFileId
|
||||
}: {
|
||||
apiFileId: string;
|
||||
}): Promise<ApiDatasetDetailResponse> => {
|
||||
//如果id是数字,认为是知识库,获取知识库列表
|
||||
if (typeof apiFileId === 'number' || !isNaN(Number(apiFileId))) {
|
||||
const limit = 100;
|
||||
let offset = 0;
|
||||
let allData: YuqueRepoListResponse = [];
|
||||
|
||||
while (true) {
|
||||
const data = await request<YuqueRepoListResponse>(
|
||||
`/api/v2/groups/${yuqueServer.userId}/repos`,
|
||||
{
|
||||
offset,
|
||||
limit
|
||||
},
|
||||
'GET'
|
||||
);
|
||||
|
||||
if (!data || data.length === 0) break;
|
||||
|
||||
allData = [...allData, ...data];
|
||||
if (data.length < limit) break;
|
||||
|
||||
offset += limit;
|
||||
}
|
||||
|
||||
const file = allData.find((item) => Number(item.id) === Number(apiFileId));
|
||||
if (!file) {
|
||||
return Promise.reject('文件不存在');
|
||||
}
|
||||
return {
|
||||
id: file.id,
|
||||
name: file.name,
|
||||
parentId: null
|
||||
};
|
||||
} else {
|
||||
const [repoId, parentUuid, fileId] = apiFileId.split(/-(.*?)-(.*)/);
|
||||
const data = await request<YuqueTocListResponse>(`/api/v2/repos/${repoId}/toc`, {}, 'GET');
|
||||
const file = data.find((item) => item.uuid === fileId);
|
||||
if (!file) {
|
||||
return Promise.reject('文件不存在');
|
||||
}
|
||||
const parentfile = data.find((item) => item.uuid === file.parent_uuid);
|
||||
const parentId = `${repoId}-${parentfile?.id}-${parentfile?.uuid}`;
|
||||
|
||||
//判断如果parent_uuid为空,则认为是知识库的根目录,返回知识库
|
||||
if (file.parent_uuid) {
|
||||
return {
|
||||
id: file.id,
|
||||
name: file.title,
|
||||
parentId: parentId
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
id: file.id,
|
||||
name: file.title,
|
||||
parentId: repoId
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
getFileContent,
|
||||
listFiles,
|
||||
getFilePreviewUrl,
|
||||
getFileDetail
|
||||
};
|
||||
};
|
||||
@@ -223,28 +223,29 @@ const toolChoice = async (props: ActionProps) => {
|
||||
}
|
||||
];
|
||||
|
||||
const body = llmCompletionsBodyFormat(
|
||||
{
|
||||
stream: true,
|
||||
model: extractModel.model,
|
||||
temperature: 0.01,
|
||||
messages: filterMessages,
|
||||
tools,
|
||||
tool_choice: { type: 'function', function: { name: agentFunName } }
|
||||
},
|
||||
extractModel
|
||||
);
|
||||
const { response } = await createChatCompletion({
|
||||
body: llmCompletionsBodyFormat(
|
||||
{
|
||||
stream: true,
|
||||
model: extractModel.model,
|
||||
temperature: 0.01,
|
||||
messages: filterMessages,
|
||||
tools,
|
||||
tool_choice: { type: 'function', function: { name: agentFunName } }
|
||||
},
|
||||
extractModel
|
||||
),
|
||||
body,
|
||||
userKey: externalProvider.openaiAccount
|
||||
});
|
||||
const { toolCalls, usage } = await formatLLMResponse(response);
|
||||
const { text, toolCalls, usage } = await formatLLMResponse(response);
|
||||
|
||||
const arg: Record<string, any> = (() => {
|
||||
try {
|
||||
return json5.parse(toolCalls?.[0]?.function?.arguments || '');
|
||||
} catch (error) {
|
||||
console.log(agentFunction.parameters);
|
||||
console.log(toolCalls?.[0]?.function);
|
||||
console.log('body', body);
|
||||
console.log('AI response', text, toolCalls?.[0]?.function);
|
||||
console.log('Your model may not support tool_call', error);
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
import { createChatCompletion } from '../../../../ai/config';
|
||||
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
|
||||
import {
|
||||
type ChatCompletion,
|
||||
type StreamChatType,
|
||||
type ChatCompletionMessageParam,
|
||||
type ChatCompletionCreateParams,
|
||||
type ChatCompletionMessageFunctionCall,
|
||||
type ChatCompletionFunctionMessageParam,
|
||||
type ChatCompletionAssistantMessageParam
|
||||
import type {
|
||||
ChatCompletion,
|
||||
StreamChatType,
|
||||
ChatCompletionMessageParam,
|
||||
ChatCompletionCreateParams,
|
||||
ChatCompletionMessageFunctionCall,
|
||||
ChatCompletionFunctionMessageParam,
|
||||
ChatCompletionAssistantMessageParam,
|
||||
CompletionFinishReason
|
||||
} from '@fastgpt/global/core/ai/type.d';
|
||||
import { type NextApiResponse } from 'next';
|
||||
import { responseWriteController } from '../../../../../common/response';
|
||||
@@ -259,14 +260,15 @@ export const runToolWithFunctionCall = async (
|
||||
}
|
||||
});
|
||||
|
||||
let { answer, functionCalls, inputTokens, outputTokens } = await (async () => {
|
||||
let { answer, functionCalls, inputTokens, outputTokens, finish_reason } = await (async () => {
|
||||
if (isStreamResponse) {
|
||||
if (!res || res.closed) {
|
||||
return {
|
||||
answer: '',
|
||||
functionCalls: [],
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
outputTokens: 0,
|
||||
finish_reason: 'close' as const
|
||||
};
|
||||
}
|
||||
const result = await streamResponse({
|
||||
@@ -281,10 +283,12 @@ export const runToolWithFunctionCall = async (
|
||||
answer: result.answer,
|
||||
functionCalls: result.functionCalls,
|
||||
inputTokens: result.usage.prompt_tokens,
|
||||
outputTokens: result.usage.completion_tokens
|
||||
outputTokens: result.usage.completion_tokens,
|
||||
finish_reason: result.finish_reason
|
||||
};
|
||||
} else {
|
||||
const result = aiResponse as ChatCompletion;
|
||||
const finish_reason = result.choices?.[0]?.finish_reason as CompletionFinishReason;
|
||||
const function_call = result.choices?.[0]?.message?.function_call;
|
||||
const usage = result.usage;
|
||||
|
||||
@@ -315,7 +319,8 @@ export const runToolWithFunctionCall = async (
|
||||
answer,
|
||||
functionCalls: toolCalls,
|
||||
inputTokens: usage?.prompt_tokens,
|
||||
outputTokens: usage?.completion_tokens
|
||||
outputTokens: usage?.completion_tokens,
|
||||
finish_reason
|
||||
};
|
||||
}
|
||||
})();
|
||||
@@ -481,7 +486,8 @@ export const runToolWithFunctionCall = async (
|
||||
completeMessages,
|
||||
assistantResponses: toolNodeAssistants,
|
||||
runTimes,
|
||||
toolWorkflowInteractiveResponse
|
||||
toolWorkflowInteractiveResponse,
|
||||
finish_reason
|
||||
};
|
||||
}
|
||||
|
||||
@@ -495,7 +501,8 @@ export const runToolWithFunctionCall = async (
|
||||
toolNodeInputTokens,
|
||||
toolNodeOutputTokens,
|
||||
assistantResponses: toolNodeAssistants,
|
||||
runTimes
|
||||
runTimes,
|
||||
finish_reason
|
||||
}
|
||||
);
|
||||
} else {
|
||||
@@ -523,7 +530,8 @@ export const runToolWithFunctionCall = async (
|
||||
: outputTokens,
|
||||
completeMessages,
|
||||
assistantResponses: [...assistantResponses, ...toolNodeAssistant.value],
|
||||
runTimes: (response?.runTimes || 0) + 1
|
||||
runTimes: (response?.runTimes || 0) + 1,
|
||||
finish_reason
|
||||
};
|
||||
}
|
||||
};
|
||||
@@ -546,28 +554,25 @@ async function streamResponse({
|
||||
readStream: stream
|
||||
});
|
||||
|
||||
let textAnswer = '';
|
||||
let functionCalls: ChatCompletionMessageFunctionCall[] = [];
|
||||
let functionId = getNanoid();
|
||||
let usage = getLLMDefaultUsage();
|
||||
|
||||
const { parsePart } = parseLLMStreamResponse();
|
||||
const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
|
||||
|
||||
for await (const part of stream) {
|
||||
usage = part.usage || usage;
|
||||
if (res.closed) {
|
||||
stream.controller?.abort();
|
||||
updateFinishReason('close');
|
||||
break;
|
||||
}
|
||||
|
||||
const { content: toolChoiceContent, responseContent } = parsePart({
|
||||
const { responseContent } = parsePart({
|
||||
part,
|
||||
parseThinkTag: false,
|
||||
retainDatasetCite
|
||||
});
|
||||
|
||||
const responseChoice = part.choices?.[0]?.delta;
|
||||
textAnswer += toolChoiceContent;
|
||||
|
||||
if (responseContent) {
|
||||
workflowStreamResponse?.({
|
||||
@@ -577,7 +582,7 @@ async function streamResponse({
|
||||
text: responseContent
|
||||
})
|
||||
});
|
||||
} else if (responseChoice.function_call) {
|
||||
} else if (responseChoice?.function_call) {
|
||||
const functionCall: {
|
||||
arguments?: string;
|
||||
name?: string;
|
||||
@@ -640,5 +645,7 @@ async function streamResponse({
|
||||
}
|
||||
}
|
||||
|
||||
return { answer: textAnswer, functionCalls, usage };
|
||||
const { content, finish_reason, usage } = getResponseData();
|
||||
|
||||
return { answer: content, functionCalls, finish_reason, usage };
|
||||
}
|
||||
|
||||
@@ -86,7 +86,6 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
});
|
||||
|
||||
// Check interactive entry
|
||||
const interactiveResponse = lastInteractive;
|
||||
props.node.isEntry = false;
|
||||
const hasReadFilesTool = toolNodes.some(
|
||||
(item) => item.flowNodeType === FlowNodeTypeEnum.readFiles
|
||||
@@ -143,7 +142,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
})
|
||||
}
|
||||
];
|
||||
if (interactiveResponse) {
|
||||
if (lastInteractive && isEntry) {
|
||||
return value.slice(0, -2);
|
||||
}
|
||||
return value;
|
||||
@@ -183,7 +182,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
toolModel,
|
||||
maxRunToolTimes: 30,
|
||||
messages: adaptMessages,
|
||||
interactiveEntryToolParams: interactiveResponse?.toolParams
|
||||
interactiveEntryToolParams: lastInteractive?.toolParams
|
||||
});
|
||||
}
|
||||
if (toolModel.functionCall) {
|
||||
@@ -194,7 +193,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
toolNodes,
|
||||
toolModel,
|
||||
messages: adaptMessages,
|
||||
interactiveEntryToolParams: interactiveResponse?.toolParams
|
||||
interactiveEntryToolParams: lastInteractive?.toolParams
|
||||
});
|
||||
}
|
||||
|
||||
@@ -224,7 +223,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
|
||||
toolNodes,
|
||||
toolModel,
|
||||
messages: adaptMessages,
|
||||
interactiveEntryToolParams: interactiveResponse?.toolParams
|
||||
interactiveEntryToolParams: lastInteractive?.toolParams
|
||||
});
|
||||
})();
|
||||
|
||||
|
||||
@@ -220,7 +220,8 @@ export const runToolWithPromptCall = async (
|
||||
|
||||
const max_tokens = computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken
|
||||
maxToken,
|
||||
min: 100
|
||||
});
|
||||
const filterMessages = await filterGPTMessageByMaxContext({
|
||||
messages,
|
||||
@@ -592,28 +593,22 @@ async function streamResponse({
|
||||
|
||||
let startResponseWrite = false;
|
||||
let answer = '';
|
||||
let reasoning = '';
|
||||
let finish_reason: CompletionFinishReason = null;
|
||||
let usage = getLLMDefaultUsage();
|
||||
|
||||
const { parsePart } = parseLLMStreamResponse();
|
||||
const { parsePart, getResponseData, updateFinishReason } = parseLLMStreamResponse();
|
||||
|
||||
for await (const part of stream) {
|
||||
usage = part.usage || usage;
|
||||
if (res.closed) {
|
||||
stream.controller?.abort();
|
||||
finish_reason = 'close';
|
||||
updateFinishReason('close');
|
||||
break;
|
||||
}
|
||||
|
||||
const { reasoningContent, content, responseContent, finishReason } = parsePart({
|
||||
const { reasoningContent, content, responseContent } = parsePart({
|
||||
part,
|
||||
parseThinkTag: aiChatReasoning,
|
||||
retainDatasetCite
|
||||
});
|
||||
finish_reason = finish_reason || finishReason;
|
||||
answer += content;
|
||||
reasoning += reasoningContent;
|
||||
|
||||
// Reasoning response
|
||||
if (aiChatReasoning && reasoningContent) {
|
||||
@@ -658,7 +653,9 @@ async function streamResponse({
|
||||
}
|
||||
}
|
||||
|
||||
return { answer, reasoning, finish_reason, usage };
|
||||
const { reasoningContent, content, finish_reason, usage } = getResponseData();
|
||||
|
||||
return { answer: content, reasoning: reasoningContent, finish_reason, usage };
|
||||
}
|
||||
|
||||
const parseAnswer = (
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user