Compare commits
26 Commits
gru/projec
...
v4.9.11-de
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4b8dfeef12 | ||
|
|
98b00ae86d | ||
|
|
c1f8d5b032 | ||
|
|
4adb8b7e6f | ||
|
|
e32ca8a3e9 | ||
|
|
2507997d20 | ||
|
|
86f5a68d8c | ||
|
|
92c38d9d2f | ||
|
|
9fb5d05865 | ||
|
|
b974574157 | ||
|
|
5a5367d30b | ||
|
|
8ed35ffe7e | ||
|
|
0f866fc552 | ||
|
|
05c7ba4483 | ||
|
|
fa80ce3a77 | ||
|
|
830358aa72 | ||
|
|
02b214b3ec | ||
|
|
a171c7b11c | ||
|
|
802de11363 | ||
|
|
b4ecfb0b79 | ||
|
|
331b851a78 | ||
|
|
50d235c42a | ||
|
|
9838593451 | ||
|
|
c25cd48e72 | ||
|
|
874300a56a | ||
|
|
1dea2b71b4 |
@@ -132,15 +132,15 @@ services:
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt-mcp-server:
|
||||
container_name: fastgpt-mcp-server
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3005:3000
|
||||
networks:
|
||||
@@ -150,8 +150,8 @@ services:
|
||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
|
||||
@@ -109,15 +109,15 @@ services:
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt-mcp-server:
|
||||
container_name: fastgpt-mcp-server
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3005:3000
|
||||
networks:
|
||||
@@ -127,8 +127,8 @@ services:
|
||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
|
||||
@@ -96,15 +96,15 @@ services:
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt-mcp-server:
|
||||
container_name: fastgpt-mcp-server
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3005:3000
|
||||
networks:
|
||||
@@ -114,8 +114,8 @@ services:
|
||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
|
||||
@@ -72,15 +72,15 @@ services:
|
||||
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt-mcp-server:
|
||||
container_name: fastgpt-mcp-server
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3005:3000
|
||||
networks:
|
||||
@@ -90,8 +90,8 @@ services:
|
||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
|
||||
232
dev.md
@@ -1,114 +1,118 @@
|
||||
## Premise
|
||||
|
||||
Since FastGPT is managed in the same way as monorepo, it is recommended to install ‘make’ first during development.
|
||||
|
||||
monorepo Project Name:
|
||||
|
||||
- app: main project
|
||||
-......
|
||||
|
||||
## Dev
|
||||
|
||||
```sh
|
||||
# Give automatic script code execution permission (on non-Linux systems, you can manually execute the postinstall.sh file content)
|
||||
chmod -R +x ./scripts/
|
||||
# Executing under the code root directory installs all dependencies within the root package, projects, and packages
|
||||
pnpm i
|
||||
|
||||
# Not make cmd
|
||||
cd projects/app
|
||||
pnpm dev
|
||||
|
||||
# Make cmd
|
||||
make dev name=app
|
||||
```
|
||||
|
||||
Note: If the Node version is >= 20, you need to pass the `--no-node-snapshot` parameter to Node when running `pnpm i`
|
||||
|
||||
```sh
|
||||
NODE_OPTIONS=--no-node-snapshot pnpm i
|
||||
```
|
||||
|
||||
### Jest
|
||||
|
||||
https://fael3z0zfze.feishu.cn/docx/ZOI1dABpxoGhS7xzhkXcKPxZnDL
|
||||
|
||||
## I18N
|
||||
|
||||
### Install i18n-ally Plugin
|
||||
|
||||
1. Open the Extensions Marketplace in VSCode, search for and install the `i18n Ally` plugin.
|
||||
|
||||
### Code Optimization Examples
|
||||
|
||||
#### Fetch Specific Namespace Translations in `getServerSideProps`
|
||||
|
||||
```typescript
|
||||
// pages/yourPage.tsx
|
||||
export async function getServerSideProps(context: any) {
|
||||
return {
|
||||
props: {
|
||||
currentTab: context?.query?.currentTab || TabEnum.info,
|
||||
...(await serverSideTranslations(context.locale, ['publish', 'user']))
|
||||
}
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
#### Use useTranslation Hook in Page
|
||||
|
||||
```typescript
|
||||
// pages/yourPage.tsx
|
||||
import { useTranslation } from 'next-i18next';
|
||||
|
||||
const YourComponent = () => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
mr={2}
|
||||
onClick={() => setShowSelected(false)}
|
||||
>
|
||||
{t('common:close')}
|
||||
</Button>
|
||||
);
|
||||
};
|
||||
|
||||
export default YourComponent;
|
||||
```
|
||||
|
||||
#### Handle Static File Translations
|
||||
|
||||
```typescript
|
||||
// utils/i18n.ts
|
||||
import { i18nT } from '@fastgpt/web/i18n/utils';
|
||||
|
||||
const staticContent = {
|
||||
id: 'simpleChat',
|
||||
avatar: 'core/workflow/template/aiChat',
|
||||
name: i18nT('app:template.simple_robot'),
|
||||
};
|
||||
|
||||
export default staticContent;
|
||||
```
|
||||
|
||||
### Standardize Translation Format
|
||||
|
||||
- Use the t(namespace:key) format to ensure consistent naming.
|
||||
- Translation keys should use lowercase letters and underscores, e.g., common.close.
|
||||
|
||||
## Build
|
||||
|
||||
```sh
|
||||
# Docker cmd: Build image, not proxy
|
||||
docker build -f ./projects/app/Dockerfile -t registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1 . --build-arg name=app
|
||||
# Make cmd: Build image, not proxy
|
||||
make build name=app image=registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1
|
||||
|
||||
# Docker cmd: Build image with proxy
|
||||
docker build -f ./projects/app/Dockerfile -t registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1 . --build-arg name=app --build-arg proxy=taobao
|
||||
# Make cmd: Build image with proxy
|
||||
make build name=app image=registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1 proxy=taobao
|
||||
```
|
||||
## Premise
|
||||
|
||||
Since FastGPT is managed in the same way as monorepo, it is recommended to install ‘make’ first during development.
|
||||
|
||||
monorepo Project Name:
|
||||
|
||||
- app: main project
|
||||
-......
|
||||
|
||||
## Dev
|
||||
|
||||
```sh
|
||||
# Give automatic script code execution permission (on non-Linux systems, you can manually execute the postinstall.sh file content)
|
||||
chmod -R +x ./scripts/
|
||||
# Executing under the code root directory installs all dependencies within the root package, projects, and packages
|
||||
pnpm i
|
||||
|
||||
# Not make cmd
|
||||
cd projects/app
|
||||
pnpm dev
|
||||
|
||||
# Make cmd
|
||||
make dev name=app
|
||||
```
|
||||
|
||||
Note: If the Node version is >= 20, you need to pass the `--no-node-snapshot` parameter to Node when running `pnpm i`
|
||||
|
||||
```sh
|
||||
NODE_OPTIONS=--no-node-snapshot pnpm i
|
||||
```
|
||||
|
||||
### Jest
|
||||
|
||||
https://fael3z0zfze.feishu.cn/docx/ZOI1dABpxoGhS7xzhkXcKPxZnDL
|
||||
|
||||
## I18N
|
||||
|
||||
### Install i18n-ally Plugin
|
||||
|
||||
1. Open the Extensions Marketplace in VSCode, search for and install the `i18n Ally` plugin.
|
||||
|
||||
### Code Optimization Examples
|
||||
|
||||
#### Fetch Specific Namespace Translations in `getServerSideProps`
|
||||
|
||||
```typescript
|
||||
// pages/yourPage.tsx
|
||||
export async function getServerSideProps(context: any) {
|
||||
return {
|
||||
props: {
|
||||
currentTab: context?.query?.currentTab || TabEnum.info,
|
||||
...(await serverSideTranslations(context.locale, ['publish', 'user']))
|
||||
}
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
#### Use useTranslation Hook in Page
|
||||
|
||||
```typescript
|
||||
// pages/yourPage.tsx
|
||||
import { useTranslation } from 'next-i18next';
|
||||
|
||||
const YourComponent = () => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
mr={2}
|
||||
onClick={() => setShowSelected(false)}
|
||||
>
|
||||
{t('common:close')}
|
||||
</Button>
|
||||
);
|
||||
};
|
||||
|
||||
export default YourComponent;
|
||||
```
|
||||
|
||||
#### Handle Static File Translations
|
||||
|
||||
```typescript
|
||||
// utils/i18n.ts
|
||||
import { i18nT } from '@fastgpt/web/i18n/utils';
|
||||
|
||||
const staticContent = {
|
||||
id: 'simpleChat',
|
||||
avatar: 'core/workflow/template/aiChat',
|
||||
name: i18nT('app:template.simple_robot'),
|
||||
};
|
||||
|
||||
export default staticContent;
|
||||
```
|
||||
|
||||
### Standardize Translation Format
|
||||
|
||||
- Use the t(namespace:key) format to ensure consistent naming.
|
||||
- Translation keys should use lowercase letters and underscores, e.g., common.close.
|
||||
|
||||
## audit
|
||||
|
||||
Please fill the OperationLogEventEnum and operationLog/audit function is added to the ts, and on the corresponding position to fill i18n, at the same time to add the location of the log using addOpearationLog function add function
|
||||
|
||||
## Build
|
||||
|
||||
```sh
|
||||
# Docker cmd: Build image, not proxy
|
||||
docker build -f ./projects/app/Dockerfile -t registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1 . --build-arg name=app
|
||||
# Make cmd: Build image, not proxy
|
||||
make build name=app image=registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1
|
||||
|
||||
# Docker cmd: Build image with proxy
|
||||
docker build -f ./projects/app/Dockerfile -t registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1 . --build-arg name=app --build-arg proxy=taobao
|
||||
# Make cmd: Build image with proxy
|
||||
make build name=app image=registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1 proxy=taobao
|
||||
```
|
||||
|
||||
BIN
docSite/assets/imgs/thirddataset-1.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
docSite/assets/imgs/thirddataset-10.png
Normal file
|
After Width: | Height: | Size: 6.0 KiB |
BIN
docSite/assets/imgs/thirddataset-12.png
Normal file
|
After Width: | Height: | Size: 64 KiB |
BIN
docSite/assets/imgs/thirddataset-13.png
Normal file
|
After Width: | Height: | Size: 73 KiB |
BIN
docSite/assets/imgs/thirddataset-14.png
Normal file
|
After Width: | Height: | Size: 62 KiB |
BIN
docSite/assets/imgs/thirddataset-15.png
Normal file
|
After Width: | Height: | Size: 26 KiB |
BIN
docSite/assets/imgs/thirddataset-16.png
Normal file
|
After Width: | Height: | Size: 29 KiB |
BIN
docSite/assets/imgs/thirddataset-17.png
Normal file
|
After Width: | Height: | Size: 33 KiB |
BIN
docSite/assets/imgs/thirddataset-18.png
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
docSite/assets/imgs/thirddataset-19.png
Normal file
|
After Width: | Height: | Size: 69 KiB |
BIN
docSite/assets/imgs/thirddataset-2.png
Normal file
|
After Width: | Height: | Size: 40 KiB |
BIN
docSite/assets/imgs/thirddataset-20.png
Normal file
|
After Width: | Height: | Size: 40 KiB |
BIN
docSite/assets/imgs/thirddataset-21.png
Normal file
|
After Width: | Height: | Size: 66 KiB |
BIN
docSite/assets/imgs/thirddataset-22.png
Normal file
|
After Width: | Height: | Size: 57 KiB |
BIN
docSite/assets/imgs/thirddataset-23.png
Normal file
|
After Width: | Height: | Size: 78 KiB |
BIN
docSite/assets/imgs/thirddataset-24.png
Normal file
|
After Width: | Height: | Size: 103 KiB |
BIN
docSite/assets/imgs/thirddataset-3.png
Normal file
|
After Width: | Height: | Size: 43 KiB |
BIN
docSite/assets/imgs/thirddataset-4.png
Normal file
|
After Width: | Height: | Size: 41 KiB |
BIN
docSite/assets/imgs/thirddataset-5.png
Normal file
|
After Width: | Height: | Size: 35 KiB |
BIN
docSite/assets/imgs/thirddataset-6.png
Normal file
|
After Width: | Height: | Size: 38 KiB |
BIN
docSite/assets/imgs/thirddataset-7.png
Normal file
|
After Width: | Height: | Size: 28 KiB |
BIN
docSite/assets/imgs/thirddataset-8.png
Normal file
|
After Width: | Height: | Size: 64 KiB |
BIN
docSite/assets/imgs/thirddataset-9.png
Normal file
|
After Width: | Height: | Size: 110 KiB |
@@ -645,7 +645,7 @@ data 为集合的 ID。
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
### 创建一个外部文件库集合(商业版)
|
||||
### 创建一个外部文件库集合(弃用)
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
{{< tab tabName="请求示例" >}}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: 'V4.9.1'
|
||||
title: 'V4.9.1(包含升级脚本)'
|
||||
description: 'FastGPT V4.9.1 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: 'V4.9.10(进行中)'
|
||||
title: 'V4.9.10'
|
||||
description: 'FastGPT V4.9.10 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
@@ -7,13 +7,28 @@ toc: true
|
||||
weight: 790
|
||||
---
|
||||
|
||||
## 升级指南
|
||||
|
||||
重要提示:本次更新会重新构建全文索引,构建期间,全文检索结果会为空,4c16g 700 万组全文索引大致消耗 25 分钟。如需无缝升级,需自行做表同步工程。
|
||||
|
||||
### 1. 做好数据备份
|
||||
|
||||
### 2. 更新镜像 tag
|
||||
|
||||
- 更新 FastGPT 镜像 tag: v4.9.10-fix2
|
||||
- 更新 FastGPT 商业版镜像 tag: v4.9.10-fix2
|
||||
- mcp_server 无需更新
|
||||
- Sandbox 无需更新
|
||||
- AIProxy 无需更新
|
||||
|
||||
## 🚀 新增内容
|
||||
|
||||
1. 支持 PG 设置`systemEnv.hnswMaxScanTuples`参数,提高迭代搜索的数据总量。
|
||||
2. 工作流调整为单向接入和接出,支持快速的添加下一步节点。
|
||||
3. 开放飞书和语雀知识库到开源版。
|
||||
4. gemini 和 claude 最新模型预设。
|
||||
2. 知识库预处理参数增加 “分块条件”,可控制某些情况下不进行分块处理。
|
||||
3. 知识库预处理参数增加 “段落优先” 模式,可控制最大段落深度。原“长度优先”模式,不再内嵌段落优先逻辑。
|
||||
4. 工作流调整为单向接入和接出,支持快速的添加下一步节点。
|
||||
5. 开放飞书和语雀知识库到开源版。
|
||||
6. gemini 和 claude 最新模型预设。
|
||||
|
||||
## ⚙️ 优化
|
||||
|
||||
@@ -22,10 +37,14 @@ weight: 790
|
||||
3. 纠正原先知识库的“表格数据集”名称,改成“备份导入”。同时支持知识库索引的导出和导入。
|
||||
4. 工作流知识库引用上限,如果工作流中没有相关 AI 节点,则交互模式改成纯手动输入,并且上限为 1000万。
|
||||
5. 语音输入,移动端判断逻辑,准确判断是否为手机,而不是小屏。
|
||||
6. 优化上下文截取算法,至少保证留下一组 Human 信息。
|
||||
|
||||
## 🐛 修复
|
||||
|
||||
1. 全文检索多知识库时排序得分排序不正确。
|
||||
2. 流响应捕获 finish_reason 可能不正确。
|
||||
3. 工具调用模式,未保存思考输出。
|
||||
4. 知识库 indexSize 参数未生效。
|
||||
4. 知识库 indexSize 参数未生效。
|
||||
5. 工作流嵌套 2 层后,获取预览引用、上下文不正确。
|
||||
6. xlsx 转成 Markdown 时候,前面会多出一个空格。
|
||||
7. 读取 Markdown 文件时,Base64 图片未进行额外抓换保存。
|
||||
42
docSite/content/zh-cn/docs/development/upgrading/4911.md
Normal file
@@ -0,0 +1,42 @@
|
||||
---
|
||||
title: 'V4.9.11(进行中)'
|
||||
description: 'FastGPT V4.9.11 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 789
|
||||
---
|
||||
|
||||
## 执行升级脚本
|
||||
|
||||
该脚本仅需商业版用户执行。
|
||||
|
||||
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`;{{host}} 替换成**FastGPT 域名**。
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'https://{{host}}/api/admin/initv4911' \
|
||||
--header 'rootkey: {{rootkey}}' \
|
||||
--header 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
**脚本功能**
|
||||
|
||||
1. 移动第三方知识库 API 配置。
|
||||
|
||||
## 🚀 新增内容
|
||||
|
||||
1. 商业版支持图片知识库。
|
||||
2. 工作流中增加节点搜索功能。
|
||||
3. 工作流中,子流程版本控制,可选择“保持最新版本”,无需手动更新。
|
||||
4. 增加更多审计操作日志。
|
||||
|
||||
## ⚙️ 优化
|
||||
|
||||
1. 原文缓存改用 gridfs 存储,提高上限。
|
||||
|
||||
## 🐛 修复
|
||||
|
||||
1. 工作流中,管理员声明的全局系统工具,无法进行版本管理。
|
||||
2. 工具调用节点前,有交互节点时,上下文异常。
|
||||
3. 修复备份导入,小于 1000 字时,无法分块问题。
|
||||
4. 自定义 PDF 解析,无法保存 base64 图片。
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: 'V4.9.4'
|
||||
title: 'V4.9.4(包含升级脚本)'
|
||||
description: 'FastGPT V4.9.4 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
|
||||
161
docSite/content/zh-cn/docs/guide/knowledge_base/third_dataset.md
Normal file
@@ -0,0 +1,161 @@
|
||||
---
|
||||
title: '第三方知识库开发'
|
||||
description: '本节详细介绍如何在FastGPT上自己接入第三方知识库'
|
||||
icon: 'language'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 410
|
||||
---
|
||||
|
||||
目前,互联网上拥有各种各样的文档库,例如飞书,语雀等等。 FastGPT 的不同用户可能使用的文档库不同,目前 FastGPT 内置了飞书、语雀文档库,如果需要接入其他文档库,可以参考本节内容。
|
||||
|
||||
|
||||
## 统一的接口规范
|
||||
|
||||
为了实现对不同文档库的统一接入,FastGPT 对第三方文档库进行了接口的规范,共包含 4 个接口内容,可以[查看 API 文件库接口](/docs/guide/knowledge_base/api_datase)。
|
||||
|
||||
所有内置的文档库,都是基于标准的 API 文件库进行扩展。可以参考`FastGPT/packages/service/core/dataset/apiDataset/yuqueDataset/api.ts`中的代码,进行其他文档库的扩展。一共需要完成 4 个接口开发:
|
||||
|
||||
1. 获取文件列表
|
||||
2. 获取文件内容/文件链接
|
||||
3. 获取原文预览地址
|
||||
4. 获取文件详情信息
|
||||
|
||||
## 开始一个第三方文件库
|
||||
|
||||
为了方便讲解,这里以添加飞书知识库为例。
|
||||
|
||||
### 1. 添加第三方文档库参数
|
||||
|
||||
首先,要进入 FastGPT 项目路径下的`FastGPT\packages\global\core\dataset\apiDataset.d.ts`文件,添加第三方文档库 Server 类型。例如,语雀文档中,需要提供`userId`、`token`两个字段作为鉴权信息。
|
||||
|
||||
```ts
|
||||
export type YuqueServer = {
|
||||
userId: string;
|
||||
token?: string;
|
||||
basePath?: string;
|
||||
};
|
||||
```
|
||||
|
||||
{{% alert icon="🤖 " context="success" %}}
|
||||
如果文档库有`根目录`选择的功能,需要设置添加一个字段`basePath`
|
||||
{{% /alert %}}
|
||||
|
||||
### 2. 创建 Hook 文件
|
||||
|
||||
每个第三方文档库都会采用 Hook 的方式来实现一套 API 接口的维护,Hook 里包含 4 个函数需要完成。
|
||||
|
||||
- 在`FastGPT\packages\service\core\dataset\apiDataset\`下创建一个文档库的文件夹,然后在文件夹下创建一个`api.ts`文件
|
||||
- 在`api.ts`文件中,需要完成 4 个函数的定义,分别是:
|
||||
- `listFiles`:获取文件列表
|
||||
- `getFileContent`:获取文件内容/文件链接
|
||||
- `getFileDetail`:获取文件详情信息
|
||||
- `getFilePreviewUrl`:获取原文预览地址
|
||||
|
||||
### 3. 数据库添加配置字段
|
||||
|
||||
- 在`packages/service/core/dataset/schema.ts` 中添加第三方文档库的配置字段,类型统一设置成`Object`。
|
||||
- 在`FastGPT/packages/global/core/dataset/type.d.ts`中添加第三方文档库配置字段的数据类型,类型设置为第一步创建的参数。
|
||||
|
||||

|
||||
|
||||
{{% alert icon="🤖 " context="success" %}}
|
||||
`schema.ts`文件修改后,需要重新启动 FastGPT 项目才会生效。
|
||||
{{% /alert %}}
|
||||
|
||||
### 4. 添加知识库类型
|
||||
|
||||
在`projects/app/src/web/core/dataset/constants.ts`中,添加自己的知识库类型
|
||||
|
||||
```TS
|
||||
export const datasetTypeCourseMap: Record<`${DatasetTypeEnum}`, string> = {
|
||||
[DatasetTypeEnum.folder]: '',
|
||||
[DatasetTypeEnum.dataset]: '',
|
||||
[DatasetTypeEnum.apiDataset]: '/docs/guide/knowledge_base/api_dataset/',
|
||||
[DatasetTypeEnum.websiteDataset]: '/docs/guide/knowledge_base/websync/',
|
||||
[DatasetTypeEnum.feishuShare]: '/docs/guide/knowledge_base/lark_share_dataset/',
|
||||
[DatasetTypeEnum.feishuKnowledge]: '/docs/guide/knowledge_base/lark_knowledge_dataset/',
|
||||
[DatasetTypeEnum.yuque]: '/docs/guide/knowledge_base/yuque_dataset/',
|
||||
[DatasetTypeEnum.externalFile]: ''
|
||||
};
|
||||
```
|
||||
|
||||
{{% alert icon="🤖 " context="success" %}}
|
||||
在 datasetTypeCourseMap 中添加自己的知识库类型,`' '`内是相应的文档说明,如果有的话,可以添加。
|
||||
文档添加在`FastGPT\docSite\content\zh-cn\docs\guide\knowledge_base\`
|
||||
{{% /alert %}}
|
||||
|
||||
## 添加前端
|
||||
|
||||
`FastGPT\packages\web\i18n\zh-CN\dataset.json`,`FastGPT\packages\web\i18n\en\dataset.json`和`FastGPT\packages\web\i18n\zh-Hant\dataset.json`中添加自己的 I18n 翻译,以中文翻译为例,大体需要如下几个内容:
|
||||
|
||||

|
||||
|
||||
`FastGPT\packages\web\components\common\Icon\icons\core\dataset\`添加自己的知识库图标,一共是两个,分为`Outline`和`Color`,分别是有颜色的和无色的,具体看如下图片。
|
||||
|
||||

|
||||
|
||||
|
||||
在`FastGPT\packages\web\components\common\Icon\constants.ts`文件中,添加自己的图标。 `import` 是图标的存放路径。
|
||||
|
||||

|
||||
|
||||
在`FastGPT\packages\global\core\dataset\constants.ts`文件中,添加自己的知识库类型。
|
||||
|
||||

|
||||
|
||||
{{% alert icon="🤖 " context="success" %}}
|
||||
`label`内容是自己之前通过 i18n 翻译添加的知识库名称的
|
||||
`icon`是自己之前添加的 Icon , I18n 的添加看最后清单。
|
||||
{{% /alert %}}
|
||||
|
||||
在`FastGPT\projects\app\src\pages\dataset\list\index.tsx`文件下,添加如下内容。这个文件负责的是知识库列表页的`新建`按钮点击后的菜单,只有在该文件添加知识库后,才能创建知识库。
|
||||
|
||||

|
||||
|
||||
在`FastGPT\projects\app\src\pageComponents\dataset\detail\Info\index.tsx`文件下,添加如下内容。
|
||||
|
||||

|
||||
|
||||
在`FastGPT\projects\app\src\pageComponents\dataset\list\CreateModal.tsx`文件下,添加如下内容。
|
||||
|
||||
| | |
|
||||
| --- | --- |
|
||||
|  |  |
|
||||
|
||||
在`FastGPT\projects\app\src\pageComponents\dataset\list\SideTag.tsx`文件下,添加如下内容。
|
||||
|
||||

|
||||
|
||||
在`FastGPT\projects\app\src\web\core\dataset\context\datasetPageContext.tsx`文件下,添加如下内容。
|
||||
|
||||

|
||||
|
||||
## 添加配置表单
|
||||
|
||||
在`FastGPT\projects\app\src\pageComponents\dataset\ApiDatasetForm.tsx`文件下,添加自己如下内容。这个文件负责的是创建知识库页的字段填写。
|
||||
|
||||
| | | |
|
||||
| --- | --- | --- |
|
||||
|  |  |  |
|
||||
|
||||
代码中添加的两个组件是对根目录选择的渲染,对应设计的 api 的 getfiledetail 方法,如果你的文件不支持,你可以不引用。
|
||||
|
||||
```
|
||||
{renderBaseUrlSelector()} //这是对`Base URL`字段的渲染
|
||||
{renderDirectoryModal()} //点击`选择`后出现的`选择根目录`窗口,见图
|
||||
```
|
||||
|
||||
| | |
|
||||
| --- | --- |
|
||||
|  |  |
|
||||
|
||||
如果知识库需要支持根目录,还需要在`ApiDatasetForm`文件中添加相关内容。
|
||||
|
||||
## 添加杂项
|
||||
|
||||
最后,需要在很多文件里添加`server`类型,这里由于文件过多,且不大,不一一列举文件的清单。只提供方法:使用自己编程工具的全局搜索功能,搜索`YuqueServer`和`yuqueServer`。在搜索到的文件中,逐一添加自己的知识库类型。
|
||||
|
||||
## 提示
|
||||
|
||||
建议知识库创建完成后,完整测试一遍知识库的功能,以确定有无漏洞,如果你的知识库添加有问题,且无法在文档找到对应的文件解决,一定是杂项没有添加完全,建议重复一次全局搜索`YuqueServer`和`yuqueServer`,检查是否有地方没有加上自己的类型。
|
||||
@@ -28,7 +28,6 @@ FastGPT 商业版是基于 FastGPT 开源版的增强版本,增加了一些独
|
||||
| 应用发布安全配置 | ❌ | ✅ | ✅ |
|
||||
| 内容审核 | ❌ | ✅ | ✅ |
|
||||
| web站点同步 | ❌ | ✅ | ✅ |
|
||||
| 主流文档库接入(目前支持:语雀、飞书) | ❌ | ✅ | ✅ |
|
||||
| 增强训练模式 | ❌ | ✅ | ✅ |
|
||||
| 第三方应用快速接入(飞书、公众号) | ❌ | ✅ | ✅ |
|
||||
| 管理后台 | ❌ | ✅ | 不需要 |
|
||||
|
||||
@@ -6,7 +6,8 @@ export const fileImgs = [
|
||||
{ suffix: '(doc|docs)', src: 'file/fill/doc' },
|
||||
{ suffix: 'txt', src: 'file/fill/txt' },
|
||||
{ suffix: 'md', src: 'file/fill/markdown' },
|
||||
{ suffix: 'html', src: 'file/fill/html' }
|
||||
{ suffix: 'html', src: 'file/fill/html' },
|
||||
{ suffix: '(jpg|jpeg|png|gif|bmp|webp|svg|ico|tiff|tif)', src: 'image' }
|
||||
|
||||
// { suffix: '.', src: '/imgs/files/file.svg' }
|
||||
];
|
||||
|
||||
@@ -2,4 +2,5 @@ export type AuthFrequencyLimitProps = {
|
||||
eventId: string;
|
||||
maxAmount: number;
|
||||
expiredTime: Date;
|
||||
num?: number;
|
||||
};
|
||||
|
||||
@@ -7,6 +7,10 @@ export const CUSTOM_SPLIT_SIGN = '-----CUSTOM_SPLIT_SIGN-----';
|
||||
type SplitProps = {
|
||||
text: string;
|
||||
chunkSize: number;
|
||||
|
||||
paragraphChunkDeep?: number; // Paragraph deep
|
||||
paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
|
||||
|
||||
maxSize?: number;
|
||||
overlapRatio?: number;
|
||||
customReg?: string[];
|
||||
@@ -108,6 +112,8 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
let {
|
||||
text = '',
|
||||
chunkSize,
|
||||
paragraphChunkDeep = 5,
|
||||
paragraphChunkMinSize = 100,
|
||||
maxSize = defaultMaxChunkSize,
|
||||
overlapRatio = 0.15,
|
||||
customReg = []
|
||||
@@ -123,7 +129,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
text = text.replace(/(```[\s\S]*?```|~~~[\s\S]*?~~~)/g, function (match) {
|
||||
return match.replace(/\n/g, codeBlockMarker);
|
||||
});
|
||||
// 2. 表格处理 - 单独提取表格出来,进行表头合并
|
||||
// 2. Markdown 表格处理 - 单独提取表格出来,进行表头合并
|
||||
const tableReg =
|
||||
/(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n?)*)(?:\n|$)/g;
|
||||
const tableDataList = text.match(tableReg);
|
||||
@@ -143,25 +149,40 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
text = text.replace(/(\r?\n|\r){3,}/g, '\n\n\n');
|
||||
|
||||
// The larger maxLen is, the next sentence is less likely to trigger splitting
|
||||
const markdownIndex = 4;
|
||||
const forbidOverlapIndex = 8;
|
||||
const customRegLen = customReg.length;
|
||||
const markdownIndex = paragraphChunkDeep - 1;
|
||||
const forbidOverlapIndex = customRegLen + markdownIndex + 4;
|
||||
|
||||
const markdownHeaderRules = ((deep?: number): { reg: RegExp; maxLen: number }[] => {
|
||||
if (!deep || deep === 0) return [];
|
||||
|
||||
const maxDeep = Math.min(deep, 8); // Maximum 8 levels
|
||||
const rules: { reg: RegExp; maxLen: number }[] = [];
|
||||
|
||||
for (let i = 1; i <= maxDeep; i++) {
|
||||
const hashSymbols = '#'.repeat(i);
|
||||
rules.push({
|
||||
reg: new RegExp(`^(${hashSymbols}\\s[^\\n]+\\n)`, 'gm'),
|
||||
maxLen: chunkSize
|
||||
});
|
||||
}
|
||||
|
||||
return rules;
|
||||
})(paragraphChunkDeep);
|
||||
|
||||
const stepReges: { reg: RegExp | string; maxLen: number }[] = [
|
||||
...customReg.map((text) => ({
|
||||
reg: text.replaceAll('\\n', '\n'),
|
||||
maxLen: chunkSize
|
||||
})),
|
||||
{ reg: /^(#\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
{ reg: /^(##\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
{ reg: /^(###\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
{ reg: /^(####\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
{ reg: /^(#####\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
...markdownHeaderRules,
|
||||
|
||||
{ reg: /([\n](```[\s\S]*?```|~~~[\s\S]*?~~~))/g, maxLen: maxSize }, // code block
|
||||
// HTML Table tag 尽可能保障完整
|
||||
{
|
||||
reg: /(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n)*)/g,
|
||||
maxLen: Math.min(chunkSize * 1.5, maxSize)
|
||||
}, // Table 尽可能保证完整性
|
||||
maxLen: chunkSize
|
||||
}, // Markdown Table 尽可能保证完整性
|
||||
{ reg: /(\n{2,})/g, maxLen: chunkSize },
|
||||
{ reg: /([\n])/g, maxLen: chunkSize },
|
||||
// ------ There's no overlap on the top
|
||||
@@ -172,12 +193,10 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
{ reg: /([,]|,\s)/g, maxLen: chunkSize }
|
||||
];
|
||||
|
||||
const customRegLen = customReg.length;
|
||||
const checkIsCustomStep = (step: number) => step < customRegLen;
|
||||
const checkIsMarkdownSplit = (step: number) =>
|
||||
step >= customRegLen && step <= markdownIndex + customRegLen;
|
||||
|
||||
const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex + customRegLen;
|
||||
const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex;
|
||||
|
||||
// if use markdown title split, Separate record title
|
||||
const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
|
||||
@@ -301,6 +320,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
const splitTexts = getSplitTexts({ text, step });
|
||||
|
||||
const chunks: string[] = [];
|
||||
|
||||
for (let i = 0; i < splitTexts.length; i++) {
|
||||
const item = splitTexts[i];
|
||||
|
||||
@@ -443,7 +463,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
*/
|
||||
export const splitText2Chunks = (props: SplitProps): SplitResponse => {
|
||||
let { text = '' } = props;
|
||||
const start = Date.now();
|
||||
const splitWithCustomSign = text.split(CUSTOM_SPLIT_SIGN);
|
||||
|
||||
const splitResult = splitWithCustomSign.map((item) => {
|
||||
|
||||
@@ -34,7 +34,7 @@ export const valToStr = (val: any) => {
|
||||
};
|
||||
|
||||
// replace {{variable}} to value
|
||||
export function replaceVariable(text: any, obj: Record<string, string | number>) {
|
||||
export function replaceVariable(text: any, obj: Record<string, string | number | undefined>) {
|
||||
if (typeof text !== 'string') return text;
|
||||
|
||||
for (const key in obj) {
|
||||
|
||||
@@ -10,6 +10,8 @@ import { AppTypeEnum } from './constants';
|
||||
import { AppErrEnum } from '../../common/error/code/app';
|
||||
import { PluginErrEnum } from '../../common/error/code/plugin';
|
||||
import { i18nT } from '../../../web/i18n/utils';
|
||||
import appErrList from '../../common/error/code/app';
|
||||
import pluginErrList from '../../common/error/code/plugin';
|
||||
|
||||
export const getDefaultAppForm = (): AppSimpleEditFormType => {
|
||||
return {
|
||||
@@ -190,17 +192,10 @@ export const getAppType = (config?: WorkflowTemplateBasicType | AppSimpleEditFor
|
||||
return '';
|
||||
};
|
||||
|
||||
export const formatToolError = (error?: string) => {
|
||||
const unExistError: Array<string> = [
|
||||
AppErrEnum.unAuthApp,
|
||||
AppErrEnum.unExist,
|
||||
PluginErrEnum.unAuth,
|
||||
PluginErrEnum.unExist
|
||||
];
|
||||
export const formatToolError = (error?: any) => {
|
||||
if (!error || typeof error !== 'string') return;
|
||||
|
||||
if (error && unExistError.includes(error)) {
|
||||
return i18nT('app:un_auth');
|
||||
} else {
|
||||
return error;
|
||||
}
|
||||
const errorText = appErrList[error]?.message || pluginErrList[error]?.message;
|
||||
|
||||
return errorText || error;
|
||||
};
|
||||
|
||||
24
packages/global/core/dataset/api.d.ts
vendored
@@ -1,4 +1,9 @@
|
||||
import type { ChunkSettingsType, DatasetDataIndexItemType, DatasetSchemaType } from './type';
|
||||
import type {
|
||||
ChunkSettingsType,
|
||||
DatasetDataIndexItemType,
|
||||
DatasetDataFieldType,
|
||||
DatasetSchemaType
|
||||
} from './type';
|
||||
import type {
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
@@ -7,12 +12,14 @@ import type {
|
||||
ChunkTriggerConfigTypeEnum,
|
||||
ParagraphChunkAIModeEnum
|
||||
} from './constants';
|
||||
import type { LLMModelItemType } from '../ai/model.d';
|
||||
import type { ParentIdType } from 'common/parentFolder/type';
|
||||
import type { ParentIdType } from '../../common/parentFolder/type';
|
||||
|
||||
/* ================= dataset ===================== */
|
||||
export type DatasetUpdateBody = {
|
||||
id: string;
|
||||
|
||||
apiDatasetServer?: DatasetSchemaType['apiDatasetServer'];
|
||||
|
||||
parentId?: ParentIdType;
|
||||
name?: string;
|
||||
avatar?: string;
|
||||
@@ -24,9 +31,6 @@ export type DatasetUpdateBody = {
|
||||
websiteConfig?: DatasetSchemaType['websiteConfig'];
|
||||
externalReadUrl?: DatasetSchemaType['externalReadUrl'];
|
||||
defaultPermission?: DatasetSchemaType['defaultPermission'];
|
||||
apiServer?: DatasetSchemaType['apiServer'];
|
||||
yuqueServer?: DatasetSchemaType['yuqueServer'];
|
||||
feishuServer?: DatasetSchemaType['feishuServer'];
|
||||
chunkSettings?: DatasetSchemaType['chunkSettings'];
|
||||
|
||||
// sync schedule
|
||||
@@ -100,6 +104,9 @@ export type ExternalFileCreateDatasetCollectionParams = ApiCreateDatasetCollecti
|
||||
externalFileUrl: string;
|
||||
filename?: string;
|
||||
};
|
||||
export type ImageCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
collectionName: string;
|
||||
};
|
||||
|
||||
/* ================= tag ===================== */
|
||||
export type CreateDatasetCollectionTagParams = {
|
||||
@@ -125,8 +132,9 @@ export type PgSearchRawType = {
|
||||
score: number;
|
||||
};
|
||||
export type PushDatasetDataChunkProps = {
|
||||
q: string; // embedding content
|
||||
a?: string; // bonus content
|
||||
q?: string;
|
||||
a?: string;
|
||||
imageId?: string;
|
||||
chunkIndex?: number;
|
||||
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { RequireOnlyOne } from '../../common/type/utils';
|
||||
import type { ParentIdType } from '../../common/parentFolder/type.d';
|
||||
import { RequireOnlyOne } from '../../../common/type/utils';
|
||||
import type { ParentIdType } from '../../../common/parentFolder/type';
|
||||
|
||||
export type APIFileItem = {
|
||||
id: string;
|
||||
@@ -28,6 +28,12 @@ export type YuqueServer = {
|
||||
basePath?: string;
|
||||
};
|
||||
|
||||
export type ApiDatasetServerType = {
|
||||
apiServer?: APIFileServer;
|
||||
feishuServer?: FeishuServer;
|
||||
yuqueServer?: YuqueServer;
|
||||
};
|
||||
|
||||
// Api dataset api
|
||||
|
||||
export type APIFileListResponse = APIFileItem[];
|
||||
31
packages/global/core/dataset/apiDataset/utils.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import type { ApiDatasetServerType } from './type';
|
||||
|
||||
export const filterApiDatasetServerPublicData = (apiDatasetServer?: ApiDatasetServerType) => {
|
||||
if (!apiDatasetServer) return undefined;
|
||||
|
||||
const { apiServer, yuqueServer, feishuServer } = apiDatasetServer;
|
||||
|
||||
return {
|
||||
apiServer: apiServer
|
||||
? {
|
||||
baseUrl: apiServer.baseUrl,
|
||||
authorization: '',
|
||||
basePath: apiServer.basePath
|
||||
}
|
||||
: undefined,
|
||||
yuqueServer: yuqueServer
|
||||
? {
|
||||
userId: yuqueServer.userId,
|
||||
token: '',
|
||||
basePath: yuqueServer.basePath
|
||||
}
|
||||
: undefined,
|
||||
feishuServer: feishuServer
|
||||
? {
|
||||
appId: feishuServer.appId,
|
||||
appSecret: '',
|
||||
folderToken: feishuServer.folderToken
|
||||
}
|
||||
: undefined
|
||||
};
|
||||
};
|
||||
@@ -6,45 +6,80 @@ export enum DatasetTypeEnum {
|
||||
dataset = 'dataset',
|
||||
websiteDataset = 'websiteDataset', // depp link
|
||||
externalFile = 'externalFile',
|
||||
|
||||
apiDataset = 'apiDataset',
|
||||
feishu = 'feishu',
|
||||
yuque = 'yuque'
|
||||
}
|
||||
export const DatasetTypeMap = {
|
||||
|
||||
// @ts-ignore
|
||||
export const ApiDatasetTypeMap: Record<
|
||||
`${DatasetTypeEnum}`,
|
||||
{
|
||||
icon: string;
|
||||
avatar: string;
|
||||
label: any;
|
||||
collectionLabel: string;
|
||||
courseUrl?: string;
|
||||
}
|
||||
> = {
|
||||
[DatasetTypeEnum.apiDataset]: {
|
||||
icon: 'core/dataset/externalDatasetOutline',
|
||||
avatar: 'core/dataset/externalDatasetColor',
|
||||
label: i18nT('dataset:api_file'),
|
||||
collectionLabel: i18nT('common:File'),
|
||||
courseUrl: '/docs/guide/knowledge_base/api_dataset/'
|
||||
},
|
||||
[DatasetTypeEnum.feishu]: {
|
||||
icon: 'core/dataset/feishuDatasetOutline',
|
||||
avatar: 'core/dataset/feishuDatasetColor',
|
||||
label: i18nT('dataset:feishu_dataset'),
|
||||
collectionLabel: i18nT('common:File'),
|
||||
courseUrl: '/docs/guide/knowledge_base/lark_dataset/'
|
||||
},
|
||||
[DatasetTypeEnum.yuque]: {
|
||||
icon: 'core/dataset/yuqueDatasetOutline',
|
||||
avatar: 'core/dataset/yuqueDatasetColor',
|
||||
label: i18nT('dataset:yuque_dataset'),
|
||||
collectionLabel: i18nT('common:File'),
|
||||
courseUrl: '/docs/guide/knowledge_base/yuque_dataset/'
|
||||
}
|
||||
};
|
||||
export const DatasetTypeMap: Record<
|
||||
`${DatasetTypeEnum}`,
|
||||
{
|
||||
icon: string;
|
||||
avatar: string;
|
||||
label: any;
|
||||
collectionLabel: string;
|
||||
courseUrl?: string;
|
||||
}
|
||||
> = {
|
||||
...ApiDatasetTypeMap,
|
||||
[DatasetTypeEnum.folder]: {
|
||||
icon: 'common/folderFill',
|
||||
avatar: 'common/folderFill',
|
||||
label: i18nT('dataset:folder_dataset'),
|
||||
collectionLabel: i18nT('common:Folder')
|
||||
},
|
||||
[DatasetTypeEnum.dataset]: {
|
||||
icon: 'core/dataset/commonDatasetOutline',
|
||||
avatar: 'core/dataset/commonDatasetColor',
|
||||
label: i18nT('dataset:common_dataset'),
|
||||
collectionLabel: i18nT('common:File')
|
||||
},
|
||||
[DatasetTypeEnum.websiteDataset]: {
|
||||
icon: 'core/dataset/websiteDatasetOutline',
|
||||
avatar: 'core/dataset/websiteDatasetColor',
|
||||
label: i18nT('dataset:website_dataset'),
|
||||
collectionLabel: i18nT('common:Website')
|
||||
collectionLabel: i18nT('common:Website'),
|
||||
courseUrl: '/docs/guide/knowledge_base/websync/'
|
||||
},
|
||||
[DatasetTypeEnum.externalFile]: {
|
||||
icon: 'core/dataset/externalDatasetOutline',
|
||||
avatar: 'core/dataset/externalDatasetColor',
|
||||
label: i18nT('dataset:external_file'),
|
||||
collectionLabel: i18nT('common:File')
|
||||
},
|
||||
[DatasetTypeEnum.apiDataset]: {
|
||||
icon: 'core/dataset/externalDatasetOutline',
|
||||
label: i18nT('dataset:api_file'),
|
||||
collectionLabel: i18nT('common:File')
|
||||
},
|
||||
[DatasetTypeEnum.feishu]: {
|
||||
icon: 'core/dataset/feishuDatasetOutline',
|
||||
label: i18nT('dataset:feishu_dataset'),
|
||||
collectionLabel: i18nT('common:File')
|
||||
},
|
||||
[DatasetTypeEnum.yuque]: {
|
||||
icon: 'core/dataset/yuqueDatasetOutline',
|
||||
label: i18nT('dataset:yuque_dataset'),
|
||||
collectionLabel: i18nT('common:File')
|
||||
}
|
||||
};
|
||||
|
||||
@@ -77,7 +112,8 @@ export enum DatasetCollectionTypeEnum {
|
||||
file = 'file',
|
||||
link = 'link', // one link
|
||||
externalFile = 'externalFile',
|
||||
apiFile = 'apiFile'
|
||||
apiFile = 'apiFile',
|
||||
images = 'images'
|
||||
}
|
||||
export const DatasetCollectionTypeMap = {
|
||||
[DatasetCollectionTypeEnum.folder]: {
|
||||
@@ -97,6 +133,9 @@ export const DatasetCollectionTypeMap = {
|
||||
},
|
||||
[DatasetCollectionTypeEnum.apiFile]: {
|
||||
name: i18nT('common:core.dataset.apiFile')
|
||||
},
|
||||
[DatasetCollectionTypeEnum.images]: {
|
||||
name: i18nT('dataset:core.dataset.Image collection')
|
||||
}
|
||||
};
|
||||
|
||||
@@ -120,6 +159,7 @@ export const DatasetCollectionSyncResultMap = {
|
||||
export enum DatasetCollectionDataProcessModeEnum {
|
||||
chunk = 'chunk',
|
||||
qa = 'qa',
|
||||
imageParse = 'imageParse',
|
||||
backup = 'backup',
|
||||
|
||||
auto = 'auto' // abandon
|
||||
@@ -133,6 +173,10 @@ export const DatasetCollectionDataProcessModeMap = {
|
||||
label: i18nT('common:core.dataset.training.QA mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.QA Import Tip')
|
||||
},
|
||||
[DatasetCollectionDataProcessModeEnum.imageParse]: {
|
||||
label: i18nT('dataset:training.Image mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.Chunk Split Tip')
|
||||
},
|
||||
[DatasetCollectionDataProcessModeEnum.backup]: {
|
||||
label: i18nT('dataset:backup_mode'),
|
||||
tooltip: i18nT('dataset:backup_mode')
|
||||
@@ -172,14 +216,16 @@ export enum ImportDataSourceEnum {
|
||||
fileCustom = 'fileCustom',
|
||||
externalFile = 'externalFile',
|
||||
apiDataset = 'apiDataset',
|
||||
reTraining = 'reTraining'
|
||||
reTraining = 'reTraining',
|
||||
imageDataset = 'imageDataset'
|
||||
}
|
||||
|
||||
export enum TrainingModeEnum {
|
||||
chunk = 'chunk',
|
||||
qa = 'qa',
|
||||
auto = 'auto',
|
||||
image = 'image'
|
||||
image = 'image',
|
||||
imageParse = 'imageParse'
|
||||
}
|
||||
|
||||
/* ------------ search -------------- */
|
||||
|
||||
4
packages/global/core/dataset/controller.d.ts
vendored
@@ -8,17 +8,19 @@ export type CreateDatasetDataProps = {
|
||||
chunkIndex?: number;
|
||||
q: string;
|
||||
a?: string;
|
||||
imageId?: string;
|
||||
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||
};
|
||||
|
||||
export type UpdateDatasetDataProps = {
|
||||
dataId: string;
|
||||
|
||||
q?: string;
|
||||
q: string;
|
||||
a?: string;
|
||||
indexes?: (Omit<DatasetDataIndexItemType, 'dataId'> & {
|
||||
dataId?: string; // pg data id
|
||||
})[];
|
||||
imageId?: string;
|
||||
};
|
||||
|
||||
export type PatchIndexesProps =
|
||||
|
||||
13
packages/global/core/dataset/image/type.d.ts
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
export type DatasetImageSchema = {
|
||||
_id: string;
|
||||
teamId: string;
|
||||
datasetId: string;
|
||||
collectionId?: string;
|
||||
name: string;
|
||||
contentType: string;
|
||||
size: number;
|
||||
metadata?: Record<string, any>;
|
||||
expiredTime?: Date;
|
||||
createdAt: Date;
|
||||
updatedAt: Date;
|
||||
};
|
||||
@@ -120,7 +120,6 @@ export const computeChunkSize = (params: {
|
||||
|
||||
return Math.min(params.chunkSize ?? chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
|
||||
};
|
||||
|
||||
export const computeChunkSplitter = (params: {
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
@@ -129,8 +128,21 @@ export const computeChunkSplitter = (params: {
|
||||
if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
|
||||
return undefined;
|
||||
}
|
||||
if (params.chunkSplitMode === DataChunkSplitModeEnum.size) {
|
||||
if (params.chunkSplitMode !== DataChunkSplitModeEnum.char) {
|
||||
return undefined;
|
||||
}
|
||||
return params.chunkSplitter;
|
||||
};
|
||||
export const computeParagraphChunkDeep = (params: {
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
paragraphChunkDeep?: number;
|
||||
}) => {
|
||||
if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
|
||||
return 5;
|
||||
}
|
||||
if (params.chunkSplitMode === DataChunkSplitModeEnum.paragraph) {
|
||||
return params.paragraphChunkDeep;
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
60
packages/global/core/dataset/type.d.ts
vendored
@@ -9,12 +9,19 @@ import type {
|
||||
DatasetTypeEnum,
|
||||
SearchScoreTypeEnum,
|
||||
TrainingModeEnum,
|
||||
ChunkSettingModeEnum
|
||||
ChunkSettingModeEnum,
|
||||
ChunkTriggerConfigTypeEnum
|
||||
} from './constants';
|
||||
import type { DatasetPermission } from '../../support/permission/dataset/controller';
|
||||
import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
|
||||
import type {
|
||||
ApiDatasetServerType,
|
||||
APIFileServer,
|
||||
FeishuServer,
|
||||
YuqueServer
|
||||
} from './apiDataset/type';
|
||||
import type { SourceMemberType } from 'support/user/type';
|
||||
import type { DatasetDataIndexTypeEnum } from './data/constants';
|
||||
import type { ParentIdType } from 'common/parentFolder/type';
|
||||
|
||||
export type ChunkSettingsType = {
|
||||
trainingType?: DatasetCollectionDataProcessModeEnum;
|
||||
@@ -37,11 +44,10 @@ export type ChunkSettingsType = {
|
||||
paragraphChunkAIMode?: ParagraphChunkAIModeEnum;
|
||||
paragraphChunkDeep?: number; // Paragraph deep
|
||||
paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
|
||||
paragraphChunkMaxSize?: number; // Paragraph max size, if too large, it will split
|
||||
// Size split
|
||||
chunkSize?: number;
|
||||
chunkSize?: number; // chunk/qa chunk size, Paragraph max chunk size.
|
||||
// Char split
|
||||
chunkSplitter?: string;
|
||||
chunkSplitter?: string; // chunk/qa chunk splitter
|
||||
indexSize?: number;
|
||||
|
||||
qaPrompt?: string;
|
||||
@@ -49,7 +55,7 @@ export type ChunkSettingsType = {
|
||||
|
||||
export type DatasetSchemaType = {
|
||||
_id: string;
|
||||
parentId?: string;
|
||||
parentId: ParentIdType;
|
||||
userId: string;
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
@@ -72,14 +78,16 @@ export type DatasetSchemaType = {
|
||||
chunkSettings?: ChunkSettingsType;
|
||||
|
||||
inheritPermission: boolean;
|
||||
apiServer?: APIFileServer;
|
||||
feishuServer?: FeishuServer;
|
||||
yuqueServer?: YuqueServer;
|
||||
|
||||
apiDatasetServer?: ApiDatasetServerType;
|
||||
|
||||
// abandon
|
||||
autoSync?: boolean;
|
||||
externalReadUrl?: string;
|
||||
defaultPermission?: number;
|
||||
apiServer?: APIFileServer;
|
||||
feishuServer?: FeishuServer;
|
||||
yuqueServer?: YuqueServer;
|
||||
};
|
||||
|
||||
export type DatasetCollectionSchemaType = ChunkSettingsType & {
|
||||
@@ -132,7 +140,13 @@ export type DatasetDataIndexItemType = {
|
||||
dataId: string; // pg data id
|
||||
text: string;
|
||||
};
|
||||
export type DatasetDataSchemaType = {
|
||||
|
||||
export type DatasetDataFieldType = {
|
||||
q: string; // large chunks or question
|
||||
a?: string; // answer or custom content
|
||||
imageId?: string;
|
||||
};
|
||||
export type DatasetDataSchemaType = DatasetDataFieldType & {
|
||||
_id: string;
|
||||
userId: string;
|
||||
teamId: string;
|
||||
@@ -141,13 +155,9 @@ export type DatasetDataSchemaType = {
|
||||
collectionId: string;
|
||||
chunkIndex: number;
|
||||
updateTime: Date;
|
||||
q: string; // large chunks or question
|
||||
a: string; // answer or custom content
|
||||
history?: {
|
||||
q: string;
|
||||
a: string;
|
||||
history?: (DatasetDataFieldType & {
|
||||
updateTime: Date;
|
||||
}[];
|
||||
})[];
|
||||
forbid?: boolean;
|
||||
fullTextToken: string;
|
||||
indexes: DatasetDataIndexItemType[];
|
||||
@@ -179,6 +189,7 @@ export type DatasetTrainingSchemaType = {
|
||||
dataId?: string;
|
||||
q: string;
|
||||
a: string;
|
||||
imageId?: string;
|
||||
chunkIndex: number;
|
||||
indexSize?: number;
|
||||
weight: number;
|
||||
@@ -244,20 +255,18 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
|
||||
};
|
||||
|
||||
/* ================= data ===================== */
|
||||
export type DatasetDataItemType = {
|
||||
export type DatasetDataItemType = DatasetDataFieldType & {
|
||||
id: string;
|
||||
teamId: string;
|
||||
datasetId: string;
|
||||
imagePreivewUrl?: string;
|
||||
updateTime: Date;
|
||||
collectionId: string;
|
||||
sourceName: string;
|
||||
sourceId?: string;
|
||||
q: string;
|
||||
a: string;
|
||||
chunkIndex: number;
|
||||
indexes: DatasetDataIndexItemType[];
|
||||
isOwner: boolean;
|
||||
// permission: DatasetPermission;
|
||||
};
|
||||
|
||||
/* --------------- file ---------------------- */
|
||||
@@ -284,3 +293,14 @@ export type SearchDataResponseItemType = Omit<
|
||||
score: { type: `${SearchScoreTypeEnum}`; value: number; index: number }[];
|
||||
// score: number;
|
||||
};
|
||||
|
||||
export type DatasetCiteItemType = {
|
||||
_id: string;
|
||||
q: string;
|
||||
a?: string;
|
||||
imagePreivewUrl?: string;
|
||||
history?: DatasetDataSchemaType['history'];
|
||||
updateTime: DatasetDataSchemaType['updateTime'];
|
||||
index: DatasetDataSchemaType['chunkIndex'];
|
||||
updated?: boolean;
|
||||
};
|
||||
|
||||
@@ -2,10 +2,15 @@ import { TrainingModeEnum, DatasetCollectionTypeEnum } from './constants';
|
||||
import { getFileIcon } from '../../common/file/icon';
|
||||
import { strIsLink } from '../../common/string/tools';
|
||||
|
||||
export function getCollectionIcon(
|
||||
type: DatasetCollectionTypeEnum = DatasetCollectionTypeEnum.file,
|
||||
name = ''
|
||||
) {
|
||||
export function getCollectionIcon({
|
||||
type = DatasetCollectionTypeEnum.file,
|
||||
name = '',
|
||||
sourceId
|
||||
}: {
|
||||
type?: DatasetCollectionTypeEnum;
|
||||
name?: string;
|
||||
sourceId?: string;
|
||||
}) {
|
||||
if (type === DatasetCollectionTypeEnum.folder) {
|
||||
return 'common/folderFill';
|
||||
}
|
||||
@@ -15,7 +20,10 @@ export function getCollectionIcon(
|
||||
if (type === DatasetCollectionTypeEnum.virtual) {
|
||||
return 'file/fill/manual';
|
||||
}
|
||||
return getFileIcon(name);
|
||||
if (type === DatasetCollectionTypeEnum.images) {
|
||||
return 'core/dataset/imageFill';
|
||||
}
|
||||
return getSourceNameIcon({ sourceName: name, sourceId });
|
||||
}
|
||||
export function getSourceNameIcon({
|
||||
sourceName,
|
||||
@@ -40,5 +48,6 @@ export function getSourceNameIcon({
|
||||
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
|
||||
if (mode === TrainingModeEnum.qa) return data.length * 20;
|
||||
if (mode === TrainingModeEnum.auto) return data.length * 5;
|
||||
if (mode === TrainingModeEnum.image) return data.length * 2;
|
||||
return data.length;
|
||||
};
|
||||
|
||||
2
packages/global/core/workflow/type/node.d.ts
vendored
@@ -59,7 +59,6 @@ export type FlowNodeCommonType = {
|
||||
};
|
||||
|
||||
export type PluginDataType = {
|
||||
version?: string;
|
||||
diagram?: string;
|
||||
userGuide?: string;
|
||||
courseUrl?: string;
|
||||
@@ -126,6 +125,7 @@ export type FlowNodeItemType = FlowNodeTemplateType & {
|
||||
nodeId: string;
|
||||
parentNodeId?: string;
|
||||
isError?: boolean;
|
||||
searchedText?: string;
|
||||
debugResult?: {
|
||||
status: 'running' | 'success' | 'skipped' | 'failed';
|
||||
message?: string;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
export enum OperationLogEventEnum {
|
||||
//Team
|
||||
LOGIN = 'LOGIN',
|
||||
CREATE_INVITATION_LINK = 'CREATE_INVITATION_LINK',
|
||||
JOIN_TEAM = 'JOIN_TEAM',
|
||||
@@ -11,5 +12,52 @@ export enum OperationLogEventEnum {
|
||||
RELOCATE_DEPARTMENT = 'RELOCATE_DEPARTMENT',
|
||||
CREATE_GROUP = 'CREATE_GROUP',
|
||||
DELETE_GROUP = 'DELETE_GROUP',
|
||||
ASSIGN_PERMISSION = 'ASSIGN_PERMISSION'
|
||||
ASSIGN_PERMISSION = 'ASSIGN_PERMISSION',
|
||||
//APP
|
||||
CREATE_APP = 'CREATE_APP',
|
||||
UPDATE_APP_INFO = 'UPDATE_APP_INFO',
|
||||
MOVE_APP = 'MOVE_APP',
|
||||
DELETE_APP = 'DELETE_APP',
|
||||
UPDATE_APP_COLLABORATOR = 'UPDATE_APP_COLLABORATOR',
|
||||
DELETE_APP_COLLABORATOR = 'DELETE_APP_COLLABORATOR',
|
||||
TRANSFER_APP_OWNERSHIP = 'TRANSFER_APP_OWNERSHIP',
|
||||
CREATE_APP_COPY = 'CREATE_APP_COPY',
|
||||
CREATE_APP_FOLDER = 'CREATE_APP_FOLDER',
|
||||
UPDATE_PUBLISH_APP = 'UPDATE_PUBLISH_APP',
|
||||
CREATE_APP_PUBLISH_CHANNEL = 'CREATE_APP_PUBLISH_CHANNEL',
|
||||
UPDATE_APP_PUBLISH_CHANNEL = 'UPDATE_APP_PUBLISH_CHANNEL',
|
||||
DELETE_APP_PUBLISH_CHANNEL = 'DELETE_APP_PUBLISH_CHANNEL',
|
||||
EXPORT_APP_CHAT_LOG = 'EXPORT_APP_CHAT_LOG',
|
||||
//Dataset
|
||||
CREATE_DATASET = 'CREATE_DATASET',
|
||||
UPDATE_DATASET = 'UPDATE_DATASET',
|
||||
DELETE_DATASET = 'DELETE_DATASET',
|
||||
MOVE_DATASET = 'MOVE_DATASET',
|
||||
UPDATE_DATASET_COLLABORATOR = 'UPDATE_DATASET_COLLABORATOR',
|
||||
DELETE_DATASET_COLLABORATOR = 'DELETE_DATASET_COLLABORATOR',
|
||||
TRANSFER_DATASET_OWNERSHIP = 'TRANSFER_DATASET_OWNERSHIP',
|
||||
EXPORT_DATASET = 'EXPORT_DATASET',
|
||||
CREATE_DATASET_FOLDER = 'CREATE_DATASET_FOLDER',
|
||||
//Collection
|
||||
CREATE_COLLECTION = 'CREATE_COLLECTION',
|
||||
UPDATE_COLLECTION = 'UPDATE_COLLECTION',
|
||||
DELETE_COLLECTION = 'DELETE_COLLECTION',
|
||||
RETRAIN_COLLECTION = 'RETRAIN_COLLECTION',
|
||||
//Data
|
||||
CREATE_DATA = 'CREATE_DATA',
|
||||
UPDATE_DATA = 'UPDATE_DATA',
|
||||
DELETE_DATA = 'DELETE_DATA',
|
||||
//SearchTest
|
||||
SEARCH_TEST = 'SEARCH_TEST',
|
||||
//Account
|
||||
CHANGE_PASSWORD = 'CHANGE_PASSWORD',
|
||||
CHANGE_NOTIFICATION_SETTINGS = 'CHANGE_NOTIFICATION_SETTINGS',
|
||||
CHANGE_MEMBER_NAME_ACCOUNT = 'CHANGE_MEMBER_NAME_ACCOUNT',
|
||||
PURCHASE_PLAN = 'PURCHASE_PLAN',
|
||||
EXPORT_BILL_RECORDS = 'EXPORT_BILL_RECORDS',
|
||||
CREATE_INVOICE = 'CREATE_INVOICE',
|
||||
SET_INVOICE_HEADER = 'SET_INVOICE_HEADER',
|
||||
CREATE_API_KEY = 'CREATE_API_KEY',
|
||||
UPDATE_API_KEY = 'UPDATE_API_KEY',
|
||||
DELETE_API_KEY = 'DELETE_API_KEY'
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ const staticPluginList = [
|
||||
'WeWorkWebhook',
|
||||
'google',
|
||||
'bing',
|
||||
'bocha',
|
||||
'delay'
|
||||
];
|
||||
// Run in worker thread (Have npm packages)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4816",
|
||||
"name": "钉钉 webhook",
|
||||
"avatar": "plugins/dingding",
|
||||
"intro": "向钉钉机器人发起 webhook 请求。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "Menghuan1918",
|
||||
"version": "488",
|
||||
"name": "PDF识别",
|
||||
"avatar": "plugins/doc2x",
|
||||
"intro": "将PDF文件发送至Doc2X进行解析,返回结构化的LaTeX公式的文本(markdown),支持传入String类型的URL或者流程输出中的文件链接变量",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "Menghuan1918",
|
||||
"version": "488",
|
||||
"name": "Doc2X服务",
|
||||
"avatar": "plugins/doc2x",
|
||||
"intro": "将传入的图片或PDF文件发送至Doc2X进行解析,返回带LaTeX公式的markdown格式的文本。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4816",
|
||||
"name": "企业微信 webhook",
|
||||
"avatar": "plugins/qiwei",
|
||||
"intro": "向企业微信机器人发起 webhook 请求。只能内部群使用。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4811",
|
||||
"name": "Bing搜索",
|
||||
"avatar": "core/workflow/template/bing",
|
||||
"intro": "在Bing中搜索。",
|
||||
|
||||
677
packages/plugins/src/bocha/template.json
Normal file
@@ -0,0 +1,677 @@
|
||||
{
|
||||
"author": "",
|
||||
"name": "博查搜索",
|
||||
"avatar": "core/workflow/template/bocha",
|
||||
"intro": "使用博查AI搜索引擎进行网络搜索。",
|
||||
"showStatus": true,
|
||||
"weight": 10,
|
||||
"courseUrl": "",
|
||||
"isTool": true,
|
||||
"templateType": "search",
|
||||
"workflow": {
|
||||
"nodes": [
|
||||
{
|
||||
"nodeId": "pluginInput",
|
||||
"name": "workflow:template.plugin_start",
|
||||
"intro": "workflow:intro_plugin_input",
|
||||
"avatar": "core/workflow/template/workflowStart",
|
||||
"flowNodeType": "pluginInput",
|
||||
"showStatus": false,
|
||||
"position": {
|
||||
"x": 636.3048409085379,
|
||||
"y": -238.61714728578016
|
||||
},
|
||||
"version": "481",
|
||||
"inputs": [
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "apiKey",
|
||||
"label": "apiKey",
|
||||
"description": "博查API密钥",
|
||||
"defaultValue": "",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input",
|
||||
"reference"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "query",
|
||||
"label": "query",
|
||||
"description": "搜索查询词",
|
||||
"defaultValue": "",
|
||||
"required": true,
|
||||
"toolDescription": "搜索查询词"
|
||||
},
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input",
|
||||
"reference"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "freshness",
|
||||
"label": "freshness",
|
||||
"description": "搜索指定时间范围内的网页。可填值:oneDay(一天内)、oneWeek(一周内)、oneMonth(一个月内)、oneYear(一年内)、noLimit(不限,默认)、YYYY-MM-DD..YYYY-MM-DD(日期范围)、YYYY-MM-DD(指定日期)",
|
||||
"defaultValue": "noLimit",
|
||||
"required": false,
|
||||
"toolDescription": "搜索时间范围"
|
||||
},
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input",
|
||||
"reference"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "boolean",
|
||||
"canEdit": true,
|
||||
"key": "summary",
|
||||
"label": "summary",
|
||||
"description": "是否显示文本摘要。true显示,false不显示(默认)",
|
||||
"defaultValue": false,
|
||||
"required": false,
|
||||
"toolDescription": "是否显示文本摘要"
|
||||
},
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input",
|
||||
"reference"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "include",
|
||||
"label": "include",
|
||||
"description": "指定搜索的site范围。多个域名使用|或,分隔,最多20个。例如:qq.com|m.163.com",
|
||||
"defaultValue": "",
|
||||
"required": false,
|
||||
"toolDescription": "指定搜索的site范围"
|
||||
},
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input",
|
||||
"reference"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "exclude",
|
||||
"label": "exclude",
|
||||
"description": "排除搜索的网站范围。多个域名使用|或,分隔,最多20个。例如:qq.com|m.163.com",
|
||||
"defaultValue": "",
|
||||
"required": false,
|
||||
"toolDescription": "排除搜索的网站范围"
|
||||
},
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input",
|
||||
"reference"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "number",
|
||||
"canEdit": true,
|
||||
"key": "count",
|
||||
"label": "count",
|
||||
"description": "返回结果的条数。可填范围:1-50,默认为10",
|
||||
"defaultValue": 10,
|
||||
"required": false,
|
||||
"min": 1,
|
||||
"max": 50,
|
||||
"toolDescription": "返回结果条数"
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"id": "apiKey",
|
||||
"valueType": "string",
|
||||
"key": "apiKey",
|
||||
"label": "apiKey",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "query",
|
||||
"valueType": "string",
|
||||
"key": "query",
|
||||
"label": "query",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "freshness",
|
||||
"valueType": "string",
|
||||
"key": "freshness",
|
||||
"label": "freshness",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "summary",
|
||||
"valueType": "boolean",
|
||||
"key": "summary",
|
||||
"label": "summary",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "include",
|
||||
"valueType": "string",
|
||||
"key": "include",
|
||||
"label": "include",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "exclude",
|
||||
"valueType": "string",
|
||||
"key": "exclude",
|
||||
"label": "exclude",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "count",
|
||||
"valueType": "number",
|
||||
"key": "count",
|
||||
"label": "count",
|
||||
"type": "hidden"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"nodeId": "pluginOutput",
|
||||
"name": "common:core.module.template.self_output",
|
||||
"intro": "workflow:intro_custom_plugin_output",
|
||||
"avatar": "core/workflow/template/pluginOutput",
|
||||
"flowNodeType": "pluginOutput",
|
||||
"showStatus": false,
|
||||
"position": {
|
||||
"x": 2764.1105686698083,
|
||||
"y": -30.617147285780163
|
||||
},
|
||||
"version": "481",
|
||||
"inputs": [
|
||||
{
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"valueType": "object",
|
||||
"canEdit": true,
|
||||
"key": "result",
|
||||
"label": "result",
|
||||
"isToolOutput": true,
|
||||
"description": "",
|
||||
"value": [
|
||||
"nyA6oA8mF1iW",
|
||||
"httpRawResponse"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"nodeId": "pluginConfig",
|
||||
"name": "common:core.module.template.system_config",
|
||||
"intro": "",
|
||||
"avatar": "core/workflow/template/systemConfig",
|
||||
"flowNodeType": "pluginConfig",
|
||||
"position": {
|
||||
"x": 184.66337662472682,
|
||||
"y": -216.05298493910115
|
||||
},
|
||||
"version": "4811",
|
||||
"inputs": [],
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"nodeId": "nyA6oA8mF1iW",
|
||||
"name": "HTTP 请求",
|
||||
"intro": "调用博查搜索API",
|
||||
"avatar": "core/workflow/template/httpRequest",
|
||||
"flowNodeType": "httpRequest468",
|
||||
"showStatus": true,
|
||||
"position": {
|
||||
"x": 1335.0647252518884,
|
||||
"y": -455.9043948565971
|
||||
},
|
||||
"version": "481",
|
||||
"inputs": [
|
||||
{
|
||||
"key": "system_addInputParam",
|
||||
"renderTypeList": [
|
||||
"addInputParam"
|
||||
],
|
||||
"valueType": "dynamic",
|
||||
"label": "",
|
||||
"required": false,
|
||||
"description": "common:core.module.input.description.HTTP Dynamic Input",
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectDataset",
|
||||
"selectApp"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpMethod",
|
||||
"renderTypeList": [
|
||||
"custom"
|
||||
],
|
||||
"valueType": "string",
|
||||
"label": "",
|
||||
"value": "POST",
|
||||
"required": true,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpTimeout",
|
||||
"renderTypeList": [
|
||||
"custom"
|
||||
],
|
||||
"valueType": "number",
|
||||
"label": "",
|
||||
"value": 30,
|
||||
"min": 5,
|
||||
"max": 600,
|
||||
"required": true,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpReqUrl",
|
||||
"renderTypeList": [
|
||||
"hidden"
|
||||
],
|
||||
"valueType": "string",
|
||||
"label": "",
|
||||
"description": "common:core.module.input.description.Http Request Url",
|
||||
"placeholder": "https://api.ai.com/getInventory",
|
||||
"required": false,
|
||||
"value": "https://api.bochaai.com/v1/web-search",
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpHeader",
|
||||
"renderTypeList": [
|
||||
"custom"
|
||||
],
|
||||
"valueType": "any",
|
||||
"value": [
|
||||
{
|
||||
"key": "Authorization",
|
||||
"type": "string",
|
||||
"value": "Bearer {{$pluginInput.apiKey$}}"
|
||||
},
|
||||
{
|
||||
"key": "Content-Type",
|
||||
"type": "string",
|
||||
"value": "application/json"
|
||||
}
|
||||
],
|
||||
"label": "",
|
||||
"description": "common:core.module.input.description.Http Request Header",
|
||||
"placeholder": "common:core.module.input.description.Http Request Header",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpParams",
|
||||
"renderTypeList": [
|
||||
"hidden"
|
||||
],
|
||||
"valueType": "any",
|
||||
"value": [],
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpJsonBody",
|
||||
"renderTypeList": [
|
||||
"hidden"
|
||||
],
|
||||
"valueType": "any",
|
||||
"value": "{\n \"query\": \"{{query}}\",\n \"freshness\": \"{{freshness}}\",\n \"summary\": {{summary}},\n \"include\": \"{{include}}\",\n \"exclude\": \"{{exclude}}\",\n \"count\": {{count}}\n}",
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpFormBody",
|
||||
"renderTypeList": [
|
||||
"hidden"
|
||||
],
|
||||
"valueType": "any",
|
||||
"value": [],
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpContentType",
|
||||
"renderTypeList": [
|
||||
"hidden"
|
||||
],
|
||||
"valueType": "string",
|
||||
"value": "json",
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"key": "query",
|
||||
"label": "query",
|
||||
"toolDescription": "博查搜索检索词",
|
||||
"required": true,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": [
|
||||
"pluginInput",
|
||||
"query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"key": "freshness",
|
||||
"label": "freshness",
|
||||
"toolDescription": "搜索时间范围",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": [
|
||||
"pluginInput",
|
||||
"freshness"
|
||||
]
|
||||
},
|
||||
{
|
||||
"valueType": "boolean",
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"key": "summary",
|
||||
"label": "summary",
|
||||
"toolDescription": "是否显示文本摘要",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": [
|
||||
"pluginInput",
|
||||
"summary"
|
||||
]
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"key": "include",
|
||||
"label": "include",
|
||||
"toolDescription": "指定搜索的site范围",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": [
|
||||
"pluginInput",
|
||||
"include"
|
||||
]
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"key": "exclude",
|
||||
"label": "exclude",
|
||||
"toolDescription": "排除搜索的网站范围",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": [
|
||||
"pluginInput",
|
||||
"exclude"
|
||||
]
|
||||
},
|
||||
{
|
||||
"valueType": "number",
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"key": "count",
|
||||
"label": "count",
|
||||
"toolDescription": "返回结果条数",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": [
|
||||
"pluginInput",
|
||||
"count"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"id": "error",
|
||||
"key": "error",
|
||||
"label": "workflow:request_error",
|
||||
"description": "HTTP请求错误信息,成功时返回空",
|
||||
"valueType": "object",
|
||||
"type": "static"
|
||||
},
|
||||
{
|
||||
"id": "httpRawResponse",
|
||||
"key": "httpRawResponse",
|
||||
"required": true,
|
||||
"label": "workflow:raw_response",
|
||||
"description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。",
|
||||
"valueType": "any",
|
||||
"type": "static"
|
||||
},
|
||||
{
|
||||
"id": "system_addOutputParam",
|
||||
"key": "system_addOutputParam",
|
||||
"type": "dynamic",
|
||||
"valueType": "dynamic",
|
||||
"label": "",
|
||||
"editField": {
|
||||
"key": true,
|
||||
"valueType": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"edges": [
|
||||
{
|
||||
"source": "pluginInput",
|
||||
"target": "nyA6oA8mF1iW",
|
||||
"sourceHandle": "pluginInput-source-right",
|
||||
"targetHandle": "nyA6oA8mF1iW-target-left"
|
||||
},
|
||||
{
|
||||
"source": "nyA6oA8mF1iW",
|
||||
"target": "pluginOutput",
|
||||
"sourceHandle": "nyA6oA8mF1iW-source-right",
|
||||
"targetHandle": "pluginOutput-target-left"
|
||||
}
|
||||
]
|
||||
},
|
||||
"chatConfig": {}
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "silencezhang",
|
||||
"version": "4811",
|
||||
"name": "数据库连接",
|
||||
"avatar": "core/workflow/template/datasource",
|
||||
"intro": "可连接常用数据库,并执行sql",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "collin",
|
||||
"version": "4817",
|
||||
"name": "流程等待",
|
||||
"avatar": "core/workflow/template/sleep",
|
||||
"intro": "让工作流等待指定时间后运行",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "silencezhang",
|
||||
"version": "4817",
|
||||
"name": "基础图表",
|
||||
"avatar": "core/workflow/template/baseChart",
|
||||
"intro": "根据数据生成图表,可根据chartType生成柱状图,折线图,饼图",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "silencezhang",
|
||||
"version": "486",
|
||||
"name": "BI图表功能",
|
||||
"avatar": "core/workflow/template/BI",
|
||||
"intro": "BI图表功能,可以生成一些常用的图表,如饼图,柱状图,折线图等",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo 网络搜索",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "使用 DuckDuckGo 进行网络搜索",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo 图片搜索",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "使用 DuckDuckGo 进行图片搜索",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo 新闻检索",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "使用 DuckDuckGo 进行新闻检索",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo 视频搜索",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "使用 DuckDuckGo 进行视频搜索",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo服务",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "DuckDuckGo 服务,包含网络搜索、图片搜索、新闻搜索等。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "488",
|
||||
"name": "飞书 webhook",
|
||||
"avatar": "core/app/templates/plugin-feishu",
|
||||
"intro": "向飞书机器人发起 webhook 请求。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "网页内容抓取",
|
||||
"avatar": "core/workflow/template/fetchUrl",
|
||||
"intro": "可获取一个网页链接内容,并以 Markdown 格式输出,仅支持获取静态网站。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "481",
|
||||
"templateType": "tools",
|
||||
"name": "获取当前时间",
|
||||
"avatar": "core/workflow/template/getTime",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4811",
|
||||
"name": "Google搜索",
|
||||
"avatar": "core/workflow/template/google",
|
||||
"intro": "在google中搜索。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "数学公式执行",
|
||||
"avatar": "core/workflow/template/mathCall",
|
||||
"intro": "用于执行数学表达式的工具,通过 js 的 expr-eval 库运行表达式并返回结果。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4816",
|
||||
"name": "Search XNG 搜索",
|
||||
"avatar": "core/workflow/template/searxng",
|
||||
"intro": "使用 Search XNG 服务进行搜索。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "cloudpense",
|
||||
"version": "1.0.0",
|
||||
"name": "Email 邮件发送",
|
||||
"avatar": "plugins/email",
|
||||
"intro": "通过SMTP协议发送电子邮件(nodemailer)",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "489",
|
||||
"name": "文本加工",
|
||||
"avatar": "/imgs/workflow/textEditor.svg",
|
||||
"intro": "可对固定或传入的文本进行加工后输出,非字符串类型数据最终会转成字符串类型。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4811",
|
||||
"name": "Wiki搜索",
|
||||
"avatar": "core/workflow/template/wiki",
|
||||
"intro": "在Wiki中查询释义。",
|
||||
|
||||
7
packages/service/common/api/type.d.ts
vendored
@@ -1,5 +1,8 @@
|
||||
import type { ApiDatasetDetailResponse } from '@fastgpt/global/core/dataset/apiDataset';
|
||||
import { FeishuServer, YuqueServer } from '@fastgpt/global/core/dataset/apiDataset';
|
||||
import type {
|
||||
ApiDatasetDetailResponse,
|
||||
FeishuServer,
|
||||
YuqueServer
|
||||
} from '@fastgpt/global/core/dataset/apiDataset/type';
|
||||
import type {
|
||||
DeepRagSearchProps,
|
||||
SearchDatasetDataResponse
|
||||
|
||||
181
packages/service/common/buffer/rawText/controller.ts
Normal file
@@ -0,0 +1,181 @@
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
import { connectionMongo } from '../../mongo';
|
||||
import { MongoRawTextBufferSchema, bucketName } from './schema';
|
||||
import { addLog } from '../../system/log';
|
||||
import { setCron } from '../../system/cron';
|
||||
import { checkTimerLock } from '../../system/timerLock/utils';
|
||||
import { TimerIdEnum } from '../../system/timerLock/constants';
|
||||
|
||||
const getGridBucket = () => {
|
||||
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
|
||||
bucketName: bucketName
|
||||
});
|
||||
};
|
||||
|
||||
export const addRawTextBuffer = async ({
|
||||
sourceId,
|
||||
sourceName,
|
||||
text,
|
||||
expiredTime
|
||||
}: {
|
||||
sourceId: string;
|
||||
sourceName: string;
|
||||
text: string;
|
||||
expiredTime: Date;
|
||||
}) => {
|
||||
const gridBucket = getGridBucket();
|
||||
const metadata = {
|
||||
sourceId,
|
||||
sourceName,
|
||||
expiredTime
|
||||
};
|
||||
|
||||
const buffer = Buffer.from(text);
|
||||
|
||||
const fileSize = buffer.length;
|
||||
// 单块大小:尽可能大,但不超过 14MB,不小于128KB
|
||||
const chunkSizeBytes = (() => {
|
||||
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
|
||||
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
|
||||
|
||||
// 确保块大小至少为128KB
|
||||
const minChunkSize = 128 * 1024; // 128KB
|
||||
|
||||
// 取理想块大小和最小块大小中的较大值
|
||||
let chunkSize = Math.max(idealChunkSize, minChunkSize);
|
||||
|
||||
// 将块大小向上取整到最接近的64KB的倍数,使其更整齐
|
||||
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
|
||||
|
||||
return chunkSize;
|
||||
})();
|
||||
|
||||
const uploadStream = gridBucket.openUploadStream(sourceId, {
|
||||
metadata,
|
||||
chunkSizeBytes
|
||||
});
|
||||
|
||||
return retryFn(async () => {
|
||||
return new Promise((resolve, reject) => {
|
||||
uploadStream.end(buffer);
|
||||
uploadStream.on('finish', () => {
|
||||
resolve(uploadStream.id);
|
||||
});
|
||||
uploadStream.on('error', (error) => {
|
||||
addLog.error('addRawTextBuffer error', error);
|
||||
resolve('');
|
||||
});
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
export const getRawTextBuffer = async (sourceId: string) => {
|
||||
const gridBucket = getGridBucket();
|
||||
|
||||
return retryFn(async () => {
|
||||
const bufferData = await MongoRawTextBufferSchema.findOne(
|
||||
{
|
||||
'metadata.sourceId': sourceId
|
||||
},
|
||||
'_id metadata'
|
||||
).lean();
|
||||
if (!bufferData) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Read file content
|
||||
const downloadStream = gridBucket.openDownloadStream(bufferData._id);
|
||||
const chunks: Buffer[] = [];
|
||||
|
||||
return new Promise<{
|
||||
text: string;
|
||||
sourceName: string;
|
||||
} | null>((resolve, reject) => {
|
||||
downloadStream.on('data', (chunk) => {
|
||||
chunks.push(chunk);
|
||||
});
|
||||
|
||||
downloadStream.on('end', () => {
|
||||
const buffer = Buffer.concat(chunks);
|
||||
const text = buffer.toString('utf8');
|
||||
resolve({
|
||||
text,
|
||||
sourceName: bufferData.metadata?.sourceName || ''
|
||||
});
|
||||
});
|
||||
|
||||
downloadStream.on('error', (error) => {
|
||||
addLog.error('getRawTextBuffer error', error);
|
||||
resolve(null);
|
||||
});
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
export const deleteRawTextBuffer = async (sourceId: string): Promise<boolean> => {
|
||||
const gridBucket = getGridBucket();
|
||||
|
||||
return retryFn(async () => {
|
||||
const buffer = await MongoRawTextBufferSchema.findOne({ 'metadata.sourceId': sourceId });
|
||||
if (!buffer) {
|
||||
return false;
|
||||
}
|
||||
|
||||
await gridBucket.delete(buffer._id);
|
||||
return true;
|
||||
});
|
||||
};
|
||||
|
||||
export const updateRawTextBufferExpiredTime = async ({
|
||||
sourceId,
|
||||
expiredTime
|
||||
}: {
|
||||
sourceId: string;
|
||||
expiredTime: Date;
|
||||
}) => {
|
||||
return retryFn(async () => {
|
||||
return MongoRawTextBufferSchema.updateOne(
|
||||
{ 'metadata.sourceId': sourceId },
|
||||
{ $set: { 'metadata.expiredTime': expiredTime } }
|
||||
);
|
||||
});
|
||||
};
|
||||
|
||||
export const clearExpiredRawTextBufferCron = async () => {
|
||||
const gridBucket = getGridBucket();
|
||||
|
||||
const clearExpiredRawTextBuffer = async () => {
|
||||
addLog.debug('Clear expired raw text buffer start');
|
||||
|
||||
const data = await MongoRawTextBufferSchema.find(
|
||||
{
|
||||
'metadata.expiredTime': { $lt: new Date() }
|
||||
},
|
||||
'_id'
|
||||
).lean();
|
||||
|
||||
for (const item of data) {
|
||||
try {
|
||||
await gridBucket.delete(item._id);
|
||||
} catch (error) {
|
||||
addLog.error('Delete expired raw text buffer error', error);
|
||||
}
|
||||
}
|
||||
addLog.debug('Clear expired raw text buffer end');
|
||||
};
|
||||
|
||||
setCron('*/10 * * * *', async () => {
|
||||
if (
|
||||
await checkTimerLock({
|
||||
timerId: TimerIdEnum.clearExpiredRawTextBuffer,
|
||||
lockMinuted: 9
|
||||
})
|
||||
) {
|
||||
try {
|
||||
await clearExpiredRawTextBuffer();
|
||||
} catch (error) {
|
||||
addLog.error('clearExpiredRawTextBufferCron error', error);
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
@@ -1,33 +1,22 @@
|
||||
import { getMongoModel, Schema } from '../../mongo';
|
||||
import { type RawTextBufferSchemaType } from './type';
|
||||
import { getMongoModel, type Types, Schema } from '../../mongo';
|
||||
|
||||
export const collectionName = 'buffer_rawtexts';
|
||||
export const bucketName = 'buffer_rawtext';
|
||||
|
||||
const RawTextBufferSchema = new Schema({
|
||||
sourceId: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
rawText: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
createTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
metadata: Object
|
||||
metadata: {
|
||||
sourceId: { type: String, required: true },
|
||||
sourceName: { type: String, required: true },
|
||||
expiredTime: { type: Date, required: true }
|
||||
}
|
||||
});
|
||||
RawTextBufferSchema.index({ 'metadata.sourceId': 'hashed' });
|
||||
RawTextBufferSchema.index({ 'metadata.expiredTime': -1 });
|
||||
|
||||
try {
|
||||
RawTextBufferSchema.index({ sourceId: 1 });
|
||||
// 20 minutes
|
||||
RawTextBufferSchema.index({ createTime: 1 }, { expireAfterSeconds: 20 * 60 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
export const MongoRawTextBuffer = getMongoModel<RawTextBufferSchemaType>(
|
||||
collectionName,
|
||||
RawTextBufferSchema
|
||||
);
|
||||
export const MongoRawTextBufferSchema = getMongoModel<{
|
||||
_id: Types.ObjectId;
|
||||
metadata: {
|
||||
sourceId: string;
|
||||
sourceName: string;
|
||||
expiredTime: Date;
|
||||
};
|
||||
}>(`${bucketName}.files`, RawTextBufferSchema);
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
export type RawTextBufferSchemaType = {
|
||||
sourceId: string;
|
||||
rawText: string;
|
||||
createTime: Date;
|
||||
metadata?: {
|
||||
filename: string;
|
||||
};
|
||||
};
|
||||
@@ -6,13 +6,14 @@ import { type DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
|
||||
import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
|
||||
import { readRawContentByFileBuffer } from '../read/utils';
|
||||
import { gridFsStream2Buffer, stream2Encoding } from './utils';
|
||||
import { computeGridFsChunSize, gridFsStream2Buffer, stream2Encoding } from './utils';
|
||||
import { addLog } from '../../system/log';
|
||||
import { readFromSecondary } from '../../mongo/utils';
|
||||
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
|
||||
import { Readable } from 'stream';
|
||||
import { addRawTextBuffer, getRawTextBuffer } from '../../buffer/rawText/controller';
|
||||
import { addMinutes } from 'date-fns';
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
|
||||
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
|
||||
MongoDatasetFileSchema;
|
||||
@@ -64,23 +65,7 @@ export async function uploadFile({
|
||||
// create a gridfs bucket
|
||||
const bucket = getGridBucket(bucketName);
|
||||
|
||||
const fileSize = stats.size;
|
||||
// 单块大小:尽可能大,但不超过 14MB,不小于512KB
|
||||
const chunkSizeBytes = (() => {
|
||||
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
|
||||
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
|
||||
|
||||
// 确保块大小至少为512KB
|
||||
const minChunkSize = 512 * 1024; // 512KB
|
||||
|
||||
// 取理想块大小和最小块大小中的较大值
|
||||
let chunkSize = Math.max(idealChunkSize, minChunkSize);
|
||||
|
||||
// 将块大小向上取整到最接近的64KB的倍数,使其更整齐
|
||||
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
|
||||
|
||||
return chunkSize;
|
||||
})();
|
||||
const chunkSizeBytes = computeGridFsChunSize(stats.size);
|
||||
|
||||
const stream = bucket.openUploadStream(filename, {
|
||||
metadata,
|
||||
@@ -173,24 +158,18 @@ export async function getFileById({
|
||||
|
||||
export async function delFileByFileIdList({
|
||||
bucketName,
|
||||
fileIdList,
|
||||
retry = 3
|
||||
fileIdList
|
||||
}: {
|
||||
bucketName: `${BucketNameEnum}`;
|
||||
fileIdList: string[];
|
||||
retry?: number;
|
||||
}): Promise<any> {
|
||||
try {
|
||||
return retryFn(async () => {
|
||||
const bucket = getGridBucket(bucketName);
|
||||
|
||||
for await (const fileId of fileIdList) {
|
||||
await bucket.delete(new Types.ObjectId(fileId));
|
||||
}
|
||||
} catch (error) {
|
||||
if (retry > 0) {
|
||||
return delFileByFileIdList({ bucketName, fileIdList, retry: retry - 1 });
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
export async function getDownloadStream({
|
||||
@@ -223,15 +202,13 @@ export const readFileContentFromMongo = async ({
|
||||
rawText: string;
|
||||
filename: string;
|
||||
}> => {
|
||||
const bufferId = `${fileId}-${customPdfParse}`;
|
||||
const bufferId = `${String(fileId)}-${customPdfParse}`;
|
||||
// read buffer
|
||||
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: bufferId }, undefined, {
|
||||
...readFromSecondary
|
||||
}).lean();
|
||||
const fileBuffer = await getRawTextBuffer(bufferId);
|
||||
if (fileBuffer) {
|
||||
return {
|
||||
rawText: fileBuffer.rawText,
|
||||
filename: fileBuffer.metadata?.filename || ''
|
||||
rawText: fileBuffer.text,
|
||||
filename: fileBuffer?.sourceName
|
||||
};
|
||||
}
|
||||
|
||||
@@ -265,16 +242,13 @@ export const readFileContentFromMongo = async ({
|
||||
}
|
||||
});
|
||||
|
||||
// < 14M
|
||||
if (fileBuffers.length < 14 * 1024 * 1024 && rawText.trim()) {
|
||||
MongoRawTextBuffer.create({
|
||||
sourceId: bufferId,
|
||||
rawText,
|
||||
metadata: {
|
||||
filename: file.filename
|
||||
}
|
||||
});
|
||||
}
|
||||
// Add buffer
|
||||
addRawTextBuffer({
|
||||
sourceId: bufferId,
|
||||
sourceName: file.filename,
|
||||
text: rawText,
|
||||
expiredTime: addMinutes(new Date(), 20)
|
||||
});
|
||||
|
||||
return {
|
||||
rawText,
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
import { Schema, getMongoModel } from '../../mongo';
|
||||
|
||||
const DatasetFileSchema = new Schema({});
|
||||
const ChatFileSchema = new Schema({});
|
||||
const DatasetFileSchema = new Schema({
|
||||
metadata: Object
|
||||
});
|
||||
const ChatFileSchema = new Schema({
|
||||
metadata: Object
|
||||
});
|
||||
|
||||
try {
|
||||
DatasetFileSchema.index({ uploadDate: -1 });
|
||||
DatasetFileSchema.index({ uploadDate: -1 });
|
||||
|
||||
ChatFileSchema.index({ uploadDate: -1 });
|
||||
ChatFileSchema.index({ 'metadata.chatId': 1 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
ChatFileSchema.index({ uploadDate: -1 });
|
||||
ChatFileSchema.index({ 'metadata.chatId': 1 });
|
||||
|
||||
export const MongoDatasetFileSchema = getMongoModel('dataset.files', DatasetFileSchema);
|
||||
export const MongoChatFileSchema = getMongoModel('chat.files', ChatFileSchema);
|
||||
|
||||
@@ -1,5 +1,57 @@
|
||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
import { PassThrough } from 'stream';
|
||||
import { getGridBucket } from './controller';
|
||||
import { type BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
|
||||
export const createFileFromText = async ({
|
||||
bucket,
|
||||
filename,
|
||||
text,
|
||||
metadata
|
||||
}: {
|
||||
bucket: `${BucketNameEnum}`;
|
||||
filename: string;
|
||||
text: string;
|
||||
metadata: Record<string, any>;
|
||||
}) => {
|
||||
const gridBucket = getGridBucket(bucket);
|
||||
|
||||
const buffer = Buffer.from(text);
|
||||
|
||||
const fileSize = buffer.length;
|
||||
// 单块大小:尽可能大,但不超过 14MB,不小于128KB
|
||||
const chunkSizeBytes = (() => {
|
||||
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
|
||||
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
|
||||
|
||||
// 确保块大小至少为128KB
|
||||
const minChunkSize = 128 * 1024; // 128KB
|
||||
|
||||
// 取理想块大小和最小块大小中的较大值
|
||||
let chunkSize = Math.max(idealChunkSize, minChunkSize);
|
||||
|
||||
// 将块大小向上取整到最接近的64KB的倍数,使其更整齐
|
||||
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
|
||||
|
||||
return chunkSize;
|
||||
})();
|
||||
|
||||
const uploadStream = gridBucket.openUploadStream(filename, {
|
||||
metadata,
|
||||
chunkSizeBytes
|
||||
});
|
||||
|
||||
return retryFn(async () => {
|
||||
return new Promise<{ fileId: string }>((resolve, reject) => {
|
||||
uploadStream.end(buffer);
|
||||
uploadStream.on('finish', () => {
|
||||
resolve({ fileId: String(uploadStream.id) });
|
||||
});
|
||||
uploadStream.on('error', reject);
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
|
||||
return new Promise<Buffer>((resolve, reject) => {
|
||||
@@ -53,3 +105,20 @@ export const stream2Encoding = async (stream: NodeJS.ReadableStream) => {
|
||||
stream: copyStream
|
||||
};
|
||||
};
|
||||
|
||||
// 单块大小:尽可能大,但不超过 14MB,不小于512KB
|
||||
export const computeGridFsChunSize = (fileSize: number) => {
|
||||
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
|
||||
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
|
||||
|
||||
// 确保块大小至少为512KB
|
||||
const minChunkSize = 512 * 1024; // 512KB
|
||||
|
||||
// 取理想块大小和最小块大小中的较大值
|
||||
let chunkSize = Math.max(idealChunkSize, minChunkSize);
|
||||
|
||||
// 将块大小向上取整到最接近的64KB的倍数,使其更整齐
|
||||
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
|
||||
|
||||
return chunkSize;
|
||||
};
|
||||
|
||||
@@ -22,7 +22,7 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
|
||||
maxSize *= 1024 * 1024;
|
||||
|
||||
class UploadModel {
|
||||
uploader = multer({
|
||||
uploaderSingle = multer({
|
||||
limits: {
|
||||
fieldSize: maxSize
|
||||
},
|
||||
@@ -41,8 +41,7 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
|
||||
}
|
||||
})
|
||||
}).single('file');
|
||||
|
||||
async doUpload<T = any>(
|
||||
async getUploadFile<T = any>(
|
||||
req: NextApiRequest,
|
||||
res: NextApiResponse,
|
||||
originBucketName?: `${BucketNameEnum}`
|
||||
@@ -54,7 +53,7 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
|
||||
bucketName?: `${BucketNameEnum}`;
|
||||
}>((resolve, reject) => {
|
||||
// @ts-ignore
|
||||
this.uploader(req, res, (error) => {
|
||||
this.uploaderSingle(req, res, (error) => {
|
||||
if (error) {
|
||||
return reject(error);
|
||||
}
|
||||
@@ -94,6 +93,58 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
uploaderMultiple = multer({
|
||||
limits: {
|
||||
fieldSize: maxSize
|
||||
},
|
||||
preservePath: true,
|
||||
storage: multer.diskStorage({
|
||||
// destination: (_req, _file, cb) => {
|
||||
// cb(null, tmpFileDirPath);
|
||||
// },
|
||||
filename: (req, file, cb) => {
|
||||
if (!file?.originalname) {
|
||||
cb(new Error('File not found'), '');
|
||||
} else {
|
||||
const { ext } = path.parse(decodeURIComponent(file.originalname));
|
||||
cb(null, `${getNanoid()}${ext}`);
|
||||
}
|
||||
}
|
||||
})
|
||||
}).array('file', global.feConfigs?.uploadFileMaxSize);
|
||||
async getUploadFiles<T = any>(req: NextApiRequest, res: NextApiResponse) {
|
||||
return new Promise<{
|
||||
files: FileType[];
|
||||
data: T;
|
||||
}>((resolve, reject) => {
|
||||
// @ts-ignore
|
||||
this.uploaderMultiple(req, res, (error) => {
|
||||
if (error) {
|
||||
console.log(error);
|
||||
return reject(error);
|
||||
}
|
||||
|
||||
// @ts-ignore
|
||||
const files = req.files as FileType[];
|
||||
|
||||
resolve({
|
||||
files: files.map((file) => ({
|
||||
...file,
|
||||
originalname: decodeURIComponent(file.originalname)
|
||||
})),
|
||||
data: (() => {
|
||||
if (!req.body?.data) return {};
|
||||
try {
|
||||
return JSON.parse(req.body.data);
|
||||
} catch (error) {
|
||||
return {};
|
||||
}
|
||||
})()
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return new UploadModel();
|
||||
|
||||
@@ -110,7 +110,7 @@ export const readRawContentByFileBuffer = async ({
|
||||
|
||||
return {
|
||||
rawText: text,
|
||||
formatText: rawText,
|
||||
formatText: text,
|
||||
imageList
|
||||
};
|
||||
};
|
||||
|
||||
@@ -4,7 +4,8 @@ import { MongoFrequencyLimit } from './schema';
|
||||
export const authFrequencyLimit = async ({
|
||||
eventId,
|
||||
maxAmount,
|
||||
expiredTime
|
||||
expiredTime,
|
||||
num = 1
|
||||
}: AuthFrequencyLimitProps) => {
|
||||
try {
|
||||
// 对应 eventId 的 account+1, 不存在的话,则创建一个
|
||||
@@ -14,7 +15,7 @@ export const authFrequencyLimit = async ({
|
||||
expiredTime: { $gte: new Date() }
|
||||
},
|
||||
{
|
||||
$inc: { amount: 1 },
|
||||
$inc: { amount: num },
|
||||
// If not exist, set the expiredTime
|
||||
$setOnInsert: { expiredTime }
|
||||
},
|
||||
|
||||
@@ -5,7 +5,10 @@ export enum TimerIdEnum {
|
||||
clearExpiredSubPlan = 'clearExpiredSubPlan',
|
||||
updateStandardPlan = 'updateStandardPlan',
|
||||
scheduleTriggerApp = 'scheduleTriggerApp',
|
||||
notification = 'notification'
|
||||
notification = 'notification',
|
||||
|
||||
clearExpiredRawTextBuffer = 'clearExpiredRawTextBuffer',
|
||||
clearExpiredDatasetImage = 'clearExpiredDatasetImage'
|
||||
}
|
||||
|
||||
export enum LockNotificationEnum {
|
||||
|
||||
@@ -20,6 +20,10 @@ export const getVlmModel = (model?: string) => {
|
||||
?.find((item) => item.model === model || item.name === model);
|
||||
};
|
||||
|
||||
export const getVlmModelList = () => {
|
||||
return Array.from(global.llmModelMap.values())?.filter((item) => item.vision) || [];
|
||||
};
|
||||
|
||||
export const getDefaultEmbeddingModel = () => global?.systemDefaultModel.embedding!;
|
||||
export const getEmbeddingModel = (model?: string) => {
|
||||
if (!model) return getDefaultEmbeddingModel();
|
||||
|
||||
@@ -30,8 +30,7 @@ import { Types } from 'mongoose';
|
||||
community: community-id
|
||||
commercial: commercial-id
|
||||
*/
|
||||
|
||||
export async function splitCombinePluginId(id: string) {
|
||||
export function splitCombineToolId(id: string) {
|
||||
const splitRes = id.split('-');
|
||||
if (splitRes.length === 1) {
|
||||
// app id
|
||||
@@ -42,7 +41,7 @@ export async function splitCombinePluginId(id: string) {
|
||||
}
|
||||
|
||||
const [source, pluginId] = id.split('-') as [PluginSourceEnum, string];
|
||||
if (!source || !pluginId) return Promise.reject('pluginId not found');
|
||||
if (!source || !pluginId) throw new Error('pluginId not found');
|
||||
|
||||
return { source, pluginId: id };
|
||||
}
|
||||
@@ -54,7 +53,7 @@ const getSystemPluginTemplateById = async (
|
||||
versionId?: string
|
||||
): Promise<ChildAppType> => {
|
||||
const item = getSystemPluginTemplates().find((plugin) => plugin.id === pluginId);
|
||||
if (!item) return Promise.reject(PluginErrEnum.unAuth);
|
||||
if (!item) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const plugin = cloneDeep(item);
|
||||
|
||||
@@ -64,10 +63,10 @@ const getSystemPluginTemplateById = async (
|
||||
{ pluginId: plugin.id, 'customConfig.associatedPluginId': plugin.associatedPluginId },
|
||||
'associatedPluginId'
|
||||
).lean();
|
||||
if (!systemPlugin) return Promise.reject(PluginErrEnum.unAuth);
|
||||
if (!systemPlugin) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const app = await MongoApp.findById(plugin.associatedPluginId).lean();
|
||||
if (!app) return Promise.reject(PluginErrEnum.unAuth);
|
||||
if (!app) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const version = versionId
|
||||
? await getAppVersionById({
|
||||
@@ -77,6 +76,12 @@ const getSystemPluginTemplateById = async (
|
||||
})
|
||||
: await getAppLatestVersion(plugin.associatedPluginId, app);
|
||||
if (!version.versionId) return Promise.reject('App version not found');
|
||||
const isLatest = version.versionId
|
||||
? await checkIsLatestVersion({
|
||||
appId: plugin.associatedPluginId,
|
||||
versionId: version.versionId
|
||||
})
|
||||
: true;
|
||||
|
||||
return {
|
||||
...plugin,
|
||||
@@ -85,12 +90,19 @@ const getSystemPluginTemplateById = async (
|
||||
edges: version.edges,
|
||||
chatConfig: version.chatConfig
|
||||
},
|
||||
version: versionId || String(version.versionId),
|
||||
version: versionId ? version?.versionId : '',
|
||||
versionLabel: version?.versionName,
|
||||
isLatestVersion: isLatest,
|
||||
teamId: String(app.teamId),
|
||||
tmbId: String(app.tmbId)
|
||||
};
|
||||
}
|
||||
return plugin;
|
||||
|
||||
return {
|
||||
...plugin,
|
||||
version: undefined,
|
||||
isLatestVersion: true
|
||||
};
|
||||
};
|
||||
|
||||
/* Format plugin to workflow preview node data */
|
||||
@@ -102,11 +114,11 @@ export async function getChildAppPreviewNode({
|
||||
versionId?: string;
|
||||
}): Promise<FlowNodeTemplateType> {
|
||||
const app: ChildAppType = await (async () => {
|
||||
const { source, pluginId } = await splitCombinePluginId(appId);
|
||||
const { source, pluginId } = splitCombineToolId(appId);
|
||||
|
||||
if (source === PluginSourceEnum.personal) {
|
||||
const item = await MongoApp.findById(appId).lean();
|
||||
if (!item) return Promise.reject('plugin not found');
|
||||
if (!item) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const version = await getAppVersionById({ appId, versionId, app: item });
|
||||
|
||||
@@ -132,8 +144,8 @@ export async function getChildAppPreviewNode({
|
||||
},
|
||||
templateType: FlowNodeTemplateTypeEnum.teamApp,
|
||||
|
||||
version: version.versionId,
|
||||
versionLabel: version?.versionName || '',
|
||||
version: versionId ? version?.versionId : '',
|
||||
versionLabel: version?.versionName,
|
||||
isLatestVersion: isLatest,
|
||||
|
||||
originCost: 0,
|
||||
@@ -142,7 +154,7 @@ export async function getChildAppPreviewNode({
|
||||
pluginOrder: 0
|
||||
};
|
||||
} else {
|
||||
return getSystemPluginTemplateById(pluginId);
|
||||
return getSystemPluginTemplateById(pluginId, versionId);
|
||||
}
|
||||
})();
|
||||
|
||||
@@ -216,12 +228,12 @@ export async function getChildAppRuntimeById(
|
||||
id: string,
|
||||
versionId?: string
|
||||
): Promise<PluginRuntimeType> {
|
||||
const app: ChildAppType = await (async () => {
|
||||
const { source, pluginId } = await splitCombinePluginId(id);
|
||||
const app = await (async () => {
|
||||
const { source, pluginId } = splitCombineToolId(id);
|
||||
|
||||
if (source === PluginSourceEnum.personal) {
|
||||
const item = await MongoApp.findById(id).lean();
|
||||
if (!item) return Promise.reject('plugin not found');
|
||||
if (!item) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const version = await getAppVersionById({
|
||||
appId: id,
|
||||
@@ -244,8 +256,6 @@ export async function getChildAppRuntimeById(
|
||||
},
|
||||
templateType: FlowNodeTemplateTypeEnum.teamApp,
|
||||
|
||||
// 用不到
|
||||
version: item?.pluginData?.nodeVersion,
|
||||
originCost: 0,
|
||||
currentCost: 0,
|
||||
hasTokenFee: false,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { type ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
|
||||
import { type PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
|
||||
import { splitCombinePluginId } from './controller';
|
||||
import { splitCombineToolId } from './controller';
|
||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
|
||||
/*
|
||||
@@ -20,7 +20,7 @@ export const computedPluginUsage = async ({
|
||||
childrenUsage: ChatNodeUsageType[];
|
||||
error?: boolean;
|
||||
}) => {
|
||||
const { source } = await splitCombinePluginId(plugin.id);
|
||||
const { source } = splitCombineToolId(plugin.id);
|
||||
const childrenUsages = childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
|
||||
|
||||
if (source !== PluginSourceEnum.personal) {
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
import { MongoDataset } from '../dataset/schema';
|
||||
import { getEmbeddingModel } from '../ai/model';
|
||||
import {
|
||||
AppNodeFlowNodeTypeMap,
|
||||
FlowNodeTypeEnum
|
||||
} from '@fastgpt/global/core/workflow/node/constant';
|
||||
import { FlowNodeTypeEnum } from '@fastgpt/global/core/workflow/node/constant';
|
||||
import { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
|
||||
import type { StoreNodeItemType } from '@fastgpt/global/core/workflow/type/node';
|
||||
import { MongoAppVersion } from './version/schema';
|
||||
import { checkIsLatestVersion } from './version/controller';
|
||||
import { Types } from '../../common/mongo';
|
||||
import { getChildAppPreviewNode, splitCombineToolId } from './plugin/controller';
|
||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
import { authAppByTmbId } from '../../support/permission/app/auth';
|
||||
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
|
||||
export async function listAppDatasetDataByTeamIdAndDatasetIds({
|
||||
teamId,
|
||||
@@ -33,53 +32,58 @@ export async function listAppDatasetDataByTeamIdAndDatasetIds({
|
||||
export async function rewriteAppWorkflowToDetail({
|
||||
nodes,
|
||||
teamId,
|
||||
isRoot
|
||||
isRoot,
|
||||
ownerTmbId
|
||||
}: {
|
||||
nodes: StoreNodeItemType[];
|
||||
teamId: string;
|
||||
isRoot: boolean;
|
||||
ownerTmbId: string;
|
||||
}) {
|
||||
const datasetIdSet = new Set<string>();
|
||||
|
||||
// Add node(App Type) versionlabel and latest sign
|
||||
const appNodes = nodes.filter((node) => AppNodeFlowNodeTypeMap[node.flowNodeType]);
|
||||
const versionIds = appNodes
|
||||
.filter((node) => node.version && Types.ObjectId.isValid(node.version))
|
||||
.map((node) => node.version);
|
||||
/* Add node(App Type) versionlabel and latest sign ==== */
|
||||
await Promise.all(
|
||||
nodes.map(async (node) => {
|
||||
if (!node.pluginId) return;
|
||||
const { source } = splitCombineToolId(node.pluginId);
|
||||
|
||||
if (versionIds.length > 0) {
|
||||
const versionDataList = await MongoAppVersion.find(
|
||||
{
|
||||
_id: { $in: versionIds }
|
||||
},
|
||||
'_id versionName appId time'
|
||||
).lean();
|
||||
try {
|
||||
const [preview] = await Promise.all([
|
||||
getChildAppPreviewNode({
|
||||
appId: node.pluginId,
|
||||
versionId: node.version
|
||||
}),
|
||||
...(source === PluginSourceEnum.personal
|
||||
? [
|
||||
authAppByTmbId({
|
||||
tmbId: ownerTmbId,
|
||||
appId: node.pluginId,
|
||||
per: ReadPermissionVal
|
||||
})
|
||||
]
|
||||
: [])
|
||||
]);
|
||||
|
||||
const versionMap: Record<string, any> = {};
|
||||
|
||||
const isLatestChecks = await Promise.all(
|
||||
versionDataList.map(async (version) => {
|
||||
const isLatest = await checkIsLatestVersion({
|
||||
appId: version.appId,
|
||||
versionId: version._id
|
||||
});
|
||||
|
||||
return { versionId: String(version._id), isLatest };
|
||||
})
|
||||
);
|
||||
const isLatestMap = new Map(isLatestChecks.map((item) => [item.versionId, item.isLatest]));
|
||||
versionDataList.forEach((version) => {
|
||||
versionMap[String(version._id)] = version;
|
||||
});
|
||||
appNodes.forEach((node) => {
|
||||
if (!node.version) return;
|
||||
const versionData = versionMap[String(node.version)];
|
||||
if (versionData) {
|
||||
node.versionLabel = versionData.versionName;
|
||||
node.isLatestVersion = isLatestMap.get(String(node.version)) || false;
|
||||
node.pluginData = {
|
||||
diagram: preview.diagram,
|
||||
userGuide: preview.userGuide,
|
||||
courseUrl: preview.courseUrl,
|
||||
name: preview.name,
|
||||
avatar: preview.avatar
|
||||
};
|
||||
node.versionLabel = preview.versionLabel;
|
||||
node.isLatestVersion = preview.isLatestVersion;
|
||||
node.version = preview.version;
|
||||
} catch (error) {
|
||||
node.pluginData = {
|
||||
error: getErrText(error)
|
||||
};
|
||||
}
|
||||
});
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
/* Add node(App Type) versionlabel and latest sign ==== */
|
||||
|
||||
// Get all dataset ids from nodes
|
||||
nodes.forEach((node) => {
|
||||
|
||||
@@ -68,6 +68,9 @@ export const checkIsLatestVersion = async ({
|
||||
appId: string;
|
||||
versionId: string;
|
||||
}) => {
|
||||
if (!Types.ObjectId.isValid(versionId)) {
|
||||
return false;
|
||||
}
|
||||
const version = await MongoAppVersion.findOne(
|
||||
{
|
||||
appId,
|
||||
|
||||
@@ -65,8 +65,8 @@ export const filterGPTMessageByMaxContext = async ({
|
||||
if (lastMessage.role === ChatCompletionRequestMessageRoleEnum.User) {
|
||||
const tokens = await countGptMessagesTokens([lastMessage, ...tmpChats]);
|
||||
maxContext -= tokens;
|
||||
// 该轮信息整体 tokens 超出范围,这段数据不要了
|
||||
if (maxContext < 0) {
|
||||
// 该轮信息整体 tokens 超出范围,这段数据不要了。但是至少保证一组。
|
||||
if (maxContext < 0 && chats.length > 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -3,12 +3,11 @@ import type {
|
||||
ApiFileReadContentResponse,
|
||||
APIFileReadResponse,
|
||||
ApiDatasetDetailResponse,
|
||||
APIFileServer,
|
||||
APIFileItem
|
||||
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||
APIFileServer
|
||||
} from '@fastgpt/global/core/dataset/apiDataset/type';
|
||||
import axios, { type Method } from 'axios';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import { readFileRawTextByUrl } from '../read';
|
||||
import { addLog } from '../../../../common/system/log';
|
||||
import { readFileRawTextByUrl } from '../../read';
|
||||
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
||||
import { type RequireOnlyOne } from '@fastgpt/global/common/type/utils';
|
||||
|
||||
@@ -3,10 +3,10 @@ import type {
|
||||
ApiFileReadContentResponse,
|
||||
ApiDatasetDetailResponse,
|
||||
FeishuServer
|
||||
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||
} from '@fastgpt/global/core/dataset/apiDataset/type';
|
||||
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
||||
import axios, { type Method } from 'axios';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import { addLog } from '../../../../common/system/log';
|
||||
|
||||
type ResponseDataType = {
|
||||
success: boolean;
|
||||
@@ -1,18 +1,10 @@
|
||||
import type {
|
||||
APIFileServer,
|
||||
YuqueServer,
|
||||
FeishuServer
|
||||
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||
import { useApiDatasetRequest } from './api';
|
||||
import { useYuqueDatasetRequest } from '../yuqueDataset/api';
|
||||
import { useFeishuDatasetRequest } from '../feishuDataset/api';
|
||||
import { useApiDatasetRequest } from './custom/api';
|
||||
import { useYuqueDatasetRequest } from './yuqueDataset/api';
|
||||
import { useFeishuDatasetRequest } from './feishuDataset/api';
|
||||
import type { ApiDatasetServerType } from '@fastgpt/global/core/dataset/apiDataset/type';
|
||||
|
||||
export const getApiDatasetRequest = async (data: {
|
||||
apiServer?: APIFileServer;
|
||||
yuqueServer?: YuqueServer;
|
||||
feishuServer?: FeishuServer;
|
||||
}) => {
|
||||
const { apiServer, yuqueServer, feishuServer } = data;
|
||||
export const getApiDatasetRequest = async (apiDatasetServer?: ApiDatasetServerType) => {
|
||||
const { apiServer, yuqueServer, feishuServer } = apiDatasetServer || {};
|
||||
|
||||
if (apiServer) {
|
||||
return useApiDatasetRequest({ apiServer });
|
||||
|
||||
@@ -3,9 +3,9 @@ import type {
|
||||
ApiFileReadContentResponse,
|
||||
YuqueServer,
|
||||
ApiDatasetDetailResponse
|
||||
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||
} from '@fastgpt/global/core/dataset/apiDataset/type';
|
||||
import axios, { type Method } from 'axios';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import { addLog } from '../../../../common/system/log';
|
||||
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
||||
|
||||
type ResponseDataType = {
|
||||
@@ -105,7 +105,6 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
|
||||
if (!parentId) {
|
||||
if (yuqueServer.basePath) parentId = yuqueServer.basePath;
|
||||
}
|
||||
|
||||
let files: APIFileItem[] = [];
|
||||
|
||||
if (!parentId) {
|
||||
@@ -5,9 +5,10 @@ import {
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
|
||||
import { MongoDatasetCollection } from './schema';
|
||||
import {
|
||||
type DatasetCollectionSchemaType,
|
||||
type DatasetSchemaType
|
||||
import type {
|
||||
DatasetCollectionSchemaType,
|
||||
DatasetDataFieldType,
|
||||
DatasetSchemaType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoDatasetTraining } from '../training/schema';
|
||||
import { MongoDatasetData } from '../data/schema';
|
||||
@@ -15,7 +16,7 @@ import { delImgByRelatedId } from '../../../common/file/image/controller';
|
||||
import { deleteDatasetDataVector } from '../../../common/vectorDB/controller';
|
||||
import { delFileByFileIdList } from '../../../common/file/gridfs/controller';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { type ClientSession } from '../../../common/mongo';
|
||||
import type { ClientSession } from '../../../common/mongo';
|
||||
import { createOrGetCollectionTags } from './utils';
|
||||
import { rawText2Chunks } from '../read';
|
||||
import { checkDatasetLimit } from '../../../support/permission/teamLimit';
|
||||
@@ -34,23 +35,29 @@ import { getTrainingModeByCollection } from './utils';
|
||||
import {
|
||||
computeChunkSize,
|
||||
computeChunkSplitter,
|
||||
computeParagraphChunkDeep,
|
||||
getLLMMaxChunkSize
|
||||
} from '@fastgpt/global/core/dataset/training/utils';
|
||||
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
|
||||
import { deleteDatasetImage } from '../image/controller';
|
||||
import { clearCollectionImages, removeDatasetImageExpiredTime } from '../image/utils';
|
||||
|
||||
export const createCollectionAndInsertData = async ({
|
||||
dataset,
|
||||
rawText,
|
||||
relatedId,
|
||||
imageIds,
|
||||
createCollectionParams,
|
||||
backupParse = false,
|
||||
billId,
|
||||
session
|
||||
}: {
|
||||
dataset: DatasetSchemaType;
|
||||
rawText: string;
|
||||
rawText?: string;
|
||||
relatedId?: string;
|
||||
imageIds?: string[];
|
||||
createCollectionParams: CreateOneCollectionParams;
|
||||
|
||||
backupParse?: boolean;
|
||||
|
||||
billId?: string;
|
||||
@@ -68,13 +75,18 @@ export const createCollectionAndInsertData = async ({
|
||||
// Set default params
|
||||
const trainingType =
|
||||
createCollectionParams.trainingType || DatasetCollectionDataProcessModeEnum.chunk;
|
||||
const chunkSize = computeChunkSize({
|
||||
...createCollectionParams,
|
||||
trainingType,
|
||||
llmModel: getLLMModel(dataset.agentModel)
|
||||
});
|
||||
const chunkSplitter = computeChunkSplitter(createCollectionParams);
|
||||
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
|
||||
const paragraphChunkDeep = computeParagraphChunkDeep(createCollectionParams);
|
||||
const trainingMode = getTrainingModeByCollection({
|
||||
trainingType: trainingType,
|
||||
autoIndexes: createCollectionParams.autoIndexes,
|
||||
imageIndex: createCollectionParams.imageIndex
|
||||
});
|
||||
|
||||
if (
|
||||
trainingType === DatasetCollectionDataProcessModeEnum.qa ||
|
||||
trainingType === DatasetCollectionDataProcessModeEnum.backup
|
||||
) {
|
||||
delete createCollectionParams.chunkTriggerType;
|
||||
delete createCollectionParams.chunkTriggerMinSize;
|
||||
delete createCollectionParams.dataEnhanceCollectionName;
|
||||
@@ -84,39 +96,69 @@ export const createCollectionAndInsertData = async ({
|
||||
delete createCollectionParams.qaPrompt;
|
||||
}
|
||||
|
||||
// 1. split chunks
|
||||
const chunks = rawText2Chunks({
|
||||
rawText,
|
||||
chunkSize,
|
||||
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
|
||||
overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : [],
|
||||
backupParse
|
||||
});
|
||||
// 1. split chunks or create image chunks
|
||||
const {
|
||||
chunks,
|
||||
chunkSize
|
||||
}: {
|
||||
chunks: Array<{
|
||||
q?: string;
|
||||
a?: string; // answer or custom content
|
||||
imageId?: string;
|
||||
indexes?: string[];
|
||||
}>;
|
||||
chunkSize?: number;
|
||||
} = (() => {
|
||||
if (rawText) {
|
||||
const chunkSize = computeChunkSize({
|
||||
...createCollectionParams,
|
||||
trainingType,
|
||||
llmModel: getLLMModel(dataset.agentModel)
|
||||
});
|
||||
// Process text chunks
|
||||
const chunks = rawText2Chunks({
|
||||
rawText,
|
||||
chunkTriggerType: createCollectionParams.chunkTriggerType,
|
||||
chunkTriggerMinSize: createCollectionParams.chunkTriggerMinSize,
|
||||
chunkSize,
|
||||
paragraphChunkDeep,
|
||||
paragraphChunkMinSize: createCollectionParams.paragraphChunkMinSize,
|
||||
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
|
||||
overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : [],
|
||||
backupParse
|
||||
});
|
||||
return { chunks, chunkSize };
|
||||
}
|
||||
|
||||
if (imageIds) {
|
||||
// Process image chunks
|
||||
const chunks = imageIds.map((imageId: string) => ({
|
||||
imageId,
|
||||
indexes: []
|
||||
}));
|
||||
return { chunks };
|
||||
}
|
||||
throw new Error('Either rawText or imageIdList must be provided');
|
||||
})();
|
||||
|
||||
// 2. auth limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(
|
||||
getTrainingModeByCollection({
|
||||
trainingType: trainingType,
|
||||
autoIndexes: createCollectionParams.autoIndexes,
|
||||
imageIndex: createCollectionParams.imageIndex
|
||||
}),
|
||||
chunks
|
||||
)
|
||||
insertLen: predictDataLimitLength(trainingMode, chunks)
|
||||
});
|
||||
|
||||
const fn = async (session: ClientSession) => {
|
||||
// 3. create collection
|
||||
// 3. Create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...createCollectionParams,
|
||||
trainingType,
|
||||
paragraphChunkDeep,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
|
||||
hashRawText: hashStr(rawText),
|
||||
rawTextLength: rawText.length,
|
||||
hashRawText: rawText ? hashStr(rawText) : undefined,
|
||||
rawTextLength: rawText?.length,
|
||||
nextSyncTime: (() => {
|
||||
// ignore auto collections sync for website datasets
|
||||
if (!dataset.autoSync && dataset.type === DatasetTypeEnum.websiteDataset) return undefined;
|
||||
@@ -158,11 +200,7 @@ export const createCollectionAndInsertData = async ({
|
||||
vectorModel: dataset.vectorModel,
|
||||
vlmModel: dataset.vlmModel,
|
||||
indexSize: createCollectionParams.indexSize,
|
||||
mode: getTrainingModeByCollection({
|
||||
trainingType: trainingType,
|
||||
autoIndexes: createCollectionParams.autoIndexes,
|
||||
imageIndex: createCollectionParams.imageIndex
|
||||
}),
|
||||
mode: trainingMode,
|
||||
prompt: createCollectionParams.qaPrompt,
|
||||
billId: traingBillId,
|
||||
data: chunks.map((item, index) => ({
|
||||
@@ -176,7 +214,12 @@ export const createCollectionAndInsertData = async ({
|
||||
session
|
||||
});
|
||||
|
||||
// 6. remove related image ttl
|
||||
// 6. Remove images ttl index
|
||||
await removeDatasetImageExpiredTime({
|
||||
ids: imageIds,
|
||||
collectionId,
|
||||
session
|
||||
});
|
||||
if (relatedId) {
|
||||
await MongoImage.updateMany(
|
||||
{
|
||||
@@ -196,7 +239,7 @@ export const createCollectionAndInsertData = async ({
|
||||
}
|
||||
|
||||
return {
|
||||
collectionId,
|
||||
collectionId: String(collectionId),
|
||||
insertResults
|
||||
};
|
||||
};
|
||||
@@ -212,46 +255,19 @@ export type CreateOneCollectionParams = CreateDatasetCollectionParams & {
|
||||
tmbId: string;
|
||||
session?: ClientSession;
|
||||
};
|
||||
export async function createOneCollection({
|
||||
teamId,
|
||||
tmbId,
|
||||
name,
|
||||
parentId,
|
||||
datasetId,
|
||||
type,
|
||||
export async function createOneCollection({ session, ...props }: CreateOneCollectionParams) {
|
||||
const {
|
||||
teamId,
|
||||
parentId,
|
||||
datasetId,
|
||||
tags,
|
||||
|
||||
createTime,
|
||||
updateTime,
|
||||
|
||||
hashRawText,
|
||||
rawTextLength,
|
||||
metadata = {},
|
||||
tags,
|
||||
|
||||
nextSyncTime,
|
||||
|
||||
fileId,
|
||||
rawLink,
|
||||
externalFileId,
|
||||
externalFileUrl,
|
||||
apiFileId,
|
||||
|
||||
// Parse settings
|
||||
customPdfParse,
|
||||
imageIndex,
|
||||
autoIndexes,
|
||||
|
||||
// Chunk settings
|
||||
trainingType,
|
||||
chunkSettingMode,
|
||||
chunkSplitMode,
|
||||
chunkSize,
|
||||
indexSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
session
|
||||
}: CreateOneCollectionParams) {
|
||||
fileId,
|
||||
rawLink,
|
||||
externalFileId,
|
||||
externalFileUrl,
|
||||
apiFileId
|
||||
} = props;
|
||||
// Create collection tags
|
||||
const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
|
||||
|
||||
@@ -259,41 +275,18 @@ export async function createOneCollection({
|
||||
const [collection] = await MongoDatasetCollection.create(
|
||||
[
|
||||
{
|
||||
...props,
|
||||
teamId,
|
||||
tmbId,
|
||||
parentId: parentId || null,
|
||||
datasetId,
|
||||
name,
|
||||
type,
|
||||
|
||||
rawTextLength,
|
||||
hashRawText,
|
||||
tags: collectionTags,
|
||||
metadata,
|
||||
|
||||
createTime,
|
||||
updateTime,
|
||||
nextSyncTime,
|
||||
|
||||
...(fileId ? { fileId } : {}),
|
||||
...(rawLink ? { rawLink } : {}),
|
||||
...(externalFileId ? { externalFileId } : {}),
|
||||
...(externalFileUrl ? { externalFileUrl } : {}),
|
||||
...(apiFileId ? { apiFileId } : {}),
|
||||
|
||||
// Parse settings
|
||||
customPdfParse,
|
||||
imageIndex,
|
||||
autoIndexes,
|
||||
|
||||
// Chunk settings
|
||||
trainingType,
|
||||
chunkSettingMode,
|
||||
chunkSplitMode,
|
||||
chunkSize,
|
||||
indexSize,
|
||||
chunkSplitter,
|
||||
qaPrompt
|
||||
...(apiFileId ? { apiFileId } : {})
|
||||
}
|
||||
],
|
||||
{ session, ordered: true }
|
||||
@@ -327,17 +320,20 @@ export const delCollectionRelatedSource = async ({
|
||||
.map((item) => item?.metadata?.relatedImgId || '')
|
||||
.filter(Boolean);
|
||||
|
||||
// Delete files
|
||||
await delFileByFileIdList({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileIdList
|
||||
});
|
||||
// Delete images
|
||||
await delImgByRelatedId({
|
||||
teamId,
|
||||
relateIds: relatedImageIds,
|
||||
session
|
||||
});
|
||||
// Delete files and images in parallel
|
||||
await Promise.all([
|
||||
// Delete files
|
||||
delFileByFileIdList({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileIdList
|
||||
}),
|
||||
// Delete images
|
||||
delImgByRelatedId({
|
||||
teamId,
|
||||
relateIds: relatedImageIds,
|
||||
session
|
||||
})
|
||||
]);
|
||||
};
|
||||
/**
|
||||
* delete collection and it related data
|
||||
@@ -382,16 +378,16 @@ export async function delCollection({
|
||||
datasetId: { $in: datasetIds },
|
||||
collectionId: { $in: collectionIds }
|
||||
}),
|
||||
// Delete dataset_images
|
||||
clearCollectionImages(collectionIds),
|
||||
// Delete images if needed
|
||||
...(delImg
|
||||
? [
|
||||
delImgByRelatedId({
|
||||
teamId,
|
||||
relateIds: collections
|
||||
.map((item) => item?.metadata?.relatedImgId || '')
|
||||
.filter(Boolean)
|
||||
})
|
||||
]
|
||||
? collections
|
||||
.map((item) => item?.metadata?.relatedImgId || '')
|
||||
.filter(Boolean)
|
||||
.map((imageId) => deleteDatasetImage(imageId))
|
||||
: []),
|
||||
// Delete files if needed
|
||||
...(delFile
|
||||
? [
|
||||
delFileByFileIdList({
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
import { MongoDatasetCollection } from './schema';
|
||||
import { type ClientSession } from '../../../common/mongo';
|
||||
import type { ClientSession } from '../../../common/mongo';
|
||||
import { MongoDatasetCollectionTags } from '../tag/schema';
|
||||
import { readFromSecondary } from '../../../common/mongo/utils';
|
||||
import {
|
||||
type CollectionWithDatasetType,
|
||||
type DatasetCollectionSchemaType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import type { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
|
||||
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionSyncResultEnum,
|
||||
@@ -159,9 +157,7 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
|
||||
return {
|
||||
type: DatasetSourceReadTypeEnum.apiFile,
|
||||
sourceId,
|
||||
apiServer: dataset.apiServer,
|
||||
feishuServer: dataset.feishuServer,
|
||||
yuqueServer: dataset.yuqueServer
|
||||
apiDatasetServer: dataset.apiDatasetServer
|
||||
};
|
||||
})();
|
||||
|
||||
@@ -233,18 +229,37 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
|
||||
QA: 独立进程
|
||||
Chunk: Image Index -> Auto index -> chunk index
|
||||
*/
|
||||
export const getTrainingModeByCollection = (collection: {
|
||||
trainingType: DatasetCollectionSchemaType['trainingType'];
|
||||
autoIndexes?: DatasetCollectionSchemaType['autoIndexes'];
|
||||
imageIndex?: DatasetCollectionSchemaType['imageIndex'];
|
||||
export const getTrainingModeByCollection = ({
|
||||
trainingType,
|
||||
autoIndexes,
|
||||
imageIndex
|
||||
}: {
|
||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||
autoIndexes?: boolean;
|
||||
imageIndex?: boolean;
|
||||
}) => {
|
||||
if (collection.trainingType === DatasetCollectionDataProcessModeEnum.qa) {
|
||||
if (
|
||||
trainingType === DatasetCollectionDataProcessModeEnum.imageParse &&
|
||||
global.feConfigs?.isPlus
|
||||
) {
|
||||
return TrainingModeEnum.imageParse;
|
||||
}
|
||||
|
||||
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
|
||||
return TrainingModeEnum.qa;
|
||||
}
|
||||
if (collection.imageIndex && global.feConfigs?.isPlus) {
|
||||
if (
|
||||
trainingType === DatasetCollectionDataProcessModeEnum.chunk &&
|
||||
imageIndex &&
|
||||
global.feConfigs?.isPlus
|
||||
) {
|
||||
return TrainingModeEnum.image;
|
||||
}
|
||||
if (collection.autoIndexes && global.feConfigs?.isPlus) {
|
||||
if (
|
||||
trainingType === DatasetCollectionDataProcessModeEnum.chunk &&
|
||||
autoIndexes &&
|
||||
global.feConfigs?.isPlus
|
||||
) {
|
||||
return TrainingModeEnum.auto;
|
||||
}
|
||||
return TrainingModeEnum.chunk;
|
||||
|
||||
@@ -9,6 +9,7 @@ import { deleteDatasetDataVector } from '../../common/vectorDB/controller';
|
||||
import { MongoDatasetDataText } from './data/dataTextSchema';
|
||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
import { clearDatasetImages } from './image/utils';
|
||||
|
||||
/* ============= dataset ========== */
|
||||
/* find all datasetId by top datasetId */
|
||||
@@ -102,8 +103,10 @@ export async function delDatasetRelevantData({
|
||||
}),
|
||||
//delete dataset_datas
|
||||
MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }),
|
||||
// Delete Image and file
|
||||
// Delete collection image and file
|
||||
delCollectionRelatedSource({ collections }),
|
||||
// Delete dataset Image
|
||||
clearDatasetImages(datasetIds),
|
||||
// Delete vector data
|
||||
deleteDatasetDataVector({ teamId, datasetIds })
|
||||
]);
|
||||
|
||||