Compare commits

..

4 Commits

Author SHA1 Message Date
Wenhao Zhu
6a1c47296c fix: 修复知识库问题优化无效的BUG (#4033) 2025-03-07 16:01:01 +08:00
archer
6d4776b3aa action 2025-03-06 22:56:00 +08:00
archer
2d351c3654 perf: http body check 2025-03-06 18:22:42 +08:00
Finley Ge
662a4a4671 fix: remove defaultTeam (#3989) 2025-03-06 00:26:32 +08:00
173 changed files with 2092 additions and 3451 deletions

View File

@@ -6,8 +6,6 @@ on:
- 'docSite/**'
branches:
- 'main'
tags:
- 'v*.*.*'
jobs:
build-fastgpt-docs-images:

View File

@@ -7,8 +7,6 @@ on:
- 'docSite/**'
branches:
- 'main'
tags:
- 'v*.*.*'
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:

View File

@@ -4,8 +4,6 @@ on:
pull_request_target:
paths:
- 'docSite/**'
branches:
- 'main'
workflow_dispatch:
# A workflow run is made up of one or more jobs that can run sequentially or in parallel

View File

@@ -27,5 +27,7 @@
},
"markdown.copyFiles.destination": {
"/docSite/content/**/*": "${documentWorkspaceFolder}/docSite/assets/imgs/"
}
},
"markdown.copyFiles.overwriteBehavior": "nameIncrementally",
"markdown.copyFiles.transformPath": "const filename = uri.path.split('/').pop(); return `/imgs/${filename}`;"
}

View File

@@ -114,6 +114,16 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
</a>
## 🏘️ 社区交流群
扫码加入飞书话题群:
![](https://oss.laf.run/otnvvf-imgs/fastgpt-feishu1.png)
<a href="#readme">
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
</a>
## 🏘️ 加入我们
我们正在寻找志同道合的小伙伴,加速 FastGPT 的发展。你可以通过 [FastGPT 2025 招聘](https://fael3z0zfze.feishu.cn/wiki/P7FOwEmPziVcaYkvVaacnVX1nvg)了解 FastGPT 的招聘信息。
@@ -123,25 +133,17 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
- [Laf3 分钟快速接入三方应用](https://github.com/labring/laf)
- [Sealos快速部署集群应用](https://github.com/labring/sealos)
- [One API多模型管理支持 Azure、文心一言等](https://github.com/songquanpeng/one-api)
- [TuShan5 分钟搭建后台管理系统](https://github.com/msgbyte/tushan)
<a href="#readme">
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
</a>
## 🌿 第三方生态
- [SiliconCloud (硅基流动) —— 开源模型在线体验平台](https://cloud.siliconflow.cn/i/TR9Ym0c4)
- [COW 个人微信/企微机器人](https://doc.tryfastgpt.ai/docs/use-cases/external-integration/onwechat/)
<a href="#readme">
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
</a>
## 🏘️ 社区交流群
扫码加入飞书话题群:
![](https://oss.laf.run/otnvvf-imgs/fastgpt-feishu1.png)
- [SiliconCloud (硅基流动) —— 开源模型在线体验平台](https://cloud.siliconflow.cn/i/TR9Ym0c4)
<a href="#readme">
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">

View File

@@ -137,13 +137,10 @@ services:
- FE_DOMAIN=
# root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。
- DEFAULT_ROOT_PSW=1234
# AI Proxy 的地址,如果配了该地址,优先使用
- AIPROXY_API_ENDPOINT=http://aiproxy:3000
# AI Proxy 的 Admin Token与 AI Proxy 中的环境变量 ADMIN_KEY
- AIPROXY_API_TOKEN=aiproxy
# 模型中转地址(如果用了 AI Proxy下面 2 个就不需要了,旧版 OneAPI 用户,使用下面的变量)
# - OPENAI_BASE_URL=http://oneapi:3000/v1
# - CHAT_API_KEY=sk-fastgpt
# AI模型的API地址哦。务必加 /v1。这里默认填写了OneApi的访问地址。
- OPENAI_BASE_URL=http://oneapi:3000/v1
# AI模型的API Key。这里默认填写了OneAPI的快速默认key测试通后务必及时修改
- CHAT_API_KEY=sk-fastgpt
# 数据库最大连接数
- DB_MAX_LINK=30
# 登录凭证密钥
@@ -173,54 +170,48 @@ services:
volumes:
- ./config.json:/app/data/config.json
# AI Proxy
aiproxy:
image: 'ghcr.io/labring/sealos-aiproxy-service:latest'
container_name: aiproxy
restart: unless-stopped
depends_on:
aiproxy_pg:
condition: service_healthy
# oneapi
mysql:
image: registry.cn-hangzhou.aliyuncs.com/fastgpt/mysql:8.0.36 # 阿里云
# image: mysql:8.0.36
container_name: mysql
restart: always
ports:
- '3002:3000'
- 3306:3306
networks:
- fastgpt
command: --default-authentication-plugin=mysql_native_password
environment:
# 对应 fastgpt 里的AIPROXY_API_TOKEN
- ADMIN_KEY=aiproxy
# 错误日志详情保存时间(小时)
- LOG_DETAIL_STORAGE_HOURS=1
# 数据库连接地址
- SQL_DSN=postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy
# 最大重试次数
- RetryTimes=3
# 不需要计费
- BILLING_ENABLED=false
# 不需要严格检测模型
- DISABLE_MODEL_CONFIG=true
healthcheck:
test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status']
interval: 5s
timeout: 5s
retries: 10
aiproxy_pg:
# image: pgvector/pgvector:0.8.0-pg15 # docker hub
image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
restart: unless-stopped
container_name: aiproxy_pg
# 默认root密码仅首次运行有效
MYSQL_ROOT_PASSWORD: oneapimmysql
MYSQL_DATABASE: oneapi
volumes:
- ./aiproxy_pg:/var/lib/postgresql/data
- ./mysql:/var/lib/mysql
oneapi:
container_name: oneapi
image: ghcr.io/songquanpeng/one-api:v0.6.7
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/one-api:v0.6.6 # 阿里云
ports:
- 3001:3000
depends_on:
- mysql
networks:
- fastgpt
restart: always
environment:
TZ: Asia/Shanghai
POSTGRES_USER: postgres
POSTGRES_DB: aiproxy
POSTGRES_PASSWORD: aiproxy
healthcheck:
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
interval: 5s
timeout: 5s
retries: 10
# mysql 连接参数
- SQL_DSN=root:oneapimmysql@tcp(mysql:3306)/oneapi
# 登录凭证加密密钥
- SESSION_SECRET=oneapikey
# 内存缓存
- MEMORY_CACHE_ENABLED=true
# 启动聚合更新,减少数据交互频率
- BATCH_UPDATE_ENABLED=true
# 聚合更新时长
- BATCH_UPDATE_INTERVAL=10
# 初始化的 root 密钥(建议部署完后更改,否则容易泄露)
- INITIAL_ROOT_TOKEN=fastgpt
volumes:
- ./oneapi:/data
networks:
fastgpt:

View File

@@ -7,12 +7,12 @@ version: '3.3'
services:
# db
pg:
image: pgvector/pgvector:0.8.0-pg15 # docker hub
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
image: pgvector/pgvector:0.7.0-pg15 # docker hub
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.7.0 # 阿里云
container_name: pg
restart: always
# ports: # 生产环境建议不要暴露
# - 5432:5432
ports: # 生产环境建议不要暴露
- 5432:5432
networks:
- fastgpt
environment:
@@ -95,13 +95,10 @@ services:
- FE_DOMAIN=
# root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。
- DEFAULT_ROOT_PSW=1234
# AI Proxy 的地址,如果配了该地址,优先使用
- AIPROXY_API_ENDPOINT=http://aiproxy:3000
# AI Proxy 的 Admin Token与 AI Proxy 中的环境变量 ADMIN_KEY
- AIPROXY_API_TOKEN=aiproxy
# 模型中转地址(如果用了 AI Proxy下面 2 个就不需要了,旧版 OneAPI 用户,使用下面的变量)
# - OPENAI_BASE_URL=http://oneapi:3000/v1
# - CHAT_API_KEY=sk-fastgpt
# AI模型的API地址哦。务必加 /v1。这里默认填写了OneApi的访问地址。
- OPENAI_BASE_URL=http://oneapi:3000/v1
# AI模型的API Key。这里默认填写了OneAPI的快速默认key测试通后务必及时修改
- CHAT_API_KEY=sk-fastgpt
# 数据库最大连接数
- DB_MAX_LINK=30
# 登录凭证密钥
@@ -130,54 +127,48 @@ services:
volumes:
- ./config.json:/app/data/config.json
# AI Proxy
aiproxy:
image: 'ghcr.io/labring/sealos-aiproxy-service:latest'
container_name: aiproxy
restart: unless-stopped
depends_on:
aiproxy_pg:
condition: service_healthy
# oneapi
mysql:
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/mysql:8.0.36 # 阿里云
image: mysql:8.0.36
container_name: mysql
restart: always
ports:
- '3002:3000'
- 3306:3306
networks:
- fastgpt
command: --default-authentication-plugin=mysql_native_password
environment:
# 对应 fastgpt 里的AIPROXY_API_TOKEN
- ADMIN_KEY=aiproxy
# 错误日志详情保存时间(小时)
- LOG_DETAIL_STORAGE_HOURS=1
# 数据库连接地址
- SQL_DSN=postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy
# 最大重试次数
- RetryTimes=3
# 不需要计费
- BILLING_ENABLED=false
# 不需要严格检测模型
- DISABLE_MODEL_CONFIG=true
healthcheck:
test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status']
interval: 5s
timeout: 5s
retries: 10
aiproxy_pg:
# image: pgvector/pgvector:0.8.0-pg15 # docker hub
image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
restart: unless-stopped
container_name: aiproxy_pg
# 默认root密码仅首次运行有效
MYSQL_ROOT_PASSWORD: oneapimmysql
MYSQL_DATABASE: oneapi
volumes:
- ./aiproxy_pg:/var/lib/postgresql/data
- ./mysql:/var/lib/mysql
oneapi:
container_name: oneapi
image: ghcr.io/songquanpeng/one-api:v0.6.7
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/one-api:v0.6.6 # 阿里云
ports:
- 3001:3000
depends_on:
- mysql
networks:
- fastgpt
restart: always
environment:
TZ: Asia/Shanghai
POSTGRES_USER: postgres
POSTGRES_DB: aiproxy
POSTGRES_PASSWORD: aiproxy
healthcheck:
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
interval: 5s
timeout: 5s
retries: 10
# mysql 连接参数
- SQL_DSN=root:oneapimmysql@tcp(mysql:3306)/oneapi
# 登录凭证加密密钥
- SESSION_SECRET=oneapikey
# 内存缓存
- MEMORY_CACHE_ENABLED=true
# 启动聚合更新,减少数据交互频率
- BATCH_UPDATE_ENABLED=true
# 聚合更新时长
- BATCH_UPDATE_INTERVAL=10
# 初始化的 root 密钥(建议部署完后更改,否则容易泄露)
- INITIAL_ROOT_TOKEN=fastgpt
volumes:
- ./oneapi:/data
networks:
fastgpt:

View File

@@ -75,13 +75,10 @@ services:
- FE_DOMAIN=
# root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。
- DEFAULT_ROOT_PSW=1234
# AI Proxy 的地址,如果配了该地址,优先使用
- AIPROXY_API_ENDPOINT=http://aiproxy:3000
# AI Proxy 的 Admin Token与 AI Proxy 中的环境变量 ADMIN_KEY
- AIPROXY_API_TOKEN=aiproxy
# 模型中转地址(如果用了 AI Proxy下面 2 个就不需要了,旧版 OneAPI 用户,使用下面的变量)
# - OPENAI_BASE_URL=http://oneapi:3000/v1
# - CHAT_API_KEY=sk-fastgpt
# AI模型的API地址哦。务必加 /v1。这里默认填写了OneApi的访问地址。
- OPENAI_BASE_URL=http://oneapi:3000/v1
# AI模型的API Key。这里默认填写了OneAPI的快速默认key测试通后务必及时修改
- CHAT_API_KEY=sk-fastgpt
# 数据库最大连接数
- DB_MAX_LINK=30
# 登录凭证密钥
@@ -111,54 +108,48 @@ services:
volumes:
- ./config.json:/app/data/config.json
# AI Proxy
aiproxy:
image: 'ghcr.io/labring/sealos-aiproxy-service:latest'
container_name: aiproxy
restart: unless-stopped
depends_on:
aiproxy_pg:
condition: service_healthy
# oneapi
mysql:
image: registry.cn-hangzhou.aliyuncs.com/fastgpt/mysql:8.0.36 # 阿里云
# image: mysql:8.0.36
container_name: mysql
restart: always
ports:
- '3002:3000'
- 3306:3306
networks:
- fastgpt
command: --default-authentication-plugin=mysql_native_password
environment:
# 对应 fastgpt 里的AIPROXY_API_TOKEN
- ADMIN_KEY=aiproxy
# 错误日志详情保存时间(小时)
- LOG_DETAIL_STORAGE_HOURS=1
# 数据库连接地址
- SQL_DSN=postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy
# 最大重试次数
- RetryTimes=3
# 不需要计费
- BILLING_ENABLED=false
# 不需要严格检测模型
- DISABLE_MODEL_CONFIG=true
healthcheck:
test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status']
interval: 5s
timeout: 5s
retries: 10
aiproxy_pg:
# image: pgvector/pgvector:0.8.0-pg15 # docker hub
image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
restart: unless-stopped
container_name: aiproxy_pg
# 默认root密码仅首次运行有效
MYSQL_ROOT_PASSWORD: oneapimmysql
MYSQL_DATABASE: oneapi
volumes:
- ./aiproxy_pg:/var/lib/postgresql/data
- ./mysql:/var/lib/mysql
oneapi:
container_name: oneapi
image: ghcr.io/songquanpeng/one-api:v0.6.7
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/one-api:v0.6.6 # 阿里云
ports:
- 3001:3000
depends_on:
- mysql
networks:
- fastgpt
restart: always
environment:
TZ: Asia/Shanghai
POSTGRES_USER: postgres
POSTGRES_DB: aiproxy
POSTGRES_PASSWORD: aiproxy
healthcheck:
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
interval: 5s
timeout: 5s
retries: 10
# mysql 连接参数
- SQL_DSN=root:oneapimmysql@tcp(mysql:3306)/oneapi
# 登录凭证加密密钥
- SESSION_SECRET=oneapikey
# 内存缓存
- MEMORY_CACHE_ENABLED=true
# 启动聚合更新,减少数据交互频率
- BATCH_UPDATE_ENABLED=true
# 聚合更新时长
- BATCH_UPDATE_INTERVAL=10
# 初始化的 root 密钥(建议部署完后更改,否则容易泄露)
- INITIAL_ROOT_TOKEN=fastgpt
volumes:
- ./oneapi:/data
networks:
fastgpt:

View File

@@ -6,7 +6,6 @@ data:
"openapiPrefix": "fastgpt",
"vectorMaxProcess": 15,
"qaMaxProcess": 15,
"vlmMaxProcess": 15,
"pgHNSWEfSearch": 100
},
"llmModels": [

Binary file not shown.

Before

Width:  |  Height:  |  Size: 198 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 198 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 229 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 422 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 235 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 341 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 212 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 240 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 342 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 363 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 348 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 222 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 135 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 329 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 216 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 85 KiB

View File

@@ -23,54 +23,8 @@ weight: 707
"systemEnv": {
"vectorMaxProcess": 15, // 向量处理线程数量
"qaMaxProcess": 15, // 问答拆分线程数量
"vlmMaxProcess": 15, // 图片理解模型最大处理进程
"tokenWorkers": 50, // Token 计算线程保持数,会持续占用内存,不能设置太大。
"pgHNSWEfSearch": 100, // 向量搜索参数。越大搜索越精确但是速度越慢。设置为100有99%+精度。
"customPdfParse": { // 4.9.0 新增配置
"url": "", // 自定义 PDF 解析服务地址
"key": "", // 自定义 PDF 解析服务密钥
"doc2xKey": "", // doc2x 服务密钥
"price": 0 // PDF 解析服务价格
}
"pgHNSWEfSearch": 100 // 向量搜索参数。越大搜索越精确但是速度越慢。设置为100有99%+精度。
}
}
```
## 自定义 PDF 解析配置
自定义 PDF 服务解析的优先级高于 Doc2x 服务,所以如果使用 Doc2x 服务,请勿配置自定义 PDF 服务。
### 使用 Sealos PDF 解析服务
#### 1. 申请 Sealos AI proxy API Key
[点击打开 Sealos Pdf parser 官网](https://cloud.sealos.run/?uid=fnWRt09fZP&openapp=system-aiproxy),并进行对应 API Key 的申请。
#### 2. 修改 FastGPT 配置文件
`systemEnv.customPdfParse.url`填写成`https://aiproxy.hzh.sealos.run/v1/parse/pdf?model=parse-pdf`
`systemEnv.customPdfParse.key`填写成在 Sealos AI proxy 中申请的 API Key。
![](/imgs/deployconfig-aiproxy.png)
### 使用 Doc2x 解析 PDF 文件
`Doc2x`是一个国内提供专业 PDF 解析。
#### 1. 申请 Doc2x 服务
[点击打开 Doc2x 官网](https://doc2x.noedgeai.com?inviteCode=9EACN2),并进行对应 API Key 的申请。
#### 2. 修改 FastGPT 配置文件
开源版用户在 `config.json` 文件中添加 `systemEnv.customPdfParse.doc2xKey` 配置,并填写上申请到的 API Key。并重启服务。
商业版用户在 Admin 后台根据表单指引填写 Doc2x 服务密钥。
#### 3. 开始使用
在知识库导入数据或应用文件上传配置中,可以勾选`PDF 增强解析`,则在对 PDF 解析时候,会使用 Doc2x 服务进行解析。
### 使用 Marker 解析 PDF 文件
[点击查看 Marker 接入教程](/docs/development/custom-models/marker)

View File

@@ -11,13 +11,13 @@ weight: 909
PDF 是一个相对复杂的文件格式,在 FastGPT 内置的 pdf 解析器中,依赖的是 pdfjs 库解析,该库基于逻辑解析,无法有效的理解复杂的 pdf 文件。所以我们在解析 pdf 时候,如果遇到图片、表格、公式等非简单文本内容,会发现解析效果不佳。
市面上目前有多种解析 PDF 的方法,比如使用 [Marker](https://github.com/VikParuchuri/marker),该项目使用了 Surya 模型,基于视觉解析,可以有效提取图片、表格、公式等复杂内容。
市面上目前有多种解析 PDF 的方法,比如使用 [Marker](https://github.com/VikParuchuri/marker),该项目使用了 Surya 模型,基于视觉解析,可以有效提取图片、表格、公式等复杂内容。为了可以让 Marker 快速接入 FastGPT我们做了一个自定义解析的拓展 Demo。
`FastGPT v4.9.0` 版本中,开源版用户可以在`config.json`文件中添加`systemEnv.customPdfParse`配置,来使用 Marker 解析 PDF 文件。商业版用户直接在 Admin 后台根据表单指引填写即可。需重新拉取 Marker 镜像,接口格式已变动。
在 FastGPT 4.8.15 版本中,你可以通过增加一个环境变量,来替换掉 FastGPT 系统内置解析器,实现自定义的文档解析服务。该功能只是 Demo 阶段,后期配置模式和交互规则会发生改动。
## 使用教程
### 1. 安装 Marker
### 1. 按照 Marker
参考文档 [Marker 安装教程](https://github.com/labring/FastGPT/tree/main/plugins/model/pdf-marker),安装 Marker 模型。封装的 API 已经适配了 FastGPT 自定义解析服务。
@@ -28,35 +28,22 @@ docker pull crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/
docker run --gpus all -itd -p 7231:7231 --name model_pdf_v1 crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:latest
```
### 2. 添加 FastGPT 文件配置
### 2. 添加 FastGPT 环境变量
```json
{
xxx
"systemEnv": {
xxx
"customPdfParse": {
"url": "http://xxxx.com/v1/parse/file", // 自定义 PDF 解析服务地址
"key": "", // 自定义 PDF 解析服务密钥
"doc2xKey": "", // doc2x 服务密钥
"price": 0 // PDF 解析服务价格
}
}
}
```
CUSTOM_READ_FILE_URL=http://xxxx.com/v1/parse/file
CUSTOM_READ_FILE_EXTENSION=pdf
```
需要重启服务
* CUSTOM_READ_FILE_URL - 自定义解析服务的地址, host改成解析服务的访问地址path 不能变动
* CUSTOM_READ_FILE_EXTENSION - 支持的文件后缀,多个文件类型,可用逗号隔开。
### 3. 测试效果
通过知识库上传一个 pdf 文件,并勾选上 `PDF 增强解析`
![alt text](/imgs/marker2.png)
确认上传后,可以在日志中看到 LOG LOG_LEVEL需要设置 info 或者 debug
通过知识库上传一个 pdf 文件,并确认上传,可以在日志中看到 LOG LOG_LEVEL需要设置 info 或者 debug
```
[Info] 2024-12-05 15:04:42 Parsing files from an external service
[Info] 2024-12-05 15:04:42 Parsing files from an external service
[Info] 2024-12-05 15:07:08 Custom file parsing is complete, time: 1316ms
```
@@ -64,10 +51,6 @@ docker run --gpus all -itd -p 7231:7231 --name model_pdf_v1 crpi-h3snc261q1dosro
![alt text](/imgs/image-10.png)
同样的,在应用中,你可以在文件上传配置里,勾选上 `PDF 增强解析`
![alt text](/imgs/marker3.png)
## 效果展示

View File

@@ -30,7 +30,7 @@ weight: 707
### PgVector版本
非常轻量,适合知识库索引量在 5000 万以下。
非常轻量,适合数据量在 5000 万以下。
{{< table "table-hover table-striped-columns" >}}
| 环境 | 最低配置(单节点) | 推荐配置 |
@@ -149,14 +149,18 @@ curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/mai
{{< tab tabName="PgVector版本" >}}
{{< markdownify >}}
无需操作
```
FE_DOMAIN=你的前端你访问地址,例如 http://192.168.0.1:3000;https://cloud.fastgpt.cn
```
{{< /markdownify >}}
{{< /tab >}}
{{< tab tabName="Milvus版本" >}}
{{< markdownify >}}
无需操作
```
FE_DOMAIN=你的前端你访问地址,例如 http://192.168.0.1:3000;https://cloud.fastgpt.cn
```
{{< /markdownify >}}
{{< /tab >}}
@@ -170,6 +174,7 @@ curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/mai
{{% alert icon="🤖" context="success" %}}
1. 修改`MILVUS_ADDRESS``MILVUS_TOKEN`链接参数,分别对应 `zilliz``Public Endpoint``Api key`记得把自己ip加入白名单。
2. 修改FE_DOMAIN=你的前端你访问地址,例如 http://192.168.0.1:3000;https://cloud.fastgpt.cn
{{% /alert %}}
@@ -184,28 +189,36 @@ curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/mai
```bash
# 启动容器
docker-compose up -d
# 等待10sOneAPI第一次总是要重启几次才能连上Mysql
sleep 10
# 重启一次oneapi(由于OneAPI的默认Key有点问题不重启的话会提示找不到渠道临时手动重启一次解决等待作者修复)
docker restart oneapi
```
### 4. 访问 FastGPT
### 4. 打开 OneAPI 添加模型
目前可以通过 `ip:3000` 直接访问(注意开放防火墙)。登录用户名为 `root`密码为`docker-compose.yml`环境变量里设置的 `DEFAULT_ROOT_PSW`
可以通过`ip:3001`访问OneAPI默认账号为`root`密码为`123456`
在OneApi中添加合适的AI模型渠道。[点击查看相关教程](/docs/development/modelconfig/one-api/)
只需要添加模型即可模板已经配置好了oneapi的连接地址和令牌无需变更。
### 5. 访问 FastGPT
目前可以通过 `ip:3000` 直接访问(注意防火墙)。登录用户名为 `root`,密码为`docker-compose.yml`环境变量里设置的 `DEFAULT_ROOT_PSW`
如果需要域名访问,请自行安装并配置 Nginx。
首次运行,会自动初始化 root 用户,密码为 `1234`(与环境变量中的`DEFAULT_ROOT_PSW`一致),日志可能会提示一次`MongoServerError: Unable to read from a snapshot due to pending collection catalog changes;`可忽略。
首次运行,会自动初始化 root 用户,密码为 `1234`(与环境变量中的`DEFAULT_ROOT_PSW`一致),日志会提示一次`MongoServerError: Unable to read from a snapshot due to pending collection catalog changes;`可忽略。
### 5. 配置模型
### 6. 配置模型
- 首次登录FastGPT后系统会提示未配置`语言模型``索引模型`,并自动跳转模型配置页面。系统必须至少有这两类模型才能正常使用。
- 如果系统未正常跳转,可以在`账号-模型提供商`页面,进行模型配置。[点击查看相关教程](/docs/development/modelconfig/ai-proxy)
- 目前已知可能问题:首次进入系统后,整个浏览器 tab 无法响应。此时需要删除该tab重新打开一次即可。
登录FastGPT后进入模型配置页面,务必先配置至少一个语言模型和一个向量模型,否则系统无法正常使用。
[点击查看模型配置教程](/docs/development/modelConfig/intro/)
## FAQ
### 登录系统后,浏览器无法响应
无法点击任何内容刷新也无效。此时需要删除该tab重新打开一次即可。
### Mongo 副本集自动初始化失败
最新的 docker-compose 示例优化 Mongo 副本集初始化,实现了全自动。目前在 unbuntu20,22 centos7, wsl2, mac, window 均通过测试。仍无法正常启动,大部分是因为 cpu 不支持 AVX 指令集,可以切换 Mongo4.x 版本。

View File

@@ -70,7 +70,6 @@ Mongo 数据库需要注意,需要注意在连接地址中增加 `directConnec
- `vectorMaxProcess`: 向量生成最大进程,根据数据库和 key 的并发数来决定,通常单个 120 号2c4g 服务器设置 10~15。
- `qaMaxProcess`: QA 生成最大进程
- `vlmMaxProcess`: 图片理解模型最大进程
- `pgHNSWEfSearch`: PostgreSQL vector 索引参数,越大搜索精度越高但是速度越慢,具体可看 pgvector 官方说明。
### 5. 运行

View File

@@ -7,18 +7,9 @@ draft: false
images: []
---
## 1. 停止服务
```bash
docker-compose down
```
## 2. Copy文件夹
## Copy文件
Docker 部署数据库都会通过 volume 挂载本地的目录进入容器,如果要迁移,直接复制这些目录即可。
`PG 数据`: pg/data
`Mongo 数据`: mongo/data
直接把pg 和 mongo目录全部复制走即可。
`Mongo 数据`: mongo/data

View File

@@ -1,129 +0,0 @@
---
title: '通过 AI Proxy 接入模型'
description: '通过 AI Proxy 接入模型'
icon: 'api'
draft: false
toc: true
weight: 744
---
`FastGPT 4.8.23` 版本开始,引入 AI Proxy 来进一步方便模型的配置。
AI Proxy 与 One API 类似,也是作为一个 OpenAI 接口管理 & 分发系统,可以通过标准的 OpenAI API 格式访问所有的大模型,开箱即用。
## 部署
### Docker 版本
`docker-compose.yml` 文件已加入了 AI Proxy 配置,可直接使用。[点击查看最新的 yml 配置](https://raw.githubusercontent.com/labring/FastGPT/main/deploy/docker/docker-compose-pgvector.yml)
从旧版升级的用户,可以复制 yml 里ai proxy 的配置,加入到旧的 yml 文件中。
## 运行原理
AI proxy 核心模块:
1. 渠道管理:管理各家模型提供商的 API Key 和可用模型列表。
2. 模型调用:根据请求的模型,选中对应的渠道;根据渠道的 API 格式,构造请求体,发送请求;格式化响应体成标准格式返回。
3. 调用日志:详细记录模型调用的日志,并在错误时候可以记录其入参和报错信息,方便排查。
运行流程:
![aiproxy12](/imgs/aiproxy1.png)
## 在 FastGPT 中使用
AI proxy 相关功能,可以在`账号-模型提供商`页面找到。
### 1. 创建渠道
`模型提供商`的配置页面,点击`模型渠道`,进入渠道配置页面
![aiproxy1](/imgs/aiproxy-1.png)
点击右上角的“新增渠道”,即可进入渠道配置页面
![aiproxy2](/imgs/aiproxy-2.png)
以阿里云的模型为例,进行如下配置
![aiproxy3](/imgs/aiproxy-3.png)
1. 渠道名:展示在外部的渠道名称,仅作标识;
2. 厂商:模型对应的厂商,不同厂商对应不同的默认地址和 API 密钥格式;
3. 模型:当前渠道具体可以使用的模型,系统内置了主流的一些模型,如果下拉框中没有想要的选项,可以点击“新增模型”,[增加自定义模型](/docs/development/modelconfig/intro/#新增自定义模型);
4. 模型映射:将 FastGPT 请求的模型,映射到具体提供的模型上。例如:
```json
{
"gpt-4o-test": "gpt-4o",
}
```
FatGPT 中的模型为 `gpt-4o-test`,向 AI Proxy 发起请求时也是 `gpt-4o-test`。AI proxy 在向上游发送请求时,实际的`model``gpt-4o`
5. 代理地址:具体请求的地址,系统给每个主流渠道配置了默认的地址,如果无需改动则不用填。
6. API 密钥:从模型厂商处获取的 API 凭证。注意部分厂商需要提供多个密钥组合,可以根据提示进行输入。
最后点击“新增”,就能在“模型渠道”下看到刚刚配置的渠道
![aiproxy4](/imgs/aiproxy-4.png)
### 2. 渠道测试
然后可以对渠道进行测试,确保配置的模型有效
![aiproxy5](/imgs/aiproxy-5.png)
点击“模型测试”,可以看到配置的模型列表,点击“开始测试”
![aiproxy6](/imgs/aiproxy-6.png)
等待模型测试完成后,会输出每个模型的测试结果以及请求时长
![aiproxy7](/imgs/aiproxy-7.png)
### 3. 启用模型
最后在`模型配置`中,可以选择启用对应的模型,这样就能在平台中使用了,更多模型配置可以参考[模型配置](/docs/development/modelconfig/intro)
![aiproxy8](/imgs/aiproxy-8.png)
## 其他功能介绍
### 优先级
范围1100。数值越大越容易被优先选中。
![aiproxy9](/imgs/aiproxy-9.png)
### 启用/禁用
在渠道右侧的控制菜单中,还可以控制渠道的启用或禁用,被禁用的渠道将无法再提供模型服务
![aiproxy10](/imgs/aiproxy-10.png)
### 调用日志
`调用日志` 页面,会展示发送到模型处的请求记录,包括具体的输入输出 tokens、请求时间、请求耗时、请求地址等等。错误的请求则会详细的入参和错误信息方便排查但仅会保留 1 小时(环境变量里可配置)。
![aiproxy11](/imgs/aiproxy-11.png)
## 从 OneAPI 迁移到 AI Proxy
可以从任意终端,发起 1 个 HTTP 请求。其中 `{{host}}` 替换成 AI Proxy 地址,`{{admin_key}}` 替换成 AI Proxy 中 `ADMIN_KEY` 的值。
Body 参数 `dsn` 为 OneAPI 的 mysql 连接串。
```bash
curl --location --request POST '{{host}}/api/channels/import/oneapi' \
--header 'Authorization: Bearer {{admin_key}}' \
--header 'Content-Type: application/json' \
--data-raw '{
"dsn": "mysql://root:s5mfkwst@tcp(dbconn.sealoshzh.site:33123)/mydb"
}'
```
执行成功的情况下会返回 "success": true
脚本目前不是完全准,仅是简单的做数据映射,主要是迁移`代理地址``模型``API 密钥`,建议迁移后再进行手动检查。

View File

@@ -13,15 +13,9 @@ weight: 744
## 配置模型
### 1. 对接模型提供商
### 1. 使用 OneAPI 对接模型提供商
#### AI Proxy
从 4.8.23 版本开始, FastGPT 支持在页面上配置模型提供商,即使用 [AI Proxy 接入教程](/docs/development/modelconfig/ai-proxy) 来进行模型聚合,从而可以对接更多模型提供商。
#### One API
也可以使用 [OneAPI 接入教程](/docs/development/modelconfig/one-api)。你需要先在各服务商申请好 API 接入 OneAPI 后,才能在 FastGPT 中使用这些模型。示例流程如下:
可以使用 [OneAPI 接入教程](/docs/development/modelconfig/one-api) 来进行模型聚合,从而可以对接更多模型提供商。你需要先在各服务商申请好 API 接入 OneAPI 后,才能在 FastGPT 中使用这些模型。示例流程如下:
![alt text](/imgs/image-95.png)
@@ -34,7 +28,17 @@ weight: 744
在 OneAPI 配置好模型后,你就可以打开 FastGPT 页面,启用对应模型了。
### 2. 配置介绍
### 2. 登录 root 用户
仅 root 用户可以进行模型配置。
### 3. 进入模型配置页面
登录 root 用户后,在`账号-模型提供商-模型配置`中,你可以看到所有内置的模型和自定义模型,以及哪些模型启用了。
![alt text](/image-90.png)
### 4. 配置介绍
{{% alert icon="🤖 " context="success" %}}
注意:

View File

@@ -20,6 +20,10 @@ FastGPT 目前采用模型分离的部署方案FastGPT 中只兼容 OpenAI
## 部署
### Docker 版本
`docker-compose.yml` 文件已加入了 OneAPI 配置,可直接使用。默认暴露在 3001 端口。
### Sealos 版本
* 北京区: [点击部署 OneAPI](https://hzh.sealos.run/?openapp=system-template%3FtemplateName%3Done-api)

View File

@@ -35,7 +35,7 @@ CHAT_API_KEY=sk-xxxxxx
![alt text](/imgs/image-104.png)
## 4. 体验测试
## 5. 体验测试
### 测试对话和图片识别

View File

@@ -297,9 +297,7 @@ curl --location --request DELETE 'http://localhost:3000/api/core/dataset/delete?
| --- | --- | --- |
| datasetId | 知识库ID | ✅ |
| parentId | 父级ID不填则默认为根目录 | |
| trainingType | 数据处理方式。chunk: 按文本长度进行分割;qa: 问答对提取 | ✅ |
| autoIndexes | 是否自动生成索引(仅商业版支持) | |
| imageIndex | 是否自动生成图片索引(仅商业版支持) | |
| trainingType | 训练模式。chunk: 按文本长度进行分割;qa: QA拆分;auto: 增强训练 | ✅ |
| chunkSize | 预估块大小 | |
| chunkSplitter | 自定义最高优先分割符号 | |
| qaPrompt | qa拆分提示词 | |
@@ -1081,7 +1079,7 @@ curl --location --request POST 'https://api.fastgpt.in/api/core/dataset/data/pus
--header 'Content-Type: application/json' \
--data-raw '{
    "collectionId": "64663f451ba1676dbdef0499",
"trainingType": "chunk",
"trainingMode": "chunk",
"prompt": "可选。qa 拆分引导词chunk 模式下忽略",
"billId": "可选。如果有这个值,本次的数据会被聚合到一个订单中,这个值可以重复使用。可以参考 [创建训练订单] 获取该值。",
    "data": [

View File

@@ -1,56 +0,0 @@
---
title: 'V4.9.0(进行中)'
description: 'FastGPT V4.9.0 更新说明'
icon: 'upgrade'
draft: false
toc: true
weight: 801
---
## 更新指南
### 1. 做好数据库备份
### 2. 更新镜像
### 3. 运行升级脚本
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`{{host}} 替换成**FastGPT 域名**。
```bash
curl --location --request POST 'https://{{host}}/api/admin/initv490' \
--header 'rootkey: {{rootkey}}' \
--header 'Content-Type: application/json'
```
**脚本功能**
1. 升级 PG Vector 插件版本
2. 全量更新知识库集合字段。
3. 全量更新知识库数据中index 的 type 类型。(时间较长)
## 兼容 & 弃用
1. 弃用 - 之前私有化部署的自定义文件解析方案,请同步更新到最新的配置方案。[点击查看 PDF 增强解析配置](/docs/development/configuration/#使用-doc2x-解析-pdf-文件)
2. 弃用 - 弃用旧版本地文件上传 API/api/core/dataset/collection/create/file以前仅商业版可用的 API该接口已放切换成/api/core/dataset/collection/create/localFile
3. 停止维护,即将弃用 - 外部文件库相关 API可通过 API 文件库替代。
4. API更新 - 上传文件至知识库、创建连接集合、API 文件库、推送分块数据等带有 `trainingType` 字段的接口,`trainingType`字段未来仅支持`chunk``QA`两种模式。增强索引模式将设置单独字段:`autoIndexes`,目前仍有适配旧版`trainingType=auto`代码,但请尽快变更成新接口类型。具体可见:[知识库 OpenAPI 文档](/docs/development/openapi/dataset.md)
## 🚀 新增内容
1. PDF增强解析交互添加到页面上。同时内嵌 Doc2x 服务,可直接使用 Doc2x 服务解析 PDF 文件。
2. 图片自动标注,同时修改知识库文件上传部分数据逻辑和交互。
3. pg vector 插件升级 0.8.0 版本,引入迭代搜索,减少部分数据无法被检索的情况。
## ⚙️ 优化
1. 知识库数据不再限制索引数量,可无限自定义。同时可自动更新输入文本的索引,不影响自定义索引。
2. Markdown 解析,增加链接后中文标点符号检测,增加空格。
3. Prompt 模式工具调用,支持思考模型。同时优化其格式检测,减少空输出的概率。
4. Mongo 文件读取流合并,减少计算量。同时优化存储 chunks极大提高大文件读取速度。50M PDF 读取时间提高 3 倍。
## 🐛 修复
1. 增加网页抓取安全链接校验。
2. 批量运行时,全局变量未进一步传递到下一次运行中,导致最终变量更新错误。

View File

@@ -0,0 +1,31 @@
export const retryRun = <T>(fn: () => T, retry = 2): T => {
try {
return fn();
} catch (error) {
if (retry > 0) {
return retryRun(fn, retry - 1);
}
throw error;
}
};
export const batchRun = async <T>(arr: T[], fn: (arr: T) => any, batchSize = 10) => {
const batchArr = new Array(batchSize).fill(null);
const result: any[] = [];
const batchFn = async () => {
const data = arr.shift();
if (data) {
result.push(await fn(data));
return batchFn();
}
};
await Promise.all(
batchArr.map(async () => {
await batchFn();
})
);
return result;
};

View File

@@ -1,4 +1,4 @@
import { batchRun } from '../system/utils';
import { batchRun } from '../fn/utils';
import { getNanoid, simpleText } from './tools';
import type { ImageType } from '../../../service/worker/readFile/type';
@@ -37,80 +37,6 @@ export const simpleMarkdownText = (rawText: string) => {
return rawText.trim();
};
export const htmlTable2Md = (content: string): string => {
return content.replace(/<table>[\s\S]*?<\/table>/g, (htmlTable) => {
try {
// Clean up whitespace and newlines
const cleanHtml = htmlTable.replace(/\n\s*/g, '');
const rows = cleanHtml.match(/<tr>(.*?)<\/tr>/g);
if (!rows) return htmlTable;
// Parse table data
let tableData: string[][] = [];
let maxColumns = 0;
// Try to convert to markdown table
rows.forEach((row, rowIndex) => {
if (!tableData[rowIndex]) {
tableData[rowIndex] = [];
}
let colIndex = 0;
const cells = row.match(/<td.*?>(.*?)<\/td>/g) || [];
cells.forEach((cell) => {
while (tableData[rowIndex][colIndex]) {
colIndex++;
}
const colspan = parseInt(cell.match(/colspan="(\d+)"/)?.[1] || '1');
const rowspan = parseInt(cell.match(/rowspan="(\d+)"/)?.[1] || '1');
const content = cell.replace(/<td.*?>|<\/td>/g, '').trim();
for (let i = 0; i < rowspan; i++) {
for (let j = 0; j < colspan; j++) {
if (!tableData[rowIndex + i]) {
tableData[rowIndex + i] = [];
}
tableData[rowIndex + i][colIndex + j] = i === 0 && j === 0 ? content : '^^';
}
}
colIndex += colspan;
maxColumns = Math.max(maxColumns, colIndex);
});
for (let i = 0; i < maxColumns; i++) {
if (!tableData[rowIndex][i]) {
tableData[rowIndex][i] = ' ';
}
}
});
const chunks: string[] = [];
const headerCells = tableData[0]
.slice(0, maxColumns)
.map((cell) => (cell === '^^' ? ' ' : cell || ' '));
const headerRow = '| ' + headerCells.join(' | ') + ' |';
chunks.push(headerRow);
const separator = '| ' + Array(headerCells.length).fill('---').join(' | ') + ' |';
chunks.push(separator);
tableData.slice(1).forEach((row) => {
const paddedRow = row
.slice(0, maxColumns)
.map((cell) => (cell === '^^' ? ' ' : cell || ' '));
while (paddedRow.length < maxColumns) {
paddedRow.push(' ');
}
chunks.push('| ' + paddedRow.join(' | ') + ' |');
});
return chunks.join('\n');
} catch (error) {
return htmlTable;
}
});
};
/**
* format markdown
* 1. upload base64

View File

@@ -43,14 +43,10 @@ export type FastGPTConfigFileType = {
export type FastGPTFeConfigsType = {
show_workorder?: boolean;
show_emptyChat?: boolean;
isPlus?: boolean;
register_method?: ['email' | 'phone' | 'sync'];
login_method?: ['email' | 'phone']; // Attention: login method is diffrent with oauth
find_password_method?: ['email' | 'phone'];
bind_notification_method?: ['email' | 'phone'];
googleClientVerKey?: string;
show_emptyChat?: boolean;
show_appStore?: boolean;
show_git?: boolean;
show_pay?: boolean;
@@ -61,19 +57,15 @@ export type FastGPTFeConfigsType = {
show_aiproxy?: boolean;
concatMd?: string;
concatMd?: string;
docUrl?: string;
openAPIDocUrl?: string;
systemPluginCourseUrl?: string;
appTemplateCourse?: string;
customApiDomain?: string;
customSharePageDomain?: string;
systemTitle?: string;
systemDescription?: string;
scripts?: { [key: string]: string }[];
favicon?: string;
googleClientVerKey?: string;
isPlus?: boolean;
sso?: {
icon?: string;
title?: string;
@@ -99,14 +91,13 @@ export type FastGPTFeConfigsType = {
exportDatasetLimitMinutes?: number;
websiteSyncLimitMinuted?: number;
};
scripts?: { [key: string]: string }[];
favicon?: string;
customApiDomain?: string;
customSharePageDomain?: string;
uploadFileMaxAmount?: number;
uploadFileMaxSize?: number;
// Compute by systemEnv.customPdfParse
showCustomPdfParse?: boolean;
customPdfParsePrice?: number;
lafEnv?: string;
navbarItems?: NavbarItemType[];
externalProviderWorkflowVariables?: ExternalProviderWorkflowVarType[];
@@ -116,18 +107,9 @@ export type SystemEnvType = {
openapiPrefix?: string;
vectorMaxProcess: number;
qaMaxProcess: number;
vlmMaxProcess: number;
pgHNSWEfSearch: number;
tokenWorkers: number; // token count max worker
oneapiUrl?: string;
chatApiKey?: string;
customPdfParse?: {
url?: string;
key?: string;
doc2xKey?: string;
price?: number; // n points/1 page
};
};

View File

@@ -16,24 +16,3 @@ export const retryFn = async <T>(fn: () => Promise<T>, retryTimes = 3): Promise<
return Promise.reject(error);
}
};
export const batchRun = async <T>(arr: T[], fn: (arr: T) => any, batchSize = 10) => {
const batchArr = new Array(batchSize).fill(null);
const result: any[] = [];
const batchFn = async () => {
const data = arr.shift();
if (data) {
result.push(await fn(data));
return batchFn();
}
};
await Promise.all(
batchArr.map(async () => {
await batchFn();
})
);
return result;
};

View File

@@ -22,7 +22,7 @@ export const defaultQAModels: LLMModelItemType[] = [
maxTemperature: 1.2,
charsPointsPrice: 0,
censor: false,
vision: true,
vision: false,
datasetProcess: true,
toolChoice: true,
functionCall: false,
@@ -59,17 +59,10 @@ export const defaultSTTModels: STTModelType[] = [
export const getModelFromList = (
modelList: { provider: ModelProviderIdType; name: string; model: string }[],
model: string
):
| {
avatar: string;
provider: ModelProviderIdType;
name: string;
model: string;
}
| undefined => {
) => {
const modelData = modelList.find((item) => item.model === model) ?? modelList[0];
if (!modelData) {
return;
throw new Error('No Key model is configured');
}
const provider = getModelProvider(modelData.provider);
return {

View File

@@ -188,7 +188,6 @@ export type AppAutoExecuteConfigType = {
// File
export type AppFileSelectConfigType = {
canSelectFile: boolean;
customPdfParse?: boolean;
canSelectImg: boolean;
maxFiles: number;
};

View File

@@ -1,11 +1,8 @@
import type {
AIChatItemValueItemType,
ChatItemType,
ChatItemValueItemType,
RuntimeUserPromptType,
SystemChatItemValueItemType,
UserChatItemType,
UserChatItemValueItemType
UserChatItemType
} from '../../core/chat/type.d';
import { ChatFileTypeEnum, ChatItemValueTypeEnum, ChatRoleEnum } from '../../core/chat/constants';
import type {
@@ -177,24 +174,137 @@ export const GPTMessages2Chats = (
): ChatItemType[] => {
const chatMessages = messages
.map((item) => {
const value: ChatItemType['value'] = [];
const obj = GPT2Chat[item.role];
const value = (() => {
if (
obj === ChatRoleEnum.System &&
item.role === ChatCompletionRequestMessageRoleEnum.System
) {
const value: SystemChatItemValueItemType[] = [];
if (Array.isArray(item.content)) {
item.content.forEach((item) => [
if (
obj === ChatRoleEnum.System &&
item.role === ChatCompletionRequestMessageRoleEnum.System
) {
if (Array.isArray(item.content)) {
item.content.forEach((item) => [
value.push({
type: ChatItemValueTypeEnum.text,
text: {
content: item.text
}
})
]);
} else {
value.push({
type: ChatItemValueTypeEnum.text,
text: {
content: item.content
}
});
}
} else if (
obj === ChatRoleEnum.Human &&
item.role === ChatCompletionRequestMessageRoleEnum.User
) {
if (typeof item.content === 'string') {
value.push({
type: ChatItemValueTypeEnum.text,
text: {
content: item.content
}
});
} else if (Array.isArray(item.content)) {
item.content.forEach((item) => {
if (item.type === 'text') {
value.push({
type: ChatItemValueTypeEnum.text,
text: {
content: item.text
}
})
]);
});
} else if (item.type === 'image_url') {
value.push({
//@ts-ignore
type: ChatItemValueTypeEnum.file,
file: {
type: ChatFileTypeEnum.image,
name: '',
url: item.image_url.url
}
});
} else if (item.type === 'file_url') {
value.push({
// @ts-ignore
type: ChatItemValueTypeEnum.file,
file: {
type: ChatFileTypeEnum.file,
name: item.name,
url: item.url
}
});
}
});
}
} else if (
obj === ChatRoleEnum.AI &&
item.role === ChatCompletionRequestMessageRoleEnum.Assistant
) {
if (item.tool_calls && reserveTool) {
// save tool calls
const toolCalls = item.tool_calls as ChatCompletionMessageToolCall[];
value.push({
//@ts-ignore
type: ChatItemValueTypeEnum.tool,
tools: toolCalls.map((tool) => {
let toolResponse =
messages.find(
(msg) =>
msg.role === ChatCompletionRequestMessageRoleEnum.Tool &&
msg.tool_call_id === tool.id
)?.content || '';
toolResponse =
typeof toolResponse === 'string' ? toolResponse : JSON.stringify(toolResponse);
return {
id: tool.id,
toolName: tool.toolName || '',
toolAvatar: tool.toolAvatar || '',
functionName: tool.function.name,
params: tool.function.arguments,
response: toolResponse as string
};
})
});
} else if (item.function_call && reserveTool) {
const functionCall = item.function_call as ChatCompletionMessageFunctionCall;
const functionResponse = messages.find(
(msg) =>
msg.role === ChatCompletionRequestMessageRoleEnum.Function &&
msg.name === item.function_call?.name
) as ChatCompletionFunctionMessageParam;
if (functionResponse) {
value.push({
//@ts-ignore
type: ChatItemValueTypeEnum.tool,
tools: [
{
id: functionCall.id || '',
toolName: functionCall.toolName || '',
toolAvatar: functionCall.toolAvatar || '',
functionName: functionCall.name,
params: functionCall.arguments,
response: functionResponse.content || ''
}
]
});
}
} else if (item.interactive) {
value.push({
//@ts-ignore
type: ChatItemValueTypeEnum.interactive,
interactive: item.interactive
});
} else if (typeof item.content === 'string') {
const lastValue = value[value.length - 1];
if (lastValue && lastValue.type === ChatItemValueTypeEnum.text && lastValue.text) {
lastValue.text.content += item.content;
} else {
value.push({
type: ChatItemValueTypeEnum.text,
@@ -203,145 +313,8 @@ export const GPTMessages2Chats = (
}
});
}
return value;
} else if (
obj === ChatRoleEnum.Human &&
item.role === ChatCompletionRequestMessageRoleEnum.User
) {
const value: UserChatItemValueItemType[] = [];
if (typeof item.content === 'string') {
value.push({
type: ChatItemValueTypeEnum.text,
text: {
content: item.content
}
});
} else if (Array.isArray(item.content)) {
item.content.forEach((item) => {
if (item.type === 'text') {
value.push({
type: ChatItemValueTypeEnum.text,
text: {
content: item.text
}
});
} else if (item.type === 'image_url') {
value.push({
//@ts-ignore
type: ChatItemValueTypeEnum.file,
file: {
type: ChatFileTypeEnum.image,
name: '',
url: item.image_url.url
}
});
} else if (item.type === 'file_url') {
value.push({
// @ts-ignore
type: ChatItemValueTypeEnum.file,
file: {
type: ChatFileTypeEnum.file,
name: item.name,
url: item.url
}
});
}
});
}
return value;
} else if (
obj === ChatRoleEnum.AI &&
item.role === ChatCompletionRequestMessageRoleEnum.Assistant
) {
const value: AIChatItemValueItemType[] = [];
if (typeof item.reasoning_text === 'string') {
value.push({
type: ChatItemValueTypeEnum.reasoning,
reasoning: {
content: item.reasoning_text
}
});
}
if (item.tool_calls && reserveTool) {
// save tool calls
const toolCalls = item.tool_calls as ChatCompletionMessageToolCall[];
value.push({
//@ts-ignore
type: ChatItemValueTypeEnum.tool,
tools: toolCalls.map((tool) => {
let toolResponse =
messages.find(
(msg) =>
msg.role === ChatCompletionRequestMessageRoleEnum.Tool &&
msg.tool_call_id === tool.id
)?.content || '';
toolResponse =
typeof toolResponse === 'string' ? toolResponse : JSON.stringify(toolResponse);
return {
id: tool.id,
toolName: tool.toolName || '',
toolAvatar: tool.toolAvatar || '',
functionName: tool.function.name,
params: tool.function.arguments,
response: toolResponse as string
};
})
});
}
if (item.function_call && reserveTool) {
const functionCall = item.function_call as ChatCompletionMessageFunctionCall;
const functionResponse = messages.find(
(msg) =>
msg.role === ChatCompletionRequestMessageRoleEnum.Function &&
msg.name === item.function_call?.name
) as ChatCompletionFunctionMessageParam;
if (functionResponse) {
value.push({
//@ts-ignore
type: ChatItemValueTypeEnum.tool,
tools: [
{
id: functionCall.id || '',
toolName: functionCall.toolName || '',
toolAvatar: functionCall.toolAvatar || '',
functionName: functionCall.name,
params: functionCall.arguments,
response: functionResponse.content || ''
}
]
});
}
}
if (item.interactive) {
value.push({
//@ts-ignore
type: ChatItemValueTypeEnum.interactive,
interactive: item.interactive
});
}
if (typeof item.content === 'string') {
const lastValue = value[value.length - 1];
if (lastValue && lastValue.type === ChatItemValueTypeEnum.text && lastValue.text) {
lastValue.text.content += item.content;
} else {
value.push({
type: ChatItemValueTypeEnum.text,
text: {
content: item.content
}
});
}
}
return value;
}
return [];
})();
}
return {
dataId: item.dataId,

View File

@@ -77,7 +77,6 @@ export type AIChatItemValueItemType = {
| ChatItemValueTypeEnum.reasoning
| ChatItemValueTypeEnum.tool
| ChatItemValueTypeEnum.interactive;
text?: {
content: string;
};

View File

@@ -1,5 +1,5 @@
import { DatasetDataIndexItemType, DatasetSchemaType } from './type';
import { DatasetCollectionTypeEnum, DatasetCollectionDataProcessModeEnum } from './constants';
import { TrainingModeEnum, DatasetCollectionTypeEnum } from './constants';
import type { LLMModelItemType } from '../ai/model.d';
import { ParentIdType } from 'common/parentFolder/type';
@@ -10,11 +10,9 @@ export type DatasetUpdateBody = {
name?: string;
avatar?: string;
intro?: string;
agentModel?: LLMModelItemType;
status?: DatasetSchemaType['status'];
agentModel?: string;
vlmModel?: string;
websiteConfig?: DatasetSchemaType['websiteConfig'];
externalReadUrl?: DatasetSchemaType['externalReadUrl'];
defaultPermission?: DatasetSchemaType['defaultPermission'];
@@ -29,10 +27,7 @@ export type DatasetUpdateBody = {
/* ================= collection ===================== */
export type DatasetCollectionChunkMetadataType = {
parentId?: string;
customPdfParse?: boolean;
trainingType?: DatasetCollectionDataProcessModeEnum;
imageIndex?: boolean;
autoIndexes?: boolean;
trainingType?: TrainingModeEnum;
chunkSize?: number;
chunkSplitter?: string;
qaPrompt?: string;
@@ -136,15 +131,9 @@ export type PostWebsiteSyncParams = {
export type PushDatasetDataProps = {
collectionId: string;
data: PushDatasetDataChunkProps[];
trainingType?: DatasetCollectionDataProcessModeEnum;
autoIndexes?: boolean;
imageIndex?: boolean;
trainingMode: TrainingModeEnum;
prompt?: string;
billId?: string;
// Abandon
trainingMode?: DatasetCollectionDataProcessModeEnum;
};
export type PushDatasetDataResponse = {
insertLen: number;

View File

@@ -1,4 +1,4 @@
import { DatasetCollectionTypeEnum } from '../constants';
import { DatasetCollectionTypeEnum, TrainingModeEnum, TrainingTypeMap } from '../constants';
import { DatasetCollectionSchemaType } from '../type';
export const getCollectionSourceData = (collection?: DatasetCollectionSchemaType) => {
@@ -16,3 +16,9 @@ export const getCollectionSourceData = (collection?: DatasetCollectionSchemaType
export const checkCollectionIsFolder = (type: DatasetCollectionTypeEnum) => {
return type === DatasetCollectionTypeEnum.folder || type === DatasetCollectionTypeEnum.virtual;
};
export const getTrainingTypeLabel = (type?: TrainingModeEnum) => {
if (!type) return '';
if (!TrainingTypeMap[type]) return '';
return TrainingTypeMap[type].label;
};

View File

@@ -109,26 +109,6 @@ export const DatasetCollectionSyncResultMap = {
}
};
export enum DatasetCollectionDataProcessModeEnum {
chunk = 'chunk',
qa = 'qa',
auto = 'auto' // abandon
}
export const DatasetCollectionDataProcessModeMap = {
[DatasetCollectionDataProcessModeEnum.chunk]: {
label: i18nT('common:core.dataset.training.Chunk mode'),
tooltip: i18nT('common:core.dataset.import.Chunk Split Tip')
},
[DatasetCollectionDataProcessModeEnum.qa]: {
label: i18nT('common:core.dataset.training.QA mode'),
tooltip: i18nT('common:core.dataset.import.QA Import Tip')
},
[DatasetCollectionDataProcessModeEnum.auto]: {
label: i18nT('common:core.dataset.training.Auto mode'),
tooltip: i18nT('common:core.dataset.training.Auto mode Tip')
}
};
/* ------------ data -------------- */
/* ------------ training -------------- */
@@ -144,11 +124,28 @@ export enum ImportDataSourceEnum {
export enum TrainingModeEnum {
chunk = 'chunk',
qa = 'qa',
auto = 'auto',
image = 'image'
qa = 'qa'
}
export const TrainingTypeMap = {
[TrainingModeEnum.chunk]: {
label: i18nT('common:core.dataset.training.Chunk mode'),
tooltip: i18nT('common:core.dataset.import.Chunk Split Tip'),
openSource: true
},
[TrainingModeEnum.auto]: {
label: i18nT('common:core.dataset.training.Auto mode'),
tooltip: i18nT('common:core.dataset.training.Auto mode Tip'),
openSource: false
},
[TrainingModeEnum.qa]: {
label: i18nT('common:core.dataset.training.QA mode'),
tooltip: i18nT('common:core.dataset.import.QA Import Tip'),
openSource: true
}
};
/* ------------ search -------------- */
export enum DatasetSearchModeEnum {
embedding = 'embedding',

View File

@@ -20,22 +20,9 @@ export type UpdateDatasetDataProps = {
})[];
};
export type PatchIndexesProps =
| {
type: 'create';
index: Omit<DatasetDataIndexItemType, 'dataId'> & {
dataId?: string;
};
}
| {
type: 'update';
index: DatasetDataIndexItemType;
}
| {
type: 'delete';
index: DatasetDataIndexItemType;
}
| {
type: 'unChange';
index: DatasetDataIndexItemType;
};
export type PatchIndexesProps = {
type: 'create' | 'update' | 'delete' | 'unChange';
index: Omit<DatasetDataIndexItemType, 'dataId'> & {
dataId?: string;
};
};

View File

@@ -1,42 +0,0 @@
import { i18nT } from '../../../../web/i18n/utils';
export enum DatasetDataIndexTypeEnum {
default = 'default',
custom = 'custom',
summary = 'summary',
question = 'question',
image = 'image'
}
export const DatasetDataIndexMap: Record<
`${DatasetDataIndexTypeEnum}`,
{
label: any;
color: string;
}
> = {
[DatasetDataIndexTypeEnum.default]: {
label: i18nT('dataset:data_index_default'),
color: 'gray'
},
[DatasetDataIndexTypeEnum.custom]: {
label: i18nT('dataset:data_index_custom'),
color: 'blue'
},
[DatasetDataIndexTypeEnum.summary]: {
label: i18nT('dataset:data_index_summary'),
color: 'green'
},
[DatasetDataIndexTypeEnum.question]: {
label: i18nT('dataset:data_index_question'),
color: 'red'
},
[DatasetDataIndexTypeEnum.image]: {
label: i18nT('dataset:data_index_image'),
color: 'purple'
}
};
export const defaultDatasetIndexData = DatasetDataIndexMap[DatasetDataIndexTypeEnum.custom];
export const getDatasetIndexMapData = (type: `${DatasetDataIndexTypeEnum}`) => {
return DatasetDataIndexMap[type] || defaultDatasetIndexData;
};

View File

@@ -1,20 +0,0 @@
import { PushDatasetDataChunkProps } from '../api';
import { TrainingModeEnum } from '../constants';
export type PushDataToTrainingQueueProps = {
teamId: string;
tmbId: string;
datasetId: string;
collectionId: string;
mode?: TrainingModeEnum;
data: PushDatasetDataChunkProps[];
prompt?: string;
agentModel: string;
vectorModel: string;
vlmModel?: string;
billId?: string;
session?: ClientSession;
};

View File

@@ -2,7 +2,6 @@ import type { LLMModelItemType, EmbeddingModelItemType } from '../../core/ai/mod
import { PermissionTypeEnum } from '../../support/permission/constant';
import { PushDatasetDataChunkProps } from './api';
import {
DatasetCollectionDataProcessModeEnum,
DatasetCollectionTypeEnum,
DatasetStatusEnum,
DatasetTypeEnum,
@@ -13,7 +12,6 @@ import { DatasetPermission } from '../../support/permission/dataset/controller';
import { Permission } from '../../support/permission/controller';
import { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
import { SourceMemberType } from 'support/user/type';
import { DatasetDataIndexTypeEnum } from './data/constants';
export type DatasetSchemaType = {
_id: string;
@@ -25,14 +23,11 @@ export type DatasetSchemaType = {
avatar: string;
name: string;
vectorModel: string;
agentModel: string;
intro: string;
type: `${DatasetTypeEnum}`;
status: `${DatasetStatusEnum}`;
vectorModel: string;
agentModel: string;
vlmModel?: string;
websiteConfig?: {
url: string;
selector: string;
@@ -57,22 +52,26 @@ export type DatasetCollectionSchemaType = {
parentId?: string;
name: string;
type: DatasetCollectionTypeEnum;
tags?: string[];
createTime: Date;
updateTime: Date;
// Status
forbid?: boolean;
nextSyncTime?: Date;
// Collection metadata
trainingType: TrainingModeEnum;
chunkSize: number;
chunkSplitter?: string;
qaPrompt?: string;
ocrParse?: boolean;
tags?: string[];
fileId?: string; // local file id
rawLink?: string; // link url
externalFileId?: string; //external file id
apiFileId?: string; // api file id
externalFileUrl?: string; // external import url
nextSyncTime?: Date;
rawTextLength?: number;
hashRawText?: string;
metadata?: {
@@ -81,16 +80,6 @@ export type DatasetCollectionSchemaType = {
[key: string]: any;
};
// Parse settings
customPdfParse?: boolean;
// Chunk settings
autoIndexes?: boolean;
imageIndex?: boolean;
trainingType: DatasetCollectionDataProcessModeEnum;
chunkSize: number;
chunkSplitter?: string;
qaPrompt?: string;
};
export type DatasetCollectionTagsSchemaType = {
@@ -101,7 +90,7 @@ export type DatasetCollectionTagsSchemaType = {
};
export type DatasetDataIndexItemType = {
type: `${DatasetDataIndexTypeEnum}`;
defaultIndex: boolean;
dataId: string; // pg data id
text: string;
};
@@ -152,7 +141,6 @@ export type DatasetTrainingSchemaType = {
chunkIndex: number;
weight: number;
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
retryCount: number;
};
export type CollectionWithDatasetType = DatasetCollectionSchemaType & {
@@ -181,10 +169,9 @@ export type DatasetListItemType = {
sourceMember?: SourceMemberType;
};
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel' | 'agentModel' | 'vlmModel'> & {
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel' | 'agentModel'> & {
vectorModel: EmbeddingModelItemType;
agentModel: LLMModelItemType;
vlmModel?: LLMModelItemType;
permission: DatasetPermission;
};

View File

@@ -1,7 +1,6 @@
import { TrainingModeEnum, DatasetCollectionTypeEnum } from './constants';
import { getFileIcon } from '../../common/file/icon';
import { strIsLink } from '../../common/string/tools';
import { DatasetDataIndexTypeEnum } from './data/constants';
export function getCollectionIcon(
type: DatasetCollectionTypeEnum = DatasetCollectionTypeEnum.file,
@@ -39,23 +38,14 @@ export function getSourceNameIcon({
}
/* get dataset data default index */
export function getDefaultIndex(props?: { q?: string; a?: string }) {
const { q = '', a } = props || {};
return [
{
text: q,
type: DatasetDataIndexTypeEnum.default
},
...(a
? [
{
text: a,
type: DatasetDataIndexTypeEnum.default
}
]
: [])
];
export function getDefaultIndex(props?: { q?: string; a?: string; dataId?: string }) {
const { q = '', a, dataId } = props || {};
const qaStr = `${q}\n${a}`.trim();
return {
defaultIndex: true,
text: a ? qaStr : q,
dataId
};
}
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {

View File

@@ -55,7 +55,7 @@ export const AiChatModule: FlowNodeTemplateType = {
showStatus: true,
isTool: true,
courseUrl: '/docs/guide/workbench/workflow/ai_chat/',
version: '490',
version: '4813',
inputs: [
Input_Template_SettingAiModel,
// --- settings modal

View File

@@ -58,13 +58,6 @@ export const ToolModule: FlowNodeTemplateType = {
valueType: WorkflowIOValueTypeEnum.boolean,
value: true
},
{
key: NodeInputKeyEnum.aiChatReasoning,
renderTypeList: [FlowNodeInputTypeEnum.hidden],
label: '',
valueType: WorkflowIOValueTypeEnum.boolean,
value: true
},
{
key: NodeInputKeyEnum.aiChatTopP,
renderTypeList: [FlowNodeInputTypeEnum.hidden],

View File

@@ -10,7 +10,6 @@ export type AuthTeamRoleProps = {
export type CreateTeamProps = {
name: string;
avatar?: string;
defaultTeam?: boolean;
memberName?: string;
memberAvatar?: string;
notificationAccount?: string;

View File

@@ -47,7 +47,6 @@ export type TeamMemberSchema = {
role: `${TeamMemberRoleEnum}`;
status: `${TeamMemberStatusEnum}`;
avatar: string;
defaultTeam: boolean;
};
export type TeamMemberWithTeamAndUserSchema = TeamMemberSchema & {
@@ -65,7 +64,6 @@ export type TeamTmbItemType = {
balance?: number;
tmbId: string;
teamDomain: string;
defaultTeam: boolean;
role: `${TeamMemberRoleEnum}`;
status: `${TeamMemberStatusEnum}`;
notificationAccount?: string;

View File

@@ -10,8 +10,7 @@ export enum UsageSourceEnum {
wecom = 'wecom',
feishu = 'feishu',
dingtalk = 'dingtalk',
official_account = 'official_account',
pdfParse = 'pdfParse'
official_account = 'official_account'
}
export const UsageSourceMap = {
@@ -44,8 +43,5 @@ export const UsageSourceMap = {
},
[UsageSourceEnum.dingtalk]: {
label: i18nT('account_usage:dingtalk')
},
[UsageSourceEnum.pdfParse]: {
label: i18nT('account_usage:pdf_parse')
}
};

View File

@@ -7,7 +7,6 @@ export type UsageListItemCountType = {
outputTokens?: number;
charsLength?: number;
duration?: number;
pages?: number;
// deprecated
tokens?: number;

View File

@@ -52,9 +52,7 @@ export async function uploadFile({
const stats = await fsp.stat(path);
if (!stats.isFile()) return Promise.reject(`${path} is not a file`);
const readStream = fs.createReadStream(path, {
highWaterMark: 256 * 1024
});
const readStream = fs.createReadStream(path);
// Add default metadata
metadata.teamId = teamId;
@@ -64,27 +62,9 @@ export async function uploadFile({
// create a gridfs bucket
const bucket = getGridBucket(bucketName);
const fileSize = stats.size;
const chunkSizeBytes = (() => {
// 计算理想块大小:文件大小 ÷ 目标块数(10)
const idealChunkSize = Math.ceil(fileSize / 10);
// 确保块大小至少为512KB
const minChunkSize = 512 * 1024; // 512KB
// 取理想块大小和最小块大小中的较大值
let chunkSize = Math.max(idealChunkSize, minChunkSize);
// 将块大小向上取整到最接近的64KB的倍数使其更整齐
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
return chunkSize;
})();
const stream = bucket.openUploadStream(filename, {
metadata,
contentType,
chunkSizeBytes
contentType
});
// save to gridfs
@@ -206,25 +186,20 @@ export async function getDownloadStream({
export const readFileContentFromMongo = async ({
teamId,
tmbId,
bucketName,
fileId,
isQAImport = false,
customPdfParse = false
isQAImport = false
}: {
teamId: string;
tmbId: string;
bucketName: `${BucketNameEnum}`;
fileId: string;
isQAImport?: boolean;
customPdfParse?: boolean;
}): Promise<{
rawText: string;
filename: string;
}> => {
const bufferId = `${fileId}-${customPdfParse}`;
// read buffer
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: bufferId }, undefined, {
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: fileId }, undefined, {
...readFromSecondary
}).lean();
if (fileBuffer) {
@@ -252,11 +227,9 @@ export const readFileContentFromMongo = async ({
// Get raw text
const { rawText } = await readRawContentByFileBuffer({
customPdfParse,
extension,
isQAImport,
teamId,
tmbId,
buffer: fileBuffers,
encoding,
metadata: {
@@ -267,7 +240,7 @@ export const readFileContentFromMongo = async ({
// < 14M
if (fileBuffers.length < 14 * 1024 * 1024 && rawText.trim()) {
MongoRawTextBuffer.create({
sourceId: bufferId,
sourceId: fileId,
rawText,
metadata: {
filename: file.filename

View File

@@ -3,13 +3,15 @@ import { PassThrough } from 'stream';
export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
return new Promise<Buffer>((resolve, reject) => {
const chunks: Uint8Array[] = [];
const chunks: Buffer[] = [];
let totalLength = 0;
stream.on('data', (chunk) => {
chunks.push(chunk);
totalLength += chunk.length;
});
stream.on('end', () => {
const resultBuffer = Buffer.concat(chunks); // 一次性拼接
const resultBuffer = Buffer.concat(chunks, totalLength); // 一次性拼接
resolve(resultBuffer);
});
stream.on('error', (err) => {
@@ -19,26 +21,25 @@ export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
};
export const stream2Encoding = async (stream: NodeJS.ReadableStream) => {
const start = Date.now();
const copyStream = stream.pipe(new PassThrough());
/* get encoding */
const buffer = await (() => {
return new Promise<Buffer>((resolve, reject) => {
const chunks: Uint8Array[] = [];
let totalLength = 0;
let tmpBuffer: Buffer = Buffer.from([]);
stream.on('data', (chunk) => {
if (totalLength < 200) {
chunks.push(chunk);
totalLength += chunk.length;
if (tmpBuffer.length < 200) {
tmpBuffer = Buffer.concat([tmpBuffer, chunk]);
if (totalLength >= 200) {
resolve(Buffer.concat(chunks));
if (tmpBuffer.length >= 200) {
resolve(tmpBuffer);
}
}
});
stream.on('end', () => {
resolve(Buffer.concat(chunks));
resolve(tmpBuffer);
});
stream.on('error', (err) => {
reject(err);

View File

@@ -1,27 +0,0 @@
import axios from 'axios';
import { addLog } from '../../system/log';
import { serverRequestBaseUrl } from '../../api/serverRequest';
import { getFileContentTypeFromHeader, guessBase64ImageType } from '../utils';
export const getImageBase64 = async (url: string) => {
addLog.debug(`Load image to base64: ${url}`);
try {
const response = await axios.get(url, {
baseURL: serverRequestBaseUrl,
responseType: 'arraybuffer',
proxy: false
});
const base64 = Buffer.from(response.data, 'binary').toString('base64');
const imageType =
getFileContentTypeFromHeader(response.headers['content-type']) ||
guessBase64ImageType(base64);
return `data:${imageType};base64,${base64}`;
} catch (error) {
addLog.debug(`Load image to base64 failed: ${url}`);
console.log(error);
return Promise.reject(error);
}
};

View File

@@ -1,23 +1,18 @@
import { uploadMongoImg } from '../image/controller';
import FormData from 'form-data';
import { WorkerNameEnum, runWorker } from '../../../worker/utils';
import fs from 'fs';
import type { ImageType, ReadFileResponse } from '../../../worker/readFile/type';
import type { ReadFileResponse } from '../../../worker/readFile/type';
import axios from 'axios';
import { addLog } from '../../system/log';
import { batchRun } from '@fastgpt/global/common/system/utils';
import { htmlTable2Md, matchMdImgTextAndUpload } from '@fastgpt/global/common/string/markdown';
import { createPdfParseUsage } from '../../../support/wallet/usage/controller';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { delay } from '@fastgpt/global/common/system/utils';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { batchRun } from '@fastgpt/global/common/fn/utils';
import { matchMdImgTextAndUpload } from '@fastgpt/global/common/string/markdown';
export type readRawTextByLocalFileParams = {
teamId: string;
tmbId: string;
path: string;
encoding: string;
customPdfParse?: boolean;
metadata?: Record<string, any>;
};
export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
@@ -27,51 +22,46 @@ export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParam
const buffer = await fs.promises.readFile(path);
return readRawContentByFileBuffer({
const { rawText } = await readRawContentByFileBuffer({
extension,
isQAImport: false,
customPdfParse: params.customPdfParse,
teamId: params.teamId,
tmbId: params.tmbId,
encoding: params.encoding,
buffer,
metadata: params.metadata
});
return {
rawText
};
};
export const readRawContentByFileBuffer = async ({
teamId,
tmbId,
extension,
isQAImport,
teamId,
buffer,
encoding,
metadata,
customPdfParse = false,
isQAImport = false
metadata
}: {
teamId: string;
tmbId: string;
isQAImport?: boolean;
extension: string;
teamId: string;
buffer: Buffer;
encoding: string;
metadata?: Record<string, any>;
customPdfParse?: boolean;
isQAImport: boolean;
}): Promise<ReadFileResponse> => {
const systemParse = () =>
runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
extension,
encoding,
buffer,
teamId
});
const parsePdfFromCustomService = async (): Promise<ReadFileResponse> => {
const url = global.systemEnv.customPdfParse?.url;
const token = global.systemEnv.customPdfParse?.key;
if (!url) return systemParse();
}) => {
// Custom read file service
const customReadfileUrl = process.env.CUSTOM_READ_FILE_URL;
const customReadFileExtension = process.env.CUSTOM_READ_FILE_EXTENSION || '';
const ocrParse = process.env.CUSTOM_READ_FILE_OCR || 'false';
const readFileFromCustomService = async (): Promise<ReadFileResponse | undefined> => {
if (
!customReadfileUrl ||
!customReadFileExtension ||
!customReadFileExtension.includes(extension)
)
return;
const start = Date.now();
addLog.info('Parsing files from an external service');
@@ -80,229 +70,43 @@ export const readRawContentByFileBuffer = async ({
data.append('file', buffer, {
filename: `file.${extension}`
});
data.append('extension', extension);
data.append('ocr', ocrParse);
const { data: response } = await axios.post<{
pages: number;
markdown: string;
error?: Object | string;
}>(url, data, {
success: boolean;
message: string;
data: {
page: number;
markdown: string;
duration: number;
};
}>(customReadfileUrl, data, {
timeout: 600000,
headers: {
...data.getHeaders(),
Authorization: token ? `Bearer ${token}` : undefined
...data.getHeaders()
}
});
if (response.error) {
return Promise.reject(response.error);
}
addLog.info(`Custom file parsing is complete, time: ${Date.now() - start}ms`);
const rawText = response.markdown;
const rawText = response.data.markdown;
const { text, imageList } = matchMdImgTextAndUpload(rawText);
createPdfParseUsage({
teamId,
tmbId,
pages: response.pages
});
return {
rawText: text,
formatText: rawText,
imageList
};
};
const parsePdfFromDoc2x = async (): Promise<ReadFileResponse> => {
const doc2xKey = global.systemEnv.customPdfParse?.doc2xKey;
if (!doc2xKey) return systemParse();
const parseTextImage = async (text: string) => {
// Extract image links and convert to base64
const imageList: { id: string; url: string }[] = [];
const processedText = text.replace(/!\[.*?\]\((http[^)]+)\)/g, (match, url) => {
const id = getNanoid();
imageList.push({
id,
url
});
return `![](${id})`;
});
let resultImageList: ImageType[] = [];
await Promise.all(
imageList.map(async (item) => {
try {
const response = await axios.get(item.url, { responseType: 'arraybuffer' });
const mime = response.headers['content-type'] || 'image/jpeg';
const base64 = response.data.toString('base64');
resultImageList.push({
uuid: item.id,
mime,
base64
});
} catch (error) {
addLog.warn(`Failed to get image from ${item.url}: ${getErrText(error)}`);
}
})
);
return {
text: processedText,
imageList: resultImageList
};
};
let startTime = Date.now();
// 1. Get pre-upload URL first
const { data: preupload_data } = await axios
.post<{ code: string; data: { uid: string; url: string } }>(
'https://v2.doc2x.noedgeai.com/api/v2/parse/preupload',
null,
{
headers: {
Authorization: `Bearer ${doc2xKey}`
}
}
)
.catch((error) => {
return Promise.reject(
`[Pre-upload Error] Failed to get pre-upload URL: ${getErrText(error)}`
);
});
if (preupload_data?.code !== 'success') {
return Promise.reject(`Failed to get pre-upload URL: ${JSON.stringify(preupload_data)}`);
}
const upload_url = preupload_data.data.url;
const uid = preupload_data.data.uid;
// 2. Upload file to pre-signed URL with binary stream
const blob = new Blob([buffer], { type: 'application/pdf' });
const response = await axios
.put(upload_url, blob, {
headers: {
'Content-Type': 'application/pdf'
}
})
.catch((error) => {
return Promise.reject(`[Upload Error] Failed to upload file: ${getErrText(error)}`);
});
if (response.status !== 200) {
return Promise.reject(`Upload failed with status ${response.status}: ${response.statusText}`);
}
await delay(5000);
addLog.debug(`Uploaded file to Doc2x, uid: ${uid}`);
// 3. Get the result by uid
const checkResult = async (retry = 30) => {
if (retry <= 0) {
return Promise.reject(
`[Parse Timeout Error] Failed to get result (uid: ${uid}): Process timeout`
);
}
try {
const { data: result_data } = await axios
.get<{
code: string;
data: {
progress: number;
status: 'processing' | 'failed' | 'success';
result: {
pages: {
md: string;
}[];
};
};
}>(`https://v2.doc2x.noedgeai.com/api/v2/parse/status?uid=${uid}`, {
headers: {
Authorization: `Bearer ${doc2xKey}`
}
})
.catch((error) => {
return Promise.reject(
`[Parse Status Error] Failed to get parse status: ${getErrText(error)}`
);
});
// Error
if (!['ok', 'success'].includes(result_data.code)) {
return Promise.reject(
`Failed to get result (uid: ${uid}): ${JSON.stringify(result_data)}`
);
}
// Process
if (['ready', 'processing'].includes(result_data.data.status)) {
addLog.debug(`Waiting for the result, uid: ${uid}`);
await delay(5000);
return checkResult(retry - 1);
}
// Finifsh
if (result_data.data.status === 'success') {
const result = result_data.data.result.pages
.map((page) => page.md)
.join('')
// Do some post-processing
.replace(/\\[\(\)]/g, '$')
.replace(/\\[\[\]]/g, '$$')
.replace(/<img\s+src="([^"]+)"(?:\s*\?[^>]*)?(?:\s*\/>|>)/g, '![img]($1)')
.replace(/<!-- Media -->/g, '')
.replace(/<!-- Footnote -->/g, '')
.replace(/\$(.+?)\s+\\tag\{(.+?)\}\$/g, '$$$1 \\qquad \\qquad ($2)$$')
.replace(/\\text\{([^}]*?)(\b\w+)_(\w+\b)([^}]*?)\}/g, '\\text{$1$2\\_$3$4}');
const { text, imageList } = await parseTextImage(htmlTable2Md(result));
return {
pages: result_data.data.result.pages.length,
text,
imageList
};
}
return checkResult(retry - 1);
} catch (error) {
if (retry > 1) {
await delay(100);
return checkResult(retry - 1);
}
return Promise.reject(error);
}
};
const { pages, text, imageList } = await checkResult();
createPdfParseUsage({
teamId,
tmbId,
pages
});
addLog.info(`Doc2x parse success, time: ${Date.now() - startTime}ms`);
return {
rawText: text,
formatText: text,
imageList
};
};
// Custom read file service
const pdfParseFn = async (): Promise<ReadFileResponse> => {
if (!customPdfParse) return systemParse();
if (global.systemEnv.customPdfParse?.url) return parsePdfFromCustomService();
if (global.systemEnv.customPdfParse?.doc2xKey) return parsePdfFromDoc2x();
return systemParse();
};
let { rawText, formatText, imageList } = await (async () => {
if (extension === 'pdf') {
return await pdfParseFn();
}
return await systemParse();
})();
let { rawText, formatText, imageList } =
(await readFileFromCustomService()) ||
(await runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
extension,
encoding,
buffer,
teamId
}));
// markdown data format
if (imageList) {
@@ -338,5 +142,5 @@ export const readRawContentByFileBuffer = async ({
}
}
return { rawText, formatText, imageList };
return { rawText };
};

View File

@@ -10,11 +10,6 @@ export const SERVICE_LOCAL_HOST =
export const initFastGPTConfig = (config?: FastGPTConfigFileType) => {
if (!config) return;
// Special config computed
config.feConfigs.showCustomPdfParse =
!!config.systemEnv.customPdfParse?.url || !!config.systemEnv.customPdfParse?.doc2xKey;
config.feConfigs.customPdfParsePrice = config.systemEnv.customPdfParse?.price || 0;
global.feConfigs = config.feConfigs;
global.systemEnv = config.systemEnv;
global.subPlans = config.subPlans;

View File

@@ -30,10 +30,10 @@ export const isInternalAddress = (url: string): boolean => {
return true;
}
// For IP addresses, check if they are internal
// For non-metadata URLs, check if it's a domain name
const ipv4Pattern = /^(\d{1,3}\.){3}\d{1,3}$/;
if (!ipv4Pattern.test(hostname)) {
return false; // Not an IP address, so it's a domain name - consider it external by default
return true;
}
// ... existing IP validation code ...

View File

@@ -164,22 +164,34 @@ export class PgVectorCtrl {
}
try {
// const explan: any = await PgClient.query(
// `BEGIN;
// SET LOCAL hnsw.ef_search = ${global.systemEnv?.pgHNSWEfSearch || 100};
// EXPLAIN ANALYZE select id, collection_id, vector <#> '[${vector}]' AS score
// from ${DatasetVectorTableName}
// where team_id='${teamId}'
// AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
// ${forbidCollectionSql}
// order by score limit ${limit};
// COMMIT;`
// );
// console.log(explan[2].rows);
const results: any = await PgClient.query(
`BEGIN;
`
BEGIN;
SET LOCAL hnsw.ef_search = ${global.systemEnv?.pgHNSWEfSearch || 100};
SET LOCAL hnsw.iterative_scan = relaxed_order;
WITH relaxed_results AS MATERIALIZED (
select id, collection_id, vector <#> '[${vector}]' AS score
from ${DatasetVectorTableName}
where team_id='${teamId}'
AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
${filterCollectionIdSql}
${forbidCollectionSql}
order by score limit ${limit}
) SELECT id, collection_id, score FROM relaxed_results ORDER BY score;
select id, collection_id, vector <#> '[${vector}]' AS score
from ${DatasetVectorTableName}
where team_id='${teamId}'
AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
${filterCollectionIdSql}
${forbidCollectionSql}
order by score limit ${limit};
COMMIT;`
);
const rows = results?.[3]?.rows as PgSearchRawType[];
const rows = results?.[2]?.rows as PgSearchRawType[];
return {
results: rows.map((item) => ({

View File

@@ -43,13 +43,13 @@ export async function text2Speech({
const readableStream = response.body as unknown as NodeJS.ReadableStream;
readableStream.pipe(res);
const chunks: Uint8Array[] = [];
let bufferStore = Buffer.from([]);
readableStream.on('data', (chunk) => {
chunks.push(chunk);
bufferStore = Buffer.concat([bufferStore, chunk]);
});
readableStream.on('end', () => {
onSuccess({ model, buffer: Buffer.concat(chunks) });
onSuccess({ model, buffer: bufferStore });
});
readableStream.on('error', (e) => {
onError(e);

View File

@@ -46,8 +46,8 @@
"defaultConfig": {},
"fieldMap": {},
"type": "llm",
"showTopP": false,
"showStopSign": false
"showTopP": true,
"showStopSign": true
}
]
}

View File

@@ -75,81 +75,6 @@
"showTopP": true,
"showStopSign": true,
"responseFormatList": ["text", "json_object"]
},
{
"model": "moonshot-v1-8k-vision-preview",
"name": "moonshot-v1-8k-vision-preview",
"maxContext": 8000,
"maxResponse": 4000,
"quoteMaxToken": 6000,
"maxTemperature": 1,
"vision": true,
"toolChoice": true,
"functionCall": false,
"defaultSystemChatPrompt": "",
"datasetProcess": true,
"usedInClassify": true,
"customCQPrompt": "",
"usedInExtractFields": true,
"usedInQueryExtension": true,
"customExtractPrompt": "",
"usedInToolCall": true,
"defaultConfig": {},
"fieldMap": {},
"type": "llm",
"showTopP": true,
"showStopSign": true,
"responseFormatList": ["text", "json_object"]
},
{
"model": "moonshot-v1-32k-vision-preview",
"name": "moonshot-v1-32k-vision-preview",
"maxContext": 32000,
"maxResponse": 4000,
"quoteMaxToken": 32000,
"maxTemperature": 1,
"vision": true,
"toolChoice": true,
"functionCall": false,
"defaultSystemChatPrompt": "",
"datasetProcess": true,
"usedInClassify": true,
"customCQPrompt": "",
"usedInExtractFields": true,
"usedInQueryExtension": true,
"customExtractPrompt": "",
"usedInToolCall": true,
"defaultConfig": {},
"fieldMap": {},
"type": "llm",
"showTopP": true,
"showStopSign": true,
"responseFormatList": ["text", "json_object"]
},
{
"model": "moonshot-v1-128k-vision-preview",
"name": "moonshot-v1-128k-vision-preview",
"maxContext": 128000,
"maxResponse": 4000,
"quoteMaxToken": 60000,
"maxTemperature": 1,
"vision": true,
"toolChoice": true,
"functionCall": false,
"defaultSystemChatPrompt": "",
"datasetProcess": true,
"usedInClassify": true,
"customCQPrompt": "",
"usedInExtractFields": true,
"usedInQueryExtension": true,
"customExtractPrompt": "",
"usedInToolCall": true,
"defaultConfig": {},
"fieldMap": {},
"type": "llm",
"showTopP": true,
"showStopSign": true,
"responseFormatList": ["text", "json_object"]
}
]
}

View File

@@ -163,13 +163,6 @@ export const loadSystemModels = async (init = false) => {
global.systemDefaultModel.rerank = Array.from(global.reRankModelMap.values())[0];
}
// Sort model list
global.systemActiveModelList.sort((a, b) => {
const providerA = getModelProvider(a.provider);
const providerB = getModelProvider(b.provider);
return providerA.order - providerB.order;
});
console.log('Load models success', JSON.stringify(global.systemActiveModelList, null, 2));
} catch (error) {
console.error('Load models error', error);

View File

@@ -13,11 +13,6 @@ export const getDatasetModel = (model?: string) => {
?.find((item) => item.model === model || item.name === model) ?? getDefaultLLMModel()
);
};
export const getVlmModel = (model?: string) => {
return Array.from(global.llmModelMap.values())
?.filter((item) => item.vision)
?.find((item) => item.model === model || item.name === model);
};
export const getDefaultEmbeddingModel = () => global?.systemDefaultModel.embedding!;
export const getEmbeddingModel = (model?: string) => {

View File

@@ -9,23 +9,41 @@ const AppTemplateSchema = new Schema({
type: String,
required: true
},
name: String,
intro: String,
avatar: String,
author: String,
name: {
type: String
},
intro: {
type: String
},
avatar: {
type: String
},
author: {
type: String
},
tags: {
type: [String],
default: undefined
},
type: String,
isActive: Boolean,
userGuide: Object,
isQuickTemplate: Boolean,
type: {
type: String
},
isActive: {
type: Boolean
},
userGuide: {
type: Object
},
isQuickTemplate: {
type: Boolean
},
order: {
type: Number,
default: -1
},
workflow: Object
workflow: {
type: Object
}
});
AppTemplateSchema.index({ templateId: 1 });

View File

@@ -9,9 +9,10 @@ import type {
} from '@fastgpt/global/core/ai/type.d';
import axios from 'axios';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
import { getFileContentTypeFromHeader, guessBase64ImageType } from '../../common/file/utils';
import { serverRequestBaseUrl } from '../../common/api/serverRequest';
import { i18nT } from '../../../web/i18n/utils';
import { addLog } from '../../common/system/log';
import { getImageBase64 } from '../../common/file/image/utils';
export const filterGPTMessageByMaxContext = async ({
messages = [],
@@ -165,13 +166,25 @@ export const loadRequestMessages = async ({
try {
// If imgUrl is a local path, load image from local, and set url to base64
if (imgUrl.startsWith('/') || process.env.MULTIPLE_DATA_TO_BASE64 === 'true') {
const base64 = await getImageBase64(imgUrl);
addLog.debug('Load image from local server', {
baseUrl: serverRequestBaseUrl,
requestUrl: imgUrl
});
const response = await axios.get(imgUrl, {
baseURL: serverRequestBaseUrl,
responseType: 'arraybuffer',
proxy: false
});
const base64 = Buffer.from(response.data, 'binary').toString('base64');
const imageType =
getFileContentTypeFromHeader(response.headers['content-type']) ||
guessBase64ImageType(base64);
return {
...item,
image_url: {
...item.image_url,
url: base64
url: `data:${imageType};base64,${base64}`
}
};
}
@@ -210,8 +223,7 @@ export const loadRequestMessages = async ({
await Promise.all(
content.map(async (item) => {
if (item.type === 'text') {
// If it is array, not need to parse image
if (item.text) return item;
if (item.text) return parseStringWithImages(item.text);
return;
}
if (item.type === 'file_url') return; // LLM not support file_url

View File

@@ -108,15 +108,7 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
return formattedFiles;
};
const getFileContent = async ({
teamId,
tmbId,
apiFileId
}: {
teamId: string;
tmbId: string;
apiFileId: string;
}) => {
const getFileContent = async ({ teamId, apiFileId }: { teamId: string; apiFileId: string }) => {
const data = await request<APIFileContentResponse>(
`/v1/file/content`,
{ id: apiFileId },
@@ -131,7 +123,6 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
if (previewUrl) {
const rawText = await readFileRawTextByUrl({
teamId,
tmbId,
url: previewUrl,
relatedId: apiFileId
});

View File

@@ -1,6 +1,6 @@
import {
DatasetCollectionTypeEnum,
DatasetCollectionDataProcessModeEnum
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { MongoDatasetCollection } from './schema';
@@ -19,14 +19,13 @@ import { predictDataLimitLength } from '../../../../global/core/dataset/utils';
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
import { createTrainingUsage } from '../../../support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getEmbeddingModel, getVlmModel } from '../../ai/model';
import { getLLMModel, getEmbeddingModel } from '../../ai/model';
import { pushDataListToTrainingQueue } from '../training/controller';
import { MongoImage } from '../../../common/file/image/schema';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { addDays } from 'date-fns';
import { MongoDatasetDataText } from '../data/dataTextSchema';
import { retryFn } from '@fastgpt/global/common/system/utils';
import { getTrainingModeByCollection } from './utils';
import { delay, retryFn } from '@fastgpt/global/common/system/utils';
export const createCollectionAndInsertData = async ({
dataset,
@@ -34,7 +33,6 @@ export const createCollectionAndInsertData = async ({
relatedId,
createCollectionParams,
isQAImport = false,
billId,
session
}: {
dataset: DatasetSchemaType;
@@ -43,21 +41,13 @@ export const createCollectionAndInsertData = async ({
createCollectionParams: CreateOneCollectionParams;
isQAImport?: boolean;
billId?: string;
session?: ClientSession;
}) => {
// Adapter 4.9.0
if (createCollectionParams.trainingType === DatasetCollectionDataProcessModeEnum.auto) {
createCollectionParams.trainingType = DatasetCollectionDataProcessModeEnum.chunk;
createCollectionParams.autoIndexes = true;
}
const teamId = createCollectionParams.teamId;
const tmbId = createCollectionParams.tmbId;
// Chunk split params
const trainingType =
createCollectionParams.trainingType || DatasetCollectionDataProcessModeEnum.chunk;
const chunkSize = createCollectionParams.chunkSize || 512;
const trainingType = createCollectionParams.trainingType || TrainingModeEnum.chunk;
const chunkSize = createCollectionParams.chunkSize;
const chunkSplitter = createCollectionParams.chunkSplitter;
const qaPrompt = createCollectionParams.qaPrompt;
const usageName = createCollectionParams.name;
@@ -66,7 +56,7 @@ export const createCollectionAndInsertData = async ({
const chunks = rawText2Chunks({
rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : [],
isQAImport
});
@@ -74,14 +64,7 @@ export const createCollectionAndInsertData = async ({
// 2. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(
getTrainingModeByCollection({
trainingType,
autoIndexes: createCollectionParams.autoIndexes,
imageIndex: createCollectionParams.imageIndex
}),
chunks
)
insertLen: predictDataLimitLength(trainingType, chunks)
});
const fn = async (session: ClientSession) => {
@@ -106,20 +89,15 @@ export const createCollectionAndInsertData = async ({
});
// 4. create training bill
const traingBillId = await (async () => {
if (billId) return billId;
const { billId: newBillId } = await createTrainingUsage({
teamId,
tmbId,
appName: usageName,
billSource: UsageSourceEnum.training,
vectorModel: getEmbeddingModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
vllmModel: getVlmModel(dataset.vlmModel)?.name,
session
});
return newBillId;
})();
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: usageName,
billSource: UsageSourceEnum.training,
vectorModel: getEmbeddingModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 5. insert to training queue
const insertResults = await pushDataListToTrainingQueue({
@@ -129,14 +107,9 @@ export const createCollectionAndInsertData = async ({
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
vlmModel: dataset.vlmModel,
mode: getTrainingModeByCollection({
trainingType,
autoIndexes: createCollectionParams.autoIndexes,
imageIndex: createCollectionParams.imageIndex
}),
trainingMode: trainingType,
prompt: qaPrompt,
billId: traingBillId,
billId,
data: chunks.map((item, index) => ({
...item,
chunkIndex: index
@@ -188,15 +161,10 @@ export async function createOneCollection({
datasetId,
type,
createTime,
updateTime,
hashRawText,
rawTextLength,
metadata = {},
tags,
nextSyncTime,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
fileId,
rawLink,
@@ -204,18 +172,15 @@ export async function createOneCollection({
externalFileUrl,
apiFileId,
// Parse settings
customPdfParse,
imageIndex,
hashRawText,
rawTextLength,
metadata = {},
session,
tags,
// Chunk settings
trainingType = DatasetCollectionDataProcessModeEnum.chunk,
autoIndexes,
chunkSize = 512,
chunkSplitter,
qaPrompt,
session
createTime,
updateTime,
nextSyncTime
}: CreateOneCollectionParams) {
// Create collection tags
const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
@@ -231,31 +196,25 @@ export async function createOneCollection({
name,
type,
rawTextLength,
hashRawText,
tags: collectionTags,
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
metadata,
createTime,
updateTime,
nextSyncTime,
...(fileId ? { fileId } : {}),
...(rawLink ? { rawLink } : {}),
...(externalFileId ? { externalFileId } : {}),
...(externalFileUrl ? { externalFileUrl } : {}),
...(apiFileId ? { apiFileId } : {}),
// Parse settings
customPdfParse,
imageIndex,
rawTextLength,
hashRawText,
tags: collectionTags,
// Chunk settings
trainingType,
autoIndexes,
chunkSize,
chunkSplitter,
qaPrompt
createTime,
updateTime,
nextSyncTime
}
],
{ session, ordered: true }

View File

@@ -1,10 +1,7 @@
import { connectionMongo, getMongoModel } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d';
import {
DatasetCollectionTypeMap,
DatasetCollectionDataProcessModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { TrainingTypeMap, DatasetCollectionTypeMap } from '@fastgpt/global/core/dataset/constants';
import { DatasetCollectionName } from '../schema';
import {
TeamCollectionName,
@@ -34,8 +31,6 @@ const DatasetCollectionSchema = new Schema({
ref: DatasetCollectionName,
required: true
},
// Basic info
type: {
type: String,
enum: Object.keys(DatasetCollectionTypeMap),
@@ -45,11 +40,6 @@ const DatasetCollectionSchema = new Schema({
type: String,
required: true
},
tags: {
type: [String],
default: []
},
createTime: {
type: Date,
default: () => new Date()
@@ -58,8 +48,33 @@ const DatasetCollectionSchema = new Schema({
type: Date,
default: () => new Date()
},
forbid: {
type: Boolean,
default: false
},
// chunk filed
trainingType: {
type: String,
enum: Object.keys(TrainingTypeMap)
},
chunkSize: {
type: Number,
required: true
},
chunkSplitter: {
type: String
},
qaPrompt: {
type: String
},
ocrParse: Boolean,
tags: {
type: [String],
default: []
},
// Metadata
// local file collection
fileId: {
type: Schema.Types.ObjectId,
@@ -67,39 +82,22 @@ const DatasetCollectionSchema = new Schema({
},
// web link collection
rawLink: String,
// Api collection
// api collection
apiFileId: String,
// external collection(Abandoned)
// external collection
externalFileId: String,
externalFileUrl: String, // external import url
// next sync time
nextSyncTime: Date,
// metadata
rawTextLength: Number,
hashRawText: String,
metadata: {
type: Object,
default: {}
},
forbid: Boolean,
// next sync time
nextSyncTime: Date,
// Parse settings
customPdfParse: Boolean,
// Chunk settings
imageIndex: Boolean,
autoIndexes: Boolean,
trainingType: {
type: String,
enum: Object.values(DatasetCollectionDataProcessModeEnum)
},
chunkSize: {
type: Number,
required: true
},
chunkSplitter: String,
qaPrompt: String
}
});
DatasetCollectionSchema.virtual('dataset', {

View File

@@ -2,17 +2,12 @@ import { MongoDatasetCollection } from './schema';
import { ClientSession } from '../../../common/mongo';
import { MongoDatasetCollectionTags } from '../tag/schema';
import { readFromSecondary } from '../../../common/mongo/utils';
import { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
import {
CollectionWithDatasetType,
DatasetCollectionSchemaType
} from '@fastgpt/global/core/dataset/type';
import {
DatasetCollectionDataProcessModeEnum,
DatasetCollectionSyncResultEnum,
DatasetCollectionTypeEnum,
DatasetSourceReadTypeEnum,
DatasetTypeEnum,
TrainingModeEnum
DatasetTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
import { readDatasetSourceRawText } from '../read';
@@ -165,7 +160,6 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
})();
const rawText = await readDatasetSourceRawText({
teamId: collection.teamId,
tmbId: collection.tmbId,
...sourceReadType
});
@@ -226,24 +220,3 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
return DatasetCollectionSyncResultEnum.success;
};
/*
QA: 独立进程
Chunk: Image Index -> Auto index -> chunk index
*/
export const getTrainingModeByCollection = (collection: {
trainingType: DatasetCollectionSchemaType['trainingType'];
autoIndexes?: DatasetCollectionSchemaType['autoIndexes'];
imageIndex?: DatasetCollectionSchemaType['imageIndex'];
}) => {
if (collection.trainingType === DatasetCollectionDataProcessModeEnum.qa) {
return TrainingModeEnum.qa;
}
if (collection.imageIndex && global.feConfigs?.isPlus) {
return TrainingModeEnum.image;
}
if (collection.autoIndexes && global.feConfigs?.isPlus) {
return TrainingModeEnum.auto;
}
return TrainingModeEnum.chunk;
};

View File

@@ -7,7 +7,6 @@ import {
} from '@fastgpt/global/support/user/team/constant';
import { DatasetCollectionName } from '../schema';
import { DatasetColCollectionName } from '../collection/schema';
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
export const DatasetDataCollectionName = 'dataset_datas';
@@ -43,14 +42,9 @@ const DatasetDataSchema = new Schema({
indexes: {
type: [
{
// Abandon
defaultIndex: {
type: Boolean
},
type: {
type: String,
enum: Object.values(DatasetDataIndexTypeEnum),
default: DatasetDataIndexTypeEnum.custom
type: Boolean,
default: false
},
dataId: {
type: String,

View File

@@ -13,15 +13,11 @@ import { POST } from '../../common/api/plusRequest';
export const readFileRawTextByUrl = async ({
teamId,
tmbId,
url,
customPdfParse,
relatedId
}: {
teamId: string;
tmbId: string;
url: string;
customPdfParse?: boolean;
relatedId: string; // externalFileId / apiFileId
}) => {
const response = await axios({
@@ -34,11 +30,8 @@ export const readFileRawTextByUrl = async ({
const buffer = Buffer.from(response.data, 'binary');
const { rawText } = await readRawContentByFileBuffer({
customPdfParse,
isQAImport: false,
extension,
teamId,
tmbId,
buffer,
encoding: 'utf-8',
metadata: {
@@ -56,7 +49,6 @@ export const readFileRawTextByUrl = async ({
*/
export const readDatasetSourceRawText = async ({
teamId,
tmbId,
type,
sourceId,
isQAImport,
@@ -64,14 +56,11 @@ export const readDatasetSourceRawText = async ({
externalFileId,
apiServer,
feishuServer,
yuqueServer,
customPdfParse
yuqueServer
}: {
teamId: string;
tmbId: string;
type: DatasetSourceReadTypeEnum;
sourceId: string;
customPdfParse?: boolean;
isQAImport?: boolean; // csv data
selector?: string; // link selector
@@ -83,11 +72,9 @@ export const readDatasetSourceRawText = async ({
if (type === DatasetSourceReadTypeEnum.fileLocal) {
const { rawText } = await readFileContentFromMongo({
teamId,
tmbId,
bucketName: BucketNameEnum.dataset,
fileId: sourceId,
isQAImport,
customPdfParse
isQAImport
});
return rawText;
} else if (type === DatasetSourceReadTypeEnum.link) {
@@ -101,10 +88,8 @@ export const readDatasetSourceRawText = async ({
if (!externalFileId) return Promise.reject('FileId not found');
const rawText = await readFileRawTextByUrl({
teamId,
tmbId,
url: sourceId,
relatedId: externalFileId,
customPdfParse
relatedId: externalFileId
});
return rawText;
} else if (type === DatasetSourceReadTypeEnum.apiFile) {
@@ -113,8 +98,7 @@ export const readDatasetSourceRawText = async ({
feishuServer,
yuqueServer,
apiFileId: sourceId,
teamId,
tmbId
teamId
});
return rawText;
}
@@ -126,18 +110,16 @@ export const readApiServerFileContent = async ({
feishuServer,
yuqueServer,
apiFileId,
teamId,
tmbId
teamId
}: {
apiServer?: APIFileServer;
feishuServer?: FeishuServer;
yuqueServer?: YuqueServer;
apiFileId: string;
teamId: string;
tmbId: string;
}) => {
if (apiServer) {
return useApiDatasetRequest({ apiServer }).getFileContent({ teamId, tmbId, apiFileId });
return useApiDatasetRequest({ apiServer }).getFileContent({ teamId, apiFileId });
}
if (feishuServer || yuqueServer) {

View File

@@ -67,7 +67,6 @@ const DatasetSchema = new Schema({
required: true,
default: 'gpt-4o-mini'
},
vlmModel: String,
intro: {
type: String,
default: ''

View File

@@ -787,6 +787,7 @@ export const defaultSearchDatasetData = async ({
...props
}: DefaultSearchDatasetDataProps): Promise<SearchDatasetDataResponse> => {
const query = props.queries[0];
const histories = props.histories;
const extensionModel = datasetSearchUsingExtensionQuery
? getLLMModel(datasetSearchExtensionModel)
@@ -796,7 +797,8 @@ export const defaultSearchDatasetData = async ({
await datasetSearchQueryExtension({
query,
extensionModel,
extensionBg: datasetSearchExtensionBg
extensionBg: datasetSearchExtensionBg,
histories
});
const result = await searchDatasetData({

View File

@@ -1,17 +1,16 @@
import { MongoDatasetTraining } from './schema';
import type {
PushDatasetDataChunkProps,
PushDatasetDataProps,
PushDatasetDataResponse
} from '@fastgpt/global/core/dataset/api.d';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { simpleText } from '@fastgpt/global/common/string/tools';
import { ClientSession } from '../../../common/mongo';
import { getLLMModel, getEmbeddingModel, getVlmModel } from '../../ai/model';
import { getLLMModel, getEmbeddingModel } from '../../ai/model';
import { addLog } from '../../../common/system/log';
import { getCollectionWithDataset } from '../controller';
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
import { PushDataToTrainingQueueProps } from '@fastgpt/global/core/dataset/training/type';
import { i18nT } from '../../../../web/i18n/utils';
export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => {
try {
@@ -29,17 +28,20 @@ export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> =>
export const pushDataListToTrainingQueueByCollectionId = async ({
collectionId,
...props
}: Omit<PushDataToTrainingQueueProps, 'datasetId' | 'agentModel' | 'vectorModel' | 'vlmModel'>) => {
}: {
teamId: string;
tmbId: string;
session?: ClientSession;
} & PushDatasetDataProps) => {
const {
dataset: { _id: datasetId, agentModel, vectorModel, vlmModel }
dataset: { _id: datasetId, agentModel, vectorModel }
} = await getCollectionWithDataset(collectionId);
return pushDataListToTrainingQueue({
...props,
datasetId,
collectionId,
vectorModel,
agentModel,
vlmModel
vectorModel
});
};
@@ -50,30 +52,30 @@ export async function pushDataListToTrainingQueue({
collectionId,
agentModel,
vectorModel,
vlmModel,
data,
prompt,
billId,
mode = TrainingModeEnum.chunk,
trainingMode = TrainingModeEnum.chunk,
session
}: PushDataToTrainingQueueProps): Promise<PushDatasetDataResponse> {
const getImageChunkMode = (data: PushDatasetDataChunkProps, mode: TrainingModeEnum) => {
if (mode !== TrainingModeEnum.image) return mode;
// 检查内容中,是否包含 ![](xxx) 的图片格式
const text = data.q + data.a || '';
const regex = /!\[\]\((.*?)\)/g;
const match = text.match(regex);
if (match) {
return TrainingModeEnum.image;
}
return mode;
};
}: {
teamId: string;
tmbId: string;
datasetId: string;
agentModel: string;
vectorModel: string;
session?: ClientSession;
} & PushDatasetDataProps): Promise<PushDatasetDataResponse> {
const { model, maxToken, weight } = await (async () => {
if (mode === TrainingModeEnum.chunk) {
const vectorModelData = getEmbeddingModel(vectorModel);
if (!vectorModelData) {
return Promise.reject(i18nT('common:error_embedding_not_config'));
}
const agentModelData = getLLMModel(agentModel);
if (!agentModelData) {
return Promise.reject(`File model ${agentModel} is inValid`);
}
const vectorModelData = getEmbeddingModel(vectorModel);
if (!vectorModelData) {
return Promise.reject(`Vector model ${vectorModel} is inValid`);
}
if (trainingMode === TrainingModeEnum.chunk) {
return {
maxToken: vectorModelData.maxToken * 1.5,
model: vectorModelData.model,
@@ -81,11 +83,7 @@ export async function pushDataListToTrainingQueue({
};
}
if (mode === TrainingModeEnum.qa || mode === TrainingModeEnum.auto) {
const agentModelData = getLLMModel(agentModel);
if (!agentModelData) {
return Promise.reject(i18nT('common:error_llm_not_config'));
}
if (trainingMode === TrainingModeEnum.qa || trainingMode === TrainingModeEnum.auto) {
return {
maxToken: agentModelData.maxContext * 0.8,
model: agentModelData.model,
@@ -93,24 +91,8 @@ export async function pushDataListToTrainingQueue({
};
}
if (mode === TrainingModeEnum.image) {
const vllmModelData = getVlmModel(vlmModel);
if (!vllmModelData) {
return Promise.reject(i18nT('common:error_vlm_not_config'));
}
return {
maxToken: vllmModelData.maxContext * 0.8,
model: vllmModelData.model,
weight: 0
};
}
return Promise.reject(`Training mode "${mode}" is inValid`);
return Promise.reject(`Training mode "${trainingMode}" is inValid`);
})();
// Filter redundant params
if (mode === TrainingModeEnum.chunk || mode === TrainingModeEnum.auto) {
prompt = undefined;
}
// filter repeat or equal content
const set = new Set();
@@ -176,7 +158,7 @@ export async function pushDataListToTrainingQueue({
datasetId,
collectionId,
billId,
mode: getImageChunkMode(item, mode),
mode: trainingMode,
prompt,
model,
q: item.q,

View File

@@ -1,15 +1,14 @@
/* 模型的知识库 */
import { connectionMongo, getMongoModel } from '../../../common/mongo';
const { Schema } = connectionMongo;
import { connectionMongo, getMongoModel, type Model } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import { DatasetColCollectionName } from '../collection/schema';
import { DatasetCollectionName } from '../schema';
import {
TeamCollectionName,
TeamMemberCollectionName
} from '@fastgpt/global/support/user/team/constant';
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
export const DatasetTrainingCollectionName = 'dataset_trainings';
@@ -26,6 +25,7 @@ const TrainingDataSchema = new Schema({
},
datasetId: {
type: Schema.Types.ObjectId,
ref: DatasetCollectionName,
required: true
},
collectionId: {
@@ -33,13 +33,15 @@ const TrainingDataSchema = new Schema({
ref: DatasetColCollectionName,
required: true
},
billId: String,
billId: {
// concat bill
type: String
},
mode: {
type: String,
enum: Object.values(TrainingModeEnum),
enum: Object.keys(TrainingTypeMap),
required: true
},
expireAt: {
// It will be deleted after 7 days
type: Date,
@@ -86,10 +88,6 @@ const TrainingDataSchema = new Schema({
indexes: {
type: [
{
type: {
type: String,
enum: Object.values(DatasetDataIndexTypeEnum)
},
text: {
type: String,
required: true
@@ -100,19 +98,6 @@ const TrainingDataSchema = new Schema({
}
});
TrainingDataSchema.virtual('dataset', {
ref: DatasetCollectionName,
localField: 'datasetId',
foreignField: '_id',
justOne: true
});
TrainingDataSchema.virtual('collection', {
ref: DatasetColCollectionName,
localField: 'collectionId',
foreignField: '_id',
justOne: true
});
try {
// lock training data(teamId); delete training data
TrainingDataSchema.index({ teamId: 1, datasetId: 1 });

View File

@@ -1,7 +1,6 @@
import { NodeOutputKeyEnum } from '@fastgpt/global/core/workflow/constants';
import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runtime/constants';
import type {
ChatDispatchProps,
DispatchNodeResultType,
RuntimeNodeItemType
} from '@fastgpt/global/core/workflow/runtime/type';
@@ -47,7 +46,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
query,
requestOrigin,
chatConfig,
runningUserInfo,
runningAppInfo: { teamId },
externalProvider,
params: {
model,
@@ -55,8 +54,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
userChatInput,
history = 6,
fileUrlList: fileLinks,
aiChatVision,
aiChatReasoning
aiChatVision
}
} = props;
@@ -64,9 +62,6 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
const useVision = aiChatVision && toolModel.vision;
const chatHistories = getHistories(history, histories);
props.params.aiChatVision = aiChatVision && toolModel.vision;
props.params.aiChatReasoning = aiChatReasoning && toolModel.reasoning;
const toolNodeIds = filterToolNodeIdByEdges({ nodeId, edges: runtimeEdges });
// Gets the module to which the tool is connected
@@ -104,11 +99,10 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
const globalFiles = chatValue2RuntimePrompt(query).files;
const { documentQuoteText, userFiles } = await getMultiInput({
runningUserInfo,
histories: chatHistories,
requestOrigin,
maxFiles: chatConfig?.fileSelectConfig?.maxFiles || 20,
customPdfParse: chatConfig?.fileSelectConfig?.customPdfParse,
teamId,
fileLinks,
inputFiles: globalFiles,
hasReadFilesTool
@@ -295,21 +289,19 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
};
const getMultiInput = async ({
runningUserInfo,
histories,
fileLinks,
requestOrigin,
maxFiles,
customPdfParse,
teamId,
inputFiles,
hasReadFilesTool
}: {
runningUserInfo: ChatDispatchProps['runningUserInfo'];
histories: ChatItemType[];
fileLinks?: string[];
requestOrigin?: string;
maxFiles: number;
customPdfParse?: boolean;
teamId: string;
inputFiles: UserChatItemValueItemType['file'][];
hasReadFilesTool: boolean;
}) => {
@@ -337,9 +329,7 @@ const getMultiInput = async ({
urls,
requestOrigin,
maxFiles,
customPdfParse,
teamId: runningUserInfo.teamId,
tmbId: runningUserInfo.tmbId
teamId
});
return {

View File

@@ -24,12 +24,7 @@ import {
import { AIChatItemType } from '@fastgpt/global/core/chat/type';
import { GPTMessages2Chats } from '@fastgpt/global/core/chat/adapt';
import { formatToolResponse, initToolCallEdges, initToolNodes } from './utils';
import {
computedMaxToken,
llmCompletionsBodyFormat,
parseReasoningContent,
parseReasoningStreamContent
} from '../../../../ai/utils';
import { computedMaxToken, llmCompletionsBodyFormat } from '../../../../ai/utils';
import { WorkflowResponseType } from '../../type';
import { toolValueTypeList } from '@fastgpt/global/core/workflow/constants';
import { WorkflowInteractiveResponseType } from '@fastgpt/global/core/workflow/template/system/interactive/type';
@@ -63,7 +58,6 @@ export const runToolWithPromptCall = async (
temperature,
maxToken,
aiChatVision,
aiChatReasoning,
aiChatTopP,
aiChatStopSign,
aiChatResponseFormat,
@@ -222,7 +216,7 @@ export const runToolWithPromptCall = async (
const [requestMessages] = await Promise.all([
loadRequestMessages({
messages: filterMessages,
useVision: aiChatVision,
useVision: toolModel.vision && aiChatVision,
origin: requestOrigin
})
]);
@@ -257,46 +251,22 @@ export const runToolWithPromptCall = async (
}
});
const { answer, reasoning } = await (async () => {
const answer = await (async () => {
if (res && isStreamResponse) {
const { answer, reasoning } = await streamResponse({
const { answer } = await streamResponse({
res,
toolNodes,
stream: aiResponse,
workflowStreamResponse,
aiChatReasoning
workflowStreamResponse
});
return { answer, reasoning };
return answer;
} else {
const content = aiResponse.choices?.[0]?.message?.content || '';
const reasoningContent: string = aiResponse.choices?.[0]?.message?.reasoning_content || '';
const result = aiResponse as ChatCompletion;
// API already parse reasoning content
if (reasoningContent || !aiChatReasoning) {
return {
answer: content,
reasoning: reasoningContent
};
}
const [think, answer] = parseReasoningContent(content);
return {
answer,
reasoning: think
};
return result.choices?.[0]?.message?.content || '';
}
})();
if (stream && !isStreamResponse && aiChatReasoning && reasoning) {
workflowStreamResponse?.({
event: SseResponseEventEnum.fastAnswer,
data: textAdaptGptResponse({
reasoning_content: reasoning
})
});
}
const { answer: replaceAnswer, toolJson } = parseAnswer(answer);
if (!answer && !toolJson) {
return Promise.reject(getEmptyResponseTip());
@@ -324,16 +294,11 @@ export const runToolWithPromptCall = async (
}
// No tool is invoked, indicating that the process is over
const gptAssistantResponse: ChatCompletionMessageParam = {
const gptAssistantResponse: ChatCompletionAssistantMessageParam = {
role: ChatCompletionRequestMessageRoleEnum.Assistant,
content: replaceAnswer,
reasoning_text: reasoning
content: replaceAnswer
};
const completeMessages = filterMessages.concat({
...gptAssistantResponse,
reasoning_text: undefined
});
const completeMessages = filterMessages.concat(gptAssistantResponse);
const inputTokens = await countGptMessagesTokens(requestMessages);
const outputTokens = await countGptMessagesTokens([gptAssistantResponse]);
@@ -414,10 +379,9 @@ export const runToolWithPromptCall = async (
})();
// 合并工具调用的结果,使用 functionCall 格式存储。
const assistantToolMsgParams: ChatCompletionMessageParam = {
const assistantToolMsgParams: ChatCompletionAssistantMessageParam = {
role: ChatCompletionRequestMessageRoleEnum.Assistant,
function_call: toolJson,
reasoning_text: reasoning
function_call: toolJson
};
// Only toolCall tokens are counted here, Tool response tokens count towards the next reply
@@ -538,14 +502,12 @@ ANSWER: `;
async function streamResponse({
res,
stream,
workflowStreamResponse,
aiChatReasoning
workflowStreamResponse
}: {
res: NextApiResponse;
toolNodes: ToolNodeItemType[];
stream: StreamChatType;
workflowStreamResponse?: WorkflowResponseType;
aiChatReasoning?: boolean;
}) {
const write = responseWriteController({
res,
@@ -553,9 +515,7 @@ async function streamResponse({
});
let startResponseWrite = false;
let answer = '';
let reasoning = '';
const { parsePart, getStartTagBuffer } = parseReasoningStreamContent();
let textAnswer = '';
for await (const part of stream) {
if (res.closed) {
@@ -563,21 +523,13 @@ async function streamResponse({
break;
}
const [reasoningContent, content] = parsePart(part, aiChatReasoning);
answer += content;
reasoning += reasoningContent;
const responseChoice = part.choices?.[0]?.delta;
// console.log(responseChoice, '---===');
if (aiChatReasoning && reasoningContent) {
workflowStreamResponse?.({
write,
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
reasoning_content: reasoningContent
})
});
}
if (responseChoice?.content) {
const content = responseChoice?.content || '';
textAnswer += content;
if (content) {
if (startResponseWrite) {
workflowStreamResponse?.({
write,
@@ -586,20 +538,18 @@ async function streamResponse({
text: content
})
});
} else if (answer.length >= 3) {
answer = answer.trimStart();
if (/0(:|)/.test(answer)) {
} else if (textAnswer.length >= 3) {
textAnswer = textAnswer.trim();
if (textAnswer.startsWith('0')) {
startResponseWrite = true;
// find first : index
const firstIndex =
answer.indexOf('0:') !== -1 ? answer.indexOf('0:') : answer.indexOf('0');
answer = answer.substring(firstIndex + 2).trim();
const firstIndex = textAnswer.indexOf(':');
textAnswer = textAnswer.substring(firstIndex + 1).trim();
workflowStreamResponse?.({
write,
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
text: answer
text: textAnswer
})
});
}
@@ -607,23 +557,7 @@ async function streamResponse({
}
}
if (answer === '') {
answer = getStartTagBuffer();
if (/0(:|)/.test(answer)) {
// find first : index
const firstIndex = answer.indexOf('0:') !== -1 ? answer.indexOf('0:') : answer.indexOf('0');
answer = answer.substring(firstIndex + 2).trim();
workflowStreamResponse?.({
write,
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
text: answer
})
});
}
}
return { answer, reasoning };
return { answer: textAnswer.trim() };
}
const parseAnswer = (
@@ -634,7 +568,8 @@ const parseAnswer = (
} => {
str = str.trim();
// 首先使用正则表达式提取TOOL_ID和TOOL_ARGUMENTS
const prefixReg = /1(:|)/;
const prefixReg = /^1(:|)/;
const answerPrefixReg = /^0(:|)/;
if (prefixReg.test(str)) {
const toolString = sliceJsonStr(str);
@@ -650,21 +585,13 @@ const parseAnswer = (
}
};
} catch (error) {
if (/^1(:|)/.test(str)) {
return {
answer: ERROR_TEXT
};
} else {
return {
answer: str
};
}
return {
answer: ERROR_TEXT
};
}
} else {
const firstIndex = str.indexOf('0:') !== -1 ? str.indexOf('0:') : str.indexOf('0');
const answer = str.substring(firstIndex + 2).trim();
return {
answer
answer: str.replace(answerPrefixReg, '')
};
}
};

View File

@@ -22,7 +22,6 @@ export type DispatchToolModuleProps = ModuleDispatchProps<{
[NodeInputKeyEnum.aiChatTemperature]: number;
[NodeInputKeyEnum.aiChatMaxToken]: number;
[NodeInputKeyEnum.aiChatVision]?: boolean;
[NodeInputKeyEnum.aiChatReasoning]?: boolean;
[NodeInputKeyEnum.aiChatTopP]?: number;
[NodeInputKeyEnum.aiChatStopSign]?: string;
[NodeInputKeyEnum.aiChatResponseFormat]?: string;

View File

@@ -11,10 +11,7 @@ import { formatModelChars2Points } from '../../../../support/wallet/usage/utils'
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
import { postTextCensor } from '../../../../common/api/requestPlusApi';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
import type {
ChatDispatchProps,
DispatchNodeResultType
} from '@fastgpt/global/core/workflow/runtime/type';
import type { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
import { countGptMessagesTokens } from '../../../../common/string/tiktoken/index';
import {
chats2GPTMessages,
@@ -72,7 +69,7 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
histories,
node: { name },
query,
runningUserInfo,
runningAppInfo: { teamId },
workflowStreamResponse,
chatConfig,
params: {
@@ -124,8 +121,7 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
stringQuoteText,
requestOrigin,
maxFiles: chatConfig?.fileSelectConfig?.maxFiles || 20,
customPdfParse: chatConfig?.fileSelectConfig?.customPdfParse,
runningUserInfo
teamId
})
]);
@@ -359,8 +355,7 @@ async function getMultiInput({
stringQuoteText,
requestOrigin,
maxFiles,
customPdfParse,
runningUserInfo
teamId
}: {
histories: ChatItemType[];
inputFiles: UserChatItemValueItemType['file'][];
@@ -368,8 +363,7 @@ async function getMultiInput({
stringQuoteText?: string; // file quote
requestOrigin?: string;
maxFiles: number;
customPdfParse?: boolean;
runningUserInfo: ChatDispatchProps['runningUserInfo'];
teamId: string;
}) {
// 旧版本适配====>
if (stringQuoteText) {
@@ -406,9 +400,7 @@ async function getMultiInput({
urls,
requestOrigin,
maxFiles,
customPdfParse,
teamId: runningUserInfo.teamId,
tmbId: runningUserInfo.tmbId
teamId
});
return {
@@ -563,15 +555,6 @@ async function streamResponse({
// if answer is empty, try to get value from startTagBuffer. (Cause: The response content is too short to exceed the minimum parse length)
if (answer === '') {
answer = getStartTagBuffer();
if (isResponseAnswerText && answer) {
workflowStreamResponse?.({
write,
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
text: answer
})
});
}
}
return { answer, reasoning };

View File

@@ -120,27 +120,144 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
2. Replace newline strings
*/
const replaceJsonBodyString = (text: string) => {
const valToStr = (val: any) => {
// Check if the variable is in quotes
const isVariableInQuotes = (text: string, variable: string) => {
const index = text.indexOf(variable);
if (index === -1) return false;
// 计算变量前面的引号数量
const textBeforeVar = text.substring(0, index);
const matches = textBeforeVar.match(/"/g) || [];
// 如果引号数量为奇数,则变量在引号内
return matches.length % 2 === 1;
};
const valToStr = (val: any, isQuoted = false) => {
if (val === undefined) return 'null';
if (val === null) return 'null';
if (typeof val === 'object') return JSON.stringify(val);
if (typeof val === 'string') {
if (isQuoted) {
return val.replace(/(?<!\\)"/g, '\\"');
}
try {
const parsed = JSON.parse(val);
if (typeof parsed === 'object') {
return JSON.stringify(parsed);
}
JSON.parse(val);
return val;
} catch (error) {
const str = JSON.stringify(val);
return str.startsWith('"') && str.endsWith('"') ? str.slice(1, -1) : str;
}
}
return String(val);
};
// Test cases for variable replacement in JSON body
// const bodyTest = () => {
// const testData = [
// // 基本字符串替换
// {
// body: `{"name":"{{name}}","age":"18"}`,
// variables: [{ key: '{{name}}', value: '测试' }],
// result: `{"name":"测试","age":"18"}`
// },
// // 特殊字符处理
// {
// body: `{"text":"{{text}}"}`,
// variables: [{ key: '{{text}}', value: '包含"引号"和\\反斜杠' }],
// result: `{"text":"包含\\"引号\\"和\\反斜杠"}`
// },
// // 数字类型处理
// {
// body: `{"count":{{count}},"price":{{price}}}`,
// variables: [
// { key: '{{count}}', value: '42' },
// { key: '{{price}}', value: '99.99' }
// ],
// result: `{"count":42,"price":99.99}`
// },
// // 布尔值处理
// {
// body: `{"isActive":{{isActive}},"hasData":{{hasData}}}`,
// variables: [
// { key: '{{isActive}}', value: 'true' },
// { key: '{{hasData}}', value: 'false' }
// ],
// result: `{"isActive":true,"hasData":false}`
// },
// // 对象类型处理
// {
// body: `{"user":{{user}},"user2":"{{user2}}"}`,
// variables: [
// { key: '{{user}}', value: `{"id":1,"name":"张三"}` },
// { key: '{{user2}}', value: `{"id":1,"name":"张三"}` }
// ],
// result: `{"user":{"id":1,"name":"张三"},"user2":"{\\"id\\":1,\\"name\\":\\"张三\\"}"}`
// },
// // 数组类型处理
// {
// body: `{"items":{{items}}}`,
// variables: [{ key: '{{items}}', value: '[1, 2, 3]' }],
// result: `{"items":[1,2,3]}`
// },
// // null 和 undefined 处理
// {
// body: `{"nullValue":{{nullValue}},"undefinedValue":{{undefinedValue}}}`,
// variables: [
// { key: '{{nullValue}}', value: 'null' },
// { key: '{{undefinedValue}}', value: 'undefined' }
// ],
// result: `{"nullValue":null,"undefinedValue":null}`
// },
// // 嵌套JSON结构
// {
// body: `{"data":{"nested":{"value":"{{nestedValue}}"}}}`,
// variables: [{ key: '{{nestedValue}}', value: '嵌套值' }],
// result: `{"data":{"nested":{"value":"嵌套值"}}}`
// },
// // 多变量替换
// {
// body: `{"first":"{{first}}","second":"{{second}}","third":{{third}}}`,
// variables: [
// { key: '{{first}}', value: '第一' },
// { key: '{{second}}', value: '第二' },
// { key: '{{third}}', value: '3' }
// ],
// result: `{"first":"第一","second":"第二","third":3}`
// },
// // JSON字符串作为变量值
// {
// body: `{"config":{{config}}}`,
// variables: [{ key: '{{config}}', value: '{"setting":"enabled","mode":"advanced"}' }],
// result: `{"config":{"setting":"enabled","mode":"advanced"}}`
// }
// ];
// for (let i = 0; i < testData.length; i++) {
// const item = testData[i];
// let bodyStr = item.body;
// for (const variable of item.variables) {
// const isQuote = isVariableInQuotes(bodyStr, variable.key);
// bodyStr = bodyStr.replace(variable.key, valToStr(variable.value, isQuote));
// }
// bodyStr = bodyStr.replace(/(".*?")\s*:\s*undefined\b/g, '$1:null');
// console.log(bodyStr === item.result, i);
// if (bodyStr !== item.result) {
// console.log(bodyStr);
// console.log(item.result);
// } else {
// try {
// JSON.parse(item.result);
// } catch (error) {
// console.log('反序列化异常', i, item.result);
// }
// }
// }
// };
// bodyTest();
// 1. Replace {{key.key}} variables
const regex1 = /\{\{\$([^.]+)\.([^$]+)\$\}\}/g;
@@ -148,6 +265,10 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
matches1.forEach((match) => {
const nodeId = match[1];
const id = match[2];
const fullMatch = match[0];
// 检查变量是否在引号内
const isInQuotes = isVariableInQuotes(text, fullMatch);
const variableVal = (() => {
if (nodeId === VARIABLE_NODE_ID) {
@@ -165,9 +286,9 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
return getReferenceVariableValue({ value: input.value, nodes: runtimeNodes, variables });
})();
const formatVal = valToStr(variableVal);
const formatVal = valToStr(variableVal, isInQuotes);
const regex = new RegExp(`\\{\\{\\$(${nodeId}\\.${id})\\$\\}\\}`, 'g');
const regex = new RegExp(`\\{\\{\\$(${nodeId}\\.${id})\\$\\}\\}`, '');
text = text.replace(regex, () => formatVal);
});
@@ -176,10 +297,16 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
const matches2 = text.match(regex2) || [];
const uniqueKeys2 = [...new Set(matches2.map((match) => match.slice(2, -2)))];
for (const key of uniqueKeys2) {
text = text.replace(new RegExp(`{{(${key})}}`, 'g'), () => valToStr(allVariables[key]));
const fullMatch = `{{${key}}}`;
// 检查变量是否在引号内
const isInQuotes = isVariableInQuotes(text, fullMatch);
text = text.replace(new RegExp(`{{(${key})}}`, ''), () =>
valToStr(allVariables[key], isInQuotes)
);
}
return text.replace(/(".*?")\s*:\s*undefined\b/g, '$1: null');
return text.replace(/(".*?")\s*:\s*undefined\b/g, '$1:null');
};
httpReqUrl = replaceStringVariables(httpReqUrl);

View File

@@ -45,14 +45,13 @@ ${content.slice(0, 100)}${content.length > 100 ? '......' : ''}
export const dispatchReadFiles = async (props: Props): Promise<Response> => {
const {
requestOrigin,
runningUserInfo: { teamId, tmbId },
runningAppInfo: { teamId },
histories,
chatConfig,
node: { version },
params: { fileUrlList = [] }
} = props;
const maxFiles = chatConfig?.fileSelectConfig?.maxFiles || 20;
const customPdfParse = chatConfig?.fileSelectConfig?.customPdfParse || false;
// Get files from histories
const filesFromHistories = version !== '489' ? [] : getHistoryFileLinks(histories);
@@ -62,9 +61,7 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
urls: [...fileUrlList, ...filesFromHistories],
requestOrigin,
maxFiles,
teamId,
tmbId,
customPdfParse
teamId
});
return {
@@ -108,16 +105,12 @@ export const getFileContentFromLinks = async ({
urls,
requestOrigin,
maxFiles,
teamId,
tmbId,
customPdfParse
teamId
}: {
urls: string[];
requestOrigin?: string;
maxFiles: number;
teamId: string;
tmbId: string;
customPdfParse?: boolean;
}) => {
const parseUrlList = urls
// Remove invalid urls
@@ -212,10 +205,8 @@ export const getFileContentFromLinks = async ({
extension,
isQAImport: false,
teamId,
tmbId,
buffer,
encoding,
customPdfParse
encoding
});
// Add to buffer

View File

@@ -43,7 +43,6 @@ async function getTeamMember(match: Record<string, any>): Promise<TeamTmbItemTyp
teamDomain: tmb.team?.teamDomain,
role: tmb.role,
status: tmb.status,
defaultTeam: tmb.defaultTeam,
permission: new TeamPermission({
per: Per ?? TeamDefaultPermissionVal,
isOwner: tmb.role === TeamMemberRoleEnum.owner
@@ -71,8 +70,7 @@ export async function getUserDefaultTeam({ userId }: { userId: string }) {
return Promise.reject('tmbId or userId is required');
}
return getTeamMember({
userId: new Types.ObjectId(userId),
defaultTeam: true
userId: new Types.ObjectId(userId)
});
}

View File

@@ -39,14 +39,14 @@ const TeamMemberSchema = new Schema({
updateTime: {
type: Date
},
defaultTeam: {
type: Boolean,
default: false
},
// Abandoned
role: {
type: String
},
// Abandoned
defaultTeam: {
type: Boolean
}
});

View File

@@ -117,16 +117,14 @@ export const createTrainingUsage = async ({
billSource,
vectorModel,
agentModel,
vllmModel,
session
}: {
teamId: string;
tmbId: string;
appName: string;
billSource: UsageSourceEnum;
vectorModel?: string;
agentModel?: string;
vllmModel?: string;
vectorModel: string;
agentModel: string;
session?: ClientSession;
}) => {
const [{ _id }] = await MongoUsage.create(
@@ -138,46 +136,27 @@ export const createTrainingUsage = async ({
source: billSource,
totalPoints: 0,
list: [
...(vectorModel
? [
{
moduleName: i18nT('account_usage:embedding_index'),
model: vectorModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
}
]
: []),
...(agentModel
? [
{
moduleName: i18nT('account_usage:qa'),
model: agentModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
},
{
moduleName: i18nT('account_usage:auto_index'),
model: agentModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
}
]
: []),
...(vllmModel
? [
{
moduleName: i18nT('account_usage:image_parse'),
model: vllmModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
}
]
: [])
{
moduleName: i18nT('common:support.wallet.moduleName.index'),
model: vectorModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
},
{
moduleName: i18nT('common:support.wallet.moduleName.qa'),
model: agentModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
},
{
moduleName: i18nT('common:core.dataset.training.Auto mode'),
model: agentModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
}
]
}
],
@@ -186,31 +165,3 @@ export const createTrainingUsage = async ({
return { billId: String(_id) };
};
export const createPdfParseUsage = async ({
teamId,
tmbId,
pages
}: {
teamId: string;
tmbId: string;
pages: number;
}) => {
const unitPrice = global.systemEnv?.customPdfParse?.price || 0;
const totalPoints = pages * unitPrice;
createUsage({
teamId,
tmbId,
appName: i18nT('account_usage:pdf_enhanced_parse'),
totalPoints,
source: UsageSourceEnum.pdfParse,
list: [
{
moduleName: i18nT('account_usage:pdf_enhanced_parse'),
amount: totalPoints,
pages
}
]
});
};

View File

@@ -9,7 +9,7 @@ import { readXlsxRawText } from './extension/xlsx';
import { readCsvRawText } from './extension/csv';
parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
const read = async (params: ReadRawTextByBuffer) => {
const readRawContentByFileBuffer = async (params: ReadRawTextByBuffer) => {
switch (params.extension) {
case 'txt':
case 'md':
@@ -27,9 +27,7 @@ parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
case 'csv':
return readCsvRawText(params);
default:
return Promise.reject(
`Only support .txt, .md, .html, .pdf, .docx, pptx, .csv, .xlsx. "${params.extension}" is not supported.`
);
return Promise.reject('Only support .txt, .md, .html, .pdf, .docx, pptx, .csv, .xlsx');
}
};
@@ -43,7 +41,7 @@ parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
try {
parentPort?.postMessage({
type: 'success',
data: await read(newProps)
data: await readRawContentByFileBuffer(newProps)
});
} catch (error) {
console.log(error);

View File

@@ -17,7 +17,7 @@ const MyPhotoView = (props: ImageProps) => {
loadingElement={<Loading fixed={false} />}
>
<PhotoView src={props.src}>
<MyImage cursor={'pointer'} {...props} title={props.title || props.src} />
<MyImage cursor={'pointer'} {...props} />
</PhotoView>
</PhotoProvider>
);

View File

@@ -11,8 +11,8 @@ type Props = BoxProps & {
const MyBox = ({ text, isLoading, children, size, ...props }: Props, ref: any) => {
return (
<Box ref={ref} position={isLoading ? 'relative' : 'unset'} {...props}>
{children}
{isLoading && <Loading fixed={false} text={text} size={size} />}
{children}
</Box>
);
};

View File

@@ -1,24 +1,26 @@
import React from 'react';
import { Box, Flex, useTheme, Grid, type GridProps, HStack } from '@chakra-ui/react';
import { useTranslation } from 'next-i18next';
import MyTooltip from '../MyTooltip';
import QuestionTip from '../MyTooltip/QuestionTip';
type Props<T> = Omit<GridProps, 'onChange'> & {
// @ts-ignore
interface Props extends GridProps {
list: {
title: string;
desc?: string;
value: T;
value: any;
children?: React.ReactNode;
tooltip?: string;
}[];
align?: 'flex-top' | 'center';
value: T;
value: any;
defaultBg?: string;
activeBg?: string;
onChange: (e: T) => void;
};
onChange: (e: any) => void;
}
const LeftRadio = <T = any,>({
const LeftRadio = ({
list,
value,
align = 'flex-top',
@@ -28,7 +30,7 @@ const LeftRadio = <T = any,>({
activeBg = 'primary.50',
onChange,
...props
}: Props<T>) => {
}: Props) => {
const { t } = useTranslation();
const theme = useTheme();
@@ -37,7 +39,7 @@ const LeftRadio = <T = any,>({
{list.map((item) => (
<Flex
alignItems={item.desc ? align : 'center'}
key={item.value as any}
key={item.value}
cursor={'pointer'}
userSelect={'none'}
px={px}
@@ -96,7 +98,7 @@ const LeftRadio = <T = any,>({
fontSize={'sm'}
>
<Box>{typeof item.title === 'string' ? t(item.title as any) : item.title}</Box>
{!!item.tooltip && <QuestionTip label={item.tooltip} color={'myGray.600'} />}
{!!item.tooltip && <QuestionTip label={item.tooltip} ml={1} color={'myGray.600'} />}
</HStack>
</Flex>
{!!item.desc && (

View File

@@ -21,7 +21,6 @@
"edit_channel": "Channel configuration",
"enable_channel": "Enable",
"forbid_channel": "Disabled",
"input maxToken_tip": "The model max_tokens parameter, if left blank, means that the model does not support it.",
"key_type": "API key format:",
"log": "Call log",
"log_detail": "Log details",
@@ -29,7 +28,6 @@
"log_status": "Status",
"mapping": "Model Mapping",
"mapping_tip": "A valid Json is required. \nThe model can be mapped when sending a request to the actual address. \nFor example:\n{\n \n \"gpt-4o\": \"gpt-4o-test\"\n\n}\n\nWhen FastGPT requests the gpt-4o model, the gpt-4o-test model is sent to the actual address, instead of gpt-4o.",
"max_temperature_tip": "If the model temperature parameter is not filled in, it means that the model does not support the temperature parameter.",
"model": "Model",
"model_name": "Model name",
"model_test": "Model testing",
@@ -45,7 +43,5 @@
"selected_model_empty": "Choose at least one model",
"start_test": "Start testing {{num}} models",
"test_failed": "There are {{num}} models that report errors",
"vlm_model": "Vlm",
"vlm_model_tip": "Used to generate additional indexing of images in a document in the knowledge base",
"waiting_test": "Waiting for testing"
}

View File

@@ -2,7 +2,6 @@
"ai_model": "AI model",
"all": "all",
"app_name": "Application name",
"auto_index": "Auto index",
"billing_module": "Deduction module",
"confirm_export": "A total of {{total}} pieces of data were filtered out. Are you sure to export?",
"current_filter_conditions": "Current filter conditions",
@@ -10,7 +9,6 @@
"details": "Details",
"dingtalk": "DingTalk",
"duration_seconds": "Duration (seconds)",
"embedding_index": "Embedding",
"every_day": "Day",
"every_month": "Moon",
"export_confirm": "Export confirmation",
@@ -18,7 +16,6 @@
"export_title": "Time,Members,Type,Project name,AI points",
"feishu": "Feishu",
"generation_time": "Generation time",
"image_parse": "Image tagging",
"input_token_length": "input tokens",
"member": "member",
"member_name": "Member name",
@@ -28,12 +25,8 @@
"official_account": "Official Account",
"order_number": "Order number",
"output_token_length": "output tokens",
"pages": "Pages",
"pdf_enhanced_parse": "PDF Enhanced Analysis",
"pdf_parse": "PDF Analysis",
"points": "Points",
"project_name": "Project name",
"qa": "QA",
"select_member_and_source_first": "Please select members and types first",
"share": "Share Link",
"source": "source",

View File

@@ -105,9 +105,6 @@
"open_vision_function_tip": "Models with icon switches have image recognition capabilities. \nAfter being turned on, the model will parse the pictures in the file link and automatically parse the pictures in the user's question (user question ≤ 500 words).",
"or_drag_JSON": "or drag in JSON file",
"paste_config_or_drag": "Paste config or drag JSON file here",
"pdf_enhance_parse": "PDF enhancement analysis",
"pdf_enhance_parse_price": "{{price}}Points/page",
"pdf_enhance_parse_tips": "Calling PDF recognition model for parsing, you can convert it into Markdown and retain pictures in the document. At the same time, you can also identify scanned documents, which will take a long time to identify them.",
"permission.des.manage": "Based on write permissions, you can configure publishing channels, view conversation logs, and assign permissions to the application.",
"permission.des.read": "Use the app to have conversations",
"permission.des.write": "Can view and edit apps",

View File

@@ -562,7 +562,10 @@
"core.dataset.file": "File",
"core.dataset.folder": "Directory",
"core.dataset.import.Auto mode Estimated Price Tips": "Requires calling the file processing model, which consumes a lot of tokens: {{price}} points/1K tokens",
"core.dataset.import.Auto process": "Automatic",
"core.dataset.import.Auto process desc": "Automatically set segmentation and preprocessing rules",
"core.dataset.import.Chunk Range": "Range: {{min}}~{{max}}",
"core.dataset.import.Chunk Split": "Chunks",
"core.dataset.import.Chunk Split Tip": "Segment the text according to certain rules and convert it into a format that can be semantically searched. Suitable for most scenarios. No additional model processing is required, and the cost is low.",
"core.dataset.import.Continue upload": "Continue upload",
"core.dataset.import.Custom process": "Custom Rules",
@@ -572,6 +575,7 @@
"core.dataset.import.Custom split char Tips": "Allows you to segment based on custom separators. Usually used for pre-processed data, using specific separators for precise segmentation.",
"core.dataset.import.Custom text": "Custom Text",
"core.dataset.import.Custom text desc": "Manually enter a piece of text as a dataset",
"core.dataset.import.Data Preprocessing": "Data Processing",
"core.dataset.import.Data process params": "Data Processing Parameters",
"core.dataset.import.Down load csv template": "Click to Download CSV Template",
"core.dataset.import.Embedding Estimated Price Tips": "Only use the index model, consuming a small amount of AI points: {{price}} points/1K tokens",
@@ -593,6 +597,7 @@
"core.dataset.import.Source name": "Source Name",
"core.dataset.import.Sources list": "Sources",
"core.dataset.import.Start upload": "Start Upload",
"core.dataset.import.Total files": "Total {{total}} Files",
"core.dataset.import.Upload complete": "Upload complete",
"core.dataset.import.Upload data": "Confirm Upload",
"core.dataset.import.Upload file progress": "File Upload Progress",
@@ -644,10 +649,10 @@
"core.dataset.training.Agent queue": "QA Training Queue",
"core.dataset.training.Auto mode": "Auto index",
"core.dataset.training.Auto mode Tip": "Increase the semantic richness of data blocks by generating related questions and summaries through sub-indexes and calling models, making it more conducive to retrieval. Requires more storage space and increases AI call times.",
"core.dataset.training.Chunk mode": "Chunk",
"core.dataset.training.Chunk mode": "Default",
"core.dataset.training.Full": "Estimated Over 5 Minutes",
"core.dataset.training.Leisure": "Idle",
"core.dataset.training.QA mode": "QA",
"core.dataset.training.QA mode": "QA Chunks",
"core.dataset.training.Vector queue": "Index Queue",
"core.dataset.training.Waiting": "Estimated 5 Minutes",
"core.dataset.training.Website Sync": "Website Sync",
@@ -856,6 +861,7 @@
"dataset.collections.Select Collection": "Select File",
"dataset.collections.Select One Collection To Store": "Select a File to Store",
"dataset.data.Can not edit": "No Edit Permission",
"dataset.data.Custom Index Number": "Custom Index {{number}}",
"dataset.data.Default Index": "Default Index",
"dataset.data.Delete Tip": "Confirm to Delete This Data?",
"dataset.data.Index Placeholder": "Enter Index Text Content",
@@ -883,9 +889,6 @@
"error.upload_image_error": "File upload failed",
"error.username_empty": "Account cannot be empty",
"error_collection_not_exist": "The collection does not exist",
"error_embedding_not_config": "Unconfigured index model",
"error_llm_not_config": "Unconfigured file understanding model",
"error_vlm_not_config": "Image comprehension model not configured",
"extraction_results": "Extraction Results",
"field_name": "Field Name",
"free": "Free",
@@ -953,7 +956,6 @@
"new_create": "Create New",
"no": "No",
"no_laf_env": "System Not Configured with Laf Environment",
"not_model_config": "No related model configured",
"not_yet_introduced": "No Introduction Yet",
"option": "Option",
"pay.amount": "Amount",
@@ -1119,6 +1121,7 @@
"support.wallet.invoice_detail": "Invoice Details",
"support.wallet.invoice_info": "The invoice will be sent to the email within 3-7 working days, please wait patiently",
"support.wallet.invoicing": "Invoicing",
"support.wallet.moduleName.index": "Index Generation",
"support.wallet.moduleName.qa": "QA Split",
"support.wallet.noBill": "No Bill Records",
"support.wallet.no_invoice": "No Invoice Records",

View File

@@ -3,16 +3,11 @@
"add_file": "Import",
"api_file": "API Dataset",
"api_url": "API Url",
"auto_indexes": "Automatically generate supplementary indexes",
"auto_indexes_tips": "Additional index generation is performed through large models to improve semantic richness and improve retrieval accuracy.",
"chunk_max_tokens": "max_tokens",
"close_auto_sync": "Are you sure you want to turn off automatic sync?",
"collection.Create update time": "Creation/Update Time",
"collection.Training type": "Training",
"collection.training_type": "Chunk type",
"collection_data_count": "Data amount",
"collection_metadata_custom_pdf_parse": "PDF enhancement analysis",
"collection_metadata_image_parse": "Image tagging",
"collection_not_support_retraining": "This collection type does not support retuning parameters",
"collection_not_support_sync": "This collection does not support synchronization",
"collection_sync": "Sync data",
@@ -27,21 +22,12 @@
"custom_data_process_params_desc": "Customize data processing rules",
"data.ideal_chunk_length": "ideal block length",
"data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes",
"data_index_custom": "Custom index",
"data_index_default": "Default index",
"data_index_image": "Image Index",
"data_index_num": "Index {{index}}",
"data_index_question": "Inferred question index",
"data_index_summary": "Summary index",
"data_process_params": "Params",
"data_process_setting": "Processing config",
"dataset.Unsupported operation": "dataset.Unsupported operation",
"dataset.no_collections": "No datasets available",
"dataset.no_tags": "No tags available",
"default_params": "default",
"default_params_desc": "Use system default parameters and rules",
"edit_dataset_config": "Edit knowledge base configuration",
"enhanced_indexes": "Index enhancement",
"error.collectionNotFound": "Collection not found~",
"external_file": "External File Library",
"external_file_dataset_desc": "Import files from an external file library to build a Dataset. The files will not be stored again.",
@@ -52,38 +38,19 @@
"feishu_dataset": "Feishu Dataset",
"feishu_dataset_config": "Feishu Dataset Config",
"feishu_dataset_desc": "Can build a dataset using Feishu documents by configuring permissions, without secondary storage",
"file_list": "File list",
"file_model_function_tip": "Enhances indexing and QA generation",
"filename": "Filename",
"folder_dataset": "Folder",
"ideal_chunk_length": "ideal block length",
"ideal_chunk_length_tips": "Segment according to the end symbol and combine multiple segments into one block. This value determines the estimated size of the block, if there is any fluctuation.",
"image_auto_parse": "Automatic image indexing",
"image_auto_parse_tips": "Call VLM to automatically label the pictures in the document and generate additional search indexes",
"import.Auto mode Estimated Price Tips": "The text understanding model needs to be called, which requires more points: {{price}} points/1K tokens",
"import.Embedding Estimated Price Tips": "Only use the index model and consume a small amount of AI points: {{price}} points/1K tokens",
"import_confirm": "Confirm upload",
"import_data_preview": "Data preview",
"import_data_process_setting": "Data processing method settings",
"import_file_parse_setting": "File parsing settings",
"import_model_config": "Model selection",
"import_param_setting": "Parameter settings",
"import_select_file": "Select a file",
"is_open_schedule": "Enable scheduled synchronization",
"keep_image": "Keep the picture",
"move.hint": "After moving, the selected knowledge base/folder will inherit the permission settings of the new folder, and the original permission settings will become invalid.",
"open_auto_sync": "After scheduled synchronization is turned on, the system will try to synchronize the collection from time to time every day. During the collection synchronization period, the collection data will not be searched.",
"params_setting": "Parameter settings",
"pdf_enhance_parse": "PDF enhancement analysis",
"pdf_enhance_parse_price": "{{price}} points/page",
"pdf_enhance_parse_tips": "Calling PDF recognition model for parsing, you can convert it into Markdown and retain pictures in the document. At the same time, you can also identify scanned documents, which will take a long time to identify them.",
"permission.des.manage": "Can manage the entire knowledge base data and information",
"permission.des.read": "View knowledge base content",
"permission.des.write": "Ability to add and change knowledge base content",
"preview_chunk": "Preview chunks",
"preview_chunk_empty": "Unable to read the contents of the file",
"preview_chunk_intro": "Display up to 10 pieces",
"preview_chunk_not_selected": "Click on the file on the left to preview",
"rebuild_embedding_start_tip": "Index model switching task has started",
"rebuilding_index_count": "Number of indexes being rebuilt: {{count}}",
"request_headers": "Request headers, will automatically append 'Bearer '",
@@ -105,10 +72,8 @@
"tag.tags": "Tags",
"tag.total_tags": "Total {{total}} tags",
"the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "The Dataset has indexes that are being trained or rebuilt",
"total_num_files": "Total {{total}} files",
"training_mode": "Chunk mode",
"vector_model_max_tokens_tip": "Each chunk of data has a maximum length of 3000 tokens",
"vllm_model": "Image understanding model",
"website_dataset": "Website Sync",
"website_dataset_desc": "Website sync allows you to build a Dataset directly using a web link.",
"yuque_dataset": "Yuque Dataset",

Some files were not shown because too many files have changed in this diff Show More