Compare commits

..

11 Commits

Author SHA1 Message Date
archer
081a843d7e update action 2025-03-05 18:45:24 +08:00
Archer
e53646d13e pdf parse doc (#3990) 2025-03-05 18:33:53 +08:00
Archer
693db35a42 fix: link (#3987) 2025-03-05 17:08:18 +08:00
Archer
9717be8522 simple mode tool reason (#3984)
* simple mode tool reason

* model config cannot set empty

* perf: read files code

* perf: mongo gridfs chunks

* perf: doc
2025-03-05 15:55:02 +08:00
archer
02685f7a3e update init sh 2025-03-05 15:09:49 +08:00
archer
e1b021af71 doc 2025-03-05 15:09:48 +08:00
Archer
051b590284 feat: prompt call tool support reason;perf: ai proxy doc (#3982)
* update schema

* perf: ai proxy doc

* feat: prompt call tool support reason
2025-03-05 15:09:48 +08:00
heheer
60f0c18997 ai proxy docker compose & doc (#3947) 2025-03-05 15:09:47 +08:00
Archer
6a3bd30add Add markdown format; Update doc (#3969)
* update doc

* markdown
2025-03-05 15:09:46 +08:00
Archer
2c89752f67 feat: pg vector 0.8.0;perf: app pdf enhance parse (#3962)
* perf: app pdf enhance parse

* feat: pg vector 0.8.0

* update schema default

* model sort and default image

* perf: i18n

* perf: ui tip
2025-03-05 15:09:46 +08:00
Archer
139b142293 Add image index and pdf parse (#3956)
* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
2025-03-05 15:09:41 +08:00
32 changed files with 212 additions and 573 deletions

View File

@@ -6,6 +6,8 @@ on:
- 'docSite/**' - 'docSite/**'
branches: branches:
- 'main' - 'main'
tags:
- 'v*.*.*'
jobs: jobs:
build-fastgpt-docs-images: build-fastgpt-docs-images:

View File

@@ -7,6 +7,8 @@ on:
- 'docSite/**' - 'docSite/**'
branches: branches:
- 'main' - 'main'
tags:
- 'v*.*.*'
# A workflow run is made up of one or more jobs that can run sequentially or in parallel # A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs: jobs:

View File

@@ -4,6 +4,8 @@ on:
pull_request_target: pull_request_target:
paths: paths:
- 'docSite/**' - 'docSite/**'
branches:
- 'main'
workflow_dispatch: workflow_dispatch:
# A workflow run is made up of one or more jobs that can run sequentially or in parallel # A workflow run is made up of one or more jobs that can run sequentially or in parallel

View File

@@ -130,7 +130,6 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
## 🌿 第三方生态 ## 🌿 第三方生态
- [AI Proxy国内模型聚合服务](https://sealos.run/aiproxy/?k=fastgpt-github/)
- [SiliconCloud (硅基流动) —— 开源模型在线体验平台](https://cloud.siliconflow.cn/i/TR9Ym0c4) - [SiliconCloud (硅基流动) —— 开源模型在线体验平台](https://cloud.siliconflow.cn/i/TR9Ym0c4)
- [COW 个人微信/企微机器人](https://doc.tryfastgpt.ai/docs/use-cases/external-integration/onwechat/) - [COW 个人微信/企微机器人](https://doc.tryfastgpt.ai/docs/use-cases/external-integration/onwechat/)

View File

@@ -181,6 +181,8 @@ services:
depends_on: depends_on:
aiproxy_pg: aiproxy_pg:
condition: service_healthy condition: service_healthy
ports:
- '3002:3000'
networks: networks:
- fastgpt - fastgpt
environment: environment:
@@ -202,8 +204,8 @@ services:
timeout: 5s timeout: 5s
retries: 10 retries: 10
aiproxy_pg: aiproxy_pg:
image: pgvector/pgvector:0.8.0-pg15 # docker hub # image: pgvector/pgvector:0.8.0-pg15 # docker hub
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云 image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
restart: unless-stopped restart: unless-stopped
container_name: aiproxy_pg container_name: aiproxy_pg
volumes: volumes:

View File

@@ -28,8 +28,8 @@ services:
# image: mongo:4.4.29 # cpu不支持AVX时候使用 # image: mongo:4.4.29 # cpu不支持AVX时候使用
container_name: mongo container_name: mongo
restart: always restart: always
# ports: ports:
# - 27017:27017 - 27017:27017
networks: networks:
- fastgpt - fastgpt
command: mongod --keyFile /data/mongodb.key --replSet rs0 command: mongod --keyFile /data/mongodb.key --replSet rs0
@@ -138,6 +138,8 @@ services:
depends_on: depends_on:
aiproxy_pg: aiproxy_pg:
condition: service_healthy condition: service_healthy
ports:
- '3002:3000'
networks: networks:
- fastgpt - fastgpt
environment: environment:
@@ -159,8 +161,8 @@ services:
timeout: 5s timeout: 5s
retries: 10 retries: 10
aiproxy_pg: aiproxy_pg:
image: pgvector/pgvector:0.8.0-pg15 # docker hub # image: pgvector/pgvector:0.8.0-pg15 # docker hub
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云 image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
restart: unless-stopped restart: unless-stopped
container_name: aiproxy_pg container_name: aiproxy_pg
volumes: volumes:

View File

@@ -119,6 +119,8 @@ services:
depends_on: depends_on:
aiproxy_pg: aiproxy_pg:
condition: service_healthy condition: service_healthy
ports:
- '3002:3000'
networks: networks:
- fastgpt - fastgpt
environment: environment:
@@ -140,8 +142,8 @@ services:
timeout: 5s timeout: 5s
retries: 10 retries: 10
aiproxy_pg: aiproxy_pg:
image: pgvector/pgvector:0.8.0-pg15 # docker hub # image: pgvector/pgvector:0.8.0-pg15 # docker hub
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云 image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
restart: unless-stopped restart: unless-stopped
container_name: aiproxy_pg container_name: aiproxy_pg
volumes: volumes:

View File

@@ -24,9 +24,10 @@ PDF 是一个相对复杂的文件格式,在 FastGPT 内置的 pdf 解析器
这里介绍快速 Docker 安装的方法: 这里介绍快速 Docker 安装的方法:
```dockerfile ```dockerfile
docker pull crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:v0.2 docker pull crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:latest
docker run --gpus all -itd -p 7231:7232 --name model_pdf_v2 -e PROCESSES_PER_GPU="2" crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:v0.2 docker run --gpus all -itd -p 7231:7231 --name model_pdf_v1 crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:latest
``` ```
### 2. 添加 FastGPT 文件配置 ### 2. 添加 FastGPT 文件配置
```json ```json
@@ -35,7 +36,7 @@ docker run --gpus all -itd -p 7231:7232 --name model_pdf_v2 -e PROCESSES_PER_GPU
"systemEnv": { "systemEnv": {
xxx xxx
"customPdfParse": { "customPdfParse": {
"url": "http://xxxx.com/v2/parse/file", // 自定义 PDF 解析服务地址 marker v0.2 "url": "http://xxxx.com/v1/parse/file", // 自定义 PDF 解析服务地址
"key": "", // 自定义 PDF 解析服务密钥 "key": "", // 自定义 PDF 解析服务密钥
"doc2xKey": "", // doc2x 服务密钥 "doc2xKey": "", // doc2x 服务密钥
"price": 0 // PDF 解析服务价格 "price": 0 // PDF 解析服务价格
@@ -80,24 +81,3 @@ docker run --gpus all -itd -p 7231:7232 --name model_pdf_v2 -e PROCESSES_PER_GPU
上图是分块后的结果,下图是 pdf 原文。整体图片、公式、表格都可以提取出来,效果还是杠杠的。 上图是分块后的结果,下图是 pdf 原文。整体图片、公式、表格都可以提取出来,效果还是杠杠的。
不过要注意的是,[Marker](https://github.com/VikParuchuri/marker) 的协议是`GPL-3.0 license`,请在遵守协议的前提下使用。 不过要注意的是,[Marker](https://github.com/VikParuchuri/marker) 的协议是`GPL-3.0 license`,请在遵守协议的前提下使用。
## 旧版 Marker 使用方法
FastGPT V4.9.0 版本之前,可以用以下方式,试用 Marker 解析服务。
安装和运行 Marker 服务:
```dockerfile
docker pull crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:v0.1
docker run --gpus all -itd -p 7231:7231 --name model_pdf_v1 -e PROCESSES_PER_GPU="2" crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:v0.1
```
并修改 FastGPT 环境变量:
```
CUSTOM_READ_FILE_URL=http://xxxx.com/v1/parse/file
CUSTOM_READ_FILE_EXTENSION=pdf
```
* CUSTOM_READ_FILE_URL - 自定义解析服务的地址, host改成解析服务的访问地址path 不能变动。
* CUSTOM_READ_FILE_EXTENSION - 支持的文件后缀,多个文件类型,可用逗号隔开。

View File

@@ -1063,12 +1063,10 @@ curl --location --request DELETE 'http://localhost:3000/api/core/dataset/collect
| 字段 | 类型 | 说明 | 必填 | | 字段 | 类型 | 说明 | 必填 |
| --- | --- | --- | --- | | --- | --- | --- | --- |
| type | String | 可选索引类型default-默认索引; custom-自定义索引; summary-总结索引; question-问题索引; image-图片索引 | | | defaultIndex | Boolean | 是否为默认索引 | |
| dataId | String | 关联的向量ID,变更数据时候传入该 ID会进行差量更新而不是全量更新 | | | dataId | String | 关联的向量ID | |
| text | String | 文本内容 | ✅ | | text | String | 文本内容 | ✅ |
`type` 不填则默认为 `custom` 索引,还会基于 q/a 组成一个默认索引。如果传入了默认索引,则不会额外创建。
### 为集合批量添加添加数据 ### 为集合批量添加添加数据
注意,每次最多推送 200 组数据。 注意,每次最多推送 200 组数据。
@@ -1300,7 +1298,8 @@ curl --location --request GET 'http://localhost:3000/api/core/dataset/data/detai
"chunkIndex": 0, "chunkIndex": 0,
"indexes": [ "indexes": [
{ {
"type": "default", "defaultIndex": true,
"type": "chunk",
"dataId": "3720083", "dataId": "3720083",
"text": "N o . 2 0 2 2 1 2中 国 信 息 通 信 研 究 院京东探索研究院2022年 9月人工智能生成内容AIGC白皮书(2022 年)版权声明本白皮书版权属于中国信息通信研究院和京东探索研究院,并受法律保护。转载、摘编或利用其它方式使用本白皮书文字或者观点的,应注明“来源:中国信息通信研究院和京东探索研究院”。违反上述声明者,编者将追究其相关法律责任。前 言习近平总书记曾指出“数字技术正以新理念、新业态、新模式全面融入人类经济、政治、文化、社会、生态文明建设各领域和全过程”。在当前数字世界和物理世界加速融合的大背景下人工智能生成内容Artificial Intelligence Generated Content简称 AIGC正在悄然引导着一场深刻的变革重塑甚至颠覆数字内容的生产方式和消费模式将极大地丰富人们的数字生活是未来全面迈向数字文明新时代不可或缺的支撑力量。", "text": "N o . 2 0 2 2 1 2中 国 信 息 通 信 研 究 院京东探索研究院2022年 9月人工智能生成内容AIGC白皮书(2022 年)版权声明本白皮书版权属于中国信息通信研究院和京东探索研究院,并受法律保护。转载、摘编或利用其它方式使用本白皮书文字或者观点的,应注明“来源:中国信息通信研究院和京东探索研究院”。违反上述声明者,编者将追究其相关法律责任。前 言习近平总书记曾指出“数字技术正以新理念、新业态、新模式全面融入人类经济、政治、文化、社会、生态文明建设各领域和全过程”。在当前数字世界和物理世界加速融合的大背景下人工智能生成内容Artificial Intelligence Generated Content简称 AIGC正在悄然引导着一场深刻的变革重塑甚至颠覆数字内容的生产方式和消费模式将极大地丰富人们的数字生活是未来全面迈向数字文明新时代不可或缺的支撑力量。",
"_id": "65abd4b29d1448617cba61dc" "_id": "65abd4b29d1448617cba61dc"
@@ -1335,19 +1334,13 @@ curl --location --request PUT 'http://localhost:3000/api/core/dataset/data/updat
"q":"测试111", "q":"测试111",
"a":"sss", "a":"sss",
"indexes":[ "indexes":[
{
"dataId": "xxxx",
"type": "default",
"text": "默认索引"
},
{ {
"dataId": "xxx", "dataId": "xxx",
"type": "custom", "defaultIndex":false,
"text": "旧的自定义索引1" "text":"自定义索引1"
}, },
{ {
"type":"custom", "text":"修改后的自定义索引2。会删除原来的自定义索引2并插入新的自定义索引2"
"text":"新增的自定义索引"
} }
] ]
}' }'

View File

@@ -14,138 +14,7 @@ weight: 801
### 2. 更新镜像 ### 2. 更新镜像
- 更新 FastGPT 镜像 tag: v4.9.0-alpha ### 3. 运行升级脚本
- 更新 FastGPT 商业版镜像 tag: v4.9.0-alpha
- Sandbox 镜像,可以不更新
### 3. 替换 OneAPI可选
如果需要使用 AI Proxy 替换 OneAPI 的用户可执行该步骤。
#### 1. 修改 yml 文件
参考[最新的 yml](https://raw.githubusercontent.com/labring/FastGPT/main/deploy/docker/docker-compose-pgvector.yml) 文件。里面已移除 OneAPI 并添加了 AIProxy配置。包含一个服务和一个 PgSQL 数据库。将 `aiproxy` 的配置`追加`到 OneAPI 的配置后面(先不要删除 OneAPI有一个初始化会自动同步 OneAPI 的配置)
{{% details title="AI Proxy Yml 配置" closed="true" %}}
```
# AI Proxy
aiproxy:
image: 'ghcr.io/labring/sealos-aiproxy-service:latest'
container_name: aiproxy
restart: unless-stopped
depends_on:
aiproxy_pg:
condition: service_healthy
networks:
- fastgpt
environment:
# 对应 fastgpt 里的AIPROXY_API_TOKEN
- ADMIN_KEY=aiproxy
# 错误日志详情保存时间(小时)
- LOG_DETAIL_STORAGE_HOURS=1
# 数据库连接地址
- SQL_DSN=postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy
# 最大重试次数
- RetryTimes=3
# 不需要计费
- BILLING_ENABLED=false
# 不需要严格检测模型
- DISABLE_MODEL_CONFIG=true
healthcheck:
test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status']
interval: 5s
timeout: 5s
retries: 10
aiproxy_pg:
image: pgvector/pgvector:0.8.0-pg15 # docker hub
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
restart: unless-stopped
container_name: aiproxy_pg
volumes:
- ./aiproxy_pg:/var/lib/postgresql/data
networks:
- fastgpt
environment:
TZ: Asia/Shanghai
POSTGRES_USER: postgres
POSTGRES_DB: aiproxy
POSTGRES_PASSWORD: aiproxy
healthcheck:
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
interval: 5s
timeout: 5s
retries: 10
```
{{% /details %}}
#### 2. 增加 FastGPT 环境变量:
修改 yml 文件中fastgpt 容器的环境变量:
```
# AI Proxy 的地址,如果配了该地址,优先使用
- AIPROXY_API_ENDPOINT=http://aiproxy:3000
# AI Proxy 的 Admin Token与 AI Proxy 中的环境变量 ADMIN_KEY
- AIPROXY_API_TOKEN=aiproxy
```
#### 3. 重载服务
`docker-compose down` 停止服务,然后 `docker-compose up -d` 启动服务,此时会追加 `aiproxy` 服务,并修改 FastGPT 的配置。
#### 4. 执行OneAPI迁移AI proxy脚本
- 可联网方案:
```bash
# 进入 aiproxy 容器
docker exec -it aiproxy sh
# 安装 curl
apk add curl
# 执行脚本
curl --location --request POST 'http://localhost:3000/api/channels/import/oneapi' \
--header 'Authorization: Bearer aiproxy' \
--header 'Content-Type: application/json' \
--data-raw '{
"dsn": "mysql://root:oneapimmysql@tcp(mysql:3306)/oneapi"
}'
# 返回 {"data":[],"success":true} 代表成功
```
- 无法联网时,可打开`aiproxy`的外网暴露端口,然后在本地执行脚本。
aiProxy 暴露端口3003:3000修改后重新 `docker-compose up -d` 启动服务。
```bash
# 在终端执行脚本
curl --location --request POST 'http://localhost:3003/api/channels/import/oneapi' \
--header 'Authorization: Bearer aiproxy' \
--header 'Content-Type: application/json' \
--data-raw '{
"dsn": "mysql://root:oneapimmysql@tcp(mysql:3306)/oneapi"
}'
# 返回 {"data":[],"success":true} 代表成功
```
- 如果不熟悉 docker 操作,建议不要走脚本迁移,直接删除 OneAPI 所有内容,然后手动重新添加渠道。
#### 5. 进入 FastGPT 检查`AI Proxy` 服务是否正常启动。
登录 root 账号后,在`账号-模型提供商`页面,可以看到多出了`模型渠道``调用日志`两个选项,打开模型渠道,可以看到之前 OneAPI 的渠道,说明迁移完成,此时可以手动再检查下渠道是否正常。
#### 6. 删除 OneAPI 服务
```bash
# 停止服务,或者针对性停止 OneAPI 和其 Mysql
docker-compose down
# yml 文件中删除 OneAPI 和其 Mysql 依赖
# 重启服务
docker-compose up -d
```
### 4. 运行 FastGPT 升级脚本
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`{{host}} 替换成**FastGPT 域名**。 从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`{{host}} 替换成**FastGPT 域名**。
@@ -159,7 +28,7 @@ curl --location --request POST 'https://{{host}}/api/admin/initv490' \
1. 升级 PG Vector 插件版本 1. 升级 PG Vector 插件版本
2. 全量更新知识库集合字段。 2. 全量更新知识库集合字段。
3. 全量更新知识库数据中index 的 type 类型。(时间较长,最后可能提示 timeout可忽略数据库不崩都会一直增量执行 3. 全量更新知识库数据中index 的 type 类型。(时间较长)
## 兼容 & 弃用 ## 兼容 & 弃用
@@ -173,7 +42,6 @@ curl --location --request POST 'https://{{host}}/api/admin/initv490' \
1. PDF增强解析交互添加到页面上。同时内嵌 Doc2x 服务,可直接使用 Doc2x 服务解析 PDF 文件。 1. PDF增强解析交互添加到页面上。同时内嵌 Doc2x 服务,可直接使用 Doc2x 服务解析 PDF 文件。
2. 图片自动标注,同时修改知识库文件上传部分数据逻辑和交互。 2. 图片自动标注,同时修改知识库文件上传部分数据逻辑和交互。
3. pg vector 插件升级 0.8.0 版本,引入迭代搜索,减少部分数据无法被检索的情况。 3. pg vector 插件升级 0.8.0 版本,引入迭代搜索,减少部分数据无法被检索的情况。
4. 新增 qwen-qwq 系列模型配置。
## ⚙️ 优化 ## ⚙️ 优化
@@ -181,7 +49,6 @@ curl --location --request POST 'https://{{host}}/api/admin/initv490' \
2. Markdown 解析,增加链接后中文标点符号检测,增加空格。 2. Markdown 解析,增加链接后中文标点符号检测,增加空格。
3. Prompt 模式工具调用,支持思考模型。同时优化其格式检测,减少空输出的概率。 3. Prompt 模式工具调用,支持思考模型。同时优化其格式检测,减少空输出的概率。
4. Mongo 文件读取流合并,减少计算量。同时优化存储 chunks极大提高大文件读取速度。50M PDF 读取时间提高 3 倍。 4. Mongo 文件读取流合并,减少计算量。同时优化存储 chunks极大提高大文件读取速度。50M PDF 读取时间提高 3 倍。
5. HTTP Body 适配,增加对字符串对象的适配。
## 🐛 修复 ## 🐛 修复

View File

@@ -168,7 +168,7 @@ export const markdownProcess = async ({
return simpleMarkdownText(imageProcess); return simpleMarkdownText(imageProcess);
}; };
export const matchMdImg = (text: string) => { export const matchMdImgTextAndUpload = (text: string) => {
const base64Regex = /!\[([^\]]*)\]\((data:image\/[^;]+;base64[^)]+)\)/g; const base64Regex = /!\[([^\]]*)\]\((data:image\/[^;]+;base64[^)]+)\)/g;
const imageList: ImageType[] = []; const imageList: ImageType[] = [];

View File

@@ -10,6 +10,7 @@ export type AuthTeamRoleProps = {
export type CreateTeamProps = { export type CreateTeamProps = {
name: string; name: string;
avatar?: string; avatar?: string;
defaultTeam?: boolean;
memberName?: string; memberName?: string;
memberAvatar?: string; memberAvatar?: string;
notificationAccount?: string; notificationAccount?: string;

View File

@@ -47,6 +47,7 @@ export type TeamMemberSchema = {
role: `${TeamMemberRoleEnum}`; role: `${TeamMemberRoleEnum}`;
status: `${TeamMemberStatusEnum}`; status: `${TeamMemberStatusEnum}`;
avatar: string; avatar: string;
defaultTeam: boolean;
}; };
export type TeamMemberWithTeamAndUserSchema = TeamMemberSchema & { export type TeamMemberWithTeamAndUserSchema = TeamMemberSchema & {
@@ -64,6 +65,7 @@ export type TeamTmbItemType = {
balance?: number; balance?: number;
tmbId: string; tmbId: string;
teamDomain: string; teamDomain: string;
defaultTeam: boolean;
role: `${TeamMemberRoleEnum}`; role: `${TeamMemberRoleEnum}`;
status: `${TeamMemberStatusEnum}`; status: `${TeamMemberStatusEnum}`;
notificationAccount?: string; notificationAccount?: string;

View File

@@ -6,7 +6,6 @@ import { guessBase64ImageType } from '../utils';
import { readFromSecondary } from '../../mongo/utils'; import { readFromSecondary } from '../../mongo/utils';
import { addHours } from 'date-fns'; import { addHours } from 'date-fns';
import { imageFileType } from '@fastgpt/global/common/file/constants'; import { imageFileType } from '@fastgpt/global/common/file/constants';
import { retryFn } from '@fastgpt/global/common/system/utils';
export const maxImgSize = 1024 * 1024 * 12; export const maxImgSize = 1024 * 1024 * 12;
const base64MimeRegex = /data:image\/([^\)]+);base64/; const base64MimeRegex = /data:image\/([^\)]+);base64/;
@@ -41,15 +40,13 @@ export async function uploadMongoImg({
return Promise.reject(`Invalid image file type: ${mime}`); return Promise.reject(`Invalid image file type: ${mime}`);
} }
const { _id } = await retryFn(() => const { _id } = await MongoImage.create({
MongoImage.create({
teamId, teamId,
binary, binary,
metadata: Object.assign({ mime }, metadata), metadata: Object.assign({ mime }, metadata),
shareId, shareId,
expiredTime: forever ? undefined : addHours(new Date(), 1) expiredTime: forever ? undefined : addHours(new Date(), 1)
}) });
);
return `${process.env.NEXT_PUBLIC_BASE_URL || ''}${imageBaseUrl}${String(_id)}.${extension}`; return `${process.env.NEXT_PUBLIC_BASE_URL || ''}${imageBaseUrl}${String(_id)}.${extension}`;
} }

View File

@@ -2,30 +2,23 @@ import axios from 'axios';
import { addLog } from '../../system/log'; import { addLog } from '../../system/log';
import { serverRequestBaseUrl } from '../../api/serverRequest'; import { serverRequestBaseUrl } from '../../api/serverRequest';
import { getFileContentTypeFromHeader, guessBase64ImageType } from '../utils'; import { getFileContentTypeFromHeader, guessBase64ImageType } from '../utils';
import { retryFn } from '@fastgpt/global/common/system/utils';
export const getImageBase64 = async (url: string) => { export const getImageBase64 = async (url: string) => {
addLog.debug(`Load image to base64: ${url}`); addLog.debug(`Load image to base64: ${url}`);
try { try {
const response = await retryFn(() => const response = await axios.get(url, {
axios.get(url, {
baseURL: serverRequestBaseUrl, baseURL: serverRequestBaseUrl,
responseType: 'arraybuffer', responseType: 'arraybuffer',
proxy: false proxy: false
}) });
);
const base64 = Buffer.from(response.data, 'binary').toString('base64'); const base64 = Buffer.from(response.data, 'binary').toString('base64');
const imageType = const imageType =
getFileContentTypeFromHeader(response.headers['content-type']) || getFileContentTypeFromHeader(response.headers['content-type']) ||
guessBase64ImageType(base64); guessBase64ImageType(base64);
return { return `data:${imageType};base64,${base64}`;
completeBase64: `data:${imageType};base64,${base64}`,
base64,
mime: imageType
};
} catch (error) { } catch (error) {
addLog.debug(`Load image to base64 failed: ${url}`); addLog.debug(`Load image to base64 failed: ${url}`);
console.log(error); console.log(error);

View File

@@ -6,12 +6,11 @@ import type { ImageType, ReadFileResponse } from '../../../worker/readFile/type'
import axios from 'axios'; import axios from 'axios';
import { addLog } from '../../system/log'; import { addLog } from '../../system/log';
import { batchRun } from '@fastgpt/global/common/system/utils'; import { batchRun } from '@fastgpt/global/common/system/utils';
import { htmlTable2Md, matchMdImg } from '@fastgpt/global/common/string/markdown'; import { htmlTable2Md, matchMdImgTextAndUpload } from '@fastgpt/global/common/string/markdown';
import { createPdfParseUsage } from '../../../support/wallet/usage/controller'; import { createPdfParseUsage } from '../../../support/wallet/usage/controller';
import { getErrText } from '@fastgpt/global/common/error/utils'; import { getErrText } from '@fastgpt/global/common/error/utils';
import { delay } from '@fastgpt/global/common/system/utils'; import { delay } from '@fastgpt/global/common/system/utils';
import { getNanoid } from '@fastgpt/global/common/string/tools'; import { getNanoid } from '@fastgpt/global/common/string/tools';
import { getImageBase64 } from '../image/utils';
export type readRawTextByLocalFileParams = { export type readRawTextByLocalFileParams = {
teamId: string; teamId: string;
@@ -100,7 +99,7 @@ export const readRawContentByFileBuffer = async ({
addLog.info(`Custom file parsing is complete, time: ${Date.now() - start}ms`); addLog.info(`Custom file parsing is complete, time: ${Date.now() - start}ms`);
const rawText = response.markdown; const rawText = response.markdown;
const { text, imageList } = matchMdImg(rawText); const { text, imageList } = matchMdImgTextAndUpload(rawText);
createPdfParseUsage({ createPdfParseUsage({
teamId, teamId,
@@ -121,8 +120,8 @@ export const readRawContentByFileBuffer = async ({
const parseTextImage = async (text: string) => { const parseTextImage = async (text: string) => {
// Extract image links and convert to base64 // Extract image links and convert to base64
const imageList: { id: string; url: string }[] = []; const imageList: { id: string; url: string }[] = [];
let processedText = text.replace(/!\[.*?\]\((http[^)]+)\)/g, (match, url) => { const processedText = text.replace(/!\[.*?\]\((http[^)]+)\)/g, (match, url) => {
const id = `IMAGE_${getNanoid()}_IMAGE`; const id = getNanoid();
imageList.push({ imageList.push({
id, id,
url url
@@ -130,24 +129,22 @@ export const readRawContentByFileBuffer = async ({
return `![](${id})`; return `![](${id})`;
}); });
// Get base64 from image url
let resultImageList: ImageType[] = []; let resultImageList: ImageType[] = [];
await batchRun( await Promise.all(
imageList, imageList.map(async (item) => {
async (item) => {
try { try {
const { base64, mime } = await getImageBase64(item.url); const response = await axios.get(item.url, { responseType: 'arraybuffer' });
const mime = response.headers['content-type'] || 'image/jpeg';
const base64 = response.data.toString('base64');
resultImageList.push({ resultImageList.push({
uuid: item.id, uuid: item.id,
mime, mime,
base64 base64
}); });
} catch (error) { } catch (error) {
processedText = processedText.replace(item.id, item.url);
addLog.warn(`Failed to get image from ${item.url}: ${getErrText(error)}`); addLog.warn(`Failed to get image from ${item.url}: ${getErrText(error)}`);
} }
}, })
5
); );
return { return {
@@ -315,14 +312,14 @@ export const readRawContentByFileBuffer = async ({
return await uploadMongoImg({ return await uploadMongoImg({
base64Img: `data:${item.mime};base64,${item.base64}`, base64Img: `data:${item.mime};base64,${item.base64}`,
teamId, teamId,
// expiredTime: addHours(new Date(), 1),
metadata: { metadata: {
...metadata, ...metadata,
mime: item.mime mime: item.mime
} }
}); });
} catch (error) { } catch (error) {
addLog.warn('Upload file image error', { error }); return '';
return 'Upload load image error';
} }
})(); })();
rawText = rawText.replace(item.uuid, src); rawText = rawText.replace(item.uuid, src);

View File

@@ -19,7 +19,7 @@ export async function connectMongo(): Promise<Mongoose> {
// Remove existing listeners to prevent duplicates // Remove existing listeners to prevent duplicates
connectionMongo.connection.removeAllListeners('error'); connectionMongo.connection.removeAllListeners('error');
connectionMongo.connection.removeAllListeners('disconnected'); connectionMongo.connection.removeAllListeners('disconnected');
connectionMongo.set('strictQuery', 'throw'); connectionMongo.set('strictQuery', false);
connectionMongo.connection.on('error', async (error) => { connectionMongo.connection.on('error', async (error) => {
console.log('mongo error', error); console.log('mongo error', error);

View File

@@ -122,56 +122,6 @@
"showTopP": true, "showTopP": true,
"showStopSign": true "showStopSign": true
}, },
{
"model": "qwq-plus",
"name": "qwq-plus",
"maxContext": 128000,
"maxResponse": 8000,
"quoteMaxToken": 100000,
"maxTemperature": null,
"vision": false,
"reasoning": true,
"toolChoice": true,
"functionCall": false,
"defaultSystemChatPrompt": "",
"datasetProcess": false,
"usedInClassify": false,
"customCQPrompt": "",
"usedInExtractFields": false,
"usedInQueryExtension": false,
"customExtractPrompt": "",
"usedInToolCall": true,
"defaultConfig": {},
"fieldMap": {},
"type": "llm",
"showTopP": false,
"showStopSign": false
},
{
"model": "qwq-32b",
"name": "qwq-32b",
"maxContext": 128000,
"maxResponse": 8000,
"quoteMaxToken": 100000,
"maxTemperature": null,
"vision": false,
"reasoning": true,
"toolChoice": true,
"functionCall": false,
"defaultSystemChatPrompt": "",
"datasetProcess": false,
"usedInClassify": false,
"customCQPrompt": "",
"usedInExtractFields": false,
"usedInQueryExtension": false,
"customExtractPrompt": "",
"usedInToolCall": true,
"defaultConfig": {},
"fieldMap": {},
"type": "llm",
"showTopP": false,
"showStopSign": false
},
{ {
"model": "qwen-coder-turbo", "model": "qwen-coder-turbo",
"name": "qwen-coder-turbo", "name": "qwen-coder-turbo",

View File

@@ -165,7 +165,7 @@ export const loadRequestMessages = async ({
try { try {
// If imgUrl is a local path, load image from local, and set url to base64 // If imgUrl is a local path, load image from local, and set url to base64
if (imgUrl.startsWith('/') || process.env.MULTIPLE_DATA_TO_BASE64 === 'true') { if (imgUrl.startsWith('/') || process.env.MULTIPLE_DATA_TO_BASE64 === 'true') {
const { completeBase64: base64 } = await getImageBase64(imgUrl); const base64 = await getImageBase64(imgUrl);
return { return {
...item, ...item,

View File

@@ -264,7 +264,7 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
} }
})(); })();
if (!answerText && !reasoningText) { if (!answerText) {
return Promise.reject(getEmptyResponseTip()); return Promise.reject(getEmptyResponseTip());
} }

View File

@@ -120,144 +120,27 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
2. Replace newline strings 2. Replace newline strings
*/ */
const replaceJsonBodyString = (text: string) => { const replaceJsonBodyString = (text: string) => {
// Check if the variable is in quotes const valToStr = (val: any) => {
const isVariableInQuotes = (text: string, variable: string) => {
const index = text.indexOf(variable);
if (index === -1) return false;
// 计算变量前面的引号数量
const textBeforeVar = text.substring(0, index);
const matches = textBeforeVar.match(/"/g) || [];
// 如果引号数量为奇数,则变量在引号内
return matches.length % 2 === 1;
};
const valToStr = (val: any, isQuoted = false) => {
if (val === undefined) return 'null'; if (val === undefined) return 'null';
if (val === null) return 'null'; if (val === null) return 'null';
if (typeof val === 'object') return JSON.stringify(val); if (typeof val === 'object') return JSON.stringify(val);
if (typeof val === 'string') { if (typeof val === 'string') {
if (isQuoted) {
return val.replace(/(?<!\\)"/g, '\\"');
}
try { try {
JSON.parse(val); const parsed = JSON.parse(val);
if (typeof parsed === 'object') {
return JSON.stringify(parsed);
}
return val; return val;
} catch (error) { } catch (error) {
const str = JSON.stringify(val); const str = JSON.stringify(val);
return str.startsWith('"') && str.endsWith('"') ? str.slice(1, -1) : str; return str.startsWith('"') && str.endsWith('"') ? str.slice(1, -1) : str;
} }
} }
return String(val); return String(val);
}; };
// Test cases for variable replacement in JSON body
// const bodyTest = () => {
// const testData = [
// // 基本字符串替换
// {
// body: `{"name":"{{name}}","age":"18"}`,
// variables: [{ key: '{{name}}', value: '测试' }],
// result: `{"name":"测试","age":"18"}`
// },
// // 特殊字符处理
// {
// body: `{"text":"{{text}}"}`,
// variables: [{ key: '{{text}}', value: '包含"引号"和\\反斜杠' }],
// result: `{"text":"包含\\"引号\\"和\\反斜杠"}`
// },
// // 数字类型处理
// {
// body: `{"count":{{count}},"price":{{price}}}`,
// variables: [
// { key: '{{count}}', value: '42' },
// { key: '{{price}}', value: '99.99' }
// ],
// result: `{"count":42,"price":99.99}`
// },
// // 布尔值处理
// {
// body: `{"isActive":{{isActive}},"hasData":{{hasData}}}`,
// variables: [
// { key: '{{isActive}}', value: 'true' },
// { key: '{{hasData}}', value: 'false' }
// ],
// result: `{"isActive":true,"hasData":false}`
// },
// // 对象类型处理
// {
// body: `{"user":{{user}},"user2":"{{user2}}"}`,
// variables: [
// { key: '{{user}}', value: `{"id":1,"name":"张三"}` },
// { key: '{{user2}}', value: `{"id":1,"name":"张三"}` }
// ],
// result: `{"user":{"id":1,"name":"张三"},"user2":"{\\"id\\":1,\\"name\\":\\"张三\\"}"}`
// },
// // 数组类型处理
// {
// body: `{"items":{{items}}}`,
// variables: [{ key: '{{items}}', value: '[1, 2, 3]' }],
// result: `{"items":[1,2,3]}`
// },
// // null 和 undefined 处理
// {
// body: `{"nullValue":{{nullValue}},"undefinedValue":{{undefinedValue}}}`,
// variables: [
// { key: '{{nullValue}}', value: 'null' },
// { key: '{{undefinedValue}}', value: 'undefined' }
// ],
// result: `{"nullValue":null,"undefinedValue":null}`
// },
// // 嵌套JSON结构
// {
// body: `{"data":{"nested":{"value":"{{nestedValue}}"}}}`,
// variables: [{ key: '{{nestedValue}}', value: '嵌套值' }],
// result: `{"data":{"nested":{"value":"嵌套值"}}}`
// },
// // 多变量替换
// {
// body: `{"first":"{{first}}","second":"{{second}}","third":{{third}}}`,
// variables: [
// { key: '{{first}}', value: '第一' },
// { key: '{{second}}', value: '第二' },
// { key: '{{third}}', value: '3' }
// ],
// result: `{"first":"第一","second":"第二","third":3}`
// },
// // JSON字符串作为变量值
// {
// body: `{"config":{{config}}}`,
// variables: [{ key: '{{config}}', value: '{"setting":"enabled","mode":"advanced"}' }],
// result: `{"config":{"setting":"enabled","mode":"advanced"}}`
// }
// ];
// for (let i = 0; i < testData.length; i++) {
// const item = testData[i];
// let bodyStr = item.body;
// for (const variable of item.variables) {
// const isQuote = isVariableInQuotes(bodyStr, variable.key);
// bodyStr = bodyStr.replace(variable.key, valToStr(variable.value, isQuote));
// }
// bodyStr = bodyStr.replace(/(".*?")\s*:\s*undefined\b/g, '$1:null');
// console.log(bodyStr === item.result, i);
// if (bodyStr !== item.result) {
// console.log(bodyStr);
// console.log(item.result);
// } else {
// try {
// JSON.parse(item.result);
// } catch (error) {
// console.log('反序列化异常', i, item.result);
// }
// }
// }
// };
// bodyTest();
// 1. Replace {{key.key}} variables // 1. Replace {{key.key}} variables
const regex1 = /\{\{\$([^.]+)\.([^$]+)\$\}\}/g; const regex1 = /\{\{\$([^.]+)\.([^$]+)\$\}\}/g;
@@ -265,10 +148,6 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
matches1.forEach((match) => { matches1.forEach((match) => {
const nodeId = match[1]; const nodeId = match[1];
const id = match[2]; const id = match[2];
const fullMatch = match[0];
// 检查变量是否在引号内
const isInQuotes = isVariableInQuotes(text, fullMatch);
const variableVal = (() => { const variableVal = (() => {
if (nodeId === VARIABLE_NODE_ID) { if (nodeId === VARIABLE_NODE_ID) {
@@ -286,9 +165,9 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
return getReferenceVariableValue({ value: input.value, nodes: runtimeNodes, variables }); return getReferenceVariableValue({ value: input.value, nodes: runtimeNodes, variables });
})(); })();
const formatVal = valToStr(variableVal, isInQuotes); const formatVal = valToStr(variableVal);
const regex = new RegExp(`\\{\\{\\$(${nodeId}\\.${id})\\$\\}\\}`, ''); const regex = new RegExp(`\\{\\{\\$(${nodeId}\\.${id})\\$\\}\\}`, 'g');
text = text.replace(regex, () => formatVal); text = text.replace(regex, () => formatVal);
}); });
@@ -297,13 +176,7 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
const matches2 = text.match(regex2) || []; const matches2 = text.match(regex2) || [];
const uniqueKeys2 = [...new Set(matches2.map((match) => match.slice(2, -2)))]; const uniqueKeys2 = [...new Set(matches2.map((match) => match.slice(2, -2)))];
for (const key of uniqueKeys2) { for (const key of uniqueKeys2) {
const fullMatch = `{{${key}}}`; text = text.replace(new RegExp(`{{(${key})}}`, 'g'), () => valToStr(allVariables[key]));
// 检查变量是否在引号内
const isInQuotes = isVariableInQuotes(text, fullMatch);
text = text.replace(new RegExp(`{{(${key})}}`, ''), () =>
valToStr(allVariables[key], isInQuotes)
);
} }
return text.replace(/(".*?")\s*:\s*undefined\b/g, '$1: null'); return text.replace(/(".*?")\s*:\s*undefined\b/g, '$1: null');

View File

@@ -43,6 +43,7 @@ async function getTeamMember(match: Record<string, any>): Promise<TeamTmbItemTyp
teamDomain: tmb.team?.teamDomain, teamDomain: tmb.team?.teamDomain,
role: tmb.role, role: tmb.role,
status: tmb.status, status: tmb.status,
defaultTeam: tmb.defaultTeam,
permission: new TeamPermission({ permission: new TeamPermission({
per: Per ?? TeamDefaultPermissionVal, per: Per ?? TeamDefaultPermissionVal,
isOwner: tmb.role === TeamMemberRoleEnum.owner isOwner: tmb.role === TeamMemberRoleEnum.owner
@@ -70,7 +71,8 @@ export async function getUserDefaultTeam({ userId }: { userId: string }) {
return Promise.reject('tmbId or userId is required'); return Promise.reject('tmbId or userId is required');
} }
return getTeamMember({ return getTeamMember({
userId: new Types.ObjectId(userId) userId: new Types.ObjectId(userId),
defaultTeam: true
}); });
} }

View File

@@ -39,14 +39,14 @@ const TeamMemberSchema = new Schema({
updateTime: { updateTime: {
type: Date type: Date
}, },
defaultTeam: {
type: Boolean,
default: false
},
// Abandoned // Abandoned
role: { role: {
type: String type: String
},
// Abandoned
defaultTeam: {
type: Boolean
} }
}); });

View File

@@ -1,6 +1,6 @@
import TurndownService from 'turndown'; import TurndownService from 'turndown';
import { ImageType } from '../readFile/type'; import { ImageType } from '../readFile/type';
import { matchMdImg } from '@fastgpt/global/common/string/markdown'; import { matchMdImgTextAndUpload } from '@fastgpt/global/common/string/markdown';
import { getNanoid } from '@fastgpt/global/common/string/tools'; import { getNanoid } from '@fastgpt/global/common/string/tools';
// @ts-ignore // @ts-ignore
const turndownPluginGfm = require('joplin-turndown-plugin-gfm'); const turndownPluginGfm = require('joplin-turndown-plugin-gfm');
@@ -46,7 +46,7 @@ export const html2md = (
// Base64 img to id, otherwise it will occupy memory when going to md // Base64 img to id, otherwise it will occupy memory when going to md
const { processedHtml, images } = processBase64Images(html); const { processedHtml, images } = processBase64Images(html);
const md = turndownService.turndown(processedHtml); const md = turndownService.turndown(processedHtml);
const { text, imageList } = matchMdImg(md); const { text, imageList } = matchMdImgTextAndUpload(md);
return { return {
rawText: text, rawText: text,

View File

@@ -70,7 +70,7 @@ export PROCESSES_PER_GPU="1"
python api_mp.py python api_mp.py
``` ```
# 镜像打包和部署(推荐) # 镜像打包和部署
## 本地构建镜像 ## 本地构建镜像
@@ -83,39 +83,26 @@ export PROCESSES_PER_GPU="1"
```bash ```bash
sudo docker run --gpus all -itd -p 7231:7231 --name model_pdf_v1 -e PROCESSES_PER_GPU="2" model_pdf sudo docker run --gpus all -itd -p 7231:7231 --name model_pdf_v1 -e PROCESSES_PER_GPU="2" model_pdf
``` ```
## 快速构建镜像
## 快速构建镜像(推荐)
```dockerfile ```dockerfile
docker pull crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:v0.2 docker pull crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:latest
docker run --gpus all -itd -p 7231:7232 --name model_pdf_v2 -e PROCESSES_PER_GPU="2" crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:v0.2 docker run --gpus all -itd -p 7231:7231 --name model_pdf_v1 -e PROCESSES_PER_GPU="2" crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:latest
``` ```
*注意*参数PROCESSES_PER_GPU设置每张显卡上文件处理的并行数量24G的显卡可以设置为2。在多显卡的环境中会自动切换显卡来运行多文件的并行处理。
# 访问示例 # 访问示例
marker v0.1用Post方法访问端口为 `7321 ` 的 `v1/parse/file` 服务 用Post方法访问端口为 `7321 ` 的 `v1/parse/file` 服务
marker v0.2用Post方法访问端口为 `7321 ` 的 `v2/parse/file` 服务
参数file-->本地文件的地址
- 访问方法 - 访问方法
- v0.2
```
curl --location --request POST "http://localhost:7231/v2/parse/file" \
--header "Authorization: Bearer your_access_token" \
--form "file=@./file/chinese_test.pdf"
```
- v0.1
``` ```
curl --location --request POST "http://localhost:7231/v1/parse/file" \ curl --location --request POST "http://localhost:7231/v1/parse/file" \
--header "Authorization: Bearer your_access_token" \ --header "Authorization: Bearer your_access_token" \
--form "file=@./file/chinese_test.pdf" --form "file=@./file/chinese_test.pdf"
``` ```
参数file-->本地文件的地址
- 多文件测试数据 - 多文件测试数据
运行 `test` 文件下的 `test.py` 文件,修改里面的 `file_paths` 为自己仓库的 `url` 即可 运行 `test` 文件下的 `test.py` 文件,修改里面的 `file_paths` 为自己仓库的 `url` 即可

View File

@@ -1,21 +1,13 @@
### FastGPT V4.9.0 更新说明 ### FastGPT V4.8.20 更新说明
#### 弃用 & 兼容 1. 新增 - 使用记录导出和仪表盘。
2. 新增 - DeepSeek resoner 模型支持输出思考过程。
1. 弃用 - 之前私有化部署的自定义文件解析方案,请同步更新到最新的配置方案。[点击查看 PDF 增强解析配置](/docs/development/configuration/#使用-doc2x-解析-pdf-文件) 3. 新增 - markdown 语法扩展,支持音视频(代码块 audio 和 video
2. 弃用 - 弃用旧版本地文件上传 API/api/core/dataset/collection/create/file以前仅商业版可用的 API该接口已放切换成/api/core/dataset/collection/create/localFile 4. 新增 - 飞书/语雀知识库。
3. 停止维护,即将弃用 - 外部文件库相关 API可通过 API 文件库替代 5. 新增 - 工作流知识库检索支持按知识库权限进行过滤
4. API更新 - 上传文件至知识库、创建连接集合、API 文件库、推送分块数据等带有 `trainingType` 字段的接口,`trainingType`字段未来仅支持`chunk``QA`两种模式。增强索引模式将设置单独字段:`autoIndexes`,目前仍有适配旧版`trainingType=auto`代码,但请尽快变更成新接口类型。具体可见:[知识库 OpenAPI 文档](/docs/development/openapi/dataset.md) 6. 新增 - 流程等待插件,可以等待 n 毫秒后继续执行流程。
7. 新增 - 飞书机器人接入,支持配置私有化飞书地址。
#### 功能更新 8. 新增 - 支持通过 JSON 配置直接创建应用。
9. 新增 - 支持通过 CURL 脚本快速创建 HTTP 插件。
1. 新增 - PDF 增强解析,可以识别图片、公式、扫描件,并将内容转化成 Markdown 格式。 10. 新增 - 支持部门架构权限模式。
2. 新增 - 支持对文档中的图片链接,进行图片索引,提高图片内容的检索精度。
3. 新增 - 语义检索增加迭代搜索,减少漏检。
4. 优化 - 知识库数据不再限制索引数量,可无限自定义。同时可自动更新输入文本的索引,不影响自定义索引。
5. 优化 - Markdown 解析,增加链接后中文标点符号检测,增加空格。
6. 优化 - Prompt 模式工具调用,支持思考模型。同时优化其格式检测,减少空输出的概率。
7. 优化 - 优化文件读取代码极大提高大文件读取速度。50M PDF 读取时间提高 3 倍。
8. 优化 - HTTP Body 适配,增加对字符串对象的适配。
9. 修复 - 批量运行时,全局变量未进一步传递到下一次运行中,导致最终变量更新错误。

View File

@@ -93,7 +93,7 @@ function MemberTable({ Tabs }: { Tabs: React.ReactNode }) {
const { runAsync: onLeaveTeam } = useRequest2( const { runAsync: onLeaveTeam } = useRequest2(
async () => { async () => {
const defaultTeam = myTeams[0]; const defaultTeam = myTeams.find((item) => item.defaultTeam) || myTeams[0];
// change to personal team // change to personal team
onSwitchTeam(defaultTeam.teamId); onSwitchTeam(defaultTeam.teamId);
return delLeaveTeam(); return delLeaveTeam();

View File

@@ -159,11 +159,12 @@ function DataProcess() {
gridTemplateColumns={'repeat(2, 1fr)'} gridTemplateColumns={'repeat(2, 1fr)'}
/> />
</Box> </Box>
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && feConfigs?.isPlus && ( {trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
<Box mt={6}> <Box mt={6}>
<Box fontSize={'sm'} mb={2} color={'myGray.600'}> <Box fontSize={'sm'} mb={2} color={'myGray.600'}>
{t('dataset:enhanced_indexes')} {t('dataset:enhanced_indexes')}
</Box> </Box>
{feConfigs?.isPlus && (
<HStack gap={[3, 7]}> <HStack gap={[3, 7]}>
<HStack flex={'1'} spacing={1}> <HStack flex={'1'} spacing={1}>
<Checkbox {...register('autoIndexes')}> <Checkbox {...register('autoIndexes')}>
@@ -175,13 +176,17 @@ function DataProcess() {
<MyTooltip <MyTooltip
label={!datasetDetail?.vlmModel ? t('common:error_vlm_not_config') : ''} label={!datasetDetail?.vlmModel ? t('common:error_vlm_not_config') : ''}
> >
<Checkbox isDisabled={!datasetDetail?.vlmModel} {...register('imageIndex')}> <Checkbox
isDisabled={!datasetDetail?.vlmModel}
{...register('imageIndex')}
>
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel> <FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
</Checkbox> </Checkbox>
</MyTooltip> </MyTooltip>
<QuestionTip label={t('dataset:image_auto_parse_tips')} /> <QuestionTip label={t('dataset:image_auto_parse_tips')} />
</HStack> </HStack>
</HStack> </HStack>
)}
</Box> </Box>
)} )}
<Box mt={6}> <Box mt={6}>

View File

@@ -1,5 +1,5 @@
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'; import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import { Box, Flex, Button, Textarea } from '@chakra-ui/react'; import { Box, Flex, Button, Textarea, useTheme } from '@chakra-ui/react';
import { import {
FieldArrayWithId, FieldArrayWithId,
UseFieldArrayRemove, UseFieldArrayRemove,
@@ -19,7 +19,8 @@ import MyModal from '@fastgpt/web/components/common/MyModal';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip'; import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import { useQuery } from '@tanstack/react-query'; import { useQuery } from '@tanstack/react-query';
import { useTranslation } from 'next-i18next'; import { useTranslation } from 'next-i18next';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest'; import { useRequest, useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils'; import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils';
import { DatasetDataIndexItemType } from '@fastgpt/global/core/dataset/type'; import { DatasetDataIndexItemType } from '@fastgpt/global/core/dataset/type';
import DeleteIcon from '@fastgpt/web/components/common/Icon/delete'; import DeleteIcon from '@fastgpt/web/components/common/Icon/delete';
@@ -29,12 +30,10 @@ import MyBox from '@fastgpt/web/components/common/MyBox';
import { getErrText } from '@fastgpt/global/common/error/utils'; import { getErrText } from '@fastgpt/global/common/error/utils';
import { useSystemStore } from '@/web/common/system/useSystemStore'; import { useSystemStore } from '@/web/common/system/useSystemStore';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip'; import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
import { useSystem } from '@fastgpt/web/hooks/useSystem';
import LightRowTabs from '@fastgpt/web/components/common/Tabs/LightRowTabs'; import LightRowTabs from '@fastgpt/web/components/common/Tabs/LightRowTabs';
import styles from './styles.module.scss'; import styles from './styles.module.scss';
import { import { getDatasetIndexMapData } from '@fastgpt/global/core/dataset/data/constants';
DatasetDataIndexTypeEnum,
getDatasetIndexMapData
} from '@fastgpt/global/core/dataset/data/constants';
export type InputDataType = { export type InputDataType = {
q: string; q: string;
@@ -63,10 +62,11 @@ const InputDataModal = ({
onSuccess: (data: InputDataType & { dataId: string }) => void; onSuccess: (data: InputDataType & { dataId: string }) => void;
}) => { }) => {
const { t } = useTranslation(); const { t } = useTranslation();
const theme = useTheme();
const { toast } = useToast(); const { toast } = useToast();
const [currentTab, setCurrentTab] = useState(TabEnum.content); const [currentTab, setCurrentTab] = useState(TabEnum.content);
const { embeddingModelList, defaultModels } = useSystemStore(); const { embeddingModelList, defaultModels } = useSystemStore();
const { isPc } = useSystem();
const { register, handleSubmit, reset, control } = useForm<InputDataType>(); const { register, handleSubmit, reset, control } = useForm<InputDataType>();
const { const {
fields: indexes, fields: indexes,
@@ -112,6 +112,11 @@ const InputDataModal = ({
} }
]; ];
const { ConfirmModal, openConfirm } = useConfirm({
content: t('common:dataset.data.Delete Tip'),
type: 'delete'
});
const { data: collection = defaultCollectionDetail } = useQuery( const { data: collection = defaultCollectionDetail } = useQuery(
['loadCollectionId', collectionId], ['loadCollectionId', collectionId],
() => { () => {
@@ -158,8 +163,8 @@ const InputDataModal = ({
}, [collection.dataset.vectorModel, defaultModels.embedding, embeddingModelList]); }, [collection.dataset.vectorModel, defaultModels.embedding, embeddingModelList]);
// import new data // import new data
const { runAsync: sureImportData, loading: isImporting } = useRequest2( const { mutate: sureImportData, isLoading: isImporting } = useRequest({
async (e: InputDataType) => { mutationFn: async (e: InputDataType) => {
if (!e.q) { if (!e.q) {
setCurrentTab(TabEnum.content); setCurrentTab(TabEnum.content);
return Promise.reject(t('common:dataset.data.input is empty')); return Promise.reject(t('common:dataset.data.input is empty'));
@@ -176,8 +181,12 @@ const InputDataModal = ({
collectionId: collection._id, collectionId: collection._id,
q: e.q, q: e.q,
a: e.a, a: e.a,
// Contains no default index // remove dataId
indexes: e.indexes indexes:
e.indexes?.map((index) => ({
...index,
dataId: undefined
})) || []
}); });
return { return {
@@ -185,7 +194,6 @@ const InputDataModal = ({
dataId dataId
}; };
}, },
{
successToast: t('common:dataset.data.Input Success Tip'), successToast: t('common:dataset.data.Input Success Tip'),
onSuccess(e) { onSuccess(e) {
reset({ reset({
@@ -197,8 +205,7 @@ const InputDataModal = ({
onSuccess(e); onSuccess(e);
}, },
errorToast: t('common:common.error.unKnow') errorToast: t('common:common.error.unKnow')
} });
);
// update // update
const { runAsync: onUpdateData, loading: isUpdating } = useRequest2( const { runAsync: onUpdateData, loading: isUpdating } = useRequest2(
@@ -232,7 +239,6 @@ const InputDataModal = ({
() => getSourceNameIcon({ sourceName: collection.sourceName, sourceId: collection.sourceId }), () => getSourceNameIcon({ sourceName: collection.sourceName, sourceId: collection.sourceId }),
[collection] [collection]
); );
return ( return (
<MyModal <MyModal
isOpen={true} isOpen={true}
@@ -285,8 +291,9 @@ const InputDataModal = ({
p={0} p={0}
onClick={() => onClick={() =>
appendIndexes({ appendIndexes({
type: DatasetDataIndexTypeEnum.custom, type: 'custom',
text: '' text: '',
dataId: `${Date.now()}`
}) })
} }
> >
@@ -324,6 +331,7 @@ const InputDataModal = ({
</MyTooltip> </MyTooltip>
</Flex> </Flex>
</MyBox> </MyBox>
<ConfirmModal />
</MyModal> </MyModal>
); );
}; };

View File

@@ -80,10 +80,8 @@ const testLLMModel = async (model: LLMModelItemType) => {
}); });
const responseText = response.choices?.[0]?.message?.content; const responseText = response.choices?.[0]?.message?.content;
// @ts-ignore
const reasoning_content = response.choices?.[0]?.message?.reasoning_content;
if (!responseText && !reasoning_content) { if (!responseText) {
return Promise.reject('Model response empty'); return Promise.reject('Model response empty');
} }

View File

@@ -204,6 +204,7 @@ async function handler(req: NextApiRequest, res: NextApiResponse) {
}); });
// save chat // save chat
if (!res.closed) {
const isInteractiveRequest = !!getLastInteractiveValue(histories); const isInteractiveRequest = !!getLastInteractiveValue(histories);
const { text: userInteractiveVal } = chatValue2RuntimePrompt(userQuestion.value); const { text: userInteractiveVal } = chatValue2RuntimePrompt(userQuestion.value);
@@ -241,6 +242,7 @@ async function handler(req: NextApiRequest, res: NextApiResponse) {
content: [userQuestion, aiResponse] content: [userQuestion, aiResponse]
}); });
} }
}
createChatUsage({ createChatUsage({
appName, appName,

View File

@@ -25,35 +25,16 @@ const formatIndexes = ({
a?: string; a?: string;
}) => { }) => {
indexes = indexes || []; indexes = indexes || [];
// If index not type, set it to custom const defaultIndex = getDefaultIndex({ q, a });
indexes = indexes
.map((item) => ({
text: typeof item.text === 'string' ? item.text : String(item.text),
type: item.type || DatasetDataIndexTypeEnum.custom,
dataId: item.dataId
}))
.filter((item) => !!item.text.trim());
// Recompute default indexes, Merge ids of the same index, reduce the number of rebuilds // 1. Reset default index
const defaultIndexes = getDefaultIndex({ q, a });
const concatDefaultIndexes = defaultIndexes.map((item) => {
const oldIndex = indexes!.find((index) => index.text === item.text);
if (oldIndex) {
return {
type: DatasetDataIndexTypeEnum.default,
text: item.text,
dataId: oldIndex.dataId
};
} else {
return item;
}
});
indexes = indexes.filter((item) => item.type !== DatasetDataIndexTypeEnum.default); indexes = indexes.filter((item) => item.type !== DatasetDataIndexTypeEnum.default);
indexes.push(...concatDefaultIndexes); // 2. Add default index
indexes.unshift(...defaultIndex);
// Filter same text // 3. Filter same text
indexes = indexes.filter( indexes = indexes.filter(
(item, index, self) => index === self.findIndex((t) => t.text === item.text) (item, index, self) =>
!!item.text.trim() && index === self.findIndex((t) => t.text === item.text)
); );
return indexes.map((index) => ({ return indexes.map((index) => ({
@@ -248,7 +229,7 @@ export async function updateData2Dataset({
const newIndexes = patchResult const newIndexes = patchResult
.filter((item) => item.type !== 'delete') .filter((item) => item.type !== 'delete')
.map((item) => item.index) as DatasetDataIndexItemType[]; .map((item) => item.index) as DatasetDataIndexItemType[];
console.log(newIndexes, '---');
// console.log(clonePatchResult2Insert); // console.log(clonePatchResult2Insert);
await mongoSessionRun(async (session) => { await mongoSessionRun(async (session) => {
// Update MongoData // Update MongoData