Compare commits
67 Commits
v4.8.22
...
v4.9.0-fix
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
36da8c862f | ||
|
|
b50cf49cc7 | ||
|
|
2270e149eb | ||
|
|
4957bdcba1 | ||
|
|
bca5cf738a | ||
|
|
c35bb5841c | ||
|
|
6e045093b1 | ||
|
|
a1b114e426 | ||
|
|
54fde7630c | ||
|
|
467c408ad7 | ||
|
|
c005a94454 | ||
|
|
c8a35822d6 | ||
|
|
d05259dedd | ||
|
|
8980664b8a | ||
|
|
43f30b3790 | ||
|
|
3ddbb37612 | ||
|
|
7c419a26b3 | ||
|
|
e131465d25 | ||
|
|
a345e56508 | ||
|
|
32ce032995 | ||
|
|
0bc075aa4e | ||
|
|
3e3f2165db | ||
|
|
e1aa068858 | ||
|
|
e98d6f1d30 | ||
|
|
54eb5c0547 | ||
|
|
adf5377ebe | ||
|
|
08b6f594df | ||
|
|
90d13ee3df | ||
|
|
5c718abd50 | ||
|
|
2d351c3654 | ||
|
|
662a4a4671 | ||
|
|
3fadabd28b | ||
|
|
dbf25cef88 | ||
|
|
b2e2fa6b76 | ||
|
|
576c60bd55 | ||
|
|
33617ab5dc | ||
|
|
b4dda6a41b | ||
|
|
e860c56b77 | ||
|
|
efac5312b4 | ||
|
|
4bc7f21182 | ||
|
|
113e8f711f | ||
|
|
abc6dffb41 | ||
|
|
f7b2a57ca3 | ||
|
|
cf0aaa1091 | ||
|
|
ac4255ea0c | ||
|
|
df4d6f86ce | ||
|
|
e697fda82f | ||
|
|
1aa319e7aa | ||
|
|
fc9e614f88 | ||
|
|
1121ea33bd | ||
|
|
9bbee60cde | ||
|
|
9f57ad0017 | ||
|
|
c3d3b30d7e | ||
|
|
fb0eb49196 | ||
|
|
27ebd2e8cf | ||
|
|
81a06718d8 | ||
|
|
3c382d1240 | ||
|
|
747bb303ec | ||
|
|
cf9c8e9f6a | ||
|
|
5d5bee9e41 | ||
|
|
4f0dd96699 | ||
|
|
fb6dbaf2d6 | ||
|
|
ffc1520f4c | ||
|
|
255764400f | ||
|
|
3bfe802c48 | ||
|
|
2bf17dbb87 | ||
|
|
8d766372fe |
2
.github/workflows/docs-deploy-kubeconfig.yml
vendored
@@ -6,8 +6,6 @@ on:
|
||||
- 'docSite/**'
|
||||
branches:
|
||||
- 'main'
|
||||
tags:
|
||||
- 'v*.*.*'
|
||||
|
||||
jobs:
|
||||
build-fastgpt-docs-images:
|
||||
|
||||
2
.github/workflows/docs-deploy-vercel.yml
vendored
@@ -7,8 +7,6 @@ on:
|
||||
- 'docSite/**'
|
||||
branches:
|
||||
- 'main'
|
||||
tags:
|
||||
- 'v*.*.*'
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
|
||||
2
.github/workflows/docs-preview.yml
vendored
@@ -4,8 +4,6 @@ on:
|
||||
pull_request_target:
|
||||
paths:
|
||||
- 'docSite/**'
|
||||
branches:
|
||||
- 'main'
|
||||
workflow_dispatch:
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
|
||||
@@ -26,7 +26,7 @@ jobs:
|
||||
with:
|
||||
driver-opts: network=host
|
||||
- name: Cache Docker layers
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: /tmp/.buildx-cache
|
||||
key: ${{ runner.os }}-buildx-${{ github.sha }}
|
||||
@@ -108,7 +108,7 @@ jobs:
|
||||
with:
|
||||
driver-opts: network=host
|
||||
- name: Cache Docker layers
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: /tmp/.buildx-cache
|
||||
key: ${{ runner.os }}-buildx-${{ github.sha }}
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
with:
|
||||
driver-opts: network=host
|
||||
- name: Cache Docker layers
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: /tmp/.buildx-cache
|
||||
key: ${{ runner.os }}-buildx-${{ github.sha }}
|
||||
@@ -68,14 +68,3 @@ jobs:
|
||||
SEALOS_TYPE: 'pr_comment'
|
||||
SEALOS_FILENAME: 'report.md'
|
||||
SEALOS_REPLACE_TAG: 'DEFAULT_REPLACE_DEPLOY'
|
||||
|
||||
helm-check:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Helm Check
|
||||
run: |
|
||||
helm dependency update files/helm/fastgpt
|
||||
helm lint files/helm/fastgpt
|
||||
helm package files/helm/fastgpt
|
||||
4
.github/workflows/helm-release.yaml
vendored
@@ -24,6 +24,6 @@ jobs:
|
||||
export APP_VERSION=${{ steps.vars.outputs.tag }}
|
||||
export HELM_VERSION=${{ steps.vars.outputs.tag }}
|
||||
export HELM_REPO=ghcr.io/${{ github.repository_owner }}
|
||||
helm dependency update files/helm/fastgpt
|
||||
helm package files/helm/fastgpt --version ${HELM_VERSION}-helm --app-version ${APP_VERSION} -d bin
|
||||
helm dependency update deploy/helm/fastgpt
|
||||
helm package deploy/helm/fastgpt --version ${HELM_VERSION}-helm --app-version ${APP_VERSION} -d bin
|
||||
helm push bin/fastgpt-${HELM_VERSION}-helm.tgz oci://${HELM_REPO}
|
||||
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
with:
|
||||
driver-opts: network=host
|
||||
- name: Cache Docker layers
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: /tmp/.buildx-cache
|
||||
key: ${{ runner.os }}-buildx-${{ github.sha }}
|
||||
2
.vscode/nextapi.code-snippets
vendored
@@ -58,7 +58,7 @@
|
||||
"body": [
|
||||
"import '@/pages/api/__mocks__/base';",
|
||||
"import { root } from '@/pages/api/__mocks__/db/init';",
|
||||
"import { getTestRequest } from '@/test/utils';",
|
||||
"import { getTestRequest } from '@fastgpt/service/test/utils'; ;",
|
||||
"import { AppErrEnum } from '@fastgpt/global/common/error/code/app';",
|
||||
"import handler from './demo';",
|
||||
"",
|
||||
|
||||
4
.vscode/settings.json
vendored
@@ -27,7 +27,5 @@
|
||||
},
|
||||
"markdown.copyFiles.destination": {
|
||||
"/docSite/content/**/*": "${documentWorkspaceFolder}/docSite/assets/imgs/"
|
||||
},
|
||||
"markdown.copyFiles.overwriteBehavior": "nameIncrementally",
|
||||
"markdown.copyFiles.transformPath": "const filename = uri.path.split('/').pop(); return `/imgs/${filename}`;"
|
||||
}
|
||||
}
|
||||
25
README.md
@@ -114,16 +114,6 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
|
||||
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
|
||||
</a>
|
||||
|
||||
## 🏘️ 社区交流群
|
||||
|
||||
扫码加入飞书话题群:
|
||||
|
||||

|
||||
|
||||
<a href="#readme">
|
||||
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
|
||||
</a>
|
||||
|
||||
## 🏘️ 加入我们
|
||||
|
||||
我们正在寻找志同道合的小伙伴,加速 FastGPT 的发展。你可以通过 [FastGPT 2025 招聘](https://fael3z0zfze.feishu.cn/wiki/P7FOwEmPziVcaYkvVaacnVX1nvg)了解 FastGPT 的招聘信息。
|
||||
@@ -133,17 +123,26 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
|
||||
- [Laf:3 分钟快速接入三方应用](https://github.com/labring/laf)
|
||||
- [Sealos:快速部署集群应用](https://github.com/labring/sealos)
|
||||
- [One API:多模型管理,支持 Azure、文心一言等](https://github.com/songquanpeng/one-api)
|
||||
- [TuShan:5 分钟搭建后台管理系统](https://github.com/msgbyte/tushan)
|
||||
|
||||
<a href="#readme">
|
||||
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
|
||||
</a>
|
||||
|
||||
|
||||
## 🌿 第三方生态
|
||||
|
||||
- [COW 个人微信/企微机器人](https://doc.tryfastgpt.ai/docs/use-cases/external-integration/onwechat/)
|
||||
- [AI Proxy:国内模型聚合服务](https://sealos.run/aiproxy/?k=fastgpt-github/)
|
||||
- [SiliconCloud (硅基流动) —— 开源模型在线体验平台](https://cloud.siliconflow.cn/i/TR9Ym0c4)
|
||||
- [COW 个人微信/企微机器人](https://doc.tryfastgpt.ai/docs/use-cases/external-integration/onwechat/)
|
||||
|
||||
<a href="#readme">
|
||||
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
|
||||
</a>
|
||||
|
||||
## 🏘️ 社区交流群
|
||||
|
||||
扫码加入飞书话题群:
|
||||
|
||||

|
||||
|
||||
<a href="#readme">
|
||||
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
|
||||
|
||||
26
SECURITY.md
Normal file
@@ -0,0 +1,26 @@
|
||||
# 安全策略
|
||||
|
||||
## 漏洞报告
|
||||
|
||||
如果您发现了 FastGPT 的安全漏洞,请按照以下步骤进行报告:
|
||||
|
||||
1. **报告方式**
|
||||
发送邮件至:yujinlong@sealos.io
|
||||
请备注版本以及您的 GitHub 账号
|
||||
|
||||
3. **响应时间**
|
||||
- 我们会在 48 小时内确认收到您的报告
|
||||
- 一般在 3 个工作日内给出初步评估结果
|
||||
|
||||
4. **漏洞处理流程**
|
||||
- 确认漏洞:我们会验证漏洞的存在性和影响范围
|
||||
- 修复开发:针对已确认的漏洞进行修复
|
||||
- 版本发布:在下一个版本更新中发布安全补丁
|
||||
- 公开披露:在修复完成后,我们会在更新日志中公布相关信息
|
||||
|
||||
5. **注意事项**
|
||||
- 在漏洞未修复前,请勿公开披露漏洞详情
|
||||
- 我们欢迎负责任的漏洞披露
|
||||
- 对于重大贡献者,我们会在项目致谢名单中提及
|
||||
|
||||
感谢您为 FastGPT 的安全性做出贡献!
|
||||
@@ -114,15 +114,15 @@ services:
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.8.21-fix # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.21-fix # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.0 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.0 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.8.21-fix # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.21-fix # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.0 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.0 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
@@ -133,14 +133,17 @@ services:
|
||||
- sandbox
|
||||
restart: always
|
||||
environment:
|
||||
# 前端访问地址: http://localhost:3000
|
||||
# 前端外部可访问的地址,用于自动补全文件资源路径。例如 https:fastgpt.cn,不能填 localhost。这个值可以不填,不填则发给模型的图片会是一个相对路径,而不是全路径,模型可能伪造Host。
|
||||
- FE_DOMAIN=
|
||||
# root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。
|
||||
- DEFAULT_ROOT_PSW=1234
|
||||
# AI模型的API地址哦。务必加 /v1。这里默认填写了OneApi的访问地址。
|
||||
- OPENAI_BASE_URL=http://oneapi:3000/v1
|
||||
# AI模型的API Key。(这里默认填写了OneAPI的快速默认key,测试通后,务必及时修改)
|
||||
- CHAT_API_KEY=sk-fastgpt
|
||||
# AI Proxy 的地址,如果配了该地址,优先使用
|
||||
- AIPROXY_API_ENDPOINT=http://aiproxy:3000
|
||||
# AI Proxy 的 Admin Token,与 AI Proxy 中的环境变量 ADMIN_KEY
|
||||
- AIPROXY_API_TOKEN=aiproxy
|
||||
# 模型中转地址(如果用了 AI Proxy,下面 2 个就不需要了,旧版 OneAPI 用户,使用下面的变量)
|
||||
# - OPENAI_BASE_URL=http://oneapi:3000/v1
|
||||
# - CHAT_API_KEY=sk-fastgpt
|
||||
# 数据库最大连接数
|
||||
- DB_MAX_LINK=30
|
||||
# 登录凭证密钥
|
||||
@@ -170,48 +173,52 @@ services:
|
||||
volumes:
|
||||
- ./config.json:/app/data/config.json
|
||||
|
||||
# oneapi
|
||||
mysql:
|
||||
image: registry.cn-hangzhou.aliyuncs.com/fastgpt/mysql:8.0.36 # 阿里云
|
||||
# image: mysql:8.0.36
|
||||
container_name: mysql
|
||||
restart: always
|
||||
ports:
|
||||
- 3306:3306
|
||||
networks:
|
||||
- fastgpt
|
||||
command: --default-authentication-plugin=mysql_native_password
|
||||
environment:
|
||||
# 默认root密码,仅首次运行有效
|
||||
MYSQL_ROOT_PASSWORD: oneapimmysql
|
||||
MYSQL_DATABASE: oneapi
|
||||
volumes:
|
||||
- ./mysql:/var/lib/mysql
|
||||
oneapi:
|
||||
container_name: oneapi
|
||||
image: ghcr.io/songquanpeng/one-api:v0.6.7
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/one-api:v0.6.6 # 阿里云
|
||||
ports:
|
||||
- 3001:3000
|
||||
# AI Proxy
|
||||
aiproxy:
|
||||
image: 'ghcr.io/labring/sealos-aiproxy-service:latest'
|
||||
container_name: aiproxy
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- mysql
|
||||
aiproxy_pg:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
environment:
|
||||
# mysql 连接参数
|
||||
- SQL_DSN=root:oneapimmysql@tcp(mysql:3306)/oneapi
|
||||
# 登录凭证加密密钥
|
||||
- SESSION_SECRET=oneapikey
|
||||
# 内存缓存
|
||||
- MEMORY_CACHE_ENABLED=true
|
||||
# 启动聚合更新,减少数据交互频率
|
||||
- BATCH_UPDATE_ENABLED=true
|
||||
# 聚合更新时长
|
||||
- BATCH_UPDATE_INTERVAL=10
|
||||
# 初始化的 root 密钥(建议部署完后更改,否则容易泄露)
|
||||
- INITIAL_ROOT_TOKEN=fastgpt
|
||||
# 对应 fastgpt 里的AIPROXY_API_TOKEN
|
||||
- ADMIN_KEY=aiproxy
|
||||
# 错误日志详情保存时间(小时)
|
||||
- LOG_DETAIL_STORAGE_HOURS=1
|
||||
# 数据库连接地址
|
||||
- SQL_DSN=postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy
|
||||
# 最大重试次数
|
||||
- RetryTimes=3
|
||||
# 不需要计费
|
||||
- BILLING_ENABLED=false
|
||||
# 不需要严格检测模型
|
||||
- DISABLE_MODEL_CONFIG=true
|
||||
healthcheck:
|
||||
test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status']
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
aiproxy_pg:
|
||||
image: pgvector/pgvector:0.8.0-pg15 # docker hub
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
|
||||
restart: unless-stopped
|
||||
container_name: aiproxy_pg
|
||||
volumes:
|
||||
- ./oneapi:/data
|
||||
- ./aiproxy_pg:/var/lib/postgresql/data
|
||||
networks:
|
||||
- fastgpt
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_DB: aiproxy
|
||||
POSTGRES_PASSWORD: aiproxy
|
||||
healthcheck:
|
||||
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
networks:
|
||||
fastgpt:
|
||||
@@ -7,12 +7,12 @@ version: '3.3'
|
||||
services:
|
||||
# db
|
||||
pg:
|
||||
image: pgvector/pgvector:0.7.0-pg15 # docker hub
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.7.0 # 阿里云
|
||||
image: pgvector/pgvector:0.8.0-pg15 # docker hub
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
|
||||
container_name: pg
|
||||
restart: always
|
||||
ports: # 生产环境建议不要暴露
|
||||
- 5432:5432
|
||||
# ports: # 生产环境建议不要暴露
|
||||
# - 5432:5432
|
||||
networks:
|
||||
- fastgpt
|
||||
environment:
|
||||
@@ -28,8 +28,8 @@ services:
|
||||
# image: mongo:4.4.29 # cpu不支持AVX时候使用
|
||||
container_name: mongo
|
||||
restart: always
|
||||
ports:
|
||||
- 27017:27017
|
||||
# ports:
|
||||
# - 27017:27017
|
||||
networks:
|
||||
- fastgpt
|
||||
command: mongod --keyFile /data/mongodb.key --replSet rs0
|
||||
@@ -72,15 +72,15 @@ services:
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.8.21-fix # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.21-fix # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.0 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.0 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.8.21-fix # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.21-fix # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.0 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.0 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
@@ -91,14 +91,17 @@ services:
|
||||
- sandbox
|
||||
restart: always
|
||||
environment:
|
||||
# 前端访问地址: http://localhost:3000
|
||||
# 前端外部可访问的地址,用于自动补全文件资源路径。例如 https:fastgpt.cn,不能填 localhost。这个值可以不填,不填则发给模型的图片会是一个相对路径,而不是全路径,模型可能伪造Host。
|
||||
- FE_DOMAIN=
|
||||
# root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。
|
||||
- DEFAULT_ROOT_PSW=1234
|
||||
# AI模型的API地址哦。务必加 /v1。这里默认填写了OneApi的访问地址。
|
||||
- OPENAI_BASE_URL=http://oneapi:3000/v1
|
||||
# AI模型的API Key。(这里默认填写了OneAPI的快速默认key,测试通后,务必及时修改)
|
||||
- CHAT_API_KEY=sk-fastgpt
|
||||
# AI Proxy 的地址,如果配了该地址,优先使用
|
||||
- AIPROXY_API_ENDPOINT=http://aiproxy:3000
|
||||
# AI Proxy 的 Admin Token,与 AI Proxy 中的环境变量 ADMIN_KEY
|
||||
- AIPROXY_API_TOKEN=aiproxy
|
||||
# 模型中转地址(如果用了 AI Proxy,下面 2 个就不需要了,旧版 OneAPI 用户,使用下面的变量)
|
||||
# - OPENAI_BASE_URL=http://oneapi:3000/v1
|
||||
# - CHAT_API_KEY=sk-fastgpt
|
||||
# 数据库最大连接数
|
||||
- DB_MAX_LINK=30
|
||||
# 登录凭证密钥
|
||||
@@ -127,48 +130,52 @@ services:
|
||||
volumes:
|
||||
- ./config.json:/app/data/config.json
|
||||
|
||||
# oneapi
|
||||
mysql:
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/mysql:8.0.36 # 阿里云
|
||||
image: mysql:8.0.36
|
||||
container_name: mysql
|
||||
restart: always
|
||||
ports:
|
||||
- 3306:3306
|
||||
networks:
|
||||
- fastgpt
|
||||
command: --default-authentication-plugin=mysql_native_password
|
||||
environment:
|
||||
# 默认root密码,仅首次运行有效
|
||||
MYSQL_ROOT_PASSWORD: oneapimmysql
|
||||
MYSQL_DATABASE: oneapi
|
||||
volumes:
|
||||
- ./mysql:/var/lib/mysql
|
||||
oneapi:
|
||||
container_name: oneapi
|
||||
image: ghcr.io/songquanpeng/one-api:v0.6.7
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/one-api:v0.6.6 # 阿里云
|
||||
ports:
|
||||
- 3001:3000
|
||||
# AI Proxy
|
||||
aiproxy:
|
||||
image: 'ghcr.io/labring/sealos-aiproxy-service:latest'
|
||||
container_name: aiproxy
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- mysql
|
||||
aiproxy_pg:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
environment:
|
||||
# mysql 连接参数
|
||||
- SQL_DSN=root:oneapimmysql@tcp(mysql:3306)/oneapi
|
||||
# 登录凭证加密密钥
|
||||
- SESSION_SECRET=oneapikey
|
||||
# 内存缓存
|
||||
- MEMORY_CACHE_ENABLED=true
|
||||
# 启动聚合更新,减少数据交互频率
|
||||
- BATCH_UPDATE_ENABLED=true
|
||||
# 聚合更新时长
|
||||
- BATCH_UPDATE_INTERVAL=10
|
||||
# 初始化的 root 密钥(建议部署完后更改,否则容易泄露)
|
||||
- INITIAL_ROOT_TOKEN=fastgpt
|
||||
# 对应 fastgpt 里的AIPROXY_API_TOKEN
|
||||
- ADMIN_KEY=aiproxy
|
||||
# 错误日志详情保存时间(小时)
|
||||
- LOG_DETAIL_STORAGE_HOURS=1
|
||||
# 数据库连接地址
|
||||
- SQL_DSN=postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy
|
||||
# 最大重试次数
|
||||
- RetryTimes=3
|
||||
# 不需要计费
|
||||
- BILLING_ENABLED=false
|
||||
# 不需要严格检测模型
|
||||
- DISABLE_MODEL_CONFIG=true
|
||||
healthcheck:
|
||||
test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status']
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
aiproxy_pg:
|
||||
image: pgvector/pgvector:0.8.0-pg15 # docker hub
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
|
||||
restart: unless-stopped
|
||||
container_name: aiproxy_pg
|
||||
volumes:
|
||||
- ./oneapi:/data
|
||||
- ./aiproxy_pg:/var/lib/postgresql/data
|
||||
networks:
|
||||
- fastgpt
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_DB: aiproxy
|
||||
POSTGRES_PASSWORD: aiproxy
|
||||
healthcheck:
|
||||
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
networks:
|
||||
fastgpt:
|
||||
@@ -53,15 +53,15 @@ services:
|
||||
wait $$!
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.8.21-fix # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.21-fix # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.0 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.0 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.8.21-fix # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.21-fix # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.0 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.0 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
@@ -71,14 +71,17 @@ services:
|
||||
- sandbox
|
||||
restart: always
|
||||
environment:
|
||||
# 前端访问地址: http://localhost:3000
|
||||
# 前端外部可访问的地址,用于自动补全文件资源路径。例如 https:fastgpt.cn,不能填 localhost。这个值可以不填,不填则发给模型的图片会是一个相对路径,而不是全路径,模型可能伪造Host。
|
||||
- FE_DOMAIN=
|
||||
# root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。
|
||||
- DEFAULT_ROOT_PSW=1234
|
||||
# AI模型的API地址哦。务必加 /v1。这里默认填写了OneApi的访问地址。
|
||||
- OPENAI_BASE_URL=http://oneapi:3000/v1
|
||||
# AI模型的API Key。(这里默认填写了OneAPI的快速默认key,测试通后,务必及时修改)
|
||||
- CHAT_API_KEY=sk-fastgpt
|
||||
# AI Proxy 的地址,如果配了该地址,优先使用
|
||||
- AIPROXY_API_ENDPOINT=http://aiproxy:3000
|
||||
# AI Proxy 的 Admin Token,与 AI Proxy 中的环境变量 ADMIN_KEY
|
||||
- AIPROXY_API_TOKEN=aiproxy
|
||||
# 模型中转地址(如果用了 AI Proxy,下面 2 个就不需要了,旧版 OneAPI 用户,使用下面的变量)
|
||||
# - OPENAI_BASE_URL=http://oneapi:3000/v1
|
||||
# - CHAT_API_KEY=sk-fastgpt
|
||||
# 数据库最大连接数
|
||||
- DB_MAX_LINK=30
|
||||
# 登录凭证密钥
|
||||
@@ -108,48 +111,52 @@ services:
|
||||
volumes:
|
||||
- ./config.json:/app/data/config.json
|
||||
|
||||
# oneapi
|
||||
mysql:
|
||||
image: registry.cn-hangzhou.aliyuncs.com/fastgpt/mysql:8.0.36 # 阿里云
|
||||
# image: mysql:8.0.36
|
||||
container_name: mysql
|
||||
restart: always
|
||||
ports:
|
||||
- 3306:3306
|
||||
networks:
|
||||
- fastgpt
|
||||
command: --default-authentication-plugin=mysql_native_password
|
||||
environment:
|
||||
# 默认root密码,仅首次运行有效
|
||||
MYSQL_ROOT_PASSWORD: oneapimmysql
|
||||
MYSQL_DATABASE: oneapi
|
||||
volumes:
|
||||
- ./mysql:/var/lib/mysql
|
||||
oneapi:
|
||||
container_name: oneapi
|
||||
image: ghcr.io/songquanpeng/one-api:v0.6.7
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/one-api:v0.6.6 # 阿里云
|
||||
ports:
|
||||
- 3001:3000
|
||||
# AI Proxy
|
||||
aiproxy:
|
||||
image: 'ghcr.io/labring/sealos-aiproxy-service:latest'
|
||||
container_name: aiproxy
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- mysql
|
||||
aiproxy_pg:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
environment:
|
||||
# mysql 连接参数
|
||||
- SQL_DSN=root:oneapimmysql@tcp(mysql:3306)/oneapi
|
||||
# 登录凭证加密密钥
|
||||
- SESSION_SECRET=oneapikey
|
||||
# 内存缓存
|
||||
- MEMORY_CACHE_ENABLED=true
|
||||
# 启动聚合更新,减少数据交互频率
|
||||
- BATCH_UPDATE_ENABLED=true
|
||||
# 聚合更新时长
|
||||
- BATCH_UPDATE_INTERVAL=10
|
||||
# 初始化的 root 密钥(建议部署完后更改,否则容易泄露)
|
||||
- INITIAL_ROOT_TOKEN=fastgpt
|
||||
# 对应 fastgpt 里的AIPROXY_API_TOKEN
|
||||
- ADMIN_KEY=aiproxy
|
||||
# 错误日志详情保存时间(小时)
|
||||
- LOG_DETAIL_STORAGE_HOURS=1
|
||||
# 数据库连接地址
|
||||
- SQL_DSN=postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy
|
||||
# 最大重试次数
|
||||
- RetryTimes=3
|
||||
# 不需要计费
|
||||
- BILLING_ENABLED=false
|
||||
# 不需要严格检测模型
|
||||
- DISABLE_MODEL_CONFIG=true
|
||||
healthcheck:
|
||||
test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status']
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
aiproxy_pg:
|
||||
image: pgvector/pgvector:0.8.0-pg15 # docker hub
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
|
||||
restart: unless-stopped
|
||||
container_name: aiproxy_pg
|
||||
volumes:
|
||||
- ./oneapi:/data
|
||||
- ./aiproxy_pg:/var/lib/postgresql/data
|
||||
networks:
|
||||
- fastgpt
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_DB: aiproxy
|
||||
POSTGRES_PASSWORD: aiproxy
|
||||
healthcheck:
|
||||
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
networks:
|
||||
fastgpt:
|
||||
@@ -6,6 +6,7 @@ data:
|
||||
"openapiPrefix": "fastgpt",
|
||||
"vectorMaxProcess": 15,
|
||||
"qaMaxProcess": 15,
|
||||
"vlmMaxProcess": 15,
|
||||
"pgHNSWEfSearch": 100
|
||||
},
|
||||
"llmModels": [
|
||||
BIN
docSite/assets/imgs/aiproxy-1.jpg
Normal file
|
After Width: | Height: | Size: 198 KiB |
BIN
docSite/assets/imgs/aiproxy-1.png
Normal file
|
After Width: | Height: | Size: 198 KiB |
BIN
docSite/assets/imgs/aiproxy-10.png
Normal file
|
After Width: | Height: | Size: 229 KiB |
BIN
docSite/assets/imgs/aiproxy-11.png
Normal file
|
After Width: | Height: | Size: 422 KiB |
BIN
docSite/assets/imgs/aiproxy-2.png
Normal file
|
After Width: | Height: | Size: 235 KiB |
BIN
docSite/assets/imgs/aiproxy-3.png
Normal file
|
After Width: | Height: | Size: 341 KiB |
BIN
docSite/assets/imgs/aiproxy-4.png
Normal file
|
After Width: | Height: | Size: 212 KiB |
BIN
docSite/assets/imgs/aiproxy-5.png
Normal file
|
After Width: | Height: | Size: 240 KiB |
BIN
docSite/assets/imgs/aiproxy-6.png
Normal file
|
After Width: | Height: | Size: 342 KiB |
BIN
docSite/assets/imgs/aiproxy-7.png
Normal file
|
After Width: | Height: | Size: 363 KiB |
BIN
docSite/assets/imgs/aiproxy-8.png
Normal file
|
After Width: | Height: | Size: 348 KiB |
BIN
docSite/assets/imgs/aiproxy-9.png
Normal file
|
After Width: | Height: | Size: 222 KiB |
BIN
docSite/assets/imgs/aiproxy1.png
Normal file
|
After Width: | Height: | Size: 135 KiB |
BIN
docSite/assets/imgs/image copy.png
Normal file
|
After Width: | Height: | Size: 329 KiB |
BIN
docSite/assets/imgs/marker2.png
Normal file
|
After Width: | Height: | Size: 216 KiB |
BIN
docSite/assets/imgs/marker3.png
Normal file
|
After Width: | Height: | Size: 85 KiB |
@@ -23,8 +23,54 @@ weight: 707
|
||||
"systemEnv": {
|
||||
"vectorMaxProcess": 15, // 向量处理线程数量
|
||||
"qaMaxProcess": 15, // 问答拆分线程数量
|
||||
"vlmMaxProcess": 15, // 图片理解模型最大处理进程
|
||||
"tokenWorkers": 50, // Token 计算线程保持数,会持续占用内存,不能设置太大。
|
||||
"pgHNSWEfSearch": 100 // 向量搜索参数。越大,搜索越精确,但是速度越慢。设置为100,有99%+精度。
|
||||
"pgHNSWEfSearch": 100, // 向量搜索参数。越大,搜索越精确,但是速度越慢。设置为100,有99%+精度。
|
||||
"customPdfParse": { // 4.9.0 新增配置
|
||||
"url": "", // 自定义 PDF 解析服务地址
|
||||
"key": "", // 自定义 PDF 解析服务密钥
|
||||
"doc2xKey": "", // doc2x 服务密钥
|
||||
"price": 0 // PDF 解析服务价格
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 自定义 PDF 解析配置
|
||||
|
||||
自定义 PDF 服务解析的优先级高于 Doc2x 服务,所以如果使用 Doc2x 服务,请勿配置自定义 PDF 服务。
|
||||
|
||||
### 使用 Sealos PDF 解析服务
|
||||
|
||||
#### 1. 申请 Sealos AI proxy API Key
|
||||
|
||||
[点击打开 Sealos Pdf parser 官网](https://cloud.sealos.run/?uid=fnWRt09fZP&openapp=system-aiproxy),并进行对应 API Key 的申请。
|
||||
|
||||
#### 2. 修改 FastGPT 配置文件
|
||||
|
||||
`systemEnv.customPdfParse.url`填写成`https://aiproxy.hzh.sealos.run/v1/parse/pdf?model=parse-pdf`
|
||||
`systemEnv.customPdfParse.key`填写成在 Sealos AI proxy 中申请的 API Key。
|
||||
|
||||

|
||||
|
||||
### 使用 Doc2x 解析 PDF 文件
|
||||
|
||||
`Doc2x`是一个国内提供专业 PDF 解析。
|
||||
|
||||
#### 1. 申请 Doc2x 服务
|
||||
|
||||
[点击打开 Doc2x 官网](https://doc2x.noedgeai.com?inviteCode=9EACN2),并进行对应 API Key 的申请。
|
||||
|
||||
#### 2. 修改 FastGPT 配置文件
|
||||
|
||||
开源版用户在 `config.json` 文件中添加 `systemEnv.customPdfParse.doc2xKey` 配置,并填写上申请到的 API Key。并重启服务。
|
||||
|
||||
商业版用户在 Admin 后台根据表单指引填写 Doc2x 服务密钥。
|
||||
|
||||
#### 3. 开始使用
|
||||
|
||||
在知识库导入数据或应用文件上传配置中,可以勾选`PDF 增强解析`,则在对 PDF 解析时候,会使用 Doc2x 服务进行解析。
|
||||
|
||||
### 使用 Marker 解析 PDF 文件
|
||||
|
||||
[点击查看 Marker 接入教程](/docs/development/custom-models/marker)
|
||||
@@ -31,9 +31,9 @@ weight: 920
|
||||
|
||||
3 个模型代码分别为:
|
||||
|
||||
1. [https://github.com/labring/FastGPT/tree/main/python/bge-rerank/bge-reranker-base](https://github.com/labring/FastGPT/tree/main/python/bge-rerank/bge-reranker-base)
|
||||
2. [https://github.com/labring/FastGPT/tree/main/python/bge-rerank/bge-reranker-large](https://github.com/labring/FastGPT/tree/main/python/bge-rerank/bge-reranker-large)
|
||||
3. [https://github.com/labring/FastGPT/tree/main/python/bge-rerank/bge-reranker-v2-m3](https://github.com/labring/FastGPT/tree/main/python/bge-rerank/bge-reranker-v2-m3)
|
||||
1. [https://github.com/labring/FastGPT/tree/main/plugins/rerank-bge/bge-reranker-base](https://github.com/labring/FastGPT/tree/main/plugins/rerank-bge/bge-reranker-base)
|
||||
2. [https://github.com/labring/FastGPT/tree/main/plugins/rerank-bge/bge-reranker-large](https://github.com/labring/FastGPT/tree/main/plugins/rerank-bge/bge-reranker-large)
|
||||
3. [https://github.com/labring/FastGPT/tree/main/plugins/rerank-bge/bge-reranker-v2-m3](https://github.com/labring/FastGPT/tree/main/plugins/rerank-bge/bge-reranker-v2-m3)
|
||||
|
||||
### 3. 安装依赖
|
||||
|
||||
|
||||
@@ -11,39 +11,51 @@ weight: 909
|
||||
|
||||
PDF 是一个相对复杂的文件格式,在 FastGPT 内置的 pdf 解析器中,依赖的是 pdfjs 库解析,该库基于逻辑解析,无法有效的理解复杂的 pdf 文件。所以我们在解析 pdf 时候,如果遇到图片、表格、公式等非简单文本内容,会发现解析效果不佳。
|
||||
|
||||
市面上目前有多种解析 PDF 的方法,比如使用 [Marker](https://github.com/VikParuchuri/marker),该项目使用了 Surya 模型,基于视觉解析,可以有效提取图片、表格、公式等复杂内容。为了可以让 Marker 快速接入 FastGPT,我们做了一个自定义解析的拓展 Demo。
|
||||
市面上目前有多种解析 PDF 的方法,比如使用 [Marker](https://github.com/VikParuchuri/marker),该项目使用了 Surya 模型,基于视觉解析,可以有效提取图片、表格、公式等复杂内容。
|
||||
|
||||
在 FastGPT 4.8.15 版本中,你可以通过增加一个环境变量,来替换掉 FastGPT 系统内置解析器,实现自定义的文档解析服务。该功能只是 Demo 阶段,后期配置模式和交互规则会发生改动。
|
||||
在 `FastGPT v4.9.0` 版本中,开源版用户可以在`config.json`文件中添加`systemEnv.customPdfParse`配置,来使用 Marker 解析 PDF 文件。商业版用户直接在 Admin 后台根据表单指引填写即可。需重新拉取 Marker 镜像,接口格式已变动。
|
||||
|
||||
## 使用教程
|
||||
|
||||
### 1. 按照 Marker
|
||||
### 1. 安装 Marker
|
||||
|
||||
参考文档 [Marker 安装教程](https://github.com/labring/FastGPT/tree/main/python/pdf-marker),安装 Marker 模型。封装的 API 已经适配了 FastGPT 自定义解析服务。
|
||||
参考文档 [Marker 安装教程](https://github.com/labring/FastGPT/tree/main/plugins/model/pdf-marker),安装 Marker 模型。封装的 API 已经适配了 FastGPT 自定义解析服务。
|
||||
|
||||
这里介绍快速 Docker 安装的方法:
|
||||
|
||||
```dockerfile
|
||||
docker pull crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:latest
|
||||
docker run --gpus all -itd -p 7231:7231 --name model_pdf_v1 crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:latest
|
||||
docker pull crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:v0.2
|
||||
docker run --gpus all -itd -p 7231:7232 --name model_pdf_v2 -e PROCESSES_PER_GPU="2" crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:v0.2
|
||||
```
|
||||
### 2. 添加 FastGPT 文件配置
|
||||
|
||||
```json
|
||||
{
|
||||
xxx
|
||||
"systemEnv": {
|
||||
xxx
|
||||
"customPdfParse": {
|
||||
"url": "http://xxxx.com/v2/parse/file", // 自定义 PDF 解析服务地址 marker v0.2
|
||||
"key": "", // 自定义 PDF 解析服务密钥
|
||||
"doc2xKey": "", // doc2x 服务密钥
|
||||
"price": 0 // PDF 解析服务价格
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. 添加 FastGPT 环境变量
|
||||
|
||||
```
|
||||
CUSTOM_READ_FILE_URL=http://xxxx.com/v1/parse/file
|
||||
CUSTOM_READ_FILE_EXTENSION=pdf
|
||||
```
|
||||
|
||||
* CUSTOM_READ_FILE_URL - 自定义解析服务的地址, host改成解析服务的访问地址,path 不能变动。
|
||||
* CUSTOM_READ_FILE_EXTENSION - 支持的文件后缀,多个文件类型,可用逗号隔开。
|
||||
需要重启服务。
|
||||
|
||||
### 3. 测试效果
|
||||
|
||||
通过知识库上传一个 pdf 文件,并确认上传,可以在日志中看到 LOG (LOG_LEVEL需要设置 info 或者 debug):
|
||||
通过知识库上传一个 pdf 文件,并勾选上 `PDF 增强解析`。
|
||||
|
||||

|
||||
|
||||
确认上传后,可以在日志中看到 LOG (LOG_LEVEL需要设置 info 或者 debug):
|
||||
|
||||
```
|
||||
[Info] 2024-12-05 15:04:42 Parsing files from an external service
|
||||
[Info] 2024-12-05 15:04:42 Parsing files from an external service
|
||||
[Info] 2024-12-05 15:07:08 Custom file parsing is complete, time: 1316ms
|
||||
```
|
||||
|
||||
@@ -51,6 +63,10 @@ CUSTOM_READ_FILE_EXTENSION=pdf
|
||||
|
||||

|
||||
|
||||
同样的,在应用中,你可以在文件上传配置里,勾选上 `PDF 增强解析`。
|
||||
|
||||

|
||||
|
||||
|
||||
## 效果展示
|
||||
|
||||
@@ -63,4 +79,25 @@ CUSTOM_READ_FILE_EXTENSION=pdf
|
||||
|
||||
上图是分块后的结果,下图是 pdf 原文。整体图片、公式、表格都可以提取出来,效果还是杠杠的。
|
||||
|
||||
不过要注意的是,[Marker](https://github.com/VikParuchuri/marker) 的协议是`GPL-3.0 license`,请在遵守协议的前提下使用。
|
||||
不过要注意的是,[Marker](https://github.com/VikParuchuri/marker) 的协议是`GPL-3.0 license`,请在遵守协议的前提下使用。
|
||||
|
||||
## 旧版 Marker 使用方法
|
||||
|
||||
FastGPT V4.9.0 版本之前,可以用以下方式,试用 Marker 解析服务。
|
||||
|
||||
安装和运行 Marker 服务:
|
||||
|
||||
```dockerfile
|
||||
docker pull crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:v0.1
|
||||
docker run --gpus all -itd -p 7231:7231 --name model_pdf_v1 -e PROCESSES_PER_GPU="2" crpi-h3snc261q1dosroc.cn-hangzhou.personal.cr.aliyuncs.com/marker11/marker_images:v0.1
|
||||
```
|
||||
|
||||
并修改 FastGPT 环境变量:
|
||||
|
||||
```
|
||||
CUSTOM_READ_FILE_URL=http://xxxx.com/v1/parse/file
|
||||
CUSTOM_READ_FILE_EXTENSION=pdf
|
||||
```
|
||||
|
||||
* CUSTOM_READ_FILE_URL - 自定义解析服务的地址, host改成解析服务的访问地址,path 不能变动。
|
||||
* CUSTOM_READ_FILE_EXTENSION - 支持的文件后缀,多个文件类型,可用逗号隔开。
|
||||
@@ -30,7 +30,7 @@ weight: 707
|
||||
|
||||
### PgVector版本
|
||||
|
||||
非常轻量,适合数据量在 5000 万以下。
|
||||
非常轻量,适合知识库索引量在 5000 万以下。
|
||||
|
||||
{{< table "table-hover table-striped-columns" >}}
|
||||
| 环境 | 最低配置(单节点) | 推荐配置 |
|
||||
@@ -118,7 +118,7 @@ brew install orbstack
|
||||
非 Linux 环境或无法访问外网环境,可手动创建一个目录,并下载配置文件和对应版本的`docker-compose.yml`,在这个文件夹中依据下载的配置文件运行docker,若作为本地开发使用推荐`docker-compose-pgvector`版本,并且自行拉取并运行`sandbox`和`fastgpt`,并在docker配置文件中注释掉`sandbox`和`fastgpt`的部分
|
||||
|
||||
- [config.json](https://raw.githubusercontent.com/labring/FastGPT/refs/heads/main/projects/app/data/config.json)
|
||||
- [docker-compose.yml](https://github.com/labring/FastGPT/blob/main/files/docker) (注意,不同向量库版本的文件不一样)
|
||||
- [docker-compose.yml](https://github.com/labring/FastGPT/blob/main/deploy/docker) (注意,不同向量库版本的文件不一样)
|
||||
|
||||
{{% alert icon="🤖" context="success" %}}
|
||||
|
||||
@@ -134,11 +134,11 @@ cd fastgpt
|
||||
curl -O https://raw.githubusercontent.com/labring/FastGPT/main/projects/app/data/config.json
|
||||
|
||||
# pgvector 版本(测试推荐,简单快捷)
|
||||
curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/main/files/docker/docker-compose-pgvector.yml
|
||||
curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/main/deploy/docker/docker-compose-pgvector.yml
|
||||
# milvus 版本
|
||||
# curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/main/files/docker/docker-compose-milvus.yml
|
||||
# curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/main/deploy/docker/docker-compose-milvus.yml
|
||||
# zilliz 版本
|
||||
# curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/main/files/docker/docker-compose-zilliz.yml
|
||||
# curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/main/deploy/docker/docker-compose-zilliz.yml
|
||||
```
|
||||
|
||||
### 2. 修改环境变量
|
||||
@@ -149,18 +149,14 @@ curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/mai
|
||||
{{< tab tabName="PgVector版本" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```
|
||||
FE_DOMAIN=你的前端你访问地址,例如 http://192.168.0.1:3000;https://cloud.fastgpt.cn
|
||||
```
|
||||
无需操作
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
{{< tab tabName="Milvus版本" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```
|
||||
FE_DOMAIN=你的前端你访问地址,例如 http://192.168.0.1:3000;https://cloud.fastgpt.cn
|
||||
```
|
||||
无需操作
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
@@ -174,7 +170,6 @@ FE_DOMAIN=你的前端你访问地址,例如 http://192.168.0.1:3000;https://clo
|
||||
{{% alert icon="🤖" context="success" %}}
|
||||
|
||||
1. 修改`MILVUS_ADDRESS`和`MILVUS_TOKEN`链接参数,分别对应 `zilliz` 的 `Public Endpoint` 和 `Api key`,记得把自己ip加入白名单。
|
||||
2. 修改FE_DOMAIN=你的前端你访问地址,例如 http://192.168.0.1:3000;https://cloud.fastgpt.cn
|
||||
|
||||
{{% /alert %}}
|
||||
|
||||
@@ -189,34 +184,28 @@ FE_DOMAIN=你的前端你访问地址,例如 http://192.168.0.1:3000;https://clo
|
||||
```bash
|
||||
# 启动容器
|
||||
docker-compose up -d
|
||||
# 等待10s,OneAPI第一次总是要重启几次才能连上Mysql
|
||||
sleep 10
|
||||
# 重启一次oneapi(由于OneAPI的默认Key有点问题,不重启的话会提示找不到渠道,临时手动重启一次解决,等待作者修复)
|
||||
docker restart oneapi
|
||||
```
|
||||
|
||||
### 4. 打开 OneAPI 添加模型
|
||||
### 4. 访问 FastGPT
|
||||
|
||||
可以通过`ip:3001`访问OneAPI,默认账号为`root`密码为`123456`。
|
||||
|
||||
在OneApi中添加合适的AI模型渠道。[点击查看相关教程](/docs/development/modelconfig/one-api/)
|
||||
|
||||
### 5. 访问 FastGPT
|
||||
|
||||
目前可以通过 `ip:3000` 直接访问(注意防火墙)。登录用户名为 `root`,密码为`docker-compose.yml`环境变量里设置的 `DEFAULT_ROOT_PSW`。
|
||||
目前可以通过 `ip:3000` 直接访问(注意开放防火墙)。登录用户名为 `root`,密码为`docker-compose.yml`环境变量里设置的 `DEFAULT_ROOT_PSW`。
|
||||
|
||||
如果需要域名访问,请自行安装并配置 Nginx。
|
||||
|
||||
首次运行,会自动初始化 root 用户,密码为 `1234`(与环境变量中的`DEFAULT_ROOT_PSW`一致),日志里会提示一次`MongoServerError: Unable to read from a snapshot due to pending collection catalog changes;`可忽略。
|
||||
首次运行,会自动初始化 root 用户,密码为 `1234`(与环境变量中的`DEFAULT_ROOT_PSW`一致),日志可能会提示一次`MongoServerError: Unable to read from a snapshot due to pending collection catalog changes;`可忽略。
|
||||
|
||||
### 6. 配置模型
|
||||
### 5. 配置模型
|
||||
|
||||
务必先配置至少一组模型,否则系统无法正常使用。
|
||||
|
||||
[点击查看模型配置教程](/docs/development/modelConfig/intro/)
|
||||
- 首次登录FastGPT后,系统会提示未配置`语言模型`和`索引模型`,并自动跳转模型配置页面。系统必须至少有这两类模型才能正常使用。
|
||||
- 如果系统未正常跳转,可以在`账号-模型提供商`页面,进行模型配置。[点击查看相关教程](/docs/development/modelconfig/ai-proxy)
|
||||
- 目前已知可能问题:首次进入系统后,整个浏览器 tab 无法响应。此时需要删除该tab,重新打开一次即可。
|
||||
|
||||
## FAQ
|
||||
|
||||
### 登录系统后,浏览器无法响应
|
||||
|
||||
无法点击任何内容,刷新也无效。此时需要删除该tab,重新打开一次即可。
|
||||
|
||||
### Mongo 副本集自动初始化失败
|
||||
|
||||
最新的 docker-compose 示例优化 Mongo 副本集初始化,实现了全自动。目前在 unbuntu20,22 centos7, wsl2, mac, window 均通过测试。仍无法正常启动,大部分是因为 cpu 不支持 AVX 指令集,可以切换 Mongo4.x 版本。
|
||||
|
||||
@@ -70,6 +70,7 @@ Mongo 数据库需要注意,需要注意在连接地址中增加 `directConnec
|
||||
|
||||
- `vectorMaxProcess`: 向量生成最大进程,根据数据库和 key 的并发数来决定,通常单个 120 号,2c4g 服务器设置 10~15。
|
||||
- `qaMaxProcess`: QA 生成最大进程
|
||||
- `vlmMaxProcess`: 图片理解模型最大进程
|
||||
- `pgHNSWEfSearch`: PostgreSQL vector 索引参数,越大搜索精度越高但是速度越慢,具体可看 pgvector 官方说明。
|
||||
|
||||
### 5. 运行
|
||||
|
||||
@@ -7,9 +7,18 @@ draft: false
|
||||
images: []
|
||||
---
|
||||
|
||||
## Copy文件
|
||||
## 1. 停止服务
|
||||
|
||||
```bash
|
||||
docker-compose down
|
||||
```
|
||||
|
||||
|
||||
## 2. Copy文件夹
|
||||
|
||||
Docker 部署数据库都会通过 volume 挂载本地的目录进入容器,如果要迁移,直接复制这些目录即可。
|
||||
|
||||
`PG 数据`: pg/data
|
||||
`Mongo 数据`: mongo/data
|
||||
`Mongo 数据`: mongo/data
|
||||
|
||||
直接把pg 和 mongo目录全部复制走即可。
|
||||
129
docSite/content/zh-cn/docs/development/modelConfig/ai-proxy.md
Normal file
@@ -0,0 +1,129 @@
|
||||
---
|
||||
title: '通过 AI Proxy 接入模型'
|
||||
description: '通过 AI Proxy 接入模型'
|
||||
icon: 'api'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 744
|
||||
---
|
||||
|
||||
从 `FastGPT 4.8.23` 版本开始,引入 AI Proxy 来进一步方便模型的配置。
|
||||
|
||||
AI Proxy 与 One API 类似,也是作为一个 OpenAI 接口管理 & 分发系统,可以通过标准的 OpenAI API 格式访问所有的大模型,开箱即用。
|
||||
|
||||
## 部署
|
||||
|
||||
### Docker 版本
|
||||
|
||||
`docker-compose.yml` 文件已加入了 AI Proxy 配置,可直接使用。[点击查看最新的 yml 配置](https://raw.githubusercontent.com/labring/FastGPT/main/deploy/docker/docker-compose-pgvector.yml)
|
||||
|
||||
从旧版升级的用户,可以复制 yml 里,ai proxy 的配置,加入到旧的 yml 文件中。
|
||||
|
||||
## 运行原理
|
||||
|
||||
AI proxy 核心模块:
|
||||
|
||||
1. 渠道管理:管理各家模型提供商的 API Key 和可用模型列表。
|
||||
2. 模型调用:根据请求的模型,选中对应的渠道;根据渠道的 API 格式,构造请求体,发送请求;格式化响应体成标准格式返回。
|
||||
3. 调用日志:详细记录模型调用的日志,并在错误时候可以记录其入参和报错信息,方便排查。
|
||||
|
||||
运行流程:
|
||||
|
||||

|
||||
|
||||
## 在 FastGPT 中使用
|
||||
|
||||
AI proxy 相关功能,可以在`账号-模型提供商`页面找到。
|
||||
|
||||
### 1. 创建渠道
|
||||
|
||||
在`模型提供商`的配置页面,点击`模型渠道`,进入渠道配置页面
|
||||
|
||||

|
||||
|
||||
点击右上角的“新增渠道”,即可进入渠道配置页面
|
||||
|
||||

|
||||
|
||||
以阿里云的模型为例,进行如下配置
|
||||
|
||||

|
||||
|
||||
1. 渠道名:展示在外部的渠道名称,仅作标识;
|
||||
2. 厂商:模型对应的厂商,不同厂商对应不同的默认地址和 API 密钥格式;
|
||||
3. 模型:当前渠道具体可以使用的模型,系统内置了主流的一些模型,如果下拉框中没有想要的选项,可以点击“新增模型”,[增加自定义模型](/docs/development/modelconfig/intro/#新增自定义模型);
|
||||
4. 模型映射:将 FastGPT 请求的模型,映射到具体提供的模型上。例如:
|
||||
|
||||
```json
|
||||
{
|
||||
"gpt-4o-test": "gpt-4o",
|
||||
}
|
||||
```
|
||||
|
||||
FatGPT 中的模型为 `gpt-4o-test`,向 AI Proxy 发起请求时也是 `gpt-4o-test`。AI proxy 在向上游发送请求时,实际的`model`为 `gpt-4o`。
|
||||
|
||||
5. 代理地址:具体请求的地址,系统给每个主流渠道配置了默认的地址,如果无需改动则不用填。
|
||||
6. API 密钥:从模型厂商处获取的 API 凭证。注意部分厂商需要提供多个密钥组合,可以根据提示进行输入。
|
||||
|
||||
最后点击“新增”,就能在“模型渠道”下看到刚刚配置的渠道
|
||||
|
||||

|
||||
|
||||
### 2. 渠道测试
|
||||
|
||||
然后可以对渠道进行测试,确保配置的模型有效
|
||||
|
||||

|
||||
|
||||
点击“模型测试”,可以看到配置的模型列表,点击“开始测试”
|
||||
|
||||

|
||||
|
||||
等待模型测试完成后,会输出每个模型的测试结果以及请求时长
|
||||
|
||||

|
||||
|
||||
### 3. 启用模型
|
||||
|
||||
最后在`模型配置`中,可以选择启用对应的模型,这样就能在平台中使用了,更多模型配置可以参考[模型配置](/docs/development/modelconfig/intro)
|
||||
|
||||

|
||||
|
||||
## 其他功能介绍
|
||||
|
||||
### 优先级
|
||||
|
||||
范围1~100。数值越大,越容易被优先选中。
|
||||
|
||||

|
||||
|
||||
### 启用/禁用
|
||||
|
||||
在渠道右侧的控制菜单中,还可以控制渠道的启用或禁用,被禁用的渠道将无法再提供模型服务
|
||||
|
||||

|
||||
|
||||
### 调用日志
|
||||
|
||||
在 `调用日志` 页面,会展示发送到模型处的请求记录,包括具体的输入输出 tokens、请求时间、请求耗时、请求地址等等。错误的请求,则会详细的入参和错误信息,方便排查,但仅会保留 1 小时(环境变量里可配置)。
|
||||
|
||||

|
||||
|
||||
## 从 OneAPI 迁移到 AI Proxy
|
||||
|
||||
可以从任意终端,发起 1 个 HTTP 请求。其中 `{{host}}` 替换成 AI Proxy 地址,`{{admin_key}}` 替换成 AI Proxy 中 `ADMIN_KEY` 的值。
|
||||
|
||||
Body 参数 `dsn` 为 OneAPI 的 mysql 连接串。
|
||||
|
||||
```bash
|
||||
curl --location --request POST '{{host}}/api/channels/import/oneapi' \
|
||||
--header 'Authorization: Bearer {{admin_key}}' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"dsn": "mysql://root:s5mfkwst@tcp(dbconn.sealoshzh.site:33123)/mydb"
|
||||
}'
|
||||
```
|
||||
|
||||
执行成功的情况下会返回 "success": true
|
||||
|
||||
脚本目前不是完全准,仅是简单的做数据映射,主要是迁移`代理地址`、`模型`和`API 密钥`,建议迁移后再进行手动检查。
|
||||
@@ -13,9 +13,15 @@ weight: 744
|
||||
|
||||
## 配置模型
|
||||
|
||||
### 1. 使用 OneAPI 对接模型提供商
|
||||
### 1. 对接模型提供商
|
||||
|
||||
可以使用 [OneAPI 接入教程](/docs/development/modelconfig/one-api) 来进行模型聚合,从而可以对接更多模型提供商。你需要先在各服务商申请好 API 接入 OneAPI 后,才能在 FastGPT 中使用这些模型。示例流程如下:
|
||||
#### AI Proxy
|
||||
|
||||
从 4.8.23 版本开始, FastGPT 支持在页面上配置模型提供商,即使用 [AI Proxy 接入教程](/docs/development/modelconfig/ai-proxy) 来进行模型聚合,从而可以对接更多模型提供商。
|
||||
|
||||
#### One API
|
||||
|
||||
也可以使用 [OneAPI 接入教程](/docs/development/modelconfig/one-api)。你需要先在各服务商申请好 API 接入 OneAPI 后,才能在 FastGPT 中使用这些模型。示例流程如下:
|
||||
|
||||

|
||||
|
||||
@@ -23,22 +29,12 @@ weight: 744
|
||||
|
||||
{{% alert icon=" " context="info" %}}
|
||||
- [SiliconCloud(硅基流动)](https://cloud.siliconflow.cn/i/TR9Ym0c4): 提供开源模型调用的平台。
|
||||
- [Sealos AIProxy](https://hzh.sealos.run/?openapp=system-aiproxy): 提供国内各家模型代理,无需逐一申请 api。
|
||||
- [Sealos AIProxy](https://cloud.sealos.run/?uid=fnWRt09fZP&openapp=system-aiproxy): 提供国内各家模型代理,无需逐一申请 api。
|
||||
{{% /alert %}}
|
||||
|
||||
在 OneAPI 配置好模型后,你就可以打开 FastGPT 页面,启用对应模型了。
|
||||
|
||||
### 2. 登录 root 用户
|
||||
|
||||
仅 root 用户可以进行模型配置。
|
||||
|
||||
### 3. 进入模型配置页面
|
||||
|
||||
登录 root 用户后,在`账号-模型提供商-模型配置`中,你可以看到所有内置的模型和自定义模型,以及哪些模型启用了。
|
||||
|
||||

|
||||
|
||||
### 4. 配置介绍
|
||||
### 2. 配置介绍
|
||||
|
||||
{{% alert icon="🤖 " context="success" %}}
|
||||
注意:
|
||||
@@ -467,4 +463,4 @@ OneAPI 的语言识别接口,无法正确的识别其他模型(会始终识
|
||||
"charsPointsPrice": 0
|
||||
}
|
||||
}
|
||||
```
|
||||
```
|
||||
|
||||
@@ -20,10 +20,6 @@ FastGPT 目前采用模型分离的部署方案,FastGPT 中只兼容 OpenAI
|
||||
|
||||
## 部署
|
||||
|
||||
### Docker 版本
|
||||
|
||||
`docker-compose.yml` 文件已加入了 OneAPI 配置,可直接使用。默认暴露在 3001 端口。
|
||||
|
||||
### Sealos 版本
|
||||
|
||||
* 北京区: [点击部署 OneAPI](https://hzh.sealos.run/?openapp=system-template%3FtemplateName%3Done-api)
|
||||
|
||||
@@ -35,7 +35,7 @@ CHAT_API_KEY=sk-xxxxxx
|
||||
|
||||

|
||||
|
||||
## 5. 体验测试
|
||||
## 4. 体验测试
|
||||
|
||||
### 测试对话和图片识别
|
||||
|
||||
|
||||
@@ -297,7 +297,9 @@ curl --location --request DELETE 'http://localhost:3000/api/core/dataset/delete?
|
||||
| --- | --- | --- |
|
||||
| datasetId | 知识库ID | ✅ |
|
||||
| parentId: | 父级ID,不填则默认为根目录 | |
|
||||
| trainingType | 训练模式。chunk: 按文本长度进行分割;qa: QA拆分;auto: 增强训练 | ✅ |
|
||||
| trainingType | 数据处理方式。chunk: 按文本长度进行分割;qa: 问答对提取 | ✅ |
|
||||
| autoIndexes | 是否自动生成索引(仅商业版支持) | |
|
||||
| imageIndex | 是否自动生成图片索引(仅商业版支持) | |
|
||||
| chunkSize | 预估块大小 | |
|
||||
| chunkSplitter | 自定义最高优先分割符号 | |
|
||||
| qaPrompt | qa拆分提示词 | |
|
||||
@@ -1061,10 +1063,12 @@ curl --location --request DELETE 'http://localhost:3000/api/core/dataset/collect
|
||||
|
||||
| 字段 | 类型 | 说明 | 必填 |
|
||||
| --- | --- | --- | --- |
|
||||
| defaultIndex | Boolean | 是否为默认索引 | ✅ |
|
||||
| dataId | String | 关联的向量ID | ✅ |
|
||||
| type | String | 可选索引类型:default-默认索引; custom-自定义索引; summary-总结索引; question-问题索引; image-图片索引 | |
|
||||
| dataId | String | 关联的向量ID,变更数据时候传入该 ID,会进行差量更新,而不是全量更新 | |
|
||||
| text | String | 文本内容 | ✅ |
|
||||
|
||||
`type` 不填则默认为 `custom` 索引,还会基于 q/a 组成一个默认索引。如果传入了默认索引,则不会额外创建。
|
||||
|
||||
### 为集合批量添加添加数据
|
||||
|
||||
注意,每次最多推送 200 组数据。
|
||||
@@ -1079,7 +1083,7 @@ curl --location --request POST 'https://api.fastgpt.in/api/core/dataset/data/pus
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"collectionId": "64663f451ba1676dbdef0499",
|
||||
"trainingMode": "chunk",
|
||||
"trainingType": "chunk",
|
||||
"prompt": "可选。qa 拆分引导词,chunk 模式下忽略",
|
||||
"billId": "可选。如果有这个值,本次的数据会被聚合到一个订单中,这个值可以重复使用。可以参考 [创建训练订单] 获取该值。",
|
||||
"data": [
|
||||
@@ -1296,8 +1300,7 @@ curl --location --request GET 'http://localhost:3000/api/core/dataset/data/detai
|
||||
"chunkIndex": 0,
|
||||
"indexes": [
|
||||
{
|
||||
"defaultIndex": true,
|
||||
"type": "chunk",
|
||||
"type": "default",
|
||||
"dataId": "3720083",
|
||||
"text": "N o . 2 0 2 2 1 2中 国 信 息 通 信 研 究 院京东探索研究院2022年 9月人工智能生成内容(AIGC)白皮书(2022 年)版权声明本白皮书版权属于中国信息通信研究院和京东探索研究院,并受法律保护。转载、摘编或利用其它方式使用本白皮书文字或者观点的,应注明“来源:中国信息通信研究院和京东探索研究院”。违反上述声明者,编者将追究其相关法律责任。前 言习近平总书记曾指出,“数字技术正以新理念、新业态、新模式全面融入人类经济、政治、文化、社会、生态文明建设各领域和全过程”。在当前数字世界和物理世界加速融合的大背景下,人工智能生成内容(Artificial Intelligence Generated Content,简称 AIGC)正在悄然引导着一场深刻的变革,重塑甚至颠覆数字内容的生产方式和消费模式,将极大地丰富人们的数字生活,是未来全面迈向数字文明新时代不可或缺的支撑力量。",
|
||||
"_id": "65abd4b29d1448617cba61dc"
|
||||
@@ -1333,12 +1336,18 @@ curl --location --request PUT 'http://localhost:3000/api/core/dataset/data/updat
|
||||
"a":"sss",
|
||||
"indexes":[
|
||||
{
|
||||
"dataId": "xxx",
|
||||
"defaultIndex":false,
|
||||
"text":"自定义索引1"
|
||||
"dataId": "xxxx",
|
||||
"type": "default",
|
||||
"text": "默认索引"
|
||||
},
|
||||
{
|
||||
"text":"修改后的自定义索引2。(会删除原来的自定义索引2,并插入新的自定义索引2)"
|
||||
"dataId": "xxx",
|
||||
"type": "custom",
|
||||
"text": "旧的自定义索引1"
|
||||
},
|
||||
{
|
||||
"type":"custom",
|
||||
"text":"新增的自定义索引"
|
||||
}
|
||||
]
|
||||
}'
|
||||
|
||||
@@ -20,7 +20,7 @@ SANDBOX_URL=内网地址
|
||||
|
||||
## Docker 部署
|
||||
|
||||
可以拉取最新 [docker-compose.yml](https://github.com/labring/FastGPT/blob/main/files/docker/docker-compose.yml) 文件参考
|
||||
可以拉取最新 [docker-compose.yml](https://github.com/labring/FastGPT/blob/main/deploy/docker/docker-compose.yml) 文件参考
|
||||
|
||||
1. 新增一个容器 `sandbox`
|
||||
2. fastgpt 和 fastgpt-pro(商业版) 容器新增环境变量: `SANDBOX_URL`
|
||||
|
||||
@@ -35,7 +35,7 @@ curl --location --request POST 'https://{{host}}/api/admin/initv4820' \
|
||||
|
||||
## 完整更新内容
|
||||
|
||||
1. 新增 - 可视化模型参数配置,取代原配置文件配置模型。预设超过 100 个模型配置。同时支持所有类型模型的一键测试。(预计下个版本会完全支持在页面上配置渠道)。
|
||||
1. 新增 - 可视化模型参数配置,取代原配置文件配置模型。预设超过 100 个模型配置。同时支持所有类型模型的一键测试。(预计下个版本会完全支持在页面上配置渠道)。[点击查看模型配置方案](/docs/development/modelconfig/intro/)
|
||||
2. 新增 - DeepSeek resoner 模型支持输出思考过程。
|
||||
3. 新增 - 使用记录导出和仪表盘。
|
||||
4. 新增 - markdown 语法扩展,支持音视频(代码块 audio 和 video)。
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: 'V4.8.22(进行中)'
|
||||
title: 'V4.8.22(包含升级脚本)'
|
||||
description: 'FastGPT V4.8.22 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
@@ -13,8 +13,8 @@ weight: 802
|
||||
|
||||
### 2. 更新镜像:
|
||||
|
||||
- 更新 fastgpt 镜像 tag: v4.8.22-alpha
|
||||
- 更新 fastgpt-pro 商业版镜像 tag: v4.8.22-alpha
|
||||
- 更新 fastgpt 镜像 tag: v4.8.22
|
||||
- 更新 fastgpt-pro 商业版镜像 tag: v4.8.22
|
||||
- Sandbox 镜像无需更新
|
||||
|
||||
### 3. 运行升级脚本
|
||||
|
||||
54
docSite/content/zh-cn/docs/development/upgrading/4823.md
Normal file
@@ -0,0 +1,54 @@
|
||||
---
|
||||
title: 'V4.8.23'
|
||||
description: 'FastGPT V4.8.23 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 802
|
||||
---
|
||||
|
||||
## 更新指南
|
||||
|
||||
### 1. 做好数据库备份
|
||||
|
||||
### 2. 更新镜像:
|
||||
|
||||
- 更新 fastgpt 镜像 tag: v4.8.23-fix
|
||||
- 更新 fastgpt-pro 商业版镜像 tag: v4.8.23-fix
|
||||
- Sandbox 镜像无需更新
|
||||
|
||||
### 3. 运行升级脚本
|
||||
|
||||
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`;{{host}} 替换成**FastGPT 域名**。
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'https://{{host}}/api/admin/initv4823' \
|
||||
--header 'rootkey: {{rootkey}}' \
|
||||
--header 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
脚本会清理一些知识库脏数据,主要是多余的全文索引。
|
||||
|
||||
## 🚀 新增内容
|
||||
|
||||
1. 增加默认“知识库文本理解模型”配置
|
||||
2. AI proxy V1版,可替换 OneAPI使用,同时提供完整模型调用日志,便于排查问题。
|
||||
3. 增加工单入口支持。
|
||||
|
||||
## ⚙️ 优化
|
||||
|
||||
1. 模型配置表单,增加必填项校验。
|
||||
2. 集合列表数据统计方式,提高大数据量统计性能。
|
||||
3. 优化数学公式,转义 Latex 格式成 Markdown 格式。
|
||||
4. 解析文档图片,图片太大时,自动忽略。
|
||||
5. 时间选择器,当天开始时间自动设0,结束设置设 23:59:59,避免 UI 与实际逻辑偏差。
|
||||
6. 升级 mongoose 库版本依赖。
|
||||
|
||||
## 🐛 修复
|
||||
|
||||
1. 标签过滤时,子文件夹未成功过滤。
|
||||
2. 暂时移除 md 阅读优化,避免链接分割错误。
|
||||
3. 离开团队时,未刷新成员列表。
|
||||
4. PPTX 编码错误,导致解析失败。
|
||||
5. 删除知识库单条数据时,全文索引未跟随删除。
|
||||
6. 修复 Mongo Dataset text 索引在查询数据时未生效。
|
||||
@@ -15,7 +15,7 @@ weight: 821
|
||||
|
||||
## V4.8.3 更新说明
|
||||
|
||||
1. 新增 - 支持 Milvus 数据库, 可参考最新的 [docker-compose-milvus.yml](https://github.com/labring/FastGPT/blob/main/files/docker/docker-compose-milvus.yml).
|
||||
1. 新增 - 支持 Milvus 数据库, 可参考最新的 [docker-compose-milvus.yml](https://github.com/labring/FastGPT/blob/main/deploy/docker/docker-compose-milvus.yml).
|
||||
2. 新增 - 给 chat 接口 empty answer 增加 log,便于排查模型问题。
|
||||
3. 新增 - ifelse判断器,字符串支持正则。
|
||||
4. 新增 - 代码运行支持 console.log 输出调试。
|
||||
|
||||
190
docSite/content/zh-cn/docs/development/upgrading/490.md
Normal file
@@ -0,0 +1,190 @@
|
||||
---
|
||||
title: 'V4.9.0(包含升级脚本)'
|
||||
description: 'FastGPT V4.9.0 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 801
|
||||
---
|
||||
|
||||
|
||||
## 更新指南
|
||||
|
||||
### 1. 做好数据库备份
|
||||
|
||||
### 2. 更新镜像和 PG 容器
|
||||
|
||||
- 更新 FastGPT 镜像 tag: v4.9.0
|
||||
- 更新 FastGPT 商业版镜像 tag: v4.9.0
|
||||
- Sandbox 镜像,可以不更新
|
||||
- 更新 PG 容器为 v0.8.0-pg15, 可以查看[最新的 yml](https://raw.githubusercontent.com/labring/FastGPT/main/deploy/docker/docker-compose-pgvector.yml)
|
||||
|
||||
### 3. 替换 OneAPI(可选)
|
||||
|
||||
如果需要使用 AI Proxy 替换 OneAPI 的用户可执行该步骤。
|
||||
|
||||
#### 1. 修改 yml 文件
|
||||
|
||||
参考[最新的 yml](https://raw.githubusercontent.com/labring/FastGPT/main/deploy/docker/docker-compose-pgvector.yml) 文件。里面已移除 OneAPI 并添加了 AIProxy配置。包含一个服务和一个 PgSQL 数据库。将 `aiproxy` 的配置`追加`到 OneAPI 的配置后面(先不要删除 OneAPI,有一个初始化会自动同步 OneAPI 的配置)
|
||||
|
||||
{{% details title="AI Proxy Yml 配置" closed="true" %}}
|
||||
|
||||
```
|
||||
# AI Proxy
|
||||
aiproxy:
|
||||
image: 'ghcr.io/labring/sealos-aiproxy-service:latest'
|
||||
container_name: aiproxy
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
aiproxy_pg:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- fastgpt
|
||||
environment:
|
||||
# 对应 fastgpt 里的AIPROXY_API_TOKEN
|
||||
- ADMIN_KEY=aiproxy
|
||||
# 错误日志详情保存时间(小时)
|
||||
- LOG_DETAIL_STORAGE_HOURS=1
|
||||
# 数据库连接地址
|
||||
- SQL_DSN=postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy
|
||||
# 最大重试次数
|
||||
- RetryTimes=3
|
||||
# 不需要计费
|
||||
- BILLING_ENABLED=false
|
||||
# 不需要严格检测模型
|
||||
- DISABLE_MODEL_CONFIG=true
|
||||
healthcheck:
|
||||
test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status']
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
aiproxy_pg:
|
||||
image: pgvector/pgvector:0.8.0-pg15 # docker hub
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
|
||||
restart: unless-stopped
|
||||
container_name: aiproxy_pg
|
||||
volumes:
|
||||
- ./aiproxy_pg:/var/lib/postgresql/data
|
||||
networks:
|
||||
- fastgpt
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_DB: aiproxy
|
||||
POSTGRES_PASSWORD: aiproxy
|
||||
healthcheck:
|
||||
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
```
|
||||
|
||||
{{% /details %}}
|
||||
|
||||
#### 2. 增加 FastGPT 环境变量:
|
||||
|
||||
修改 yml 文件中,fastgpt 容器的环境变量:
|
||||
|
||||
```
|
||||
# AI Proxy 的地址,如果配了该地址,优先使用
|
||||
- AIPROXY_API_ENDPOINT=http://aiproxy:3000
|
||||
# AI Proxy 的 Admin Token,与 AI Proxy 中的环境变量 ADMIN_KEY
|
||||
- AIPROXY_API_TOKEN=aiproxy
|
||||
```
|
||||
|
||||
#### 3. 重载服务
|
||||
|
||||
`docker-compose down` 停止服务,然后 `docker-compose up -d` 启动服务,此时会追加 `aiproxy` 服务,并修改 FastGPT 的配置。
|
||||
|
||||
#### 4. 执行OneAPI迁移AI proxy脚本
|
||||
|
||||
- 可联网方案:
|
||||
|
||||
```bash
|
||||
# 进入 aiproxy 容器
|
||||
docker exec -it aiproxy sh
|
||||
# 安装 curl
|
||||
apk add curl
|
||||
# 执行脚本
|
||||
curl --location --request POST 'http://localhost:3000/api/channels/import/oneapi' \
|
||||
--header 'Authorization: Bearer aiproxy' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"dsn": "mysql://root:oneapimmysql@tcp(mysql:3306)/oneapi"
|
||||
}'
|
||||
# 返回 {"data":[],"success":true} 代表成功
|
||||
```
|
||||
|
||||
- 无法联网时,可打开`aiproxy`的外网暴露端口,然后在本地执行脚本。
|
||||
|
||||
aiProxy 暴露端口:3003:3000,修改后重新 `docker-compose up -d` 启动服务。
|
||||
|
||||
```bash
|
||||
# 在终端执行脚本
|
||||
curl --location --request POST 'http://localhost:3003/api/channels/import/oneapi' \
|
||||
--header 'Authorization: Bearer aiproxy' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"dsn": "mysql://root:oneapimmysql@tcp(mysql:3306)/oneapi"
|
||||
}'
|
||||
# 返回 {"data":[],"success":true} 代表成功
|
||||
```
|
||||
|
||||
- 如果不熟悉 docker 操作,建议不要走脚本迁移,直接删除 OneAPI 所有内容,然后手动重新添加渠道。
|
||||
|
||||
#### 5. 进入 FastGPT 检查`AI Proxy` 服务是否正常启动。
|
||||
|
||||
登录 root 账号后,在`账号-模型提供商`页面,可以看到多出了`模型渠道`和`调用日志`两个选项,打开模型渠道,可以看到之前 OneAPI 的渠道,说明迁移完成,此时可以手动再检查下渠道是否正常。
|
||||
|
||||
#### 6. 删除 OneAPI 服务
|
||||
|
||||
```bash
|
||||
# 停止服务,或者针对性停止 OneAPI 和其 Mysql
|
||||
docker-compose down
|
||||
# yml 文件中删除 OneAPI 和其 Mysql 依赖
|
||||
# 重启服务
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
### 4. 运行 FastGPT 升级脚本
|
||||
|
||||
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`;{{host}} 替换成**FastGPT 域名**。
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'https://{{host}}/api/admin/initv490' \
|
||||
--header 'rootkey: {{rootkey}}' \
|
||||
--header 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
**脚本功能**
|
||||
|
||||
1. 升级 PG Vector 插件版本
|
||||
2. 全量更新知识库集合字段。
|
||||
3. 全量更新知识库数据中,index 的 type 类型。(时间较长,最后可能提示 timeout,可忽略,数据库不崩都会一直增量执行)
|
||||
|
||||
## 兼容 & 弃用
|
||||
|
||||
1. 弃用 - 之前私有化部署的自定义文件解析方案,请同步更新到最新的配置方案。[点击查看 PDF 增强解析配置](/docs/development/configuration/#使用-doc2x-解析-pdf-文件)
|
||||
2. 弃用 - 弃用旧版本地文件上传 API:/api/core/dataset/collection/create/file(以前仅商业版可用的 API,该接口已放切换成:/api/core/dataset/collection/create/localFile)
|
||||
3. 停止维护,即将弃用 - 外部文件库相关 API,可通过 API 文件库替代。
|
||||
4. API更新 - 上传文件至知识库、创建连接集合、API 文件库、推送分块数据等带有 `trainingType` 字段的接口,`trainingType`字段未来仅支持`chunk`和`QA`两种模式。增强索引模式将设置单独字段:`autoIndexes`,目前仍有适配旧版`trainingType=auto`代码,但请尽快变更成新接口类型。具体可见:[知识库 OpenAPI 文档](/docs/development/openapi/dataset.md)
|
||||
|
||||
## 🚀 新增内容
|
||||
|
||||
1. PDF增强解析交互添加到页面上。同时内嵌 Doc2x 服务,可直接使用 Doc2x 服务解析 PDF 文件。
|
||||
2. 图片自动标注,同时修改知识库文件上传部分数据逻辑和交互。
|
||||
3. pg vector 插件升级 0.8.0 版本,引入迭代搜索,减少部分数据无法被检索的情况。
|
||||
4. 新增 qwen-qwq 系列模型配置。
|
||||
|
||||
## ⚙️ 优化
|
||||
|
||||
1. 知识库数据不再限制索引数量,可无限自定义。同时可自动更新输入文本的索引,不影响自定义索引。
|
||||
2. Markdown 解析,增加链接后中文标点符号检测,增加空格。
|
||||
3. Prompt 模式工具调用,支持思考模型。同时优化其格式检测,减少空输出的概率。
|
||||
4. Mongo 文件读取流合并,减少计算量。同时优化存储 chunks,极大提高大文件读取速度。50M PDF 读取时间提高 3 倍。
|
||||
5. HTTP Body 适配,增加对字符串对象的适配。
|
||||
|
||||
## 🐛 修复
|
||||
|
||||
1. 增加网页抓取安全链接校验。
|
||||
2. 批量运行时,全局变量未进一步传递到下一次运行中,导致最终变量更新错误。
|
||||
@@ -7,11 +7,11 @@ toc: true
|
||||
weight: 102
|
||||
---
|
||||
|
||||
更多使用技巧,[查看视屏教程](https://www.bilibili.com/video/BV1sH4y1T7s9)
|
||||
更多使用技巧,[查看视频教程](https://www.bilibili.com/video/BV1sH4y1T7s9)
|
||||
|
||||
## 知识库
|
||||
|
||||
开始前,请准备一份测试电子文档,WORD,PDF,TXT,excel,markdown 都可以,比如公司休假制度,不涉密的销售说辞,产品知识等等。
|
||||
开始前,请准备一份测试电子文档,WORD、PDF、TXT、excel、markdown 都可以,比如公司休假制度、不涉密的销售说辞、产品知识等等。
|
||||
|
||||
这里使用 FastGPT 中文 README 文件为例。
|
||||
|
||||
@@ -31,7 +31,7 @@ weight: 102
|
||||
|
||||

|
||||
|
||||
点击上传后我们需要等待数据处理完成,等到我们上传的文件状态为可用。
|
||||
点击上传后我们需要等待数据处理完成,直到我们上传的文件状态为可用。
|
||||
|
||||

|
||||
|
||||
|
||||
@@ -89,6 +89,9 @@ weight: 506
|
||||
47.99.59.223
|
||||
112.124.46.5
|
||||
121.40.46.247
|
||||
120.26.145.73
|
||||
120.26.147.199
|
||||
121.43.125.163
|
||||
```
|
||||
|
||||
## 4. 获取AES Key,选择加密方式
|
||||
|
||||
3
packages/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# 目录说明
|
||||
|
||||
该目录为 FastGPT 的依赖包,多端复用。
|
||||
@@ -4,6 +4,7 @@ import { ErrType } from '../errorCode';
|
||||
/* dataset: 501000 */
|
||||
export enum DatasetErrEnum {
|
||||
unExist = 'unExistDataset',
|
||||
unExistCollection = 'unExistCollection',
|
||||
unAuthDataset = 'unAuthDataset',
|
||||
unCreateCollection = 'unCreateCollection',
|
||||
unAuthDatasetCollection = 'unAuthDatasetCollection',
|
||||
@@ -28,6 +29,10 @@ const datasetErr = [
|
||||
statusText: DatasetErrEnum.unExist,
|
||||
message: 'core.dataset.error.unExistDataset'
|
||||
},
|
||||
{
|
||||
statusText: DatasetErrEnum.unExistCollection,
|
||||
message: i18nT('common:error_collection_not_exist')
|
||||
},
|
||||
{
|
||||
statusText: DatasetErrEnum.unAuthDataset,
|
||||
message: 'core.dataset.error.unAuthDataset'
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
export const retryRun = <T>(fn: () => T, retry = 2): T => {
|
||||
try {
|
||||
return fn();
|
||||
} catch (error) {
|
||||
if (retry > 0) {
|
||||
return retryRun(fn, retry - 1);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
export const batchRun = async <T>(arr: T[], fn: (arr: T) => any, batchSize = 10) => {
|
||||
const batchArr = new Array(batchSize).fill(null);
|
||||
const result: any[] = [];
|
||||
|
||||
const batchFn = async () => {
|
||||
const data = arr.shift();
|
||||
if (data) {
|
||||
result.push(await fn(data));
|
||||
return batchFn();
|
||||
}
|
||||
};
|
||||
|
||||
await Promise.all(
|
||||
batchArr.map(async () => {
|
||||
await batchFn();
|
||||
})
|
||||
);
|
||||
|
||||
return result;
|
||||
};
|
||||
@@ -1,4 +1,4 @@
|
||||
import { batchRun } from '../fn/utils';
|
||||
import { batchRun } from '../system/utils';
|
||||
import { getNanoid, simpleText } from './tools';
|
||||
import type { ImageType } from '../../../service/worker/readFile/type';
|
||||
|
||||
@@ -37,6 +37,80 @@ export const simpleMarkdownText = (rawText: string) => {
|
||||
return rawText.trim();
|
||||
};
|
||||
|
||||
export const htmlTable2Md = (content: string): string => {
|
||||
return content.replace(/<table>[\s\S]*?<\/table>/g, (htmlTable) => {
|
||||
try {
|
||||
// Clean up whitespace and newlines
|
||||
const cleanHtml = htmlTable.replace(/\n\s*/g, '');
|
||||
const rows = cleanHtml.match(/<tr>(.*?)<\/tr>/g);
|
||||
if (!rows) return htmlTable;
|
||||
|
||||
// Parse table data
|
||||
let tableData: string[][] = [];
|
||||
let maxColumns = 0;
|
||||
|
||||
// Try to convert to markdown table
|
||||
rows.forEach((row, rowIndex) => {
|
||||
if (!tableData[rowIndex]) {
|
||||
tableData[rowIndex] = [];
|
||||
}
|
||||
let colIndex = 0;
|
||||
const cells = row.match(/<td.*?>(.*?)<\/td>/g) || [];
|
||||
|
||||
cells.forEach((cell) => {
|
||||
while (tableData[rowIndex][colIndex]) {
|
||||
colIndex++;
|
||||
}
|
||||
const colspan = parseInt(cell.match(/colspan="(\d+)"/)?.[1] || '1');
|
||||
const rowspan = parseInt(cell.match(/rowspan="(\d+)"/)?.[1] || '1');
|
||||
const content = cell.replace(/<td.*?>|<\/td>/g, '').trim();
|
||||
|
||||
for (let i = 0; i < rowspan; i++) {
|
||||
for (let j = 0; j < colspan; j++) {
|
||||
if (!tableData[rowIndex + i]) {
|
||||
tableData[rowIndex + i] = [];
|
||||
}
|
||||
tableData[rowIndex + i][colIndex + j] = i === 0 && j === 0 ? content : '^^';
|
||||
}
|
||||
}
|
||||
colIndex += colspan;
|
||||
maxColumns = Math.max(maxColumns, colIndex);
|
||||
});
|
||||
|
||||
for (let i = 0; i < maxColumns; i++) {
|
||||
if (!tableData[rowIndex][i]) {
|
||||
tableData[rowIndex][i] = ' ';
|
||||
}
|
||||
}
|
||||
});
|
||||
const chunks: string[] = [];
|
||||
|
||||
const headerCells = tableData[0]
|
||||
.slice(0, maxColumns)
|
||||
.map((cell) => (cell === '^^' ? ' ' : cell || ' '));
|
||||
const headerRow = '| ' + headerCells.join(' | ') + ' |';
|
||||
chunks.push(headerRow);
|
||||
|
||||
const separator = '| ' + Array(headerCells.length).fill('---').join(' | ') + ' |';
|
||||
chunks.push(separator);
|
||||
|
||||
tableData.slice(1).forEach((row) => {
|
||||
const paddedRow = row
|
||||
.slice(0, maxColumns)
|
||||
.map((cell) => (cell === '^^' ? ' ' : cell || ' '));
|
||||
while (paddedRow.length < maxColumns) {
|
||||
paddedRow.push(' ');
|
||||
}
|
||||
chunks.push('| ' + paddedRow.join(' | ') + ' |');
|
||||
});
|
||||
|
||||
return chunks.join('\n');
|
||||
} catch (error) {
|
||||
return htmlTable;
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* format markdown
|
||||
* 1. upload base64
|
||||
@@ -94,7 +168,7 @@ export const markdownProcess = async ({
|
||||
return simpleMarkdownText(imageProcess);
|
||||
};
|
||||
|
||||
export const matchMdImgTextAndUpload = (text: string) => {
|
||||
export const matchMdImg = (text: string) => {
|
||||
const base64Regex = /!\[([^\]]*)\]\((data:image\/[^;]+;base64[^)]+)\)/g;
|
||||
const imageList: ImageType[] = [];
|
||||
|
||||
|
||||
@@ -7,12 +7,14 @@ import { i18nT } from '../../../web/i18n/utils';
|
||||
dayjs.extend(utc);
|
||||
dayjs.extend(timezone);
|
||||
|
||||
export const formatTime2YMDHMW = (time?: Date) => dayjs(time).format('YYYY-MM-DD HH:mm:ss dddd');
|
||||
export const formatTime2YMDHMS = (time?: Date) =>
|
||||
export const formatTime2YMDHMW = (time?: Date | number) =>
|
||||
dayjs(time).format('YYYY-MM-DD HH:mm:ss dddd');
|
||||
export const formatTime2YMDHMS = (time?: Date | number) =>
|
||||
time ? dayjs(time).format('YYYY-MM-DD HH:mm:ss') : '';
|
||||
export const formatTime2YMDHM = (time?: Date) =>
|
||||
export const formatTime2YMDHM = (time?: Date | number) =>
|
||||
time ? dayjs(time).format('YYYY-MM-DD HH:mm') : '';
|
||||
export const formatTime2YMD = (time?: Date) => (time ? dayjs(time).format('YYYY-MM-DD') : '');
|
||||
export const formatTime2YMD = (time?: Date | number) =>
|
||||
time ? dayjs(time).format('YYYY-MM-DD') : '';
|
||||
export const formatTime2HM = (time: Date = new Date()) => dayjs(time).format('HH:mm');
|
||||
|
||||
/**
|
||||
|
||||
32
packages/global/common/system/types/index.d.ts
vendored
@@ -41,11 +41,16 @@ export type FastGPTConfigFileType = {
|
||||
};
|
||||
|
||||
export type FastGPTFeConfigsType = {
|
||||
show_workorder?: boolean;
|
||||
show_emptyChat?: boolean;
|
||||
isPlus?: boolean;
|
||||
register_method?: ['email' | 'phone' | 'sync'];
|
||||
login_method?: ['email' | 'phone']; // Attention: login method is diffrent with oauth
|
||||
find_password_method?: ['email' | 'phone'];
|
||||
bind_notification_method?: ['email' | 'phone'];
|
||||
googleClientVerKey?: string;
|
||||
|
||||
show_emptyChat?: boolean;
|
||||
show_appStore?: boolean;
|
||||
show_git?: boolean;
|
||||
show_pay?: boolean;
|
||||
@@ -53,17 +58,22 @@ export type FastGPTFeConfigsType = {
|
||||
show_promotion?: boolean;
|
||||
show_team_chat?: boolean;
|
||||
show_compliance_copywriting?: boolean;
|
||||
show_aiproxy?: boolean;
|
||||
concatMd?: string;
|
||||
|
||||
concatMd?: string;
|
||||
docUrl?: string;
|
||||
openAPIDocUrl?: string;
|
||||
systemPluginCourseUrl?: string;
|
||||
appTemplateCourse?: string;
|
||||
customApiDomain?: string;
|
||||
customSharePageDomain?: string;
|
||||
|
||||
systemTitle?: string;
|
||||
systemDescription?: string;
|
||||
googleClientVerKey?: string;
|
||||
isPlus?: boolean;
|
||||
scripts?: { [key: string]: string }[];
|
||||
favicon?: string;
|
||||
|
||||
sso?: {
|
||||
icon?: string;
|
||||
title?: string;
|
||||
@@ -89,13 +99,14 @@ export type FastGPTFeConfigsType = {
|
||||
exportDatasetLimitMinutes?: number;
|
||||
websiteSyncLimitMinuted?: number;
|
||||
};
|
||||
scripts?: { [key: string]: string }[];
|
||||
favicon?: string;
|
||||
customApiDomain?: string;
|
||||
customSharePageDomain?: string;
|
||||
|
||||
uploadFileMaxAmount?: number;
|
||||
uploadFileMaxSize?: number;
|
||||
|
||||
// Compute by systemEnv.customPdfParse
|
||||
showCustomPdfParse?: boolean;
|
||||
customPdfParsePrice?: number;
|
||||
|
||||
lafEnv?: string;
|
||||
navbarItems?: NavbarItemType[];
|
||||
externalProviderWorkflowVariables?: ExternalProviderWorkflowVarType[];
|
||||
@@ -105,9 +116,18 @@ export type SystemEnvType = {
|
||||
openapiPrefix?: string;
|
||||
vectorMaxProcess: number;
|
||||
qaMaxProcess: number;
|
||||
vlmMaxProcess: number;
|
||||
pgHNSWEfSearch: number;
|
||||
tokenWorkers: number; // token count max worker
|
||||
|
||||
oneapiUrl?: string;
|
||||
chatApiKey?: string;
|
||||
|
||||
customPdfParse?: {
|
||||
url?: string;
|
||||
key?: string;
|
||||
|
||||
doc2xKey?: string;
|
||||
price?: number; // n points/1 page
|
||||
};
|
||||
};
|
||||
|
||||
@@ -16,3 +16,24 @@ export const retryFn = async <T>(fn: () => Promise<T>, retryTimes = 3): Promise<
|
||||
return Promise.reject(error);
|
||||
}
|
||||
};
|
||||
|
||||
export const batchRun = async <T>(arr: T[], fn: (arr: T) => any, batchSize = 10) => {
|
||||
const batchArr = new Array(batchSize).fill(null);
|
||||
const result: any[] = [];
|
||||
|
||||
const batchFn = async () => {
|
||||
const data = arr.shift();
|
||||
if (data) {
|
||||
result.push(await fn(data));
|
||||
return batchFn();
|
||||
}
|
||||
};
|
||||
|
||||
await Promise.all(
|
||||
batchArr.map(async () => {
|
||||
await batchFn();
|
||||
})
|
||||
);
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
2
packages/global/core/ai/model.d.ts
vendored
@@ -17,6 +17,8 @@ type BaseModelItemType = {
|
||||
isActive?: boolean;
|
||||
isCustom?: boolean;
|
||||
isDefault?: boolean;
|
||||
isDefaultDatasetTextModel?: boolean;
|
||||
isDefaultDatasetImageModel?: boolean;
|
||||
|
||||
// If has requestUrl, it will request the model directly
|
||||
requestUrl?: string;
|
||||
|
||||
@@ -22,7 +22,7 @@ export const defaultQAModels: LLMModelItemType[] = [
|
||||
maxTemperature: 1.2,
|
||||
charsPointsPrice: 0,
|
||||
censor: false,
|
||||
vision: false,
|
||||
vision: true,
|
||||
datasetProcess: true,
|
||||
toolChoice: true,
|
||||
functionCall: false,
|
||||
@@ -59,10 +59,17 @@ export const defaultSTTModels: STTModelType[] = [
|
||||
export const getModelFromList = (
|
||||
modelList: { provider: ModelProviderIdType; name: string; model: string }[],
|
||||
model: string
|
||||
) => {
|
||||
):
|
||||
| {
|
||||
avatar: string;
|
||||
provider: ModelProviderIdType;
|
||||
name: string;
|
||||
model: string;
|
||||
}
|
||||
| undefined => {
|
||||
const modelData = modelList.find((item) => item.model === model) ?? modelList[0];
|
||||
if (!modelData) {
|
||||
throw new Error('No Key model is configured');
|
||||
return;
|
||||
}
|
||||
const provider = getModelProvider(modelData.provider);
|
||||
return {
|
||||
|
||||
1
packages/global/core/app/type.d.ts
vendored
@@ -188,6 +188,7 @@ export type AppAutoExecuteConfigType = {
|
||||
// File
|
||||
export type AppFileSelectConfigType = {
|
||||
canSelectFile: boolean;
|
||||
customPdfParse?: boolean;
|
||||
canSelectImg: boolean;
|
||||
maxFiles: number;
|
||||
};
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
import type {
|
||||
AIChatItemValueItemType,
|
||||
ChatItemType,
|
||||
ChatItemValueItemType,
|
||||
RuntimeUserPromptType,
|
||||
UserChatItemType
|
||||
SystemChatItemValueItemType,
|
||||
UserChatItemType,
|
||||
UserChatItemValueItemType
|
||||
} from '../../core/chat/type.d';
|
||||
import { ChatFileTypeEnum, ChatItemValueTypeEnum, ChatRoleEnum } from '../../core/chat/constants';
|
||||
import type {
|
||||
@@ -174,137 +177,24 @@ export const GPTMessages2Chats = (
|
||||
): ChatItemType[] => {
|
||||
const chatMessages = messages
|
||||
.map((item) => {
|
||||
const value: ChatItemType['value'] = [];
|
||||
const obj = GPT2Chat[item.role];
|
||||
|
||||
if (
|
||||
obj === ChatRoleEnum.System &&
|
||||
item.role === ChatCompletionRequestMessageRoleEnum.System
|
||||
) {
|
||||
if (Array.isArray(item.content)) {
|
||||
item.content.forEach((item) => [
|
||||
value.push({
|
||||
type: ChatItemValueTypeEnum.text,
|
||||
text: {
|
||||
content: item.text
|
||||
}
|
||||
})
|
||||
]);
|
||||
} else {
|
||||
value.push({
|
||||
type: ChatItemValueTypeEnum.text,
|
||||
text: {
|
||||
content: item.content
|
||||
}
|
||||
});
|
||||
}
|
||||
} else if (
|
||||
obj === ChatRoleEnum.Human &&
|
||||
item.role === ChatCompletionRequestMessageRoleEnum.User
|
||||
) {
|
||||
if (typeof item.content === 'string') {
|
||||
value.push({
|
||||
type: ChatItemValueTypeEnum.text,
|
||||
text: {
|
||||
content: item.content
|
||||
}
|
||||
});
|
||||
} else if (Array.isArray(item.content)) {
|
||||
item.content.forEach((item) => {
|
||||
if (item.type === 'text') {
|
||||
const value = (() => {
|
||||
if (
|
||||
obj === ChatRoleEnum.System &&
|
||||
item.role === ChatCompletionRequestMessageRoleEnum.System
|
||||
) {
|
||||
const value: SystemChatItemValueItemType[] = [];
|
||||
|
||||
if (Array.isArray(item.content)) {
|
||||
item.content.forEach((item) => [
|
||||
value.push({
|
||||
type: ChatItemValueTypeEnum.text,
|
||||
text: {
|
||||
content: item.text
|
||||
}
|
||||
});
|
||||
} else if (item.type === 'image_url') {
|
||||
value.push({
|
||||
//@ts-ignore
|
||||
type: ChatItemValueTypeEnum.file,
|
||||
file: {
|
||||
type: ChatFileTypeEnum.image,
|
||||
name: '',
|
||||
url: item.image_url.url
|
||||
}
|
||||
});
|
||||
} else if (item.type === 'file_url') {
|
||||
value.push({
|
||||
// @ts-ignore
|
||||
type: ChatItemValueTypeEnum.file,
|
||||
file: {
|
||||
type: ChatFileTypeEnum.file,
|
||||
name: item.name,
|
||||
url: item.url
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
} else if (
|
||||
obj === ChatRoleEnum.AI &&
|
||||
item.role === ChatCompletionRequestMessageRoleEnum.Assistant
|
||||
) {
|
||||
if (item.tool_calls && reserveTool) {
|
||||
// save tool calls
|
||||
const toolCalls = item.tool_calls as ChatCompletionMessageToolCall[];
|
||||
value.push({
|
||||
//@ts-ignore
|
||||
type: ChatItemValueTypeEnum.tool,
|
||||
tools: toolCalls.map((tool) => {
|
||||
let toolResponse =
|
||||
messages.find(
|
||||
(msg) =>
|
||||
msg.role === ChatCompletionRequestMessageRoleEnum.Tool &&
|
||||
msg.tool_call_id === tool.id
|
||||
)?.content || '';
|
||||
toolResponse =
|
||||
typeof toolResponse === 'string' ? toolResponse : JSON.stringify(toolResponse);
|
||||
|
||||
return {
|
||||
id: tool.id,
|
||||
toolName: tool.toolName || '',
|
||||
toolAvatar: tool.toolAvatar || '',
|
||||
functionName: tool.function.name,
|
||||
params: tool.function.arguments,
|
||||
response: toolResponse as string
|
||||
};
|
||||
})
|
||||
});
|
||||
} else if (item.function_call && reserveTool) {
|
||||
const functionCall = item.function_call as ChatCompletionMessageFunctionCall;
|
||||
const functionResponse = messages.find(
|
||||
(msg) =>
|
||||
msg.role === ChatCompletionRequestMessageRoleEnum.Function &&
|
||||
msg.name === item.function_call?.name
|
||||
) as ChatCompletionFunctionMessageParam;
|
||||
|
||||
if (functionResponse) {
|
||||
value.push({
|
||||
//@ts-ignore
|
||||
type: ChatItemValueTypeEnum.tool,
|
||||
tools: [
|
||||
{
|
||||
id: functionCall.id || '',
|
||||
toolName: functionCall.toolName || '',
|
||||
toolAvatar: functionCall.toolAvatar || '',
|
||||
functionName: functionCall.name,
|
||||
params: functionCall.arguments,
|
||||
response: functionResponse.content || ''
|
||||
}
|
||||
]
|
||||
});
|
||||
}
|
||||
} else if (item.interactive) {
|
||||
value.push({
|
||||
//@ts-ignore
|
||||
type: ChatItemValueTypeEnum.interactive,
|
||||
interactive: item.interactive
|
||||
});
|
||||
} else if (typeof item.content === 'string') {
|
||||
const lastValue = value[value.length - 1];
|
||||
if (lastValue && lastValue.type === ChatItemValueTypeEnum.text && lastValue.text) {
|
||||
lastValue.text.content += item.content;
|
||||
})
|
||||
]);
|
||||
} else {
|
||||
value.push({
|
||||
type: ChatItemValueTypeEnum.text,
|
||||
@@ -313,8 +203,145 @@ export const GPTMessages2Chats = (
|
||||
}
|
||||
});
|
||||
}
|
||||
return value;
|
||||
} else if (
|
||||
obj === ChatRoleEnum.Human &&
|
||||
item.role === ChatCompletionRequestMessageRoleEnum.User
|
||||
) {
|
||||
const value: UserChatItemValueItemType[] = [];
|
||||
|
||||
if (typeof item.content === 'string') {
|
||||
value.push({
|
||||
type: ChatItemValueTypeEnum.text,
|
||||
text: {
|
||||
content: item.content
|
||||
}
|
||||
});
|
||||
} else if (Array.isArray(item.content)) {
|
||||
item.content.forEach((item) => {
|
||||
if (item.type === 'text') {
|
||||
value.push({
|
||||
type: ChatItemValueTypeEnum.text,
|
||||
text: {
|
||||
content: item.text
|
||||
}
|
||||
});
|
||||
} else if (item.type === 'image_url') {
|
||||
value.push({
|
||||
//@ts-ignore
|
||||
type: ChatItemValueTypeEnum.file,
|
||||
file: {
|
||||
type: ChatFileTypeEnum.image,
|
||||
name: '',
|
||||
url: item.image_url.url
|
||||
}
|
||||
});
|
||||
} else if (item.type === 'file_url') {
|
||||
value.push({
|
||||
// @ts-ignore
|
||||
type: ChatItemValueTypeEnum.file,
|
||||
file: {
|
||||
type: ChatFileTypeEnum.file,
|
||||
name: item.name,
|
||||
url: item.url
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
return value;
|
||||
} else if (
|
||||
obj === ChatRoleEnum.AI &&
|
||||
item.role === ChatCompletionRequestMessageRoleEnum.Assistant
|
||||
) {
|
||||
const value: AIChatItemValueItemType[] = [];
|
||||
|
||||
if (typeof item.reasoning_text === 'string') {
|
||||
value.push({
|
||||
type: ChatItemValueTypeEnum.reasoning,
|
||||
reasoning: {
|
||||
content: item.reasoning_text
|
||||
}
|
||||
});
|
||||
}
|
||||
if (item.tool_calls && reserveTool) {
|
||||
// save tool calls
|
||||
const toolCalls = item.tool_calls as ChatCompletionMessageToolCall[];
|
||||
value.push({
|
||||
//@ts-ignore
|
||||
type: ChatItemValueTypeEnum.tool,
|
||||
tools: toolCalls.map((tool) => {
|
||||
let toolResponse =
|
||||
messages.find(
|
||||
(msg) =>
|
||||
msg.role === ChatCompletionRequestMessageRoleEnum.Tool &&
|
||||
msg.tool_call_id === tool.id
|
||||
)?.content || '';
|
||||
toolResponse =
|
||||
typeof toolResponse === 'string' ? toolResponse : JSON.stringify(toolResponse);
|
||||
|
||||
return {
|
||||
id: tool.id,
|
||||
toolName: tool.toolName || '',
|
||||
toolAvatar: tool.toolAvatar || '',
|
||||
functionName: tool.function.name,
|
||||
params: tool.function.arguments,
|
||||
response: toolResponse as string
|
||||
};
|
||||
})
|
||||
});
|
||||
}
|
||||
if (item.function_call && reserveTool) {
|
||||
const functionCall = item.function_call as ChatCompletionMessageFunctionCall;
|
||||
const functionResponse = messages.find(
|
||||
(msg) =>
|
||||
msg.role === ChatCompletionRequestMessageRoleEnum.Function &&
|
||||
msg.name === item.function_call?.name
|
||||
) as ChatCompletionFunctionMessageParam;
|
||||
|
||||
if (functionResponse) {
|
||||
value.push({
|
||||
//@ts-ignore
|
||||
type: ChatItemValueTypeEnum.tool,
|
||||
tools: [
|
||||
{
|
||||
id: functionCall.id || '',
|
||||
toolName: functionCall.toolName || '',
|
||||
toolAvatar: functionCall.toolAvatar || '',
|
||||
functionName: functionCall.name,
|
||||
params: functionCall.arguments,
|
||||
response: functionResponse.content || ''
|
||||
}
|
||||
]
|
||||
});
|
||||
}
|
||||
}
|
||||
if (item.interactive) {
|
||||
value.push({
|
||||
//@ts-ignore
|
||||
type: ChatItemValueTypeEnum.interactive,
|
||||
interactive: item.interactive
|
||||
});
|
||||
}
|
||||
if (typeof item.content === 'string') {
|
||||
const lastValue = value[value.length - 1];
|
||||
if (lastValue && lastValue.type === ChatItemValueTypeEnum.text && lastValue.text) {
|
||||
lastValue.text.content += item.content;
|
||||
} else {
|
||||
value.push({
|
||||
type: ChatItemValueTypeEnum.text,
|
||||
text: {
|
||||
content: item.content
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
return [];
|
||||
})();
|
||||
|
||||
return {
|
||||
dataId: item.dataId,
|
||||
|
||||
1
packages/global/core/chat/type.d.ts
vendored
@@ -77,6 +77,7 @@ export type AIChatItemValueItemType = {
|
||||
| ChatItemValueTypeEnum.reasoning
|
||||
| ChatItemValueTypeEnum.tool
|
||||
| ChatItemValueTypeEnum.interactive;
|
||||
|
||||
text?: {
|
||||
content: string;
|
||||
};
|
||||
|
||||
19
packages/global/core/dataset/api.d.ts
vendored
@@ -1,5 +1,5 @@
|
||||
import { DatasetDataIndexItemType, DatasetSchemaType } from './type';
|
||||
import { TrainingModeEnum, DatasetCollectionTypeEnum } from './constants';
|
||||
import { DatasetCollectionTypeEnum, DatasetCollectionDataProcessModeEnum } from './constants';
|
||||
import type { LLMModelItemType } from '../ai/model.d';
|
||||
import { ParentIdType } from 'common/parentFolder/type';
|
||||
|
||||
@@ -10,9 +10,11 @@ export type DatasetUpdateBody = {
|
||||
name?: string;
|
||||
avatar?: string;
|
||||
intro?: string;
|
||||
agentModel?: LLMModelItemType;
|
||||
status?: DatasetSchemaType['status'];
|
||||
|
||||
agentModel?: string;
|
||||
vlmModel?: string;
|
||||
|
||||
websiteConfig?: DatasetSchemaType['websiteConfig'];
|
||||
externalReadUrl?: DatasetSchemaType['externalReadUrl'];
|
||||
defaultPermission?: DatasetSchemaType['defaultPermission'];
|
||||
@@ -27,7 +29,10 @@ export type DatasetUpdateBody = {
|
||||
/* ================= collection ===================== */
|
||||
export type DatasetCollectionChunkMetadataType = {
|
||||
parentId?: string;
|
||||
trainingType?: TrainingModeEnum;
|
||||
customPdfParse?: boolean;
|
||||
trainingType?: DatasetCollectionDataProcessModeEnum;
|
||||
imageIndex?: boolean;
|
||||
autoIndexes?: boolean;
|
||||
chunkSize?: number;
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
@@ -131,9 +136,15 @@ export type PostWebsiteSyncParams = {
|
||||
export type PushDatasetDataProps = {
|
||||
collectionId: string;
|
||||
data: PushDatasetDataChunkProps[];
|
||||
trainingMode: TrainingModeEnum;
|
||||
trainingType?: DatasetCollectionDataProcessModeEnum;
|
||||
autoIndexes?: boolean;
|
||||
imageIndex?: boolean;
|
||||
prompt?: string;
|
||||
|
||||
billId?: string;
|
||||
|
||||
// Abandon
|
||||
trainingMode?: DatasetCollectionDataProcessModeEnum;
|
||||
};
|
||||
export type PushDatasetDataResponse = {
|
||||
insertLen: number;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { DatasetCollectionTypeEnum, TrainingModeEnum, TrainingTypeMap } from '../constants';
|
||||
import { DatasetCollectionTypeEnum } from '../constants';
|
||||
import { DatasetCollectionSchemaType } from '../type';
|
||||
|
||||
export const getCollectionSourceData = (collection?: DatasetCollectionSchemaType) => {
|
||||
@@ -16,9 +16,3 @@ export const getCollectionSourceData = (collection?: DatasetCollectionSchemaType
|
||||
export const checkCollectionIsFolder = (type: DatasetCollectionTypeEnum) => {
|
||||
return type === DatasetCollectionTypeEnum.folder || type === DatasetCollectionTypeEnum.virtual;
|
||||
};
|
||||
|
||||
export const getTrainingTypeLabel = (type?: TrainingModeEnum) => {
|
||||
if (!type) return '';
|
||||
if (!TrainingTypeMap[type]) return '';
|
||||
return TrainingTypeMap[type].label;
|
||||
};
|
||||
|
||||
@@ -109,6 +109,26 @@ export const DatasetCollectionSyncResultMap = {
|
||||
}
|
||||
};
|
||||
|
||||
export enum DatasetCollectionDataProcessModeEnum {
|
||||
chunk = 'chunk',
|
||||
qa = 'qa',
|
||||
auto = 'auto' // abandon
|
||||
}
|
||||
export const DatasetCollectionDataProcessModeMap = {
|
||||
[DatasetCollectionDataProcessModeEnum.chunk]: {
|
||||
label: i18nT('common:core.dataset.training.Chunk mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.Chunk Split Tip')
|
||||
},
|
||||
[DatasetCollectionDataProcessModeEnum.qa]: {
|
||||
label: i18nT('common:core.dataset.training.QA mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.QA Import Tip')
|
||||
},
|
||||
[DatasetCollectionDataProcessModeEnum.auto]: {
|
||||
label: i18nT('common:core.dataset.training.Auto mode'),
|
||||
tooltip: i18nT('common:core.dataset.training.Auto mode Tip')
|
||||
}
|
||||
};
|
||||
|
||||
/* ------------ data -------------- */
|
||||
|
||||
/* ------------ training -------------- */
|
||||
@@ -124,28 +144,11 @@ export enum ImportDataSourceEnum {
|
||||
|
||||
export enum TrainingModeEnum {
|
||||
chunk = 'chunk',
|
||||
qa = 'qa',
|
||||
auto = 'auto',
|
||||
qa = 'qa'
|
||||
image = 'image'
|
||||
}
|
||||
|
||||
export const TrainingTypeMap = {
|
||||
[TrainingModeEnum.chunk]: {
|
||||
label: i18nT('common:core.dataset.training.Chunk mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.Chunk Split Tip'),
|
||||
openSource: true
|
||||
},
|
||||
[TrainingModeEnum.auto]: {
|
||||
label: i18nT('common:core.dataset.training.Auto mode'),
|
||||
tooltip: i18nT('common:core.dataset.training.Auto mode Tip'),
|
||||
openSource: false
|
||||
},
|
||||
[TrainingModeEnum.qa]: {
|
||||
label: i18nT('common:core.dataset.training.QA mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.QA Import Tip'),
|
||||
openSource: true
|
||||
}
|
||||
};
|
||||
|
||||
/* ------------ search -------------- */
|
||||
export enum DatasetSearchModeEnum {
|
||||
embedding = 'embedding',
|
||||
|
||||
25
packages/global/core/dataset/controller.d.ts
vendored
@@ -20,9 +20,22 @@ export type UpdateDatasetDataProps = {
|
||||
})[];
|
||||
};
|
||||
|
||||
export type PatchIndexesProps = {
|
||||
type: 'create' | 'update' | 'delete' | 'unChange';
|
||||
index: Omit<DatasetDataIndexItemType, 'dataId'> & {
|
||||
dataId?: string;
|
||||
};
|
||||
};
|
||||
export type PatchIndexesProps =
|
||||
| {
|
||||
type: 'create';
|
||||
index: Omit<DatasetDataIndexItemType, 'dataId'> & {
|
||||
dataId?: string;
|
||||
};
|
||||
}
|
||||
| {
|
||||
type: 'update';
|
||||
index: DatasetDataIndexItemType;
|
||||
}
|
||||
| {
|
||||
type: 'delete';
|
||||
index: DatasetDataIndexItemType;
|
||||
}
|
||||
| {
|
||||
type: 'unChange';
|
||||
index: DatasetDataIndexItemType;
|
||||
};
|
||||
|
||||
42
packages/global/core/dataset/data/constants.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
import { i18nT } from '../../../../web/i18n/utils';
|
||||
|
||||
export enum DatasetDataIndexTypeEnum {
|
||||
default = 'default',
|
||||
custom = 'custom',
|
||||
summary = 'summary',
|
||||
question = 'question',
|
||||
image = 'image'
|
||||
}
|
||||
|
||||
export const DatasetDataIndexMap: Record<
|
||||
`${DatasetDataIndexTypeEnum}`,
|
||||
{
|
||||
label: any;
|
||||
color: string;
|
||||
}
|
||||
> = {
|
||||
[DatasetDataIndexTypeEnum.default]: {
|
||||
label: i18nT('dataset:data_index_default'),
|
||||
color: 'gray'
|
||||
},
|
||||
[DatasetDataIndexTypeEnum.custom]: {
|
||||
label: i18nT('dataset:data_index_custom'),
|
||||
color: 'blue'
|
||||
},
|
||||
[DatasetDataIndexTypeEnum.summary]: {
|
||||
label: i18nT('dataset:data_index_summary'),
|
||||
color: 'green'
|
||||
},
|
||||
[DatasetDataIndexTypeEnum.question]: {
|
||||
label: i18nT('dataset:data_index_question'),
|
||||
color: 'red'
|
||||
},
|
||||
[DatasetDataIndexTypeEnum.image]: {
|
||||
label: i18nT('dataset:data_index_image'),
|
||||
color: 'purple'
|
||||
}
|
||||
};
|
||||
export const defaultDatasetIndexData = DatasetDataIndexMap[DatasetDataIndexTypeEnum.custom];
|
||||
export const getDatasetIndexMapData = (type: `${DatasetDataIndexTypeEnum}`) => {
|
||||
return DatasetDataIndexMap[type] || defaultDatasetIndexData;
|
||||
};
|
||||
20
packages/global/core/dataset/training/type.d.ts
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
import { PushDatasetDataChunkProps } from '../api';
|
||||
import { TrainingModeEnum } from '../constants';
|
||||
|
||||
export type PushDataToTrainingQueueProps = {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
|
||||
mode?: TrainingModeEnum;
|
||||
data: PushDatasetDataChunkProps[];
|
||||
prompt?: string;
|
||||
|
||||
agentModel: string;
|
||||
vectorModel: string;
|
||||
vlmModel?: string;
|
||||
|
||||
billId?: string;
|
||||
session?: ClientSession;
|
||||
};
|
||||
46
packages/global/core/dataset/type.d.ts
vendored
@@ -2,6 +2,7 @@ import type { LLMModelItemType, EmbeddingModelItemType } from '../../core/ai/mod
|
||||
import { PermissionTypeEnum } from '../../support/permission/constant';
|
||||
import { PushDatasetDataChunkProps } from './api';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetStatusEnum,
|
||||
DatasetTypeEnum,
|
||||
@@ -12,6 +13,7 @@ import { DatasetPermission } from '../../support/permission/dataset/controller';
|
||||
import { Permission } from '../../support/permission/controller';
|
||||
import { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
|
||||
import { SourceMemberType } from 'support/user/type';
|
||||
import { DatasetDataIndexTypeEnum } from './data/constants';
|
||||
|
||||
export type DatasetSchemaType = {
|
||||
_id: string;
|
||||
@@ -23,11 +25,14 @@ export type DatasetSchemaType = {
|
||||
|
||||
avatar: string;
|
||||
name: string;
|
||||
vectorModel: string;
|
||||
agentModel: string;
|
||||
intro: string;
|
||||
type: `${DatasetTypeEnum}`;
|
||||
status: `${DatasetStatusEnum}`;
|
||||
|
||||
vectorModel: string;
|
||||
agentModel: string;
|
||||
vlmModel?: string;
|
||||
|
||||
websiteConfig?: {
|
||||
url: string;
|
||||
selector: string;
|
||||
@@ -52,26 +57,22 @@ export type DatasetCollectionSchemaType = {
|
||||
parentId?: string;
|
||||
name: string;
|
||||
type: DatasetCollectionTypeEnum;
|
||||
createTime: Date;
|
||||
updateTime: Date;
|
||||
forbid?: boolean;
|
||||
|
||||
trainingType: TrainingModeEnum;
|
||||
chunkSize: number;
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
ocrParse?: boolean;
|
||||
|
||||
tags?: string[];
|
||||
|
||||
createTime: Date;
|
||||
updateTime: Date;
|
||||
|
||||
// Status
|
||||
forbid?: boolean;
|
||||
nextSyncTime?: Date;
|
||||
|
||||
// Collection metadata
|
||||
fileId?: string; // local file id
|
||||
rawLink?: string; // link url
|
||||
externalFileId?: string; //external file id
|
||||
apiFileId?: string; // api file id
|
||||
externalFileUrl?: string; // external import url
|
||||
|
||||
nextSyncTime?: Date;
|
||||
|
||||
rawTextLength?: number;
|
||||
hashRawText?: string;
|
||||
metadata?: {
|
||||
@@ -80,6 +81,16 @@ export type DatasetCollectionSchemaType = {
|
||||
|
||||
[key: string]: any;
|
||||
};
|
||||
|
||||
// Parse settings
|
||||
customPdfParse?: boolean;
|
||||
// Chunk settings
|
||||
autoIndexes?: boolean;
|
||||
imageIndex?: boolean;
|
||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||
chunkSize: number;
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
};
|
||||
|
||||
export type DatasetCollectionTagsSchemaType = {
|
||||
@@ -90,7 +101,7 @@ export type DatasetCollectionTagsSchemaType = {
|
||||
};
|
||||
|
||||
export type DatasetDataIndexItemType = {
|
||||
defaultIndex: boolean;
|
||||
type: `${DatasetDataIndexTypeEnum}`;
|
||||
dataId: string; // pg data id
|
||||
text: string;
|
||||
};
|
||||
@@ -141,6 +152,7 @@ export type DatasetTrainingSchemaType = {
|
||||
chunkIndex: number;
|
||||
weight: number;
|
||||
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||
retryCount: number;
|
||||
};
|
||||
|
||||
export type CollectionWithDatasetType = DatasetCollectionSchemaType & {
|
||||
@@ -169,9 +181,10 @@ export type DatasetListItemType = {
|
||||
sourceMember?: SourceMemberType;
|
||||
};
|
||||
|
||||
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel' | 'agentModel'> & {
|
||||
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel' | 'agentModel' | 'vlmModel'> & {
|
||||
vectorModel: EmbeddingModelItemType;
|
||||
agentModel: LLMModelItemType;
|
||||
vlmModel?: LLMModelItemType;
|
||||
permission: DatasetPermission;
|
||||
};
|
||||
|
||||
@@ -192,6 +205,7 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
|
||||
sourceId?: string;
|
||||
file?: DatasetFileSchema;
|
||||
permission: DatasetPermission;
|
||||
indexAmount: number;
|
||||
};
|
||||
|
||||
/* ================= data ===================== */
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { TrainingModeEnum, DatasetCollectionTypeEnum } from './constants';
|
||||
import { getFileIcon } from '../../common/file/icon';
|
||||
import { strIsLink } from '../../common/string/tools';
|
||||
import { DatasetDataIndexTypeEnum } from './data/constants';
|
||||
|
||||
export function getCollectionIcon(
|
||||
type: DatasetCollectionTypeEnum = DatasetCollectionTypeEnum.file,
|
||||
@@ -38,14 +39,23 @@ export function getSourceNameIcon({
|
||||
}
|
||||
|
||||
/* get dataset data default index */
|
||||
export function getDefaultIndex(props?: { q?: string; a?: string; dataId?: string }) {
|
||||
const { q = '', a, dataId } = props || {};
|
||||
const qaStr = `${q}\n${a}`.trim();
|
||||
return {
|
||||
defaultIndex: true,
|
||||
text: a ? qaStr : q,
|
||||
dataId
|
||||
};
|
||||
export function getDefaultIndex(props?: { q?: string; a?: string }) {
|
||||
const { q = '', a } = props || {};
|
||||
|
||||
return [
|
||||
{
|
||||
text: q,
|
||||
type: DatasetDataIndexTypeEnum.default
|
||||
},
|
||||
...(a
|
||||
? [
|
||||
{
|
||||
text: a,
|
||||
type: DatasetDataIndexTypeEnum.default
|
||||
}
|
||||
]
|
||||
: [])
|
||||
];
|
||||
}
|
||||
|
||||
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
|
||||
|
||||
@@ -420,137 +420,3 @@ export function rewriteNodeOutputByHistories(
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// Parse <think></think> tags to think and answer - unstream response
|
||||
export const parseReasoningContent = (text: string): [string, string] => {
|
||||
const regex = /<think>([\s\S]*?)<\/think>/;
|
||||
const match = text.match(regex);
|
||||
|
||||
if (!match) {
|
||||
return ['', text];
|
||||
}
|
||||
|
||||
const thinkContent = match[1].trim();
|
||||
|
||||
// Add answer (remaining text after think tag)
|
||||
const answerContent = text.slice(match.index! + match[0].length);
|
||||
|
||||
return [thinkContent, answerContent];
|
||||
};
|
||||
|
||||
// Parse <think></think> tags to think and answer - stream response
|
||||
export const parseReasoningStreamContent = () => {
|
||||
let isInThinkTag: boolean | undefined;
|
||||
|
||||
const startTag = '<think>';
|
||||
let startTagBuffer = '';
|
||||
|
||||
const endTag = '</think>';
|
||||
let endTagBuffer = '';
|
||||
|
||||
/*
|
||||
parseReasoning - 只控制是否主动解析 <think></think>,如果接口已经解析了,仍然会返回 think 内容。
|
||||
*/
|
||||
const parsePart = (
|
||||
part: {
|
||||
choices: {
|
||||
delta: {
|
||||
content?: string;
|
||||
reasoning_content?: string;
|
||||
};
|
||||
}[];
|
||||
},
|
||||
parseReasoning = false
|
||||
): [string, string] => {
|
||||
const content = part.choices?.[0]?.delta?.content || '';
|
||||
|
||||
// @ts-ignore
|
||||
const reasoningContent = part.choices?.[0]?.delta?.reasoning_content || '';
|
||||
if (reasoningContent || !parseReasoning) {
|
||||
isInThinkTag = false;
|
||||
return [reasoningContent, content];
|
||||
}
|
||||
|
||||
if (!content) {
|
||||
return ['', ''];
|
||||
}
|
||||
|
||||
// 如果不在 think 标签中,或者有 reasoningContent(接口已解析),则返回 reasoningContent 和 content
|
||||
if (isInThinkTag === false) {
|
||||
return ['', content];
|
||||
}
|
||||
|
||||
// 检测是否为 think 标签开头的数据
|
||||
if (isInThinkTag === undefined) {
|
||||
// Parse content think and answer
|
||||
startTagBuffer += content;
|
||||
// 太少内容时候,暂时不解析
|
||||
if (startTagBuffer.length < startTag.length) {
|
||||
return ['', ''];
|
||||
}
|
||||
|
||||
if (startTagBuffer.startsWith(startTag)) {
|
||||
isInThinkTag = true;
|
||||
return [startTagBuffer.slice(startTag.length), ''];
|
||||
}
|
||||
|
||||
// 如果未命中 think 标签,则认为不在 think 标签中,返回 buffer 内容作为 content
|
||||
isInThinkTag = false;
|
||||
return ['', startTagBuffer];
|
||||
}
|
||||
|
||||
// 确认是 think 标签内容,开始返回 think 内容,并实时检测 </think>
|
||||
/*
|
||||
检测 </think> 方案。
|
||||
存储所有疑似 </think> 的内容,直到检测到完整的 </think> 标签或超出 </think> 长度。
|
||||
content 返回值包含以下几种情况:
|
||||
abc - 完全未命中尾标签
|
||||
abc<th - 命中一部分尾标签
|
||||
abc</think> - 完全命中尾标签
|
||||
abc</think>abc - 完全命中尾标签
|
||||
</think>abc - 完全命中尾标签
|
||||
k>abc - 命中一部分尾标签
|
||||
*/
|
||||
// endTagBuffer 专门用来记录疑似尾标签的内容
|
||||
if (endTagBuffer) {
|
||||
endTagBuffer += content;
|
||||
if (endTagBuffer.includes(endTag)) {
|
||||
isInThinkTag = false;
|
||||
const answer = endTagBuffer.slice(endTag.length);
|
||||
return ['', answer];
|
||||
} else if (endTagBuffer.length >= endTag.length) {
|
||||
// 缓存内容超出尾标签长度,且仍未命中 </think>,则认为本次猜测 </think> 失败,仍处于 think 阶段。
|
||||
const tmp = endTagBuffer;
|
||||
endTagBuffer = '';
|
||||
return [tmp, ''];
|
||||
}
|
||||
return ['', ''];
|
||||
} else if (content.includes(endTag)) {
|
||||
// 返回内容,完整命中</think>,直接结束
|
||||
isInThinkTag = false;
|
||||
const [think, answer] = content.split(endTag);
|
||||
return [think, answer];
|
||||
} else {
|
||||
// 无 buffer,且未命中 </think>,开始疑似 </think> 检测。
|
||||
for (let i = 1; i < endTag.length; i++) {
|
||||
const partialEndTag = endTag.slice(0, i);
|
||||
// 命中一部分尾标签
|
||||
if (content.endsWith(partialEndTag)) {
|
||||
const think = content.slice(0, -partialEndTag.length);
|
||||
endTagBuffer += partialEndTag;
|
||||
return [think, ''];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 完全未命中尾标签,还是 think 阶段。
|
||||
return [content, ''];
|
||||
};
|
||||
|
||||
const getStartTagBuffer = () => startTagBuffer;
|
||||
|
||||
return {
|
||||
parsePart,
|
||||
getStartTagBuffer
|
||||
};
|
||||
};
|
||||
|
||||
@@ -55,7 +55,7 @@ export const AiChatModule: FlowNodeTemplateType = {
|
||||
showStatus: true,
|
||||
isTool: true,
|
||||
courseUrl: '/docs/guide/workbench/workflow/ai_chat/',
|
||||
version: '4813',
|
||||
version: '490',
|
||||
inputs: [
|
||||
Input_Template_SettingAiModel,
|
||||
// --- settings modal
|
||||
|
||||
@@ -58,6 +58,13 @@ export const ToolModule: FlowNodeTemplateType = {
|
||||
valueType: WorkflowIOValueTypeEnum.boolean,
|
||||
value: true
|
||||
},
|
||||
{
|
||||
key: NodeInputKeyEnum.aiChatReasoning,
|
||||
renderTypeList: [FlowNodeInputTypeEnum.hidden],
|
||||
label: '',
|
||||
valueType: WorkflowIOValueTypeEnum.boolean,
|
||||
value: true
|
||||
},
|
||||
{
|
||||
key: NodeInputKeyEnum.aiChatTopP,
|
||||
renderTypeList: [FlowNodeInputTypeEnum.hidden],
|
||||
|
||||
@@ -10,7 +10,6 @@ export type AuthTeamRoleProps = {
|
||||
export type CreateTeamProps = {
|
||||
name: string;
|
||||
avatar?: string;
|
||||
defaultTeam?: boolean;
|
||||
memberName?: string;
|
||||
memberAvatar?: string;
|
||||
notificationAccount?: string;
|
||||
|
||||
2
packages/global/support/user/team/type.d.ts
vendored
@@ -47,7 +47,6 @@ export type TeamMemberSchema = {
|
||||
role: `${TeamMemberRoleEnum}`;
|
||||
status: `${TeamMemberStatusEnum}`;
|
||||
avatar: string;
|
||||
defaultTeam: boolean;
|
||||
};
|
||||
|
||||
export type TeamMemberWithTeamAndUserSchema = TeamMemberSchema & {
|
||||
@@ -65,7 +64,6 @@ export type TeamTmbItemType = {
|
||||
balance?: number;
|
||||
tmbId: string;
|
||||
teamDomain: string;
|
||||
defaultTeam: boolean;
|
||||
role: `${TeamMemberRoleEnum}`;
|
||||
status: `${TeamMemberStatusEnum}`;
|
||||
notificationAccount?: string;
|
||||
|
||||
1
packages/global/support/user/type.d.ts
vendored
@@ -27,7 +27,6 @@ export type UserType = {
|
||||
timezone: string;
|
||||
promotionRate: UserModelSchema['promotionRate'];
|
||||
team: TeamTmbItemType;
|
||||
standardInfo?: standardInfoType;
|
||||
notificationAccount?: string;
|
||||
permission: TeamPermission;
|
||||
contact?: string;
|
||||
|
||||
@@ -10,7 +10,8 @@ export enum UsageSourceEnum {
|
||||
wecom = 'wecom',
|
||||
feishu = 'feishu',
|
||||
dingtalk = 'dingtalk',
|
||||
official_account = 'official_account'
|
||||
official_account = 'official_account',
|
||||
pdfParse = 'pdfParse'
|
||||
}
|
||||
|
||||
export const UsageSourceMap = {
|
||||
@@ -43,5 +44,8 @@ export const UsageSourceMap = {
|
||||
},
|
||||
[UsageSourceEnum.dingtalk]: {
|
||||
label: i18nT('account_usage:dingtalk')
|
||||
},
|
||||
[UsageSourceEnum.pdfParse]: {
|
||||
label: i18nT('account_usage:pdf_parse')
|
||||
}
|
||||
};
|
||||
|
||||
@@ -7,6 +7,7 @@ export type UsageListItemCountType = {
|
||||
outputTokens?: number;
|
||||
charsLength?: number;
|
||||
duration?: number;
|
||||
pages?: number;
|
||||
|
||||
// deprecated
|
||||
tokens?: number;
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
"nodeId": "lmpb9v2lo2lk",
|
||||
"name": "插件开始",
|
||||
"intro": "自定义配置外部输入,使用插件时,仅暴露自定义配置的输入",
|
||||
"avatar": "/imgs/workflow/input.png",
|
||||
"avatar": "core/workflow/template/workflowStart",
|
||||
"flowNodeType": "pluginInput",
|
||||
"showStatus": false,
|
||||
"position": {
|
||||
@@ -26,14 +26,16 @@
|
||||
"version": "481",
|
||||
"inputs": [
|
||||
{
|
||||
"renderTypeList": ["reference"],
|
||||
"renderTypeList": ["input", "reference"],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"key": "url",
|
||||
"label": "url",
|
||||
"description": "需要读取的网页链接",
|
||||
"required": true,
|
||||
"toolDescription": "需要读取的网页链接"
|
||||
"toolDescription": "需要读取的网页链接",
|
||||
"list": [],
|
||||
"defaultValue": ""
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
@@ -50,12 +52,12 @@
|
||||
"nodeId": "i7uow4wj2wdp",
|
||||
"name": "插件输出",
|
||||
"intro": "自定义配置外部输出,使用插件时,仅暴露自定义配置的输出",
|
||||
"avatar": "/imgs/workflow/output.png",
|
||||
"avatar": "core/workflow/template/pluginOutput",
|
||||
"flowNodeType": "pluginOutput",
|
||||
"showStatus": false,
|
||||
"position": {
|
||||
"x": 1607.7142331269129,
|
||||
"y": -150.8808596935447
|
||||
"x": 1853.935047606551,
|
||||
"y": -154.13661665265613
|
||||
},
|
||||
"version": "481",
|
||||
"inputs": [
|
||||
@@ -81,12 +83,12 @@
|
||||
"nodeId": "ebLCxU43hHuZ",
|
||||
"name": "HTTP 请求",
|
||||
"intro": "可以发出一个 HTTP 请求,实现更为复杂的操作(联网搜索、数据库查询等)",
|
||||
"avatar": "/imgs/workflow/http.png",
|
||||
"avatar": "core/workflow/template/httpRequest",
|
||||
"flowNodeType": "httpRequest468",
|
||||
"showStatus": true,
|
||||
"position": {
|
||||
"x": 1050.9890727421412,
|
||||
"y": -415.2085119990912
|
||||
"x": 1054.2940501177068,
|
||||
"y": -503.13661665265613
|
||||
},
|
||||
"version": "481",
|
||||
"inputs": [
|
||||
@@ -96,7 +98,7 @@
|
||||
"valueType": "dynamic",
|
||||
"label": "",
|
||||
"required": false,
|
||||
"description": "core.module.input.description.HTTP Dynamic Input",
|
||||
"description": "common:core.module.input.description.HTTP Dynamic Input",
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
@@ -107,16 +109,19 @@
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
"selectDataset",
|
||||
"selectApp"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
}
|
||||
},
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpMethod",
|
||||
@@ -124,17 +129,33 @@
|
||||
"valueType": "string",
|
||||
"label": "",
|
||||
"value": "POST",
|
||||
"required": true
|
||||
"required": true,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpTimeout",
|
||||
"renderTypeList": ["custom"],
|
||||
"valueType": "number",
|
||||
"label": "",
|
||||
"value": 30,
|
||||
"min": 5,
|
||||
"max": 600,
|
||||
"required": true,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpReqUrl",
|
||||
"renderTypeList": ["hidden"],
|
||||
"valueType": "string",
|
||||
"label": "",
|
||||
"description": "core.module.input.description.Http Request Url",
|
||||
"description": "common:core.module.input.description.Http Request Url",
|
||||
"placeholder": "https://api.ai.com/getInventory",
|
||||
"required": false,
|
||||
"value": "fetchUrl"
|
||||
"value": "fetchUrl",
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpHeader",
|
||||
@@ -142,9 +163,11 @@
|
||||
"valueType": "any",
|
||||
"value": [],
|
||||
"label": "",
|
||||
"description": "core.module.input.description.Http Request Header",
|
||||
"placeholder": "core.module.input.description.Http Request Header",
|
||||
"required": false
|
||||
"description": "common:core.module.input.description.Http Request Header",
|
||||
"placeholder": "common:core.module.input.description.Http Request Header",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpParams",
|
||||
@@ -152,7 +175,9 @@
|
||||
"valueType": "any",
|
||||
"value": [],
|
||||
"label": "",
|
||||
"required": false
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpJsonBody",
|
||||
@@ -160,7 +185,29 @@
|
||||
"valueType": "any",
|
||||
"value": "{\n \"url\": \"{{url}}\"\n}",
|
||||
"label": "",
|
||||
"required": false
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpFormBody",
|
||||
"renderTypeList": ["hidden"],
|
||||
"valueType": "any",
|
||||
"value": [],
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpContentType",
|
||||
"renderTypeList": ["hidden"],
|
||||
"valueType": "string",
|
||||
"value": "json",
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"renderTypeList": ["reference"],
|
||||
@@ -178,12 +225,13 @@
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
"selectDataset",
|
||||
"selectApp"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
@@ -193,6 +241,23 @@
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"id": "error",
|
||||
"key": "error",
|
||||
"label": "workflow:request_error",
|
||||
"description": "HTTP请求错误信息,成功时返回空",
|
||||
"valueType": "object",
|
||||
"type": "static"
|
||||
},
|
||||
{
|
||||
"id": "httpRawResponse",
|
||||
"key": "httpRawResponse",
|
||||
"required": true,
|
||||
"label": "workflow:raw_response",
|
||||
"description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。",
|
||||
"valueType": "any",
|
||||
"type": "static"
|
||||
},
|
||||
{
|
||||
"id": "system_addOutputParam",
|
||||
"key": "system_addOutputParam",
|
||||
@@ -220,23 +285,6 @@
|
||||
"showDefaultValue": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "error",
|
||||
"key": "error",
|
||||
"label": "请求错误",
|
||||
"description": "HTTP请求错误信息,成功时返回空",
|
||||
"valueType": "object",
|
||||
"type": "static"
|
||||
},
|
||||
{
|
||||
"id": "httpRawResponse",
|
||||
"key": "httpRawResponse",
|
||||
"label": "原始响应",
|
||||
"required": true,
|
||||
"description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。",
|
||||
"valueType": "any",
|
||||
"type": "static"
|
||||
},
|
||||
{
|
||||
"id": "rH4tMV02robs",
|
||||
"valueType": "string",
|
||||
@@ -260,6 +308,34 @@
|
||||
"sourceHandle": "ebLCxU43hHuZ-source-right",
|
||||
"targetHandle": "i7uow4wj2wdp-target-left"
|
||||
}
|
||||
]
|
||||
],
|
||||
"chatConfig": {
|
||||
"welcomeText": "",
|
||||
"variables": [],
|
||||
"questionGuide": {
|
||||
"open": false,
|
||||
"model": "gpt-4o-mini",
|
||||
"customPrompt": "You are an AI assistant tasked with predicting the user's next question based on the conversation history. Your goal is to generate 3 potential questions that will guide the user to continue the conversation. When generating these questions, adhere to the following rules:\n\n1. Use the same language as the user's last question in the conversation history.\n2. Keep each question under 20 characters in length.\n\nAnalyze the conversation history provided to you and use it as context to generate relevant and engaging follow-up questions. Your predictions should be logical extensions of the current topic or related areas that the user might be interested in exploring further.\n\nRemember to maintain consistency in tone and style with the existing conversation while providing diverse options for the user to choose from. Your goal is to keep the conversation flowing naturally and help the user delve deeper into the subject matter or explore related topics."
|
||||
},
|
||||
"ttsConfig": {
|
||||
"type": "web"
|
||||
},
|
||||
"whisperConfig": {
|
||||
"open": false,
|
||||
"autoSend": false,
|
||||
"autoTTSResponse": false
|
||||
},
|
||||
"chatInputGuide": {
|
||||
"open": false,
|
||||
"textList": [],
|
||||
"customUrl": ""
|
||||
},
|
||||
"instruction": "",
|
||||
"autoExecute": {
|
||||
"open": false,
|
||||
"defaultPrompt": ""
|
||||
},
|
||||
"_id": "677b59849d672185a5671b45"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,10 +18,10 @@ export function getGFSCollection(bucket: `${BucketNameEnum}`) {
|
||||
MongoDatasetFileSchema;
|
||||
MongoChatFileSchema;
|
||||
|
||||
return connectionMongo.connection.db.collection(`${bucket}.files`);
|
||||
return connectionMongo.connection.db!.collection(`${bucket}.files`);
|
||||
}
|
||||
export function getGridBucket(bucket: `${BucketNameEnum}`) {
|
||||
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db, {
|
||||
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
|
||||
bucketName: bucket,
|
||||
// @ts-ignore
|
||||
readPreference: ReadPreference.SECONDARY_PREFERRED // Read from secondary node
|
||||
@@ -52,7 +52,9 @@ export async function uploadFile({
|
||||
const stats = await fsp.stat(path);
|
||||
if (!stats.isFile()) return Promise.reject(`${path} is not a file`);
|
||||
|
||||
const readStream = fs.createReadStream(path);
|
||||
const readStream = fs.createReadStream(path, {
|
||||
highWaterMark: 256 * 1024
|
||||
});
|
||||
|
||||
// Add default metadata
|
||||
metadata.teamId = teamId;
|
||||
@@ -62,9 +64,27 @@ export async function uploadFile({
|
||||
// create a gridfs bucket
|
||||
const bucket = getGridBucket(bucketName);
|
||||
|
||||
const fileSize = stats.size;
|
||||
const chunkSizeBytes = (() => {
|
||||
// 计算理想块大小:文件大小 ÷ 目标块数(10)
|
||||
const idealChunkSize = Math.ceil(fileSize / 10);
|
||||
|
||||
// 确保块大小至少为512KB
|
||||
const minChunkSize = 512 * 1024; // 512KB
|
||||
|
||||
// 取理想块大小和最小块大小中的较大值
|
||||
let chunkSize = Math.max(idealChunkSize, minChunkSize);
|
||||
|
||||
// 将块大小向上取整到最接近的64KB的倍数,使其更整齐
|
||||
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
|
||||
|
||||
return chunkSize;
|
||||
})();
|
||||
|
||||
const stream = bucket.openUploadStream(filename, {
|
||||
metadata,
|
||||
contentType
|
||||
contentType,
|
||||
chunkSizeBytes
|
||||
});
|
||||
|
||||
// save to gridfs
|
||||
@@ -186,20 +206,25 @@ export async function getDownloadStream({
|
||||
|
||||
export const readFileContentFromMongo = async ({
|
||||
teamId,
|
||||
tmbId,
|
||||
bucketName,
|
||||
fileId,
|
||||
isQAImport = false
|
||||
isQAImport = false,
|
||||
customPdfParse = false
|
||||
}: {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
bucketName: `${BucketNameEnum}`;
|
||||
fileId: string;
|
||||
isQAImport?: boolean;
|
||||
customPdfParse?: boolean;
|
||||
}): Promise<{
|
||||
rawText: string;
|
||||
filename: string;
|
||||
}> => {
|
||||
const bufferId = `${fileId}-${customPdfParse}`;
|
||||
// read buffer
|
||||
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: fileId }, undefined, {
|
||||
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: bufferId }, undefined, {
|
||||
...readFromSecondary
|
||||
}).lean();
|
||||
if (fileBuffer) {
|
||||
@@ -227,9 +252,11 @@ export const readFileContentFromMongo = async ({
|
||||
|
||||
// Get raw text
|
||||
const { rawText } = await readRawContentByFileBuffer({
|
||||
customPdfParse,
|
||||
extension,
|
||||
isQAImport,
|
||||
teamId,
|
||||
tmbId,
|
||||
buffer: fileBuffers,
|
||||
encoding,
|
||||
metadata: {
|
||||
@@ -240,7 +267,7 @@ export const readFileContentFromMongo = async ({
|
||||
// < 14M
|
||||
if (fileBuffers.length < 14 * 1024 * 1024 && rawText.trim()) {
|
||||
MongoRawTextBuffer.create({
|
||||
sourceId: fileId,
|
||||
sourceId: bufferId,
|
||||
rawText,
|
||||
metadata: {
|
||||
filename: file.filename
|
||||
|
||||
@@ -3,13 +3,14 @@ import { PassThrough } from 'stream';
|
||||
|
||||
export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
|
||||
return new Promise<Buffer>((resolve, reject) => {
|
||||
let tmpBuffer: Buffer = Buffer.from([]);
|
||||
const chunks: Uint8Array[] = [];
|
||||
|
||||
stream.on('data', (chunk) => {
|
||||
tmpBuffer = Buffer.concat([tmpBuffer, chunk]);
|
||||
chunks.push(chunk);
|
||||
});
|
||||
stream.on('end', () => {
|
||||
resolve(tmpBuffer);
|
||||
const resultBuffer = Buffer.concat(chunks); // 一次性拼接
|
||||
resolve(resultBuffer);
|
||||
});
|
||||
stream.on('error', (err) => {
|
||||
reject(err);
|
||||
@@ -18,25 +19,26 @@ export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
|
||||
};
|
||||
|
||||
export const stream2Encoding = async (stream: NodeJS.ReadableStream) => {
|
||||
const start = Date.now();
|
||||
const copyStream = stream.pipe(new PassThrough());
|
||||
|
||||
/* get encoding */
|
||||
const buffer = await (() => {
|
||||
return new Promise<Buffer>((resolve, reject) => {
|
||||
let tmpBuffer: Buffer = Buffer.from([]);
|
||||
const chunks: Uint8Array[] = [];
|
||||
let totalLength = 0;
|
||||
|
||||
stream.on('data', (chunk) => {
|
||||
if (tmpBuffer.length < 200) {
|
||||
tmpBuffer = Buffer.concat([tmpBuffer, chunk]);
|
||||
if (totalLength < 200) {
|
||||
chunks.push(chunk);
|
||||
totalLength += chunk.length;
|
||||
|
||||
if (tmpBuffer.length >= 200) {
|
||||
resolve(tmpBuffer);
|
||||
if (totalLength >= 200) {
|
||||
resolve(Buffer.concat(chunks));
|
||||
}
|
||||
}
|
||||
});
|
||||
stream.on('end', () => {
|
||||
resolve(tmpBuffer);
|
||||
resolve(Buffer.concat(chunks));
|
||||
});
|
||||
stream.on('error', (err) => {
|
||||
reject(err);
|
||||
|
||||