Compare commits
6 Commits
gru/projec
...
test-openG
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
63028dacb2 | ||
|
|
b4ecfb0b79 | ||
|
|
331b851a78 | ||
|
|
50d235c42a | ||
|
|
9838593451 | ||
|
|
c25cd48e72 |
@@ -132,15 +132,15 @@ services:
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt-mcp-server:
|
||||
container_name: fastgpt-mcp-server
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云
|
||||
ports:
|
||||
- 3005:3000
|
||||
networks:
|
||||
@@ -150,8 +150,8 @@ services:
|
||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.10 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
|
||||
@@ -109,15 +109,15 @@ services:
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt-mcp-server:
|
||||
container_name: fastgpt-mcp-server
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云
|
||||
ports:
|
||||
- 3005:3000
|
||||
networks:
|
||||
@@ -127,8 +127,8 @@ services:
|
||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.10 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
|
||||
218
deploy/docker/docker-compose-opengauss.yml
Normal file
218
deploy/docker/docker-compose-opengauss.yml
Normal file
@@ -0,0 +1,218 @@
|
||||
# 数据库的默认账号和密码仅首次运行时设置有效
|
||||
# 如果修改了账号密码,记得改数据库和项目连接参数,别只改一处~
|
||||
# 该配置文件只是给快速启动,测试使用。正式使用,记得务必修改账号密码,以及调整合适的知识库参数,共享内存等。
|
||||
# 如何无法访问 dockerhub 和 git,可以用阿里云(阿里云没有arm包)
|
||||
|
||||
version: '3.3'
|
||||
services:
|
||||
# db
|
||||
gs:
|
||||
image: opengauss/opengauss:7.0.0-RC1 # docker hub
|
||||
container_name: gs
|
||||
restart: always
|
||||
# ports: # 生产环境建议不要暴露
|
||||
# - 5432:5432
|
||||
networks:
|
||||
- fastgpt
|
||||
environment:
|
||||
# 这里的配置只有首次运行生效。修改后,重启镜像是不会生效的。需要把持久化数据删除再重启,才有效果
|
||||
- GS_USER=username
|
||||
- GS_PASSWORD=password
|
||||
- GS_DB=postgres
|
||||
volumes:
|
||||
- ./opengauss/data:/var/lib/opengauss/data
|
||||
healthcheck:
|
||||
test: ['CMD-SHELL', 'netstat -lntp | grep tcp6 > /dev/null 2>&1']
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
mongo:
|
||||
image: mongo:5.0.18 # dockerhub
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/mongo:5.0.18 # 阿里云
|
||||
# image: mongo:4.4.29 # cpu不支持AVX时候使用
|
||||
container_name: mongo
|
||||
restart: always
|
||||
# ports:
|
||||
# - 27017:27017
|
||||
networks:
|
||||
- fastgpt
|
||||
command: mongod --keyFile /data/mongodb.key --replSet rs0
|
||||
environment:
|
||||
- MONGO_INITDB_ROOT_USERNAME=myusername
|
||||
- MONGO_INITDB_ROOT_PASSWORD=mypassword
|
||||
volumes:
|
||||
- ./mongo/data:/data/db
|
||||
entrypoint:
|
||||
- bash
|
||||
- -c
|
||||
- |
|
||||
openssl rand -base64 128 > /data/mongodb.key
|
||||
chmod 400 /data/mongodb.key
|
||||
chown 999:999 /data/mongodb.key
|
||||
echo 'const isInited = rs.status().ok === 1
|
||||
if(!isInited){
|
||||
rs.initiate({
|
||||
_id: "rs0",
|
||||
members: [
|
||||
{ _id: 0, host: "mongo:27017" }
|
||||
]
|
||||
})
|
||||
}' > /data/initReplicaSet.js
|
||||
# 启动MongoDB服务
|
||||
exec docker-entrypoint.sh "$$@" &
|
||||
|
||||
# 等待MongoDB服务启动
|
||||
until mongo -u myusername -p mypassword --authenticationDatabase admin --eval "print('waited for connection')"; do
|
||||
echo "Waiting for MongoDB to start..."
|
||||
sleep 2
|
||||
done
|
||||
|
||||
# 执行初始化副本集的脚本
|
||||
mongo -u myusername -p mypassword --authenticationDatabase admin /data/initReplicaSet.js
|
||||
|
||||
# 等待docker-entrypoint.sh脚本执行的MongoDB服务进程
|
||||
wait $$!
|
||||
|
||||
redis:
|
||||
image: redis:7.2-alpine
|
||||
container_name: redis
|
||||
# ports:
|
||||
# - 6379:6379
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
command: |
|
||||
redis-server --requirepass mypassword --loglevel warning --maxclients 10000 --appendonly yes --save 60 10 --maxmemory 4gb --maxmemory-policy noeviction
|
||||
healthcheck:
|
||||
test: ['CMD', 'redis-cli', '-a', 'mypassword', 'ping']
|
||||
interval: 10s
|
||||
timeout: 3s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
volumes:
|
||||
- ./redis/data:/data
|
||||
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt-mcp-server:
|
||||
container_name: fastgpt-mcp-server
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
|
||||
ports:
|
||||
- 3005:3000
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
environment:
|
||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
|
||||
# image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/ghcr.io/labring/fastgpt:v4.8.4-linuxarm64 # openGauss在arm架构上性能更好
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
- fastgpt
|
||||
depends_on:
|
||||
- mongo
|
||||
- gs
|
||||
- sandbox
|
||||
restart: always
|
||||
environment:
|
||||
# 前端外部可访问的地址,用于自动补全文件资源路径。例如 https:fastgpt.cn,不能填 localhost。这个值可以不填,不填则发给模型的图片会是一个相对路径,而不是全路径,模型可能伪造Host。
|
||||
- FE_DOMAIN=
|
||||
# root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。
|
||||
- DEFAULT_ROOT_PSW=1234
|
||||
# AI Proxy 的地址,如果配了该地址,优先使用
|
||||
- AIPROXY_API_ENDPOINT=http://aiproxy:3000
|
||||
# AI Proxy 的 Admin Token,与 AI Proxy 中的环境变量 ADMIN_KEY
|
||||
- AIPROXY_API_TOKEN=aiproxy
|
||||
# 数据库最大连接数
|
||||
- DB_MAX_LINK=30
|
||||
# 登录凭证密钥
|
||||
- TOKEN_KEY=any
|
||||
# root的密钥,常用于升级时候的初始化请求
|
||||
- ROOT_KEY=root_key
|
||||
# 文件阅读加密
|
||||
- FILE_TOKEN_KEY=filetoken
|
||||
# MongoDB 连接参数. 用户名myusername,密码mypassword。
|
||||
- MONGODB_URI=mongodb://myusername:mypassword@mongo:27017/fastgpt?authSource=admin
|
||||
# openGauss 连接参数
|
||||
- OPENGAUSS_URL=opengauss://gaussdb:Huawei12%23%24@gs:9999/test
|
||||
# Redis 连接参数
|
||||
- REDIS_URL=redis://default:mypassword@redis:6379
|
||||
# sandbox 地址
|
||||
- SANDBOX_URL=http://sandbox:3000
|
||||
# 日志等级: debug, info, warn, error
|
||||
- LOG_LEVEL=info
|
||||
- STORE_LOG_LEVEL=warn
|
||||
# 工作流最大运行次数
|
||||
- WORKFLOW_MAX_RUN_TIMES=1000
|
||||
# 批量执行节点,最大输入长度
|
||||
- WORKFLOW_MAX_LOOP_TIMES=100
|
||||
# 自定义跨域,不配置时,默认都允许跨域(多个域名通过逗号分割)
|
||||
- ALLOWED_ORIGINS=
|
||||
# 是否开启IP限制,默认不开启
|
||||
- USE_IP_LIMIT=false
|
||||
# 对话文件过期天数
|
||||
- CHAT_FILE_EXPIRE_TIME=7
|
||||
volumes:
|
||||
- ./config.json:/app/data/config.json
|
||||
|
||||
# AI Proxy
|
||||
aiproxy:
|
||||
image: ghcr.io/labring/aiproxy:v0.1.7
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/labring/aiproxy:v0.1.7 # 阿里云
|
||||
container_name: aiproxy
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
aiproxy_pg:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- fastgpt
|
||||
environment:
|
||||
# 对应 fastgpt 里的AIPROXY_API_TOKEN
|
||||
- ADMIN_KEY=aiproxy
|
||||
# 错误日志详情保存时间(小时)
|
||||
- LOG_DETAIL_STORAGE_HOURS=1
|
||||
# 数据库连接地址
|
||||
- SQL_DSN=postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy
|
||||
# 最大重试次数
|
||||
- RETRY_TIMES=3
|
||||
# 不需要计费
|
||||
- BILLING_ENABLED=false
|
||||
# 不需要严格检测模型
|
||||
- DISABLE_MODEL_CONFIG=true
|
||||
healthcheck:
|
||||
test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status']
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
aiproxy_pg:
|
||||
image: pgvector/pgvector:0.8.0-pg15 # docker hub
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
|
||||
restart: unless-stopped
|
||||
container_name: aiproxy_pg
|
||||
volumes:
|
||||
- ./aiproxy_pg:/var/lib/postgresql/data
|
||||
networks:
|
||||
- fastgpt
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_DB: aiproxy
|
||||
POSTGRES_PASSWORD: aiproxy
|
||||
healthcheck:
|
||||
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
networks:
|
||||
fastgpt:
|
||||
@@ -96,15 +96,15 @@ services:
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt-mcp-server:
|
||||
container_name: fastgpt-mcp-server
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云
|
||||
ports:
|
||||
- 3005:3000
|
||||
networks:
|
||||
@@ -114,8 +114,8 @@ services:
|
||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.10 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
|
||||
@@ -72,15 +72,15 @@ services:
|
||||
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt-mcp-server:
|
||||
container_name: fastgpt-mcp-server
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云
|
||||
ports:
|
||||
- 3005:3000
|
||||
networks:
|
||||
@@ -90,8 +90,8 @@ services:
|
||||
- FASTGPT_ENDPOINT=http://fastgpt:3000
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.9.9 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.9 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.9.10 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: 'V4.9.10(进行中)'
|
||||
title: 'V4.9.10'
|
||||
description: 'FastGPT V4.9.10 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
@@ -7,13 +7,28 @@ toc: true
|
||||
weight: 790
|
||||
---
|
||||
|
||||
## 升级指南
|
||||
|
||||
重要提示:本次更新会重新构建全文索引,构建期间,全文检索结果会为空,4c16g 700 万组全文索引大致消耗 25 分钟。如需无缝升级,需自行做表同步工程。
|
||||
|
||||
### 1. 做好数据备份
|
||||
|
||||
### 2. 更新镜像 tag
|
||||
|
||||
- 更新 FastGPT 镜像 tag: v4.9.10
|
||||
- 更新 FastGPT 商业版镜像 tag: v4.9.10
|
||||
- mcp_server 无需更新
|
||||
- Sandbox 无需更新
|
||||
- AIProxy 无需更新
|
||||
|
||||
## 🚀 新增内容
|
||||
|
||||
1. 支持 PG 设置`systemEnv.hnswMaxScanTuples`参数,提高迭代搜索的数据总量。
|
||||
2. 工作流调整为单向接入和接出,支持快速的添加下一步节点。
|
||||
3. 开放飞书和语雀知识库到开源版。
|
||||
4. gemini 和 claude 最新模型预设。
|
||||
2. 知识库预处理参数增加 “分块条件”,可控制某些情况下不进行分块处理。
|
||||
3. 知识库预处理参数增加 “段落优先” 模式,可控制最大段落深度。原“长度优先”模式,不再内嵌段落优先逻辑。
|
||||
4. 工作流调整为单向接入和接出,支持快速的添加下一步节点。
|
||||
5. 开放飞书和语雀知识库到开源版。
|
||||
6. gemini 和 claude 最新模型预设。
|
||||
|
||||
## ⚙️ 优化
|
||||
|
||||
@@ -31,4 +46,5 @@ weight: 790
|
||||
3. 工具调用模式,未保存思考输出。
|
||||
4. 知识库 indexSize 参数未生效。
|
||||
5. 工作流嵌套 2 层后,获取预览引用、上下文不正确。
|
||||
6. xlsx 转成 Markdown 时候,前面会多出一个空格。
|
||||
6. xlsx 转成 Markdown 时候,前面会多出一个空格。
|
||||
7. 读取 Markdown 文件时,Base64 图片未进行额外抓换保存。
|
||||
21
docSite/content/zh-cn/docs/development/upgrading/4911.md
Normal file
21
docSite/content/zh-cn/docs/development/upgrading/4911.md
Normal file
@@ -0,0 +1,21 @@
|
||||
---
|
||||
title: 'V4.9.11(进行中)'
|
||||
description: 'FastGPT V4.9.11 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 789
|
||||
---
|
||||
|
||||
|
||||
## 🚀 新增内容
|
||||
|
||||
1. 工作流中,子流程版本控制,可选择“保持最新版本”,无需手动更新。
|
||||
|
||||
## ⚙️ 优化
|
||||
|
||||
|
||||
|
||||
## 🐛 修复
|
||||
|
||||
1. 工作流中,管理员声明的全局系统工具,无法进行版本管理。
|
||||
1
env.d.ts
vendored
1
env.d.ts
vendored
@@ -15,6 +15,7 @@ declare global {
|
||||
MONGODB_LOG_URI?: string;
|
||||
PG_URL: string;
|
||||
OCEANBASE_URL: string;
|
||||
OPENGAUSS_URL: string;
|
||||
MILVUS_ADDRESS: string;
|
||||
MILVUS_TOKEN: string;
|
||||
SANDBOX_URL: string;
|
||||
|
||||
@@ -7,6 +7,10 @@ export const CUSTOM_SPLIT_SIGN = '-----CUSTOM_SPLIT_SIGN-----';
|
||||
type SplitProps = {
|
||||
text: string;
|
||||
chunkSize: number;
|
||||
|
||||
paragraphChunkDeep?: number; // Paragraph deep
|
||||
paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
|
||||
|
||||
maxSize?: number;
|
||||
overlapRatio?: number;
|
||||
customReg?: string[];
|
||||
@@ -108,6 +112,8 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
let {
|
||||
text = '',
|
||||
chunkSize,
|
||||
paragraphChunkDeep = 5,
|
||||
paragraphChunkMinSize = 100,
|
||||
maxSize = defaultMaxChunkSize,
|
||||
overlapRatio = 0.15,
|
||||
customReg = []
|
||||
@@ -123,7 +129,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
text = text.replace(/(```[\s\S]*?```|~~~[\s\S]*?~~~)/g, function (match) {
|
||||
return match.replace(/\n/g, codeBlockMarker);
|
||||
});
|
||||
// 2. 表格处理 - 单独提取表格出来,进行表头合并
|
||||
// 2. Markdown 表格处理 - 单独提取表格出来,进行表头合并
|
||||
const tableReg =
|
||||
/(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n?)*)(?:\n|$)/g;
|
||||
const tableDataList = text.match(tableReg);
|
||||
@@ -143,25 +149,40 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
text = text.replace(/(\r?\n|\r){3,}/g, '\n\n\n');
|
||||
|
||||
// The larger maxLen is, the next sentence is less likely to trigger splitting
|
||||
const markdownIndex = 4;
|
||||
const forbidOverlapIndex = 8;
|
||||
const customRegLen = customReg.length;
|
||||
const markdownIndex = paragraphChunkDeep - 1;
|
||||
const forbidOverlapIndex = customRegLen + markdownIndex + 4;
|
||||
|
||||
const markdownHeaderRules = ((deep?: number): { reg: RegExp; maxLen: number }[] => {
|
||||
if (!deep || deep === 0) return [];
|
||||
|
||||
const maxDeep = Math.min(deep, 8); // Maximum 8 levels
|
||||
const rules: { reg: RegExp; maxLen: number }[] = [];
|
||||
|
||||
for (let i = 1; i <= maxDeep; i++) {
|
||||
const hashSymbols = '#'.repeat(i);
|
||||
rules.push({
|
||||
reg: new RegExp(`^(${hashSymbols}\\s[^\\n]+\\n)`, 'gm'),
|
||||
maxLen: chunkSize
|
||||
});
|
||||
}
|
||||
|
||||
return rules;
|
||||
})(paragraphChunkDeep);
|
||||
|
||||
const stepReges: { reg: RegExp | string; maxLen: number }[] = [
|
||||
...customReg.map((text) => ({
|
||||
reg: text.replaceAll('\\n', '\n'),
|
||||
maxLen: chunkSize
|
||||
})),
|
||||
{ reg: /^(#\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
{ reg: /^(##\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
{ reg: /^(###\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
{ reg: /^(####\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
{ reg: /^(#####\s[^\n]+\n)/gm, maxLen: chunkSize },
|
||||
...markdownHeaderRules,
|
||||
|
||||
{ reg: /([\n](```[\s\S]*?```|~~~[\s\S]*?~~~))/g, maxLen: maxSize }, // code block
|
||||
// HTML Table tag 尽可能保障完整
|
||||
{
|
||||
reg: /(\n\|(?:(?:[^\n|]+\|){1,})\n\|(?:[:\-\s]+\|){1,}\n(?:\|(?:[^\n|]+\|)*\n)*)/g,
|
||||
maxLen: Math.min(chunkSize * 1.5, maxSize)
|
||||
}, // Table 尽可能保证完整性
|
||||
maxLen: chunkSize
|
||||
}, // Markdown Table 尽可能保证完整性
|
||||
{ reg: /(\n{2,})/g, maxLen: chunkSize },
|
||||
{ reg: /([\n])/g, maxLen: chunkSize },
|
||||
// ------ There's no overlap on the top
|
||||
@@ -172,12 +193,10 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
{ reg: /([,]|,\s)/g, maxLen: chunkSize }
|
||||
];
|
||||
|
||||
const customRegLen = customReg.length;
|
||||
const checkIsCustomStep = (step: number) => step < customRegLen;
|
||||
const checkIsMarkdownSplit = (step: number) =>
|
||||
step >= customRegLen && step <= markdownIndex + customRegLen;
|
||||
|
||||
const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex + customRegLen;
|
||||
const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex;
|
||||
|
||||
// if use markdown title split, Separate record title
|
||||
const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
|
||||
@@ -301,6 +320,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
const splitTexts = getSplitTexts({ text, step });
|
||||
|
||||
const chunks: string[] = [];
|
||||
|
||||
for (let i = 0; i < splitTexts.length; i++) {
|
||||
const item = splitTexts[i];
|
||||
|
||||
@@ -443,7 +463,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
*/
|
||||
export const splitText2Chunks = (props: SplitProps): SplitResponse => {
|
||||
let { text = '' } = props;
|
||||
const start = Date.now();
|
||||
const splitWithCustomSign = text.split(CUSTOM_SPLIT_SIGN);
|
||||
|
||||
const splitResult = splitWithCustomSign.map((item) => {
|
||||
|
||||
@@ -10,6 +10,8 @@ import { AppTypeEnum } from './constants';
|
||||
import { AppErrEnum } from '../../common/error/code/app';
|
||||
import { PluginErrEnum } from '../../common/error/code/plugin';
|
||||
import { i18nT } from '../../../web/i18n/utils';
|
||||
import appErrList from '../../common/error/code/app';
|
||||
import pluginErrList from '../../common/error/code/plugin';
|
||||
|
||||
export const getDefaultAppForm = (): AppSimpleEditFormType => {
|
||||
return {
|
||||
@@ -190,17 +192,10 @@ export const getAppType = (config?: WorkflowTemplateBasicType | AppSimpleEditFor
|
||||
return '';
|
||||
};
|
||||
|
||||
export const formatToolError = (error?: string) => {
|
||||
const unExistError: Array<string> = [
|
||||
AppErrEnum.unAuthApp,
|
||||
AppErrEnum.unExist,
|
||||
PluginErrEnum.unAuth,
|
||||
PluginErrEnum.unExist
|
||||
];
|
||||
export const formatToolError = (error?: any) => {
|
||||
if (!error || typeof error !== 'string') return;
|
||||
|
||||
if (error && unExistError.includes(error)) {
|
||||
return i18nT('app:un_auth');
|
||||
} else {
|
||||
return error;
|
||||
}
|
||||
const errorText = appErrList[error]?.message || pluginErrList[error]?.message;
|
||||
|
||||
return errorText || error;
|
||||
};
|
||||
|
||||
7
packages/global/core/dataset/api.d.ts
vendored
7
packages/global/core/dataset/api.d.ts
vendored
@@ -124,6 +124,13 @@ export type PgSearchRawType = {
|
||||
collection_id: string;
|
||||
score: number;
|
||||
};
|
||||
|
||||
export type GsSearchRawType = {
|
||||
id: string;
|
||||
collection_id: string;
|
||||
score: number;
|
||||
};
|
||||
|
||||
export type PushDatasetDataChunkProps = {
|
||||
q: string; // embedding content
|
||||
a?: string; // bonus content
|
||||
|
||||
@@ -120,7 +120,6 @@ export const computeChunkSize = (params: {
|
||||
|
||||
return Math.min(params.chunkSize ?? chunkAutoChunkSize, getLLMMaxChunkSize(params.llmModel));
|
||||
};
|
||||
|
||||
export const computeChunkSplitter = (params: {
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
@@ -129,8 +128,21 @@ export const computeChunkSplitter = (params: {
|
||||
if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
|
||||
return undefined;
|
||||
}
|
||||
if (params.chunkSplitMode === DataChunkSplitModeEnum.size) {
|
||||
if (params.chunkSplitMode !== DataChunkSplitModeEnum.char) {
|
||||
return undefined;
|
||||
}
|
||||
return params.chunkSplitter;
|
||||
};
|
||||
export const computeParagraphChunkDeep = (params: {
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
paragraphChunkDeep?: number;
|
||||
}) => {
|
||||
if (params.chunkSettingMode === ChunkSettingModeEnum.auto) {
|
||||
return 5;
|
||||
}
|
||||
if (params.chunkSplitMode === DataChunkSplitModeEnum.paragraph) {
|
||||
return params.paragraphChunkDeep;
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
8
packages/global/core/dataset/type.d.ts
vendored
8
packages/global/core/dataset/type.d.ts
vendored
@@ -9,7 +9,8 @@ import type {
|
||||
DatasetTypeEnum,
|
||||
SearchScoreTypeEnum,
|
||||
TrainingModeEnum,
|
||||
ChunkSettingModeEnum
|
||||
ChunkSettingModeEnum,
|
||||
ChunkTriggerConfigTypeEnum
|
||||
} from './constants';
|
||||
import type { DatasetPermission } from '../../support/permission/dataset/controller';
|
||||
import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
|
||||
@@ -37,11 +38,10 @@ export type ChunkSettingsType = {
|
||||
paragraphChunkAIMode?: ParagraphChunkAIModeEnum;
|
||||
paragraphChunkDeep?: number; // Paragraph deep
|
||||
paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
|
||||
paragraphChunkMaxSize?: number; // Paragraph max size, if too large, it will split
|
||||
// Size split
|
||||
chunkSize?: number;
|
||||
chunkSize?: number; // chunk/qa chunk size, Paragraph max chunk size.
|
||||
// Char split
|
||||
chunkSplitter?: string;
|
||||
chunkSplitter?: string; // chunk/qa chunk splitter
|
||||
indexSize?: number;
|
||||
|
||||
qaPrompt?: string;
|
||||
|
||||
1
packages/global/core/workflow/type/node.d.ts
vendored
1
packages/global/core/workflow/type/node.d.ts
vendored
@@ -59,7 +59,6 @@ export type FlowNodeCommonType = {
|
||||
};
|
||||
|
||||
export type PluginDataType = {
|
||||
version?: string;
|
||||
diagram?: string;
|
||||
userGuide?: string;
|
||||
courseUrl?: string;
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4816",
|
||||
"name": "钉钉 webhook",
|
||||
"avatar": "plugins/dingding",
|
||||
"intro": "向钉钉机器人发起 webhook 请求。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "Menghuan1918",
|
||||
"version": "488",
|
||||
"name": "PDF识别",
|
||||
"avatar": "plugins/doc2x",
|
||||
"intro": "将PDF文件发送至Doc2X进行解析,返回结构化的LaTeX公式的文本(markdown),支持传入String类型的URL或者流程输出中的文件链接变量",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "Menghuan1918",
|
||||
"version": "488",
|
||||
"name": "Doc2X服务",
|
||||
"avatar": "plugins/doc2x",
|
||||
"intro": "将传入的图片或PDF文件发送至Doc2X进行解析,返回带LaTeX公式的markdown格式的文本。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4816",
|
||||
"name": "企业微信 webhook",
|
||||
"avatar": "plugins/qiwei",
|
||||
"intro": "向企业微信机器人发起 webhook 请求。只能内部群使用。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4811",
|
||||
"name": "Bing搜索",
|
||||
"avatar": "core/workflow/template/bing",
|
||||
"intro": "在Bing中搜索。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "silencezhang",
|
||||
"version": "4811",
|
||||
"name": "数据库连接",
|
||||
"avatar": "core/workflow/template/datasource",
|
||||
"intro": "可连接常用数据库,并执行sql",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "collin",
|
||||
"version": "4817",
|
||||
"name": "流程等待",
|
||||
"avatar": "core/workflow/template/sleep",
|
||||
"intro": "让工作流等待指定时间后运行",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "silencezhang",
|
||||
"version": "4817",
|
||||
"name": "基础图表",
|
||||
"avatar": "core/workflow/template/baseChart",
|
||||
"intro": "根据数据生成图表,可根据chartType生成柱状图,折线图,饼图",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "silencezhang",
|
||||
"version": "486",
|
||||
"name": "BI图表功能",
|
||||
"avatar": "core/workflow/template/BI",
|
||||
"intro": "BI图表功能,可以生成一些常用的图表,如饼图,柱状图,折线图等",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo 网络搜索",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "使用 DuckDuckGo 进行网络搜索",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo 图片搜索",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "使用 DuckDuckGo 进行图片搜索",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo 新闻检索",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "使用 DuckDuckGo 进行新闻检索",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo 视频搜索",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "使用 DuckDuckGo 进行视频搜索",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "DuckDuckGo服务",
|
||||
"avatar": "core/workflow/template/duckduckgo",
|
||||
"intro": "DuckDuckGo 服务,包含网络搜索、图片搜索、新闻搜索等。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "488",
|
||||
"name": "飞书 webhook",
|
||||
"avatar": "core/app/templates/plugin-feishu",
|
||||
"intro": "向飞书机器人发起 webhook 请求。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "网页内容抓取",
|
||||
"avatar": "core/workflow/template/fetchUrl",
|
||||
"intro": "可获取一个网页链接内容,并以 Markdown 格式输出,仅支持获取静态网站。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "481",
|
||||
"templateType": "tools",
|
||||
"name": "获取当前时间",
|
||||
"avatar": "core/workflow/template/getTime",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4811",
|
||||
"name": "Google搜索",
|
||||
"avatar": "core/workflow/template/google",
|
||||
"intro": "在google中搜索。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "486",
|
||||
"name": "数学公式执行",
|
||||
"avatar": "core/workflow/template/mathCall",
|
||||
"intro": "用于执行数学表达式的工具,通过 js 的 expr-eval 库运行表达式并返回结果。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4816",
|
||||
"name": "Search XNG 搜索",
|
||||
"avatar": "core/workflow/template/searxng",
|
||||
"intro": "使用 Search XNG 服务进行搜索。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "cloudpense",
|
||||
"version": "1.0.0",
|
||||
"name": "Email 邮件发送",
|
||||
"avatar": "plugins/email",
|
||||
"intro": "通过SMTP协议发送电子邮件(nodemailer)",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "489",
|
||||
"name": "文本加工",
|
||||
"avatar": "/imgs/workflow/textEditor.svg",
|
||||
"intro": "可对固定或传入的文本进行加工后输出,非字符串类型数据最终会转成字符串类型。",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"author": "",
|
||||
"version": "4811",
|
||||
"name": "Wiki搜索",
|
||||
"avatar": "core/workflow/template/wiki",
|
||||
"intro": "在Wiki中查询释义。",
|
||||
|
||||
@@ -3,5 +3,6 @@ export const DatasetVectorTableName = 'modeldata';
|
||||
|
||||
export const PG_ADDRESS = process.env.PG_URL;
|
||||
export const OCEANBASE_ADDRESS = process.env.OCEANBASE_URL;
|
||||
export const OPENGAUSS_ADDRESS = process.env.OPENGAUSS_URL;
|
||||
export const MILVUS_ADDRESS = process.env.MILVUS_ADDRESS;
|
||||
export const MILVUS_TOKEN = process.env.MILVUS_TOKEN;
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
/* vector crud */
|
||||
import { PgVectorCtrl } from './pg';
|
||||
import { ObVectorCtrl } from './oceanbase';
|
||||
import { GsVectorCtrl } from './opengauss';
|
||||
import { getVectorsByText } from '../../core/ai/embedding';
|
||||
import { type DelDatasetVectorCtrlProps, type InsertVectorProps } from './controller.d';
|
||||
import { type EmbeddingModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
import { MILVUS_ADDRESS, PG_ADDRESS, OCEANBASE_ADDRESS } from './constants';
|
||||
import { MILVUS_ADDRESS, PG_ADDRESS, OCEANBASE_ADDRESS, OPENGAUSS_ADDRESS } from './constants';
|
||||
import { MilvusCtrl } from './milvus';
|
||||
import { setRedisCache, getRedisCache, delRedisCache, CacheKeyEnum } from '../redis/cache';
|
||||
import { throttle } from 'lodash';
|
||||
@@ -14,6 +15,7 @@ const getVectorObj = () => {
|
||||
if (PG_ADDRESS) return new PgVectorCtrl();
|
||||
if (OCEANBASE_ADDRESS) return new ObVectorCtrl();
|
||||
if (MILVUS_ADDRESS) return new MilvusCtrl();
|
||||
if (OPENGAUSS_ADDRESS) return new GsVectorCtrl();
|
||||
|
||||
return new PgVectorCtrl();
|
||||
};
|
||||
|
||||
188
packages/service/common/vectorDB/opengauss/controller.ts
Normal file
188
packages/service/common/vectorDB/opengauss/controller.ts
Normal file
@@ -0,0 +1,188 @@
|
||||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import { addLog } from '../../system/log';
|
||||
import { Pool } from 'pg';
|
||||
import type { QueryResultRow } from 'pg';
|
||||
import { OPENGAUSS_ADDRESS } from '../constants';
|
||||
|
||||
export const connectGs = async (): Promise<Pool> => {
|
||||
if (global.gsClient) {
|
||||
return global.gsClient;
|
||||
}
|
||||
|
||||
global.gsClient = new Pool({
|
||||
connectionString: OPENGAUSS_ADDRESS,
|
||||
max: Number(process.env.DB_MAX_LINK || 20),
|
||||
min: 10,
|
||||
keepAlive: true,
|
||||
idleTimeoutMillis: 600000,
|
||||
connectionTimeoutMillis: 20000,
|
||||
query_timeout: 30000,
|
||||
statement_timeout: 40000,
|
||||
idle_in_transaction_session_timeout: 60000
|
||||
});
|
||||
|
||||
global.gsClient.on('error', async (err) => {
|
||||
addLog.error(`openGauss error`, err);
|
||||
global.gsClient?.end();
|
||||
global.gsClient = null;
|
||||
|
||||
await delay(1000);
|
||||
addLog.info(`Retry connect openGauss`);
|
||||
connectGs();
|
||||
});
|
||||
|
||||
try {
|
||||
await global.gsClient.connect();
|
||||
console.log('openGauss connected');
|
||||
return global.gsClient;
|
||||
} catch (error) {
|
||||
addLog.error(`openGauss connect error`, error);
|
||||
global.gsClient?.end();
|
||||
global.gsClient = null;
|
||||
|
||||
await delay(1000);
|
||||
addLog.info(`Retry connect openGauss`);
|
||||
|
||||
return connectGs();
|
||||
}
|
||||
};
|
||||
|
||||
type WhereProps = (string | [string, string | number])[];
|
||||
type GetProps = {
|
||||
fields?: string[];
|
||||
where?: WhereProps;
|
||||
order?: { field: string; mode: 'DESC' | 'ASC' | string }[];
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
};
|
||||
|
||||
type DeleteProps = {
|
||||
where: WhereProps;
|
||||
};
|
||||
|
||||
type ValuesProps = { key: string; value?: string | number }[];
|
||||
type UpdateProps = {
|
||||
values: ValuesProps;
|
||||
where: WhereProps;
|
||||
};
|
||||
type InsertProps = {
|
||||
values: ValuesProps[];
|
||||
};
|
||||
|
||||
class GsClass {
|
||||
private getWhereStr(where?: WhereProps) {
|
||||
return where
|
||||
? `WHERE ${where
|
||||
.map((item) => {
|
||||
if (typeof item === 'string') {
|
||||
return item;
|
||||
}
|
||||
const val = typeof item[1] === 'number' ? item[1] : `'${String(item[1])}'`;
|
||||
return `${item[0]}=${val}`;
|
||||
})
|
||||
.join(' ')}`
|
||||
: '';
|
||||
}
|
||||
private getUpdateValStr(values: ValuesProps) {
|
||||
return values
|
||||
.map((item) => {
|
||||
const val =
|
||||
typeof item.value === 'number'
|
||||
? item.value
|
||||
: `'${String(item.value).replace(/\'/g, '"')}'`;
|
||||
|
||||
return `${item.key}=${val}`;
|
||||
})
|
||||
.join(',');
|
||||
}
|
||||
private getInsertValStr(values: ValuesProps[]) {
|
||||
return values
|
||||
.map(
|
||||
(items) =>
|
||||
`(${items
|
||||
.map((item) =>
|
||||
typeof item.value === 'number'
|
||||
? item.value
|
||||
: `'${String(item.value).replace(/\'/g, '"')}'`
|
||||
)
|
||||
.join(',')})`
|
||||
)
|
||||
.join(',');
|
||||
}
|
||||
async select<T extends QueryResultRow = any>(table: string, props: GetProps) {
|
||||
const sql = `SELECT ${
|
||||
!props.fields || props.fields?.length === 0 ? '*' : props.fields?.join(',')
|
||||
}
|
||||
FROM ${table}
|
||||
${this.getWhereStr(props.where)}
|
||||
${
|
||||
props.order
|
||||
? `ORDER BY ${props.order.map((item) => `${item.field} ${item.mode}`).join(',')}`
|
||||
: ''
|
||||
}
|
||||
LIMIT ${props.limit || 10} OFFSET ${props.offset || 0}
|
||||
`;
|
||||
|
||||
const gs = await connectGs();
|
||||
return gs.query<T>(sql);
|
||||
}
|
||||
async count(table: string, props: GetProps) {
|
||||
const sql = `SELECT COUNT(${props?.fields?.[0] || '*'})
|
||||
FROM ${table}
|
||||
${this.getWhereStr(props.where)}
|
||||
`;
|
||||
|
||||
const gs = await connectGs();
|
||||
return gs.query(sql).then((res) => Number(res.rows[0]?.count || 0));
|
||||
}
|
||||
async delete(table: string, props: DeleteProps) {
|
||||
const sql = `DELETE FROM ${table} ${this.getWhereStr(props.where)}`;
|
||||
const gs = await connectGs();
|
||||
return gs.query(sql);
|
||||
}
|
||||
async update(table: string, props: UpdateProps) {
|
||||
if (props.values.length === 0) {
|
||||
return {
|
||||
rowCount: 0
|
||||
};
|
||||
}
|
||||
|
||||
const sql = `UPDATE ${table} SET ${this.getUpdateValStr(props.values)} ${this.getWhereStr(
|
||||
props.where
|
||||
)}`;
|
||||
const gs = await connectGs();
|
||||
return gs.query(sql);
|
||||
}
|
||||
async insert(table: string, props: InsertProps) {
|
||||
if (props.values.length === 0) {
|
||||
return {
|
||||
rowCount: 0,
|
||||
rows: []
|
||||
};
|
||||
}
|
||||
|
||||
const fields = props.values[0].map((item) => item.key).join(',');
|
||||
const sql = `INSERT INTO ${table} (${fields}) VALUES ${this.getInsertValStr(
|
||||
props.values
|
||||
)} RETURNING id`;
|
||||
|
||||
const gs = await connectGs();
|
||||
return gs.query<{ id: string }>(sql);
|
||||
}
|
||||
async query<T extends QueryResultRow = any>(sql: string) {
|
||||
const gs = await connectGs();
|
||||
const start = Date.now();
|
||||
return gs.query<T>(sql).then((res) => {
|
||||
const time = Date.now() - start;
|
||||
|
||||
if (time > 300) {
|
||||
addLog.warn(`gs query time: ${time}ms, sql: ${sql}`);
|
||||
}
|
||||
|
||||
return res;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export const GsClient = new GsClass();
|
||||
export const Gs = global.gsClient;
|
||||
253
packages/service/common/vectorDB/opengauss/index.ts
Normal file
253
packages/service/common/vectorDB/opengauss/index.ts
Normal file
@@ -0,0 +1,253 @@
|
||||
/* pg vector crud */
|
||||
import { DatasetVectorTableName } from '../constants';
|
||||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import { GsClient, connectGs } from './controller';
|
||||
import { GsSearchRawType } from '@fastgpt/global/core/dataset/api';
|
||||
import type {
|
||||
DelDatasetVectorCtrlProps,
|
||||
EmbeddingRecallCtrlProps,
|
||||
EmbeddingRecallResponse,
|
||||
InsertVectorControllerProps
|
||||
} from '../controller.d';
|
||||
import dayjs from 'dayjs';
|
||||
import { addLog } from '../../system/log';
|
||||
|
||||
export class GsVectorCtrl {
|
||||
constructor() {}
|
||||
init = async () => {
|
||||
try {
|
||||
await connectGs();
|
||||
await GsClient.query(`
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
CREATE TABLE IF NOT EXISTS ${DatasetVectorTableName} (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
vector VECTOR(1536) NOT NULL,
|
||||
team_id VARCHAR(50) NOT NULL,
|
||||
dataset_id VARCHAR(50) NOT NULL,
|
||||
collection_id VARCHAR(50) NOT NULL,
|
||||
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
`);
|
||||
|
||||
await GsClient.query(
|
||||
`CREATE INDEX CONCURRENTLY IF NOT EXISTS vector_index ON ${DatasetVectorTableName} USING hnsw (vector vector_ip_ops) WITH (m = 32, ef_construction = 128);`
|
||||
);
|
||||
await GsClient.query(
|
||||
`CREATE INDEX CONCURRENTLY IF NOT EXISTS team_dataset_collection_index ON ${DatasetVectorTableName} USING btree(team_id, dataset_id, collection_id);`
|
||||
);
|
||||
await GsClient.query(
|
||||
`CREATE INDEX CONCURRENTLY IF NOT EXISTS create_time_index ON ${DatasetVectorTableName} USING btree(createtime);`
|
||||
);
|
||||
|
||||
addLog.info('init pg successful');
|
||||
} catch (error) {
|
||||
addLog.error('init pg error', error);
|
||||
}
|
||||
};
|
||||
insert = async (props: InsertVectorControllerProps): Promise<{ insertId: string }> => {
|
||||
const { teamId, datasetId, collectionId, vector, retry = 3 } = props;
|
||||
|
||||
try {
|
||||
const { rowCount, rows } = await GsClient.insert(DatasetVectorTableName, {
|
||||
values: [
|
||||
[
|
||||
{ key: 'vector', value: `[${vector}]` },
|
||||
{ key: 'team_id', value: String(teamId) },
|
||||
{ key: 'dataset_id', value: String(datasetId) },
|
||||
{ key: 'collection_id', value: String(collectionId) }
|
||||
]
|
||||
]
|
||||
});
|
||||
|
||||
if (rowCount === 0) {
|
||||
return Promise.reject('insertDatasetData: no insert');
|
||||
}
|
||||
|
||||
return {
|
||||
insertId: rows[0].id
|
||||
};
|
||||
} catch (error) {
|
||||
if (retry <= 0) {
|
||||
return Promise.reject(error);
|
||||
}
|
||||
await delay(500);
|
||||
return this.insert({
|
||||
...props,
|
||||
retry: retry - 1
|
||||
});
|
||||
}
|
||||
};
|
||||
delete = async (props: DelDatasetVectorCtrlProps): Promise<any> => {
|
||||
const { teamId, retry = 2 } = props;
|
||||
|
||||
const teamIdWhere = `team_id='${String(teamId)}' AND`;
|
||||
|
||||
const where = await (() => {
|
||||
if ('id' in props && props.id) return `${teamIdWhere} id=${props.id}`;
|
||||
|
||||
if ('datasetIds' in props && props.datasetIds) {
|
||||
const datasetIdWhere = `dataset_id IN (${props.datasetIds
|
||||
.map((id) => `'${String(id)}'`)
|
||||
.join(',')})`;
|
||||
|
||||
if ('collectionIds' in props && props.collectionIds) {
|
||||
return `${teamIdWhere} ${datasetIdWhere} AND collection_id IN (${props.collectionIds
|
||||
.map((id) => `'${String(id)}'`)
|
||||
.join(',')})`;
|
||||
}
|
||||
|
||||
return `${teamIdWhere} ${datasetIdWhere}`;
|
||||
}
|
||||
|
||||
if ('idList' in props && Array.isArray(props.idList)) {
|
||||
if (props.idList.length === 0) return;
|
||||
return `${teamIdWhere} id IN (${props.idList.map((id) => String(id)).join(',')})`;
|
||||
}
|
||||
return Promise.reject('deleteDatasetData: no where');
|
||||
})();
|
||||
|
||||
if (!where) return;
|
||||
|
||||
try {
|
||||
await GsClient.delete(DatasetVectorTableName, {
|
||||
where: [where]
|
||||
});
|
||||
} catch (error) {
|
||||
if (retry <= 0) {
|
||||
return Promise.reject(error);
|
||||
}
|
||||
await delay(500);
|
||||
return this.delete({
|
||||
...props,
|
||||
retry: retry - 1
|
||||
});
|
||||
}
|
||||
};
|
||||
embRecall = async (props: EmbeddingRecallCtrlProps): Promise<EmbeddingRecallResponse> => {
|
||||
const {
|
||||
teamId,
|
||||
datasetIds,
|
||||
vector,
|
||||
limit,
|
||||
forbidCollectionIdList,
|
||||
filterCollectionIdList,
|
||||
retry = 2
|
||||
} = props;
|
||||
|
||||
// Get forbid collection
|
||||
const formatForbidCollectionIdList = (() => {
|
||||
if (!filterCollectionIdList) return forbidCollectionIdList;
|
||||
const list = forbidCollectionIdList
|
||||
.map((id) => String(id))
|
||||
.filter((id) => !filterCollectionIdList.includes(id));
|
||||
return list;
|
||||
})();
|
||||
const forbidCollectionSql =
|
||||
formatForbidCollectionIdList.length > 0
|
||||
? `AND collection_id NOT IN (${formatForbidCollectionIdList.map((id) => `'${id}'`).join(',')})`
|
||||
: '';
|
||||
|
||||
// Filter by collectionId
|
||||
const formatFilterCollectionId = (() => {
|
||||
if (!filterCollectionIdList) return;
|
||||
|
||||
return filterCollectionIdList
|
||||
.map((id) => String(id))
|
||||
.filter((id) => !forbidCollectionIdList.includes(id));
|
||||
})();
|
||||
const filterCollectionIdSql = formatFilterCollectionId
|
||||
? `AND collection_id IN (${formatFilterCollectionId.map((id) => `'${id}'`).join(',')})`
|
||||
: '';
|
||||
// Empty data
|
||||
if (formatFilterCollectionId && formatFilterCollectionId.length === 0) {
|
||||
return { results: [] };
|
||||
}
|
||||
|
||||
try {
|
||||
const results: any = await GsClient.query(
|
||||
`BEGIN;
|
||||
SET ob_hnsw_ef_search = ${global.systemEnv?.hnswEfSearch || 100};
|
||||
SELECT id, collection_id, inner_product(vector, [${vector}]) AS score
|
||||
FROM ${DatasetVectorTableName}
|
||||
WHERE team_id='${teamId}'
|
||||
AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
|
||||
${filterCollectionIdSql}
|
||||
${forbidCollectionSql}
|
||||
ORDER BY score desc APPROXIMATE LIMIT ${limit};
|
||||
COMMIT;`
|
||||
);
|
||||
const rows = results?.[3]?.rows as GsSearchRawType[];
|
||||
|
||||
if (!Array.isArray(rows)) {
|
||||
return {
|
||||
results: []
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
results: rows.map((item) => ({
|
||||
id: String(item.id),
|
||||
collectionId: item.collection_id,
|
||||
score: item.score * -1
|
||||
}))
|
||||
};
|
||||
} catch (error) {
|
||||
if (retry <= 0) {
|
||||
return Promise.reject(error);
|
||||
}
|
||||
return this.embRecall({
|
||||
...props,
|
||||
retry: retry - 1
|
||||
});
|
||||
}
|
||||
};
|
||||
getVectorDataByTime = async (start: Date, end: Date) => {
|
||||
const { rows } = await GsClient.query<{
|
||||
id: string;
|
||||
team_id: string;
|
||||
dataset_id: string;
|
||||
}>(`SELECT id, team_id, dataset_id
|
||||
FROM ${DatasetVectorTableName}
|
||||
WHERE createtime BETWEEN '${dayjs(start).format('YYYY-MM-DD HH:mm:ss')}' AND '${dayjs(
|
||||
end
|
||||
).format('YYYY-MM-DD HH:mm:ss')}';
|
||||
`);
|
||||
|
||||
return rows.map((item) => ({
|
||||
id: String(item.id),
|
||||
teamId: item.team_id,
|
||||
datasetId: item.dataset_id
|
||||
}));
|
||||
};
|
||||
getVectorCountByTeamId = async (teamId: string) => {
|
||||
const total = await GsClient.count(DatasetVectorTableName, {
|
||||
where: [['team_id', String(teamId)]]
|
||||
});
|
||||
|
||||
return total;
|
||||
};
|
||||
getVectorCountByDatasetId = async (teamId: string, datasetId: string) => {
|
||||
const total = await GsClient.count(DatasetVectorTableName, {
|
||||
where: [['team_id', String(teamId)], 'and', ['dataset_id', String(datasetId)]]
|
||||
});
|
||||
|
||||
return total;
|
||||
};
|
||||
getVectorCountByCollectionId = async (
|
||||
teamId: string,
|
||||
datasetId: string,
|
||||
collectionId: string
|
||||
) => {
|
||||
const total = await GsClient.count(DatasetVectorTableName, {
|
||||
where: [
|
||||
['team_id', String(teamId)],
|
||||
'and',
|
||||
['dataset_id', String(datasetId)],
|
||||
'and',
|
||||
['collection_id', String(collectionId)]
|
||||
]
|
||||
});
|
||||
|
||||
return total;
|
||||
};
|
||||
}
|
||||
1
packages/service/common/vectorDB/type.d.ts
vendored
1
packages/service/common/vectorDB/type.d.ts
vendored
@@ -6,6 +6,7 @@ declare global {
|
||||
var pgClient: Pool | null;
|
||||
var obClient: MysqlPool | null;
|
||||
var milvusClient: MilvusClient | null;
|
||||
var gsClient: Pool | null;
|
||||
}
|
||||
|
||||
export type EmbeddingRecallItemType = {
|
||||
|
||||
@@ -30,8 +30,7 @@ import { Types } from 'mongoose';
|
||||
community: community-id
|
||||
commercial: commercial-id
|
||||
*/
|
||||
|
||||
export async function splitCombinePluginId(id: string) {
|
||||
export function splitCombineToolId(id: string) {
|
||||
const splitRes = id.split('-');
|
||||
if (splitRes.length === 1) {
|
||||
// app id
|
||||
@@ -42,7 +41,7 @@ export async function splitCombinePluginId(id: string) {
|
||||
}
|
||||
|
||||
const [source, pluginId] = id.split('-') as [PluginSourceEnum, string];
|
||||
if (!source || !pluginId) return Promise.reject('pluginId not found');
|
||||
if (!source || !pluginId) throw new Error('pluginId not found');
|
||||
|
||||
return { source, pluginId: id };
|
||||
}
|
||||
@@ -54,7 +53,7 @@ const getSystemPluginTemplateById = async (
|
||||
versionId?: string
|
||||
): Promise<ChildAppType> => {
|
||||
const item = getSystemPluginTemplates().find((plugin) => plugin.id === pluginId);
|
||||
if (!item) return Promise.reject(PluginErrEnum.unAuth);
|
||||
if (!item) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const plugin = cloneDeep(item);
|
||||
|
||||
@@ -64,10 +63,10 @@ const getSystemPluginTemplateById = async (
|
||||
{ pluginId: plugin.id, 'customConfig.associatedPluginId': plugin.associatedPluginId },
|
||||
'associatedPluginId'
|
||||
).lean();
|
||||
if (!systemPlugin) return Promise.reject(PluginErrEnum.unAuth);
|
||||
if (!systemPlugin) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const app = await MongoApp.findById(plugin.associatedPluginId).lean();
|
||||
if (!app) return Promise.reject(PluginErrEnum.unAuth);
|
||||
if (!app) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const version = versionId
|
||||
? await getAppVersionById({
|
||||
@@ -77,6 +76,12 @@ const getSystemPluginTemplateById = async (
|
||||
})
|
||||
: await getAppLatestVersion(plugin.associatedPluginId, app);
|
||||
if (!version.versionId) return Promise.reject('App version not found');
|
||||
const isLatest = version.versionId
|
||||
? await checkIsLatestVersion({
|
||||
appId: plugin.associatedPluginId,
|
||||
versionId: version.versionId
|
||||
})
|
||||
: true;
|
||||
|
||||
return {
|
||||
...plugin,
|
||||
@@ -85,12 +90,19 @@ const getSystemPluginTemplateById = async (
|
||||
edges: version.edges,
|
||||
chatConfig: version.chatConfig
|
||||
},
|
||||
version: versionId || String(version.versionId),
|
||||
version: versionId ? version?.versionId : '',
|
||||
versionLabel: version?.versionName,
|
||||
isLatestVersion: isLatest,
|
||||
teamId: String(app.teamId),
|
||||
tmbId: String(app.tmbId)
|
||||
};
|
||||
}
|
||||
return plugin;
|
||||
|
||||
return {
|
||||
...plugin,
|
||||
version: undefined,
|
||||
isLatestVersion: true
|
||||
};
|
||||
};
|
||||
|
||||
/* Format plugin to workflow preview node data */
|
||||
@@ -102,11 +114,11 @@ export async function getChildAppPreviewNode({
|
||||
versionId?: string;
|
||||
}): Promise<FlowNodeTemplateType> {
|
||||
const app: ChildAppType = await (async () => {
|
||||
const { source, pluginId } = await splitCombinePluginId(appId);
|
||||
const { source, pluginId } = splitCombineToolId(appId);
|
||||
|
||||
if (source === PluginSourceEnum.personal) {
|
||||
const item = await MongoApp.findById(appId).lean();
|
||||
if (!item) return Promise.reject('plugin not found');
|
||||
if (!item) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const version = await getAppVersionById({ appId, versionId, app: item });
|
||||
|
||||
@@ -132,8 +144,8 @@ export async function getChildAppPreviewNode({
|
||||
},
|
||||
templateType: FlowNodeTemplateTypeEnum.teamApp,
|
||||
|
||||
version: version.versionId,
|
||||
versionLabel: version?.versionName || '',
|
||||
version: versionId ? version?.versionId : '',
|
||||
versionLabel: version?.versionName,
|
||||
isLatestVersion: isLatest,
|
||||
|
||||
originCost: 0,
|
||||
@@ -142,7 +154,7 @@ export async function getChildAppPreviewNode({
|
||||
pluginOrder: 0
|
||||
};
|
||||
} else {
|
||||
return getSystemPluginTemplateById(pluginId);
|
||||
return getSystemPluginTemplateById(pluginId, versionId);
|
||||
}
|
||||
})();
|
||||
|
||||
@@ -216,12 +228,12 @@ export async function getChildAppRuntimeById(
|
||||
id: string,
|
||||
versionId?: string
|
||||
): Promise<PluginRuntimeType> {
|
||||
const app: ChildAppType = await (async () => {
|
||||
const { source, pluginId } = await splitCombinePluginId(id);
|
||||
const app = await (async () => {
|
||||
const { source, pluginId } = splitCombineToolId(id);
|
||||
|
||||
if (source === PluginSourceEnum.personal) {
|
||||
const item = await MongoApp.findById(id).lean();
|
||||
if (!item) return Promise.reject('plugin not found');
|
||||
if (!item) return Promise.reject(PluginErrEnum.unExist);
|
||||
|
||||
const version = await getAppVersionById({
|
||||
appId: id,
|
||||
@@ -244,8 +256,6 @@ export async function getChildAppRuntimeById(
|
||||
},
|
||||
templateType: FlowNodeTemplateTypeEnum.teamApp,
|
||||
|
||||
// 用不到
|
||||
version: item?.pluginData?.nodeVersion,
|
||||
originCost: 0,
|
||||
currentCost: 0,
|
||||
hasTokenFee: false,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { type ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
|
||||
import { type PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
|
||||
import { splitCombinePluginId } from './controller';
|
||||
import { splitCombineToolId } from './controller';
|
||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
|
||||
/*
|
||||
@@ -20,7 +20,7 @@ export const computedPluginUsage = async ({
|
||||
childrenUsage: ChatNodeUsageType[];
|
||||
error?: boolean;
|
||||
}) => {
|
||||
const { source } = await splitCombinePluginId(plugin.id);
|
||||
const { source } = splitCombineToolId(plugin.id);
|
||||
const childrenUsages = childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
|
||||
|
||||
if (source !== PluginSourceEnum.personal) {
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
import { MongoDataset } from '../dataset/schema';
|
||||
import { getEmbeddingModel } from '../ai/model';
|
||||
import {
|
||||
AppNodeFlowNodeTypeMap,
|
||||
FlowNodeTypeEnum
|
||||
} from '@fastgpt/global/core/workflow/node/constant';
|
||||
import { FlowNodeTypeEnum } from '@fastgpt/global/core/workflow/node/constant';
|
||||
import { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
|
||||
import type { StoreNodeItemType } from '@fastgpt/global/core/workflow/type/node';
|
||||
import { MongoAppVersion } from './version/schema';
|
||||
import { checkIsLatestVersion } from './version/controller';
|
||||
import { Types } from '../../common/mongo';
|
||||
import { getChildAppPreviewNode, splitCombineToolId } from './plugin/controller';
|
||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
import { authAppByTmbId } from '../../support/permission/app/auth';
|
||||
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
|
||||
export async function listAppDatasetDataByTeamIdAndDatasetIds({
|
||||
teamId,
|
||||
@@ -33,53 +32,58 @@ export async function listAppDatasetDataByTeamIdAndDatasetIds({
|
||||
export async function rewriteAppWorkflowToDetail({
|
||||
nodes,
|
||||
teamId,
|
||||
isRoot
|
||||
isRoot,
|
||||
ownerTmbId
|
||||
}: {
|
||||
nodes: StoreNodeItemType[];
|
||||
teamId: string;
|
||||
isRoot: boolean;
|
||||
ownerTmbId: string;
|
||||
}) {
|
||||
const datasetIdSet = new Set<string>();
|
||||
|
||||
// Add node(App Type) versionlabel and latest sign
|
||||
const appNodes = nodes.filter((node) => AppNodeFlowNodeTypeMap[node.flowNodeType]);
|
||||
const versionIds = appNodes
|
||||
.filter((node) => node.version && Types.ObjectId.isValid(node.version))
|
||||
.map((node) => node.version);
|
||||
/* Add node(App Type) versionlabel and latest sign ==== */
|
||||
await Promise.all(
|
||||
nodes.map(async (node) => {
|
||||
if (!node.pluginId) return;
|
||||
const { source } = splitCombineToolId(node.pluginId);
|
||||
|
||||
if (versionIds.length > 0) {
|
||||
const versionDataList = await MongoAppVersion.find(
|
||||
{
|
||||
_id: { $in: versionIds }
|
||||
},
|
||||
'_id versionName appId time'
|
||||
).lean();
|
||||
try {
|
||||
const [preview] = await Promise.all([
|
||||
getChildAppPreviewNode({
|
||||
appId: node.pluginId,
|
||||
versionId: node.version
|
||||
}),
|
||||
...(source === PluginSourceEnum.personal
|
||||
? [
|
||||
authAppByTmbId({
|
||||
tmbId: ownerTmbId,
|
||||
appId: node.pluginId,
|
||||
per: ReadPermissionVal
|
||||
})
|
||||
]
|
||||
: [])
|
||||
]);
|
||||
|
||||
const versionMap: Record<string, any> = {};
|
||||
|
||||
const isLatestChecks = await Promise.all(
|
||||
versionDataList.map(async (version) => {
|
||||
const isLatest = await checkIsLatestVersion({
|
||||
appId: version.appId,
|
||||
versionId: version._id
|
||||
});
|
||||
|
||||
return { versionId: String(version._id), isLatest };
|
||||
})
|
||||
);
|
||||
const isLatestMap = new Map(isLatestChecks.map((item) => [item.versionId, item.isLatest]));
|
||||
versionDataList.forEach((version) => {
|
||||
versionMap[String(version._id)] = version;
|
||||
});
|
||||
appNodes.forEach((node) => {
|
||||
if (!node.version) return;
|
||||
const versionData = versionMap[String(node.version)];
|
||||
if (versionData) {
|
||||
node.versionLabel = versionData.versionName;
|
||||
node.isLatestVersion = isLatestMap.get(String(node.version)) || false;
|
||||
node.pluginData = {
|
||||
diagram: preview.diagram,
|
||||
userGuide: preview.userGuide,
|
||||
courseUrl: preview.courseUrl,
|
||||
name: preview.name,
|
||||
avatar: preview.avatar
|
||||
};
|
||||
node.versionLabel = preview.versionLabel;
|
||||
node.isLatestVersion = preview.isLatestVersion;
|
||||
node.version = preview.version;
|
||||
} catch (error) {
|
||||
node.pluginData = {
|
||||
error: getErrText(error)
|
||||
};
|
||||
}
|
||||
});
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
/* Add node(App Type) versionlabel and latest sign ==== */
|
||||
|
||||
// Get all dataset ids from nodes
|
||||
nodes.forEach((node) => {
|
||||
|
||||
@@ -68,6 +68,9 @@ export const checkIsLatestVersion = async ({
|
||||
appId: string;
|
||||
versionId: string;
|
||||
}) => {
|
||||
if (!Types.ObjectId.isValid(versionId)) {
|
||||
return false;
|
||||
}
|
||||
const version = await MongoAppVersion.findOne(
|
||||
{
|
||||
appId,
|
||||
|
||||
@@ -34,6 +34,7 @@ import { getTrainingModeByCollection } from './utils';
|
||||
import {
|
||||
computeChunkSize,
|
||||
computeChunkSplitter,
|
||||
computeParagraphChunkDeep,
|
||||
getLLMMaxChunkSize
|
||||
} from '@fastgpt/global/core/dataset/training/utils';
|
||||
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
|
||||
@@ -74,6 +75,8 @@ export const createCollectionAndInsertData = async ({
|
||||
llmModel: getLLMModel(dataset.agentModel)
|
||||
});
|
||||
const chunkSplitter = computeChunkSplitter(createCollectionParams);
|
||||
const paragraphChunkDeep = computeParagraphChunkDeep(createCollectionParams);
|
||||
|
||||
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
|
||||
delete createCollectionParams.chunkTriggerType;
|
||||
delete createCollectionParams.chunkTriggerMinSize;
|
||||
@@ -87,7 +90,11 @@ export const createCollectionAndInsertData = async ({
|
||||
// 1. split chunks
|
||||
const chunks = rawText2Chunks({
|
||||
rawText,
|
||||
chunkTriggerType: createCollectionParams.chunkTriggerType,
|
||||
chunkTriggerMinSize: createCollectionParams.chunkTriggerMinSize,
|
||||
chunkSize,
|
||||
paragraphChunkDeep,
|
||||
paragraphChunkMinSize: createCollectionParams.paragraphChunkMinSize,
|
||||
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
|
||||
overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : [],
|
||||
@@ -112,6 +119,7 @@ export const createCollectionAndInsertData = async ({
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...createCollectionParams,
|
||||
trainingType,
|
||||
paragraphChunkDeep,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
|
||||
@@ -212,46 +220,19 @@ export type CreateOneCollectionParams = CreateDatasetCollectionParams & {
|
||||
tmbId: string;
|
||||
session?: ClientSession;
|
||||
};
|
||||
export async function createOneCollection({
|
||||
teamId,
|
||||
tmbId,
|
||||
name,
|
||||
parentId,
|
||||
datasetId,
|
||||
type,
|
||||
export async function createOneCollection({ session, ...props }: CreateOneCollectionParams) {
|
||||
const {
|
||||
teamId,
|
||||
parentId,
|
||||
datasetId,
|
||||
tags,
|
||||
|
||||
createTime,
|
||||
updateTime,
|
||||
|
||||
hashRawText,
|
||||
rawTextLength,
|
||||
metadata = {},
|
||||
tags,
|
||||
|
||||
nextSyncTime,
|
||||
|
||||
fileId,
|
||||
rawLink,
|
||||
externalFileId,
|
||||
externalFileUrl,
|
||||
apiFileId,
|
||||
|
||||
// Parse settings
|
||||
customPdfParse,
|
||||
imageIndex,
|
||||
autoIndexes,
|
||||
|
||||
// Chunk settings
|
||||
trainingType,
|
||||
chunkSettingMode,
|
||||
chunkSplitMode,
|
||||
chunkSize,
|
||||
indexSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
session
|
||||
}: CreateOneCollectionParams) {
|
||||
fileId,
|
||||
rawLink,
|
||||
externalFileId,
|
||||
externalFileUrl,
|
||||
apiFileId
|
||||
} = props;
|
||||
// Create collection tags
|
||||
const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
|
||||
|
||||
@@ -259,41 +240,18 @@ export async function createOneCollection({
|
||||
const [collection] = await MongoDatasetCollection.create(
|
||||
[
|
||||
{
|
||||
...props,
|
||||
teamId,
|
||||
tmbId,
|
||||
parentId: parentId || null,
|
||||
datasetId,
|
||||
name,
|
||||
type,
|
||||
|
||||
rawTextLength,
|
||||
hashRawText,
|
||||
tags: collectionTags,
|
||||
metadata,
|
||||
|
||||
createTime,
|
||||
updateTime,
|
||||
nextSyncTime,
|
||||
|
||||
...(fileId ? { fileId } : {}),
|
||||
...(rawLink ? { rawLink } : {}),
|
||||
...(externalFileId ? { externalFileId } : {}),
|
||||
...(externalFileUrl ? { externalFileUrl } : {}),
|
||||
...(apiFileId ? { apiFileId } : {}),
|
||||
|
||||
// Parse settings
|
||||
customPdfParse,
|
||||
imageIndex,
|
||||
autoIndexes,
|
||||
|
||||
// Chunk settings
|
||||
trainingType,
|
||||
chunkSettingMode,
|
||||
chunkSplitMode,
|
||||
chunkSize,
|
||||
indexSize,
|
||||
chunkSplitter,
|
||||
qaPrompt
|
||||
...(apiFileId ? { apiFileId } : {})
|
||||
}
|
||||
],
|
||||
{ session, ordered: true }
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
ChunkTriggerConfigTypeEnum,
|
||||
DatasetSourceReadTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { readFileContentFromMongo } from '../../common/file/gridfs/controller';
|
||||
import { urlsFetch } from '../../common/string/cheerio';
|
||||
import { type TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
@@ -179,11 +182,17 @@ export const readApiServerFileContent = async ({
|
||||
|
||||
export const rawText2Chunks = ({
|
||||
rawText,
|
||||
chunkTriggerType = ChunkTriggerConfigTypeEnum.minSize,
|
||||
chunkTriggerMinSize = 1000,
|
||||
backupParse,
|
||||
chunkSize = 512,
|
||||
...splitProps
|
||||
}: {
|
||||
rawText: string;
|
||||
|
||||
chunkTriggerType?: ChunkTriggerConfigTypeEnum;
|
||||
chunkTriggerMinSize?: number; // maxSize from agent model, not store
|
||||
|
||||
backupParse?: boolean;
|
||||
tableParse?: boolean;
|
||||
} & TextSplitProps): {
|
||||
@@ -209,6 +218,28 @@ export const rawText2Chunks = ({
|
||||
};
|
||||
};
|
||||
|
||||
// Chunk condition
|
||||
// 1. 选择最大值条件,只有超过了最大值(默认为模型的最大值*0.7),才会触发分块
|
||||
if (chunkTriggerType === ChunkTriggerConfigTypeEnum.maxSize) {
|
||||
const textLength = rawText.trim().length;
|
||||
const maxSize = splitProps.maxSize ? splitProps.maxSize * 0.7 : 16000;
|
||||
if (textLength < maxSize) {
|
||||
return [
|
||||
{
|
||||
q: rawText,
|
||||
a: ''
|
||||
}
|
||||
];
|
||||
}
|
||||
}
|
||||
// 2. 选择最小值条件,只有超过最小值(手动决定)才会触发分块
|
||||
if (chunkTriggerType !== ChunkTriggerConfigTypeEnum.forceChunk) {
|
||||
const textLength = rawText.trim().length;
|
||||
if (textLength < chunkTriggerMinSize) {
|
||||
return [{ q: rawText, a: '' }];
|
||||
}
|
||||
}
|
||||
|
||||
if (backupParse) {
|
||||
return parseDatasetBackup2Chunks(rawText).chunks;
|
||||
}
|
||||
|
||||
@@ -47,7 +47,6 @@ export const ChunkSettings = {
|
||||
},
|
||||
paragraphChunkDeep: Number,
|
||||
paragraphChunkMinSize: Number,
|
||||
paragraphChunkMaxSize: Number,
|
||||
chunkSize: Number,
|
||||
chunkSplitter: String,
|
||||
|
||||
|
||||
@@ -658,7 +658,7 @@ export async function searchDatasetData(
|
||||
tokenLen: 0
|
||||
};
|
||||
} catch (error) {
|
||||
addLog.error('multiQueryRecall error', error);
|
||||
addLog.error('Full text search error', error);
|
||||
return {
|
||||
fullTextRecallResults: [],
|
||||
tokenLen: 0
|
||||
|
||||
@@ -10,7 +10,7 @@ import { AppPermission } from '@fastgpt/global/support/permission/app/controller
|
||||
import { type PermissionValueType } from '@fastgpt/global/support/permission/type';
|
||||
import { AppFolderTypeList } from '@fastgpt/global/core/app/constants';
|
||||
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
||||
import { splitCombinePluginId } from '../../../core/app/plugin/controller';
|
||||
import { splitCombineToolId } from '../../../core/app/plugin/controller';
|
||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
import { type AuthModeType, type AuthResponseType } from '../type';
|
||||
import { AppDefaultPermissionVal } from '@fastgpt/global/support/permission/app/constant';
|
||||
@@ -24,7 +24,7 @@ export const authPluginByTmbId = async ({
|
||||
appId: string;
|
||||
per: PermissionValueType;
|
||||
}) => {
|
||||
const { source } = await splitCombinePluginId(appId);
|
||||
const { source } = splitCombineToolId(appId);
|
||||
if (source === PluginSourceEnum.personal) {
|
||||
const { app } = await authAppByTmbId({
|
||||
appId,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import iconv from 'iconv-lite';
|
||||
import { type ReadRawTextByBuffer, type ReadFileResponse } from '../type';
|
||||
import { matchMdImg } from '@fastgpt/global/common/string/markdown';
|
||||
|
||||
const rawEncodingList = [
|
||||
'ascii',
|
||||
@@ -34,7 +35,10 @@ export const readFileRawText = ({ buffer, encoding }: ReadRawTextByBuffer): Read
|
||||
}
|
||||
})();
|
||||
|
||||
const { text, imageList } = matchMdImg(content);
|
||||
|
||||
return {
|
||||
rawText: content
|
||||
rawText: text,
|
||||
imageList
|
||||
};
|
||||
};
|
||||
|
||||
@@ -54,7 +54,7 @@ const RadioGroup = <T = any,>({ list, value, onChange, ...props }: Props<T>) =>
|
||||
/>
|
||||
</Flex>
|
||||
</Box>
|
||||
<HStack spacing={1} color={'myGray.900'} whiteSpace={'nowrap'} fontSize={'sm'}>
|
||||
<HStack spacing={0.5} color={'myGray.900'} whiteSpace={'nowrap'} fontSize={'sm'}>
|
||||
<Box>{typeof item.title === 'string' ? t(item.title as any) : item.title}</Box>
|
||||
{!!item.tooltip && <QuestionTip label={item.tooltip} color={'myGray.600'} />}
|
||||
</HStack>
|
||||
|
||||
@@ -18,7 +18,7 @@ export default function Variable({ variableLabel }: { variableLabel: string }) {
|
||||
: { bg: 'red.50', color: 'red.600' })}
|
||||
>
|
||||
{variableLabel ? (
|
||||
<Flex alignItems={'center'}>{variableLabel}</Flex>
|
||||
<Flex alignItems={'center'}>{t(variableLabel as any)}</Flex>
|
||||
) : (
|
||||
<Box>{t('common:invalid_variable')}</Box>
|
||||
)}
|
||||
|
||||
@@ -187,7 +187,7 @@ export function useScrollPagination<
|
||||
scrollLoadType = 'bottom',
|
||||
|
||||
pageSize = 10,
|
||||
params = {},
|
||||
params,
|
||||
EmptyTip,
|
||||
showErrorToast = true,
|
||||
disalbed = false,
|
||||
@@ -196,7 +196,7 @@ export function useScrollPagination<
|
||||
scrollLoadType?: 'top' | 'bottom';
|
||||
|
||||
pageSize?: number;
|
||||
params?: Record<string, any>;
|
||||
params?: Omit<TParams, 'offset' | 'pageSize'>;
|
||||
EmptyTip?: React.JSX.Element;
|
||||
showErrorToast?: boolean;
|
||||
disalbed?: boolean;
|
||||
|
||||
@@ -85,6 +85,7 @@
|
||||
"interval.per_hour": "Every Hour",
|
||||
"intro": "A comprehensive model application orchestration system that offers out-of-the-box data processing and model invocation capabilities. It allows for rapid Dataset construction and workflow orchestration through Flow visualization, enabling complex Dataset scenarios!",
|
||||
"invalid_json_format": "JSON format error",
|
||||
"keep_the_latest": "Keep the latest",
|
||||
"llm_not_support_vision": "This model does not support image recognition",
|
||||
"llm_use_vision": "Vision",
|
||||
"llm_use_vision_tip": "After clicking on the model selection, you can see whether the model supports image recognition and the ability to control whether to start image recognition. \nAfter starting image recognition, the model will read the image content in the file link, and if the user question is less than 500 words, it will automatically parse the image in the user question.",
|
||||
|
||||
@@ -145,8 +145,8 @@
|
||||
"code_error.outlink_error.invalid_link": "Invalid Share Link",
|
||||
"code_error.outlink_error.link_not_exist": "Share Link Does Not Exist",
|
||||
"code_error.outlink_error.un_auth_user": "Identity Verification Failed",
|
||||
"code_error.plugin_error.not_exist": "Plugin Does Not Exist",
|
||||
"code_error.plugin_error.un_auth": "Unauthorized to Operate This Plugin",
|
||||
"code_error.plugin_error.not_exist": "The tool does not exist",
|
||||
"code_error.plugin_error.un_auth": "No permission to operate the tool",
|
||||
"code_error.system_error.community_version_num_limit": "Exceeded Open Source Version Limit, Please Upgrade to Commercial Version: https://tryfastgpt.ai",
|
||||
"code_error.system_error.license_app_amount_limit": "Exceed the maximum number of applications in the system",
|
||||
"code_error.system_error.license_dataset_amount_limit": "Exceed the maximum number of knowledge bases in the system",
|
||||
|
||||
@@ -15,7 +15,13 @@
|
||||
"backup_dataset_tip": "You can reimport the downloaded csv file when exporting the knowledge base.",
|
||||
"backup_mode": "Backup import",
|
||||
"chunk_max_tokens": "max_tokens",
|
||||
"chunk_process_params": "Block processing parameters",
|
||||
"chunk_size": "Block size",
|
||||
"chunk_trigger": "Blocking conditions",
|
||||
"chunk_trigger_force_chunk": "Forced chunking",
|
||||
"chunk_trigger_max_size": "The original text length is greater than the maximum context of the file processing model 70%",
|
||||
"chunk_trigger_min_size": "The original text is greater than",
|
||||
"chunk_trigger_tips": "Block storage is triggered when certain conditions are met, otherwise the original text will be stored in full directly",
|
||||
"close_auto_sync": "Are you sure you want to turn off automatic sync?",
|
||||
"collection.Create update time": "Creation/Update Time",
|
||||
"collection.Training type": "Training",
|
||||
@@ -29,6 +35,7 @@
|
||||
"collection_tags": "Collection Tags",
|
||||
"common_dataset": "General Dataset",
|
||||
"common_dataset_desc": "Building a knowledge base by importing files, web page links, or manual entry",
|
||||
"condition": "condition",
|
||||
"config_sync_schedule": "Configure scheduled synchronization",
|
||||
"confirm_to_rebuild_embedding_tip": "Are you sure you want to switch the index for the Dataset?\nSwitching the index is a significant operation that requires re-indexing all data in your Dataset, which may take a long time. Please ensure your account has sufficient remaining points.\n\nAdditionally, you need to update the applications that use this Dataset to avoid conflicts with other indexed model Datasets.",
|
||||
"core.dataset.import.Adjust parameters": "Adjust parameters",
|
||||
@@ -100,6 +107,7 @@
|
||||
"is_open_schedule": "Enable scheduled synchronization",
|
||||
"keep_image": "Keep the picture",
|
||||
"loading": "Loading...",
|
||||
"max_chunk_size": "Maximum chunk size",
|
||||
"move.hint": "After moving, the selected knowledge base/folder will inherit the permission settings of the new folder, and the original permission settings will become invalid.",
|
||||
"noChildren": "No subdirectories",
|
||||
"noSelectedFolder": "No selected folder",
|
||||
@@ -107,8 +115,10 @@
|
||||
"noValidId": "No valid ID",
|
||||
"open_auto_sync": "After scheduled synchronization is turned on, the system will try to synchronize the collection from time to time every day. During the collection synchronization period, the collection data will not be searched.",
|
||||
"other_dataset": "Third-party knowledge base",
|
||||
"paragraph_max_deep": "Maximum paragraph depth",
|
||||
"paragraph_split": "Partition by paragraph",
|
||||
"paragraph_split_tip": "Priority is given to chunking according to the Makdown title paragraph. If the chunking is too long, then chunking is done according to the length.",
|
||||
"params_config": "Config",
|
||||
"params_setting": "Parameter settings",
|
||||
"pdf_enhance_parse": "PDF enhancement analysis",
|
||||
"pdf_enhance_parse_price": "{{price}} points/page",
|
||||
"pdf_enhance_parse_tips": "Calling PDF recognition model for parsing, you can convert it into Markdown and retain pictures in the document. At the same time, you can also identify scanned documents, which will take a long time to identify them.",
|
||||
|
||||
@@ -85,6 +85,7 @@
|
||||
"interval.per_hour": "每小时",
|
||||
"intro": "是一个大模型应用编排系统,提供开箱即用的数据处理、模型调用等能力,可以快速的构建知识库并通过 Flow 可视化进行工作流编排,实现复杂的知识库场景!",
|
||||
"invalid_json_format": "JSON 格式错误",
|
||||
"keep_the_latest": "保持最新版本",
|
||||
"llm_not_support_vision": "该模型不支持图片识别",
|
||||
"llm_use_vision": "图片识别",
|
||||
"llm_use_vision_tip": "点击模型选择后,可以看到模型是否支持图片识别以及控制是否启动图片识别的能力。启动图片识别后,模型会读取文件链接里图片内容,并且如果用户问题少于 500 字,会自动解析用户问题中的图片。",
|
||||
|
||||
@@ -145,8 +145,8 @@
|
||||
"code_error.outlink_error.invalid_link": "分享链接无效",
|
||||
"code_error.outlink_error.link_not_exist": "分享链接不存在",
|
||||
"code_error.outlink_error.un_auth_user": "身份校验失败",
|
||||
"code_error.plugin_error.not_exist": "插件不存在",
|
||||
"code_error.plugin_error.un_auth": "无权操作该插件",
|
||||
"code_error.plugin_error.not_exist": "工具不存在",
|
||||
"code_error.plugin_error.un_auth": "无权操作该工具",
|
||||
"code_error.system_error.community_version_num_limit": "超出开源版数量限制,请升级商业版: https://fastgpt.in",
|
||||
"code_error.system_error.license_app_amount_limit": "超出系统最大应用数量",
|
||||
"code_error.system_error.license_dataset_amount_limit": "超出系统最大知识库数量",
|
||||
@@ -554,7 +554,7 @@
|
||||
"core.dataset.training.Agent queue": "QA 训练排队",
|
||||
"core.dataset.training.Auto mode": "补充索引",
|
||||
"core.dataset.training.Auto mode Tip": "通过子索引以及调用模型生成相关问题与摘要,来增加数据块的语义丰富度,更利于检索。需要消耗更多的存储空间和增加 AI 调用次数。",
|
||||
"core.dataset.training.Chunk mode": "直接分块",
|
||||
"core.dataset.training.Chunk mode": "分块存储",
|
||||
"core.dataset.training.Full": "预计 20 分钟以上",
|
||||
"core.dataset.training.Leisure": "空闲",
|
||||
"core.dataset.training.QA mode": "问答对提取",
|
||||
|
||||
@@ -15,7 +15,13 @@
|
||||
"backup_dataset_tip": "可以将导出知识库时,下载的 csv 文件重新导入。",
|
||||
"backup_mode": "备份导入",
|
||||
"chunk_max_tokens": "分块上限",
|
||||
"chunk_process_params": "分块处理参数",
|
||||
"chunk_size": "分块大小",
|
||||
"chunk_trigger": "分块条件",
|
||||
"chunk_trigger_force_chunk": "强制分块",
|
||||
"chunk_trigger_max_size": "原文长度大于文件处理模型最大上下文70%",
|
||||
"chunk_trigger_min_size": "原文长度大于",
|
||||
"chunk_trigger_tips": "当满足一定条件时才触发分块存储,否则会直接完整存储原文",
|
||||
"close_auto_sync": "确认关闭自动同步功能?",
|
||||
"collection.Create update time": "创建/更新时间",
|
||||
"collection.Training type": "训练模式",
|
||||
@@ -29,6 +35,7 @@
|
||||
"collection_tags": "集合标签",
|
||||
"common_dataset": "通用知识库",
|
||||
"common_dataset_desc": "通过导入文件、网页链接或手动录入形式构建知识库",
|
||||
"condition": "条件",
|
||||
"config_sync_schedule": "配置定时同步",
|
||||
"confirm_to_rebuild_embedding_tip": "确认为知识库切换索引?\n切换索引是一个非常重量的操作,需要对您知识库内所有数据进行重新索引,时间可能较长,请确保账号内剩余积分充足。\n\n此外,你还需要注意修改选择该知识库的应用,避免它们与其他索引模型知识库混用。",
|
||||
"core.dataset.import.Adjust parameters": "调整参数",
|
||||
@@ -100,6 +107,7 @@
|
||||
"is_open_schedule": "启用定时同步",
|
||||
"keep_image": "保留图片",
|
||||
"loading": "加载中...",
|
||||
"max_chunk_size": "最大分块大小",
|
||||
"move.hint": "移动后,所选知识库/文件夹将继承新文件夹的权限设置,原先的权限设置失效。",
|
||||
"noChildren": "无子目录",
|
||||
"noSelectedFolder": "没有选择文件夹",
|
||||
@@ -107,8 +115,10 @@
|
||||
"noValidId": "没有有效的 ID",
|
||||
"open_auto_sync": "开启定时同步后,系统将会每天不定时尝试同步集合,集合同步期间,会出现无法搜索到该集合数据现象。",
|
||||
"other_dataset": "第三方知识库",
|
||||
"paragraph_max_deep": "最大段落深度",
|
||||
"paragraph_split": "按段落分块",
|
||||
"paragraph_split_tip": "优先按 Makdown 标题段落进行分块,如果分块过长,再按长度进行二次分块",
|
||||
"params_config": "配置",
|
||||
"params_setting": "参数设置",
|
||||
"pdf_enhance_parse": "PDF增强解析",
|
||||
"pdf_enhance_parse_price": "{{price}}积分/页",
|
||||
"pdf_enhance_parse_tips": "调用 PDF 识别模型进行解析,可以将其转换成 Markdown 并保留文档中的图片,同时也可以对扫描件进行识别,识别时间较长。",
|
||||
|
||||
@@ -85,6 +85,7 @@
|
||||
"interval.per_hour": "每小時",
|
||||
"intro": "FastGPT 是一個基於大型語言模型的知識庫平臺,提供開箱即用的資料處理、向量檢索和視覺化 AI 工作流程編排等功能,讓您可以輕鬆開發和部署複雜的問答系統,而無需繁瑣的設定或設定。",
|
||||
"invalid_json_format": "JSON 格式錯誤",
|
||||
"keep_the_latest": "保持最新版本",
|
||||
"llm_not_support_vision": "這個模型不支援圖片辨識",
|
||||
"llm_use_vision": "圖片辨識",
|
||||
"llm_use_vision_tip": "點選模型選擇後,可以看到模型是否支援圖片辨識以及控制是否啟用圖片辨識的功能。啟用圖片辨識後,模型會讀取檔案連結中的圖片內容,並且如果使用者問題少於 500 字,會自動解析使用者問題中的圖片。",
|
||||
|
||||
@@ -145,8 +145,8 @@
|
||||
"code_error.outlink_error.invalid_link": "分享連結無效",
|
||||
"code_error.outlink_error.link_not_exist": "分享連結不存在",
|
||||
"code_error.outlink_error.un_auth_user": "身份驗證失敗",
|
||||
"code_error.plugin_error.not_exist": "外掛程式不存在",
|
||||
"code_error.plugin_error.un_auth": "無權操作此外掛程式",
|
||||
"code_error.plugin_error.not_exist": "工具不存在",
|
||||
"code_error.plugin_error.un_auth": "無權操作該工具",
|
||||
"code_error.system_error.community_version_num_limit": "超出開源版數量限制,請升級商業版:https://tryfastgpt.ai",
|
||||
"code_error.system_error.license_app_amount_limit": "超出系統最大應用數量",
|
||||
"code_error.system_error.license_dataset_amount_limit": "超出系統最大知識庫數量",
|
||||
@@ -554,7 +554,7 @@
|
||||
"core.dataset.training.Agent queue": "問答訓練排隊中",
|
||||
"core.dataset.training.Auto mode": "補充索引",
|
||||
"core.dataset.training.Auto mode Tip": "透過子索引以及呼叫模型產生相關問題與摘要,來增加資料區塊的語意豐富度,更有利於檢索。需要消耗更多的儲存空間並增加 AI 呼叫次數。",
|
||||
"core.dataset.training.Chunk mode": "直接分塊",
|
||||
"core.dataset.training.Chunk mode": "分塊存儲",
|
||||
"core.dataset.training.Full": "預計 20 分鐘以上",
|
||||
"core.dataset.training.Leisure": "閒置",
|
||||
"core.dataset.training.QA mode": "問答對提取",
|
||||
|
||||
@@ -14,7 +14,12 @@
|
||||
"backup_dataset_tip": "可以將導出知識庫時,下載的 csv 文件重新導入。",
|
||||
"backup_mode": "備份導入",
|
||||
"chunk_max_tokens": "分塊上限",
|
||||
"chunk_process_params": "分塊處理參數",
|
||||
"chunk_size": "分塊大小",
|
||||
"chunk_trigger": "分塊條件",
|
||||
"chunk_trigger_force_chunk": "強制分塊",
|
||||
"chunk_trigger_max_size": "原文長度大於文件處理模型最大上下文70%",
|
||||
"chunk_trigger_min_size": "原文長度大於",
|
||||
"close_auto_sync": "確認關閉自動同步功能?",
|
||||
"collection.Create update time": "建立/更新時間",
|
||||
"collection.Training type": "分段模式",
|
||||
@@ -28,6 +33,7 @@
|
||||
"collection_tags": "集合標籤",
|
||||
"common_dataset": "通用資料集",
|
||||
"common_dataset_desc": "通過導入文件、網頁鏈接或手動錄入形式構建知識庫",
|
||||
"condition": "條件",
|
||||
"config_sync_schedule": "設定定時同步",
|
||||
"confirm_to_rebuild_embedding_tip": "確定要為資料集切換索引嗎?\n切換索引是一個重要的操作,需要對您資料集內所有資料重新建立索引,可能需要較長時間,請確保帳號內剩餘點數充足。\n\n此外,您還需要注意修改使用此資料集的應用程式,避免與其他索引模型資料集混用。",
|
||||
"core.dataset.import.Adjust parameters": "調整參數",
|
||||
@@ -99,6 +105,7 @@
|
||||
"is_open_schedule": "啟用定時同步",
|
||||
"keep_image": "保留圖片",
|
||||
"loading": "加載中...",
|
||||
"max_chunk_size": "最大分塊大小",
|
||||
"move.hint": "移動後,所選資料集/資料夾將繼承新資料夾的權限設定,原先的權限設定將失效。",
|
||||
"noChildren": "無子目錄",
|
||||
"noSelectedFolder": "沒有選擇文件夾",
|
||||
@@ -106,8 +113,10 @@
|
||||
"noValidId": "沒有有效的 ID",
|
||||
"open_auto_sync": "開啟定時同步後,系統將每天不定時嘗試同步集合,集合同步期間,會出現無法搜尋到該集合資料現象。",
|
||||
"other_dataset": "第三方知識庫",
|
||||
"paragraph_max_deep": "最大段落深度",
|
||||
"paragraph_split": "按段落分塊",
|
||||
"paragraph_split_tip": "優先按 Makdown 標題段落進行分塊,如果分塊過長,再按長度進行二次分塊",
|
||||
"params_config": "設定",
|
||||
"params_setting": "參數設定",
|
||||
"pdf_enhance_parse": "PDF 增強解析",
|
||||
"pdf_enhance_parse_price": "{{price}}積分/頁",
|
||||
"pdf_enhance_parse_tips": "呼叫 PDF 識別模型進行解析,可以將其轉換成 Markdown 並保留文件中的圖片,同時也可以對掃描件進行識別,識別時間較長。",
|
||||
|
||||
@@ -29,6 +29,8 @@ MONGODB_LOG_URI=mongodb://username:password@0.0.0.0:27017/fastgpt?authSource=adm
|
||||
PG_URL=postgresql://username:password@host:port/postgres
|
||||
# OceanBase 向量库连接参数
|
||||
OCEANBASE_URL=
|
||||
# openGauss 向量库连接参数
|
||||
OPENGAUSS_URL=
|
||||
# milvus 向量库连接参数
|
||||
MILVUS_ADDRESS=
|
||||
MILVUS_TOKEN=
|
||||
|
||||
2
projects/app/src/global/core/chat/api.d.ts
vendored
2
projects/app/src/global/core/chat/api.d.ts
vendored
@@ -24,7 +24,7 @@ export type GetChatRecordsProps = OutLinkChatAuthProps & {
|
||||
appId: string;
|
||||
chatId?: string;
|
||||
loadCustomFeedbacks?: boolean;
|
||||
type: `${GetChatTypeEnum}`;
|
||||
type?: `${GetChatTypeEnum}`;
|
||||
};
|
||||
|
||||
export type InitOutLinkChatProps = {
|
||||
|
||||
@@ -19,7 +19,7 @@ export const useNodeTemplates = () => {
|
||||
const nodeList = useContextSelector(WorkflowContext, (v) => v.nodeList);
|
||||
|
||||
const hasToolNode = useMemo(
|
||||
() => nodeList.some((node) => node.flowNodeType === FlowNodeTypeEnum.toolSet),
|
||||
() => nodeList.some((node) => node.flowNodeType === FlowNodeTypeEnum.tools),
|
||||
[nodeList]
|
||||
);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import React, { useCallback, useMemo, useRef } from 'react';
|
||||
import { Box, Button, Flex, HStack, useDisclosure, type FlexProps } from '@chakra-ui/react';
|
||||
import React, { useCallback, useMemo } from 'react';
|
||||
import { Box, Button, Flex, useDisclosure, type FlexProps } from '@chakra-ui/react';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import Avatar from '@fastgpt/web/components/common/Avatar';
|
||||
import type { FlowNodeItemType } from '@fastgpt/global/core/workflow/type/node.d';
|
||||
@@ -15,7 +15,7 @@ import { ToolSourceHandle, ToolTargetHandle } from './Handle/ToolHandle';
|
||||
import { useEditTextarea } from '@fastgpt/web/hooks/useEditTextarea';
|
||||
import { ConnectionSourceHandle, ConnectionTargetHandle } from './Handle/ConnectionHandle';
|
||||
import { useDebug } from '../../hooks/useDebug';
|
||||
import { getPreviewPluginNode } from '@/web/core/app/api/plugin';
|
||||
import { getPreviewPluginNode, getToolVersionList } from '@/web/core/app/api/plugin';
|
||||
import { storeNode2FlowNode } from '@/web/core/workflow/utils';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
@@ -104,12 +104,9 @@ const NodeCard = (props: Props) => {
|
||||
}, [nodeList, nodeId]);
|
||||
const isAppNode = node && AppNodeFlowNodeTypeMap[node?.flowNodeType];
|
||||
const showVersion = useMemo(() => {
|
||||
if (!isAppNode || !node?.pluginId) return false;
|
||||
if (!isAppNode || !node?.pluginId || node?.pluginData?.error) return false;
|
||||
if ([FlowNodeTypeEnum.tool, FlowNodeTypeEnum.toolSet].includes(node.flowNodeType)) return false;
|
||||
if (node.pluginId.split('-').length > 1) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return typeof node.version === 'string';
|
||||
}, [isAppNode, node]);
|
||||
|
||||
const { data: nodeTemplate } = useRequest2(
|
||||
@@ -617,11 +614,10 @@ const NodeVersion = React.memo(function NodeVersion({ node }: { node: FlowNodeIt
|
||||
const { isOpen, onOpen, onClose } = useDisclosure();
|
||||
|
||||
// Load version list
|
||||
const { ScrollData, data: versionList } = useScrollPagination(getAppVersionList, {
|
||||
const { ScrollData, data: versionList } = useScrollPagination(getToolVersionList, {
|
||||
pageSize: 20,
|
||||
params: {
|
||||
appId: node.pluginId,
|
||||
isPublish: true
|
||||
toolId: node.pluginId
|
||||
},
|
||||
refreshDeps: [node.pluginId, isOpen],
|
||||
disalbed: !isOpen,
|
||||
@@ -653,18 +649,23 @@ const NodeVersion = React.memo(function NodeVersion({ node }: { node: FlowNodeIt
|
||||
}
|
||||
);
|
||||
|
||||
const renderList = useCreation(
|
||||
() =>
|
||||
versionList.map((item) => ({
|
||||
const renderVersionList = useCreation(
|
||||
() => [
|
||||
{
|
||||
label: t('app:keep_the_latest'),
|
||||
value: ''
|
||||
},
|
||||
...versionList.map((item) => ({
|
||||
label: item.versionName,
|
||||
value: item._id
|
||||
})),
|
||||
}))
|
||||
],
|
||||
[node.isLatestVersion, node.version, t, versionList]
|
||||
);
|
||||
const valueLabel = useMemo(() => {
|
||||
return (
|
||||
<Flex alignItems={'center'} gap={0.5}>
|
||||
{node?.versionLabel}
|
||||
{node?.version === '' ? t('app:keep_the_latest') : node?.versionLabel}
|
||||
{!node.isLatestVersion && (
|
||||
<MyTag type="fill" colorSchema={'adora'} fontSize={'mini'} borderRadius={'lg'}>
|
||||
{t('app:not_the_newest')}
|
||||
@@ -672,7 +673,7 @@ const NodeVersion = React.memo(function NodeVersion({ node }: { node: FlowNodeIt
|
||||
)}
|
||||
</Flex>
|
||||
);
|
||||
}, [node.isLatestVersion, node?.versionLabel, t]);
|
||||
}, [node.isLatestVersion, node?.version, node?.versionLabel, t]);
|
||||
|
||||
return (
|
||||
<MySelect
|
||||
@@ -685,7 +686,7 @@ const NodeVersion = React.memo(function NodeVersion({ node }: { node: FlowNodeIt
|
||||
placeholder={node?.versionLabel}
|
||||
variant={'whitePrimaryOutline'}
|
||||
size={'sm'}
|
||||
list={renderList}
|
||||
list={renderVersionList}
|
||||
ScrollData={(props) => (
|
||||
<ScrollData minH={'100px'} maxH={'40vh'}>
|
||||
{props.children}
|
||||
|
||||
@@ -105,7 +105,6 @@ type WorkflowContextType = {
|
||||
|
||||
// nodes
|
||||
nodeList: FlowNodeItemType[];
|
||||
hasToolNode: boolean;
|
||||
|
||||
onUpdateNodeError: (node: string, isError: Boolean) => void;
|
||||
onResetNode: (e: { id: string; node: FlowNodeTemplateType }) => void;
|
||||
@@ -226,7 +225,6 @@ export const WorkflowContext = createContext<WorkflowContextType>({
|
||||
},
|
||||
basicNodeTemplates: [],
|
||||
nodeList: [],
|
||||
hasToolNode: false,
|
||||
onUpdateNodeError: function (node: string, isError: Boolean): void {
|
||||
throw new Error('Function not implemented.');
|
||||
},
|
||||
@@ -399,10 +397,6 @@ const WorkflowContextProvider = ({
|
||||
[nodeListString]
|
||||
);
|
||||
|
||||
const hasToolNode = useMemo(() => {
|
||||
return !!nodeList.find((node) => node.flowNodeType === FlowNodeTypeEnum.tools);
|
||||
}, [nodeList]);
|
||||
|
||||
const onUpdateNodeError = useMemoizedFn((nodeId: string, isError: Boolean) => {
|
||||
setNodes((state) => {
|
||||
return state.map((item) => {
|
||||
@@ -1011,7 +1005,6 @@ const WorkflowContextProvider = ({
|
||||
|
||||
// node
|
||||
nodeList,
|
||||
hasToolNode,
|
||||
onUpdateNodeError,
|
||||
onResetNode,
|
||||
onChangeNode,
|
||||
@@ -1057,7 +1050,6 @@ const WorkflowContextProvider = ({
|
||||
flowData2StoreDataAndCheck,
|
||||
future,
|
||||
getNodeDynamicInputs,
|
||||
hasToolNode,
|
||||
initData,
|
||||
nodeList,
|
||||
onChangeNode,
|
||||
|
||||
@@ -100,8 +100,6 @@ const WebsiteConfigModal = ({
|
||||
paragraphChunkDeep: chunkSettings?.paragraphChunkDeep || defaultFormData.paragraphChunkDeep,
|
||||
paragraphChunkMinSize:
|
||||
chunkSettings?.paragraphChunkMinSize || defaultFormData.paragraphChunkMinSize,
|
||||
paragraphChunkMaxSize:
|
||||
chunkSettings?.paragraphChunkMaxSize || defaultFormData.paragraphChunkMaxSize,
|
||||
|
||||
chunkSize: chunkSettings?.chunkSize || defaultFormData.chunkSize,
|
||||
|
||||
|
||||
@@ -17,10 +17,8 @@ import {
|
||||
} from '@chakra-ui/react';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
|
||||
import type {
|
||||
ChunkTriggerConfigTypeEnum,
|
||||
ParagraphChunkAIModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import type { ParagraphChunkAIModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { ChunkTriggerConfigTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
DataChunkSplitModeEnum,
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
@@ -108,7 +106,6 @@ export type CollectionChunkFormType = {
|
||||
paragraphChunkAIMode: ParagraphChunkAIModeEnum;
|
||||
paragraphChunkDeep: number; // Paragraph deep
|
||||
paragraphChunkMinSize: number; // Paragraph min size, if too small, it will merge
|
||||
paragraphChunkMaxSize: number; // Paragraph max size, if too large, it will split
|
||||
// Size split
|
||||
chunkSize: number;
|
||||
// Char split
|
||||
@@ -130,6 +127,7 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
|
||||
const { setValue, register, watch, getValues } = form;
|
||||
|
||||
const trainingType = watch('trainingType');
|
||||
const chunkTriggerType = watch('chunkTriggerType');
|
||||
const chunkSettingMode = watch('chunkSettingMode');
|
||||
const chunkSplitMode = watch('chunkSplitMode');
|
||||
const autoIndexes = watch('autoIndexes');
|
||||
@@ -151,6 +149,14 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
|
||||
}));
|
||||
}, [t]);
|
||||
|
||||
// Chunk trigger
|
||||
const chunkTriggerSelectList = [
|
||||
{ label: t('dataset:chunk_trigger_min_size'), value: ChunkTriggerConfigTypeEnum.minSize },
|
||||
{ label: t('dataset:chunk_trigger_max_size'), value: ChunkTriggerConfigTypeEnum.maxSize },
|
||||
{ label: t('dataset:chunk_trigger_force_chunk'), value: ChunkTriggerConfigTypeEnum.forceChunk }
|
||||
];
|
||||
|
||||
// Form max or min value
|
||||
const {
|
||||
maxChunkSize,
|
||||
minChunkSize: minChunkSizeValue,
|
||||
@@ -189,14 +195,11 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
|
||||
{ label: '=====', value: '=====' },
|
||||
{ label: t('dataset:split_sign_custom'), value: 'Other' }
|
||||
];
|
||||
const [customListSelectValue, setCustomListSelectValue] = useState(getValues('chunkSplitter'));
|
||||
useEffect(() => {
|
||||
if (customListSelectValue === 'Other') {
|
||||
setValue('chunkSplitter', '');
|
||||
} else {
|
||||
setValue('chunkSplitter', customListSelectValue);
|
||||
}
|
||||
}, [customListSelectValue, setValue]);
|
||||
const [customListSelectValue, setCustomListSelectValue] = useState(
|
||||
customSplitList.some((item) => item.value === getValues('chunkSplitter'))
|
||||
? getValues('chunkSplitter')
|
||||
: 'Other'
|
||||
);
|
||||
|
||||
// Index size
|
||||
const indexSizeSeletorList = useMemo(() => getIndexSizeSelectList(maxIndexSize), [maxIndexSize]);
|
||||
@@ -243,6 +246,41 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
|
||||
gridTemplateColumns={'repeat(2, 1fr)'}
|
||||
/>
|
||||
</Box>
|
||||
|
||||
{trainingType === DatasetCollectionDataProcessModeEnum.chunk && (
|
||||
<Box mt={6}>
|
||||
<HStack fontSize={'sm'} mb={2} color={'myGray.600'} spacing={1}>
|
||||
<Box>{t('dataset:chunk_trigger')}</Box>
|
||||
<QuestionTip label={t('dataset:chunk_trigger_tips')} />
|
||||
</HStack>
|
||||
<HStack>
|
||||
<Box flex={'1 0 0'} h={'34px'}>
|
||||
<MySelect
|
||||
borderRadius={'md'}
|
||||
list={chunkTriggerSelectList}
|
||||
value={chunkTriggerType}
|
||||
onChange={(e) => {
|
||||
setValue('chunkTriggerType', e);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
{chunkTriggerType === ChunkTriggerConfigTypeEnum.minSize && (
|
||||
<Box flex={'1 0 0'}>
|
||||
<MyNumberInput
|
||||
h={'34px'}
|
||||
bg={'white'}
|
||||
min={100}
|
||||
max={100000}
|
||||
register={register}
|
||||
name={'chunkTriggerMinSize'}
|
||||
step={100}
|
||||
/>
|
||||
</Box>
|
||||
)}
|
||||
</HStack>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{trainingType === DatasetCollectionDataProcessModeEnum.chunk &&
|
||||
feConfigs?.show_dataset_enhance !== false && (
|
||||
<Box mt={6}>
|
||||
@@ -287,7 +325,7 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
|
||||
)}
|
||||
<Box mt={6}>
|
||||
<Box fontSize={'sm'} mb={2} color={'myGray.600'}>
|
||||
{t('dataset:params_setting')}
|
||||
{t('dataset:chunk_process_params')}
|
||||
</Box>
|
||||
<LeftRadio<ChunkSettingModeEnum>
|
||||
list={[
|
||||
@@ -305,6 +343,11 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
|
||||
<Box>
|
||||
<RadioGroup<DataChunkSplitModeEnum>
|
||||
list={[
|
||||
{
|
||||
title: t('dataset:paragraph_split'),
|
||||
value: DataChunkSplitModeEnum.paragraph,
|
||||
tooltip: t('dataset:paragraph_split_tip')
|
||||
},
|
||||
{
|
||||
title: t('dataset:split_chunk_size'),
|
||||
value: DataChunkSplitModeEnum.size
|
||||
@@ -321,30 +364,76 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
|
||||
}}
|
||||
/>
|
||||
|
||||
{chunkSplitMode === DataChunkSplitModeEnum.size && (
|
||||
<Box
|
||||
mt={1.5}
|
||||
css={{
|
||||
'& > span': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
<MyTooltip
|
||||
label={t('common:core.dataset.import.Chunk Range', {
|
||||
min: minChunkSizeValue,
|
||||
max: maxChunkSize
|
||||
})}
|
||||
>
|
||||
{chunkSplitMode === DataChunkSplitModeEnum.paragraph && (
|
||||
<>
|
||||
<Box mt={1.5}>
|
||||
<Box>{t('dataset:paragraph_max_deep')}</Box>
|
||||
<MyNumberInput
|
||||
register={register}
|
||||
name={'chunkSize'}
|
||||
min={minChunkSizeValue}
|
||||
max={maxChunkSize}
|
||||
size={'sm'}
|
||||
step={100}
|
||||
bg={'myGray.50'}
|
||||
register={register}
|
||||
name={'paragraphChunkDeep'}
|
||||
min={1}
|
||||
max={8}
|
||||
step={1}
|
||||
h={'32px'}
|
||||
/>
|
||||
</MyTooltip>
|
||||
</Box>
|
||||
<Box mt={1.5}>
|
||||
<Box>{t('dataset:max_chunk_size')}</Box>
|
||||
<Box
|
||||
css={{
|
||||
'& > span': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
<MyTooltip
|
||||
label={t('common:core.dataset.import.Chunk Range', {
|
||||
min: minChunkSizeValue,
|
||||
max: maxChunkSize
|
||||
})}
|
||||
>
|
||||
<MyNumberInput
|
||||
register={register}
|
||||
name={'chunkSize'}
|
||||
min={minChunkSizeValue}
|
||||
max={maxChunkSize}
|
||||
size={'sm'}
|
||||
step={100}
|
||||
/>
|
||||
</MyTooltip>
|
||||
</Box>
|
||||
</Box>
|
||||
</>
|
||||
)}
|
||||
|
||||
{chunkSplitMode === DataChunkSplitModeEnum.size && (
|
||||
<Box mt={1.5}>
|
||||
<Box>{t('dataset:chunk_size')}</Box>
|
||||
<Box
|
||||
css={{
|
||||
'& > span': {
|
||||
display: 'block'
|
||||
}
|
||||
}}
|
||||
>
|
||||
<MyTooltip
|
||||
label={t('common:core.dataset.import.Chunk Range', {
|
||||
min: minChunkSizeValue,
|
||||
max: maxChunkSize
|
||||
})}
|
||||
>
|
||||
<MyNumberInput
|
||||
register={register}
|
||||
name={'chunkSize'}
|
||||
min={minChunkSizeValue}
|
||||
max={maxChunkSize}
|
||||
size={'sm'}
|
||||
step={100}
|
||||
/>
|
||||
</MyTooltip>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
@@ -358,6 +447,11 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
|
||||
value={customListSelectValue}
|
||||
h={'32px'}
|
||||
onChange={(val) => {
|
||||
if (val === 'Other') {
|
||||
setValue('chunkSplitter', '');
|
||||
} else {
|
||||
setValue('chunkSplitter', val);
|
||||
}
|
||||
setCustomListSelectValue(val);
|
||||
}}
|
||||
/>
|
||||
|
||||
@@ -51,11 +51,10 @@ export const defaultFormData: ImportFormType = {
|
||||
autoIndexes: false,
|
||||
|
||||
chunkSettingMode: ChunkSettingModeEnum.auto,
|
||||
chunkSplitMode: DataChunkSplitModeEnum.size,
|
||||
chunkSplitMode: DataChunkSplitModeEnum.paragraph,
|
||||
paragraphChunkAIMode: ParagraphChunkAIModeEnum.auto,
|
||||
paragraphChunkDeep: 4,
|
||||
paragraphChunkDeep: 5,
|
||||
paragraphChunkMinSize: 100,
|
||||
paragraphChunkMaxSize: chunkAutoChunkSize,
|
||||
|
||||
chunkSize: chunkAutoChunkSize,
|
||||
chunkSplitter: '',
|
||||
|
||||
@@ -8,10 +8,8 @@ import { useRouter } from 'next/router';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { getDatasetCollectionById } from '@/web/core/dataset/api';
|
||||
import MyBox from '@fastgpt/web/components/common/MyBox';
|
||||
import { ChunkSettingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
|
||||
import { Box } from '@chakra-ui/react';
|
||||
import { DataChunkSplitModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
|
||||
|
||||
const Upload = dynamic(() => import('../commonProgress/Upload'));
|
||||
@@ -68,8 +66,6 @@ const ReTraining = () => {
|
||||
paragraphChunkDeep: collection.paragraphChunkDeep || defaultFormData.paragraphChunkDeep,
|
||||
paragraphChunkMinSize:
|
||||
collection.paragraphChunkMinSize || defaultFormData.paragraphChunkMinSize,
|
||||
paragraphChunkMaxSize:
|
||||
collection.paragraphChunkMaxSize || defaultFormData.paragraphChunkMaxSize,
|
||||
|
||||
chunkSize: collection.chunkSize || defaultFormData.chunkSize,
|
||||
|
||||
@@ -85,11 +81,13 @@ const ReTraining = () => {
|
||||
|
||||
return (
|
||||
<MyBox isLoading={loading} h={'100%'}>
|
||||
<Box h={'100%'} overflow={'auto'}>
|
||||
{activeStep === 0 && <DataProcess />}
|
||||
{activeStep === 1 && <PreviewData />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</Box>
|
||||
{!loading && (
|
||||
<Box h={'100%'} overflow={'auto'}>
|
||||
{activeStep === 0 && <DataProcess />}
|
||||
{activeStep === 1 && <PreviewData />}
|
||||
{activeStep === 2 && <Upload />}
|
||||
</Box>
|
||||
)}
|
||||
</MyBox>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -3,7 +3,6 @@ import { authApp } from '@fastgpt/service/support/permission/app/auth';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { checkNode } from '@/service/core/app/utils';
|
||||
import { rewriteAppWorkflowToDetail } from '@fastgpt/service/core/app/utils';
|
||||
/* 获取应用详情 */
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
@@ -23,6 +22,7 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
await rewriteAppWorkflowToDetail({
|
||||
nodes: app.modules,
|
||||
teamId,
|
||||
ownerTmbId: app.tmbId,
|
||||
isRoot
|
||||
});
|
||||
|
||||
@@ -34,12 +34,7 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
...app,
|
||||
modules: await Promise.all(
|
||||
app.modules.map((node) => checkNode({ node, ownerTmbId: app.tmbId }))
|
||||
)
|
||||
};
|
||||
return app;
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
import type { NextApiResponse } from 'next';
|
||||
import {
|
||||
getChildAppPreviewNode,
|
||||
splitCombinePluginId
|
||||
splitCombineToolId
|
||||
} from '@fastgpt/service/core/app/plugin/controller';
|
||||
import { type FlowNodeTemplateType } from '@fastgpt/global/core/workflow/type/node.d';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
@@ -21,7 +21,7 @@ async function handler(
|
||||
): Promise<FlowNodeTemplateType> {
|
||||
const { appId, versionId } = req.query;
|
||||
|
||||
const { source } = await splitCombinePluginId(appId);
|
||||
const { source } = splitCombineToolId(appId);
|
||||
|
||||
if (source === PluginSourceEnum.personal) {
|
||||
await authApp({ req, authToken: true, appId, per: ReadPermissionVal });
|
||||
|
||||
86
projects/app/src/pages/api/core/app/plugin/getVersionList.ts
Normal file
86
projects/app/src/pages/api/core/app/plugin/getVersionList.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { MongoAppVersion } from '@fastgpt/service/core/app/version/schema';
|
||||
import { type PaginationProps, type PaginationResponse } from '@fastgpt/web/common/fetch/type';
|
||||
import { type ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { authApp } from '@fastgpt/service/support/permission/app/auth';
|
||||
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination';
|
||||
import { splitCombineToolId } from '@fastgpt/service/core/app/plugin/controller';
|
||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
import { getSystemPluginTemplates } from '@fastgpt/plugins/register';
|
||||
import { PluginErrEnum } from '@fastgpt/global/common/error/code/plugin';
|
||||
import { Types } from '@fastgpt/service/common/mongo';
|
||||
|
||||
export type getToolVersionListProps = PaginationProps<{
|
||||
toolId?: string;
|
||||
}>;
|
||||
|
||||
export type getToolVersionResponse = PaginationResponse<{
|
||||
_id: string;
|
||||
versionName: string;
|
||||
}>;
|
||||
|
||||
async function handler(
|
||||
req: ApiRequestProps<getToolVersionListProps>,
|
||||
_res: NextApiResponse<any>
|
||||
): Promise<getToolVersionResponse> {
|
||||
const { toolId } = req.body;
|
||||
const { offset, pageSize } = parsePaginationRequest(req);
|
||||
|
||||
if (!toolId) {
|
||||
return {
|
||||
total: 0,
|
||||
list: []
|
||||
};
|
||||
}
|
||||
|
||||
const { source, pluginId: formatToolId } = splitCombineToolId(toolId);
|
||||
|
||||
// Auth
|
||||
const appId = await (async () => {
|
||||
if (source === PluginSourceEnum.personal) {
|
||||
const { app } = await authApp({
|
||||
appId: formatToolId,
|
||||
req,
|
||||
per: ReadPermissionVal,
|
||||
authToken: true
|
||||
});
|
||||
return app._id;
|
||||
} else {
|
||||
const item = getSystemPluginTemplates().find((plugin) => plugin.id === formatToolId);
|
||||
if (!item) return Promise.reject(PluginErrEnum.unAuth);
|
||||
return item.associatedPluginId;
|
||||
}
|
||||
})();
|
||||
|
||||
if (!appId || !Types.ObjectId.isValid(appId)) {
|
||||
return {
|
||||
total: 0,
|
||||
list: []
|
||||
};
|
||||
}
|
||||
|
||||
const match = {
|
||||
appId,
|
||||
isPublish: true
|
||||
};
|
||||
|
||||
const [result, total] = await Promise.all([
|
||||
await MongoAppVersion.find(match, 'versionName')
|
||||
.sort({
|
||||
time: -1
|
||||
})
|
||||
.skip(offset)
|
||||
.limit(pageSize)
|
||||
.lean(),
|
||||
MongoAppVersion.countDocuments(match)
|
||||
]);
|
||||
|
||||
return {
|
||||
total,
|
||||
list: result
|
||||
};
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
||||
@@ -5,7 +5,6 @@ import { authApp } from '@fastgpt/service/support/permission/app/auth';
|
||||
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { type AppVersionSchemaType } from '@fastgpt/global/core/app/version';
|
||||
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
|
||||
import { checkNode } from '@/service/core/app/utils';
|
||||
import { rewriteAppWorkflowToDetail } from '@fastgpt/service/core/app/utils';
|
||||
|
||||
type Props = {
|
||||
@@ -34,14 +33,12 @@ async function handler(
|
||||
await rewriteAppWorkflowToDetail({
|
||||
nodes: result.nodes,
|
||||
teamId,
|
||||
ownerTmbId: app.tmbId,
|
||||
isRoot
|
||||
});
|
||||
|
||||
return {
|
||||
...result,
|
||||
nodes: await Promise.all(
|
||||
result.nodes.map((n) => checkNode({ node: n, ownerTmbId: app.tmbId }))
|
||||
),
|
||||
versionName: result?.versionName || formatTime2YMDHM(result?.time)
|
||||
};
|
||||
}
|
||||
|
||||
@@ -31,13 +31,16 @@ async function handler(
|
||||
per: WritePermissionVal
|
||||
});
|
||||
|
||||
const version = await getAppLatestVersion(req.query.appId, app);
|
||||
|
||||
await rewriteAppWorkflowToDetail({
|
||||
nodes: app.modules,
|
||||
nodes: version.nodes,
|
||||
teamId,
|
||||
isRoot
|
||||
isRoot,
|
||||
ownerTmbId: app.tmbId
|
||||
});
|
||||
|
||||
return getAppLatestVersion(req.query.appId, app);
|
||||
return version;
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import {
|
||||
type ChunkSettingModeEnum,
|
||||
type DataChunkSplitModeEnum,
|
||||
type DatasetCollectionDataProcessModeEnum
|
||||
ChunkSettingModeEnum,
|
||||
DatasetCollectionDataProcessModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { rawText2Chunks, readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
|
||||
@@ -16,25 +15,21 @@ import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import {
|
||||
computeChunkSize,
|
||||
computeChunkSplitter,
|
||||
computeParagraphChunkDeep,
|
||||
getLLMMaxChunkSize
|
||||
} from '@fastgpt/global/core/dataset/training/utils';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { getLLMModel } from '@fastgpt/service/core/ai/model';
|
||||
import type { ChunkSettingsType } from '@fastgpt/global/core/dataset/type';
|
||||
|
||||
export type PostPreviewFilesChunksProps = {
|
||||
export type PostPreviewFilesChunksProps = ChunkSettingsType & {
|
||||
datasetId: string;
|
||||
type: DatasetSourceReadTypeEnum;
|
||||
sourceId: string;
|
||||
|
||||
customPdfParse?: boolean;
|
||||
|
||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||
|
||||
// Chunk settings
|
||||
chunkSettingMode: ChunkSettingModeEnum;
|
||||
chunkSplitMode: DataChunkSplitModeEnum;
|
||||
chunkSize: number;
|
||||
chunkSplitter?: string;
|
||||
overlapRatio: number;
|
||||
|
||||
// Read params
|
||||
@@ -57,9 +52,15 @@ async function handler(
|
||||
sourceId,
|
||||
customPdfParse = false,
|
||||
|
||||
trainingType,
|
||||
chunkSettingMode,
|
||||
trainingType = DatasetCollectionDataProcessModeEnum.chunk,
|
||||
|
||||
chunkTriggerType,
|
||||
chunkTriggerMinSize,
|
||||
|
||||
chunkSettingMode = ChunkSettingModeEnum.auto,
|
||||
chunkSplitMode,
|
||||
paragraphChunkDeep,
|
||||
paragraphChunkMinSize,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
|
||||
@@ -103,12 +104,16 @@ async function handler(
|
||||
chunkSize,
|
||||
llmModel: getLLMModel(dataset.agentModel)
|
||||
});
|
||||
|
||||
chunkSplitter = computeChunkSplitter({
|
||||
chunkSettingMode,
|
||||
chunkSplitMode,
|
||||
chunkSplitter
|
||||
});
|
||||
paragraphChunkDeep = computeParagraphChunkDeep({
|
||||
chunkSettingMode,
|
||||
chunkSplitMode,
|
||||
paragraphChunkDeep
|
||||
});
|
||||
|
||||
const { rawText } = await readDatasetSourceRawText({
|
||||
teamId,
|
||||
@@ -125,7 +130,11 @@ async function handler(
|
||||
|
||||
const chunks = rawText2Chunks({
|
||||
rawText,
|
||||
chunkTriggerType,
|
||||
chunkTriggerMinSize,
|
||||
chunkSize,
|
||||
paragraphChunkDeep,
|
||||
paragraphChunkMinSize,
|
||||
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
|
||||
overlapRatio,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : []
|
||||
|
||||
@@ -24,7 +24,7 @@ import { saveChat } from '@fastgpt/service/core/chat/saveChat';
|
||||
import { getAppLatestVersion } from '@fastgpt/service/core/app/version/controller';
|
||||
import {
|
||||
getChildAppPreviewNode,
|
||||
splitCombinePluginId
|
||||
splitCombineToolId
|
||||
} from '@fastgpt/service/core/app/plugin/controller';
|
||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
import { authAppByTmbId } from '@fastgpt/service/support/permission/app/auth';
|
||||
@@ -137,46 +137,3 @@ export const getScheduleTriggerApp = async () => {
|
||||
})
|
||||
);
|
||||
};
|
||||
|
||||
export const checkNode = async ({
|
||||
node,
|
||||
ownerTmbId
|
||||
}: {
|
||||
node: StoreNodeItemType;
|
||||
ownerTmbId: string;
|
||||
}): Promise<StoreNodeItemType> => {
|
||||
const pluginId = node.pluginId;
|
||||
if (!pluginId) return node;
|
||||
|
||||
try {
|
||||
const { source } = await splitCombinePluginId(pluginId);
|
||||
|
||||
if (source === PluginSourceEnum.personal) {
|
||||
await authAppByTmbId({
|
||||
tmbId: ownerTmbId,
|
||||
appId: pluginId,
|
||||
per: ReadPermissionVal
|
||||
});
|
||||
}
|
||||
|
||||
const preview = await getChildAppPreviewNode({ appId: pluginId });
|
||||
return {
|
||||
...node,
|
||||
pluginData: {
|
||||
version: preview.version,
|
||||
diagram: preview.diagram,
|
||||
userGuide: preview.userGuide,
|
||||
courseUrl: preview.courseUrl,
|
||||
name: preview.name,
|
||||
avatar: preview.avatar
|
||||
}
|
||||
};
|
||||
} catch (error: any) {
|
||||
return {
|
||||
...node,
|
||||
pluginData: {
|
||||
error: getErrText(error)
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -24,6 +24,10 @@ import { type McpToolConfigType } from '@fastgpt/global/core/app/type';
|
||||
import type { updateMCPToolsBody } from '@/pages/api/core/app/mcpTools/update';
|
||||
import type { RunMCPToolBody } from '@/pages/api/support/mcp/client/runTool';
|
||||
import type { getMCPToolsBody } from '@/pages/api/support/mcp/client/getTools';
|
||||
import type {
|
||||
getToolVersionListProps,
|
||||
getToolVersionResponse
|
||||
} from '@/pages/api/core/app/plugin/getVersionList';
|
||||
|
||||
/* ============ team plugin ============== */
|
||||
export const getTeamPlugTemplates = (data?: ListAppBody) =>
|
||||
@@ -71,6 +75,9 @@ export const getSystemPluginPaths = (data: GetPathProps) => {
|
||||
export const getPreviewPluginNode = (data: GetPreviewNodeQuery) =>
|
||||
GET<FlowNodeTemplateType>('/core/app/plugin/getPreviewNode', data);
|
||||
|
||||
export const getToolVersionList = (data: getToolVersionListProps) =>
|
||||
POST<getToolVersionResponse>('/core/app/plugin/getVersionList', data);
|
||||
|
||||
/* ============ mcp tools ============== */
|
||||
export const postCreateMCPTools = (data: createMCPToolsBody) =>
|
||||
POST('/core/app/mcpTools/create', data);
|
||||
|
||||
@@ -51,7 +51,7 @@ const ChatRecordContextProvider = ({
|
||||
params
|
||||
}: {
|
||||
children: ReactNode;
|
||||
params: Record<string, any>;
|
||||
params: Omit<getPaginationRecordsBody, 'offset' | 'pageSize'>;
|
||||
}) => {
|
||||
const ChatBoxRef = useContextSelector(ChatItemContext, (v) => v.ChatBoxRef);
|
||||
const [isChatRecordsLoaded, setIsChatRecordsLoaded] = useState(false);
|
||||
|
||||
@@ -16,7 +16,6 @@ import type {
|
||||
ApiDatasetCreateDatasetCollectionParams,
|
||||
CreateDatasetCollectionParams,
|
||||
CreateDatasetCollectionTagParams,
|
||||
CsvTableCreateDatasetCollectionParams,
|
||||
DatasetUpdateBody,
|
||||
ExternalFileCreateDatasetCollectionParams,
|
||||
FileIdCreateDatasetCollectionParams,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { it, expect } from 'vitest'; // 必须显式导入
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import fs from 'fs';
|
||||
|
||||
const simpleChunks = (chunks: string[]) => {
|
||||
return chunks.map((chunk) => chunk.replace(/\s+/g, ''));
|
||||
@@ -634,9 +635,83 @@ it(`Test splitText2Chunks 9`, () => {
|
||||
| 10012 | 杨一 | 34 | 程序员 | 厦门 |
|
||||
`,
|
||||
result: [
|
||||
'测试的呀,第一个表格\n\n| 序号 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 1 | 张三 | 25 | 工程师 | 北京 |\n| 2 | 李四 | 30 | 教师 | 上海 |\n| 3 | 王五 | 28 | 医生 | 广州 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 4 | 赵六 | 35 | 律师 | 深圳 |\n| 5 | 孙七 | 27 | 设计师 | 杭州 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 7 | 吴九 | 29 | 销售 | 武汉 |\n| 8 | 郑十 | 31 | 记者 | 南京 |\n| 9 | 刘一 | 33 | 建筑师 | 天津 |\n| 10 | 陈二 | 26 | 程序员 | 重庆 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1001 | 杨一 | 34 | 程序员 | 厦门 |\n| 1002 | 杨二 | 34 | 程序员 | 厦门 |\n| 1003 | 杨三 | 34 | 程序员 | 厦门 |',
|
||||
'| 序号 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 1004 | 杨四 | 34 | 程序员 | 厦门 |\n| 1005 | 杨五 | 34 | 程序员 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 9 | 刘一 | 33 | 建筑师 | 天津 |\n| 10 | 陈二 | 26 | 程序员 | 重庆 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1001 | 杨一 | 34 | 程序员 | 厦门 |\n| 1002 | 杨二 | 34 | 程序员 | 厦门 |\n| 1003 | 杨三 | 34 | 程序员 | 厦门 |\n| 1004 | 杨四 | 34 | 程序员 | 厦门 |\n| 1005 | 杨五 | 34 | 程序员 | 厦门 |\n\n| 序号 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |\n| 1000 | 黄末 | 28 | 作家 | 厦门 |',
|
||||
'这是第二段了,第二表格\n\n| 序号 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 1 | 张三 | 25 | 工程师 | 北京 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 2 | 李四 | 30 | 教师 | 上海 |\n| 3 | 王五 | 28 | 医生 | 广州 |\n| 4 | 赵六 | 35 | 律师 | 深圳 |\n| 5 | 孙七 | 27 | 设计师 | 杭州 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 7 | 吴九 | 29 | 销售 | 武汉 |\n| 8 | 郑十 | 31 | 记者 | 南京 |\n| 9 | 刘一 | 33 | 建筑师 | 天津 |\n| 10 | 陈二 | 26 | 程序员 | 重庆 |\n| 10004 | 黄末 | 28 | 作家 | 厦门 |\n| 10013 | 杨一 | 34 | 程序员 | 厦门 |\n\n\n结束了\n\n| 序号22 | 姓名 | 年龄 | 职业 | 城市 |\n| --- | --- | --- | --- | --- |\n| 1 | 张三 | 25 | 工程师 | 北京 |\n| 2 | 李四 | 30 | 教师 | 上海 |\n| 3 | 王五 | 28 | 医生 | 广州 |\n| 4 | 赵六 | 35 | 律师 | 深圳 |\n| 5 | 孙七 | 27 | 设计师 | 杭州 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 6 | 周八 | 32 | 会计 | 成都 |\n| 7 | 吴九 | 29 | 销售 | 武汉 |\n| 8 | 郑十 | 31 | 记者 | 南京 |\n| 9 | 刘一 | 33 | 建筑师 | 天津 |\n| 10 | 陈二 | 26 | 程序员 | 重庆 |\n| 10002 | 黄末 | 28 | 作家 | 厦门 |\n| 10012 | 杨一 | 34 | 程序员 | 厦门 |'
|
||||
`测试的呀,第一个表格
|
||||
|
||||
| 序号 | 姓名 | 年龄 | 职业 | 城市 |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| 1 | 张三 | 25 | 工程师 | 北京 |
|
||||
| 2 | 李四 | 30 | 教师 | 上海 |
|
||||
| 3 | 王五 | 28 | 医生 | 广州 |
|
||||
| 6 | 周八 | 32 | 会计 | 成都 |
|
||||
| 4 | 赵六 | 35 | 律师 | 深圳 |
|
||||
| 5 | 孙七 | 27 | 设计师 | 杭州 |
|
||||
| 6 | 周八 | 32 | 会计 | 成都 |
|
||||
| 6 | 周八 | 32 | 会计 | 成都 |
|
||||
| 7 | 吴九 | 29 | 销售 | 武汉 |
|
||||
| 8 | 郑十 | 31 | 记者 | 南京 |
|
||||
| 9 | 刘一 | 33 | 建筑师 | 天津 |
|
||||
| 10 | 陈二 | 26 | 程序员 | 重庆 |
|
||||
| 1000 | 黄末 | 28 | 作家 | 厦门 |
|
||||
| 1001 | 杨一 | 34 | 程序员 | 厦门 |
|
||||
| 1002 | 杨二 | 34 | 程序员 | 厦门 |
|
||||
| 1003 | 杨三 | 34 | 程序员 | 厦门 |`,
|
||||
`| 序号 | 姓名 | 年龄 | 职业 | 城市 |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| 6 | 周八 | 32 | 会计 | 成都 |
|
||||
| 1004 | 杨四 | 34 | 程序员 | 厦门 |
|
||||
| 1005 | 杨五 | 34 | 程序员 | 厦门 |
|
||||
| 1000 | 黄末 | 28 | 作家 | 厦门 |
|
||||
| 1000 | 黄末 | 28 | 作家 | 厦门 |
|
||||
| 1000 | 黄末 | 28 | 作家 | 厦门 |
|
||||
| 9 | 刘一 | 33 | 建筑师 | 天津 |
|
||||
| 10 | 陈二 | 26 | 程序员 | 重庆 |
|
||||
| 1000 | 黄末 | 28 | 作家 | 厦门 |
|
||||
| 1001 | 杨一 | 34 | 程序员 | 厦门 |
|
||||
| 1002 | 杨二 | 34 | 程序员 | 厦门 |
|
||||
| 1003 | 杨三 | 34 | 程序员 | 厦门 |
|
||||
| 1004 | 杨四 | 34 | 程序员 | 厦门 |
|
||||
| 1005 | 杨五 | 34 | 程序员 | 厦门 |`,
|
||||
`| 序号 | 姓名 | 年龄 | 职业 | 城市 |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| 6 | 周八 | 32 | 会计 | 成都 |
|
||||
| 1000 | 黄末 | 28 | 作家 | 厦门 |
|
||||
| 1000 | 黄末 | 28 | 作家 | 厦门 |
|
||||
| 1000 | 黄末 | 28 | 作家 | 厦门 |
|
||||
|
||||
这是第二段了,第二表格
|
||||
|
||||
| 序号 | 姓名 | 年龄 | 职业 | 城市 |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| 1 | 张三 | 25 | 工程师 | 北京 |
|
||||
| 6 | 周八 | 32 | 会计 | 成都 |
|
||||
| 2 | 李四 | 30 | 教师 | 上海 |
|
||||
| 3 | 王五 | 28 | 医生 | 广州 |
|
||||
| 4 | 赵六 | 35 | 律师 | 深圳 |
|
||||
| 5 | 孙七 | 27 | 设计师 | 杭州 |
|
||||
| 6 | 周八 | 32 | 会计 | 成都 |
|
||||
| 7 | 吴九 | 29 | 销售 | 武汉 |
|
||||
| 8 | 郑十 | 31 | 记者 | 南京 |
|
||||
| 9 | 刘一 | 33 | 建筑师 | 天津 |
|
||||
| 10 | 陈二 | 26 | 程序员 | 重庆 |
|
||||
| 10004 | 黄末 | 28 | 作家 | 厦门 |
|
||||
| 10013 | 杨一 | 34 | 程序员 | 厦门 |`,
|
||||
`结束了
|
||||
|
||||
| 序号22 | 姓名 | 年龄 | 职业 | 城市 |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| 1 | 张三 | 25 | 工程师 | 北京 |
|
||||
| 2 | 李四 | 30 | 教师 | 上海 |
|
||||
| 3 | 王五 | 28 | 医生 | 广州 |
|
||||
| 4 | 赵六 | 35 | 律师 | 深圳 |
|
||||
| 5 | 孙七 | 27 | 设计师 | 杭州 |
|
||||
| 6 | 周八 | 32 | 会计 | 成都 |
|
||||
| 6 | 周八 | 32 | 会计 | 成都 |
|
||||
| 7 | 吴九 | 29 | 销售 | 武汉 |
|
||||
| 8 | 郑十 | 31 | 记者 | 南京 |
|
||||
| 9 | 刘一 | 33 | 建筑师 | 天津 |
|
||||
| 10 | 陈二 | 26 | 程序员 | 重庆 |
|
||||
| 10002 | 黄末 | 28 | 作家 | 厦门 |
|
||||
| 10012 | 杨一 | 34 | 程序员 | 厦门 |`
|
||||
]
|
||||
};
|
||||
|
||||
@@ -644,3 +719,91 @@ it(`Test splitText2Chunks 9`, () => {
|
||||
|
||||
expect(chunks).toEqual(mock.result);
|
||||
});
|
||||
|
||||
// 段落优化先测试 - 段落深度 0
|
||||
it(`Test splitText2Chunks 10`, () => {
|
||||
const mock = {
|
||||
text: `# A
|
||||
af da da fda a a
|
||||
|
||||
## B
|
||||
段落 2
|
||||
### D
|
||||
段落 3
|
||||
## E
|
||||
段落 4`,
|
||||
result: [
|
||||
`# A
|
||||
af da da fda a a
|
||||
|
||||
## B
|
||||
段落 2
|
||||
### D
|
||||
段落 3
|
||||
## E
|
||||
段落 4`
|
||||
]
|
||||
};
|
||||
|
||||
const { chunks } = splitText2Chunks({ text: mock.text, chunkSize: 2000, paragraphChunkDeep: 0 });
|
||||
expect(chunks).toEqual(mock.result);
|
||||
});
|
||||
|
||||
// 段落优化先测试 - 段落深度 1
|
||||
it(`Test splitText2Chunks 11`, () => {
|
||||
const mock = {
|
||||
text: `# A
|
||||
af da da fda a a
|
||||
|
||||
## B
|
||||
段落 2
|
||||
### D
|
||||
段落 3
|
||||
## E
|
||||
段落 4`,
|
||||
result: [
|
||||
`# A
|
||||
af da da fda a a
|
||||
|
||||
## B
|
||||
段落 2
|
||||
### D
|
||||
段落 3
|
||||
## E
|
||||
段落 4`
|
||||
]
|
||||
};
|
||||
|
||||
const { chunks } = splitText2Chunks({ text: mock.text, chunkSize: 2000, paragraphChunkDeep: 1 });
|
||||
expect(chunks).toEqual(mock.result);
|
||||
});
|
||||
|
||||
// 段落优化先测试 - 段落深度 2
|
||||
it(`Test splitText2Chunks 12`, () => {
|
||||
const mock = {
|
||||
text: `# A
|
||||
af da da fda a a
|
||||
|
||||
## B
|
||||
段落 2
|
||||
### D
|
||||
段落 3
|
||||
## E
|
||||
段落 4`,
|
||||
result: [
|
||||
`# A
|
||||
af da da fda a a`,
|
||||
`# A
|
||||
## B
|
||||
段落 2
|
||||
### D
|
||||
段落 3`,
|
||||
`# A
|
||||
## E
|
||||
段落 4`
|
||||
]
|
||||
};
|
||||
|
||||
const { chunks } = splitText2Chunks({ text: mock.text, chunkSize: 2000, paragraphChunkDeep: 2 });
|
||||
expect(chunks).toEqual(mock.result);
|
||||
});
|
||||
|
||||
@@ -0,0 +1,380 @@
|
||||
import { it, expect } from 'vitest'; // 必须显式导入
|
||||
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
|
||||
import { ChunkTriggerConfigTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
const formatChunks = (
|
||||
chunks: {
|
||||
q: string;
|
||||
a: string;
|
||||
indexes?: string[];
|
||||
}[]
|
||||
) => {
|
||||
return chunks.map((chunk) => chunk.q.replace(/\s+/g, ''));
|
||||
};
|
||||
const formatResult = (result: string[]) => {
|
||||
return result.map((item) => item.replace(/\s+/g, ''));
|
||||
};
|
||||
|
||||
// 最大值分块测试-小于最大值,不分块
|
||||
it(`Test splitText2Chunks 1`, () => {
|
||||
const mock = {
|
||||
text: `# A
|
||||
|
||||
af da da fda a a
|
||||
|
||||
## B
|
||||
|
||||
阿凡撒发生的都是发大水
|
||||
|
||||
### c
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
#### D
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
##### E
|
||||
|
||||
dsgsgfsgs22sddddddd
|
||||
`,
|
||||
result: [
|
||||
`# A
|
||||
|
||||
af da da fda a a
|
||||
|
||||
## B
|
||||
|
||||
阿凡撒发生的都是发大水
|
||||
|
||||
### c
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
#### D
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
##### E
|
||||
|
||||
dsgsgfsgs22sddddddd`
|
||||
]
|
||||
};
|
||||
|
||||
const data = rawText2Chunks({
|
||||
rawText: mock.text,
|
||||
chunkTriggerType: ChunkTriggerConfigTypeEnum.maxSize,
|
||||
chunkTriggerMinSize: 1000,
|
||||
maxSize: 20000,
|
||||
chunkSize: 512,
|
||||
backupParse: false
|
||||
});
|
||||
expect(formatChunks(data)).toEqual(formatResult(mock.result));
|
||||
});
|
||||
// 最大值分块测试-大于最大值,分块
|
||||
it(`Test splitText2Chunks 2`, () => {
|
||||
const mock = {
|
||||
text: `# A
|
||||
|
||||
af da da fda a a
|
||||
|
||||
## B
|
||||
|
||||
阿凡撒发生的都是发大水
|
||||
|
||||
### c
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
#### D
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
##### E
|
||||
|
||||
dsgsgfsgs22sddddddd`,
|
||||
result: [
|
||||
`# A
|
||||
|
||||
af da da fda a a`,
|
||||
`# A
|
||||
## B
|
||||
|
||||
阿凡撒发生的都是发大水`,
|
||||
`# A
|
||||
## B
|
||||
### c
|
||||
|
||||
dsgsgfsgs22`,
|
||||
`# A
|
||||
## B
|
||||
### c
|
||||
#### D
|
||||
|
||||
dsgsgfsgs22`,
|
||||
`# A
|
||||
## B
|
||||
### c
|
||||
#### D
|
||||
##### E
|
||||
|
||||
dsgsgfsgs22sddddddd`
|
||||
]
|
||||
};
|
||||
|
||||
const data = rawText2Chunks({
|
||||
rawText: mock.text,
|
||||
chunkTriggerType: ChunkTriggerConfigTypeEnum.maxSize,
|
||||
chunkTriggerMinSize: 10,
|
||||
maxSize: 10,
|
||||
chunkSize: 512,
|
||||
backupParse: false
|
||||
});
|
||||
|
||||
expect(formatChunks(data)).toEqual(formatResult(mock.result));
|
||||
});
|
||||
|
||||
// 最小值分块测试-大于最小值,不分块
|
||||
it(`Test splitText2Chunks 3`, () => {
|
||||
const mock = {
|
||||
text: `# A
|
||||
|
||||
af da da fda a a
|
||||
|
||||
## B
|
||||
|
||||
阿凡撒发生的都是发大水
|
||||
|
||||
### c
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
#### D
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
##### E
|
||||
|
||||
dsgsgfsgs22sddddddd`,
|
||||
result: [
|
||||
`# A
|
||||
|
||||
af da da fda a a
|
||||
|
||||
## B
|
||||
|
||||
阿凡撒发生的都是发大水
|
||||
|
||||
### c
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
#### D
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
##### E
|
||||
|
||||
dsgsgfsgs22sddddddd`
|
||||
]
|
||||
};
|
||||
|
||||
const data = rawText2Chunks({
|
||||
rawText: mock.text,
|
||||
chunkTriggerType: ChunkTriggerConfigTypeEnum.minSize,
|
||||
chunkTriggerMinSize: 1000,
|
||||
maxSize: 1000,
|
||||
chunkSize: 512,
|
||||
backupParse: false
|
||||
});
|
||||
|
||||
expect(formatChunks(data)).toEqual(formatResult(mock.result));
|
||||
});
|
||||
// 最小值分块测试-小于最小值,分块
|
||||
it(`Test splitText2Chunks 4`, () => {
|
||||
const mock = {
|
||||
text: `# A
|
||||
|
||||
af da da fda a a
|
||||
|
||||
## B
|
||||
|
||||
阿凡撒发生的都是发大水
|
||||
|
||||
### c
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
#### D
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
##### E
|
||||
|
||||
dsgsgfsgs22sddddddd`,
|
||||
result: [
|
||||
`# A
|
||||
|
||||
af da da fda a a`,
|
||||
`# A
|
||||
## B
|
||||
|
||||
阿凡撒发生的都是发大水`,
|
||||
`# A
|
||||
## B
|
||||
### c
|
||||
|
||||
dsgsgfsgs22`,
|
||||
`# A
|
||||
## B
|
||||
### c
|
||||
#### D
|
||||
|
||||
dsgsgfsgs22`,
|
||||
`# A
|
||||
## B
|
||||
### c
|
||||
#### D
|
||||
##### E
|
||||
|
||||
dsgsgfsgs22sddddddd`
|
||||
]
|
||||
};
|
||||
|
||||
const data = rawText2Chunks({
|
||||
rawText: mock.text,
|
||||
chunkTriggerType: ChunkTriggerConfigTypeEnum.minSize,
|
||||
chunkTriggerMinSize: 10,
|
||||
maxSize: 10,
|
||||
chunkSize: 512,
|
||||
backupParse: false
|
||||
});
|
||||
|
||||
expect(formatChunks(data)).toEqual(formatResult(mock.result));
|
||||
});
|
||||
|
||||
// 强制分块测试-小于最小值和最大值
|
||||
it(`Test splitText2Chunks 5`, () => {
|
||||
const mock = {
|
||||
text: `# A
|
||||
|
||||
af da da fda a a
|
||||
|
||||
## B
|
||||
|
||||
阿凡撒发生的都是发大水
|
||||
|
||||
### c
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
#### D
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
##### E
|
||||
|
||||
dsgsgfsgs22sddddddd`,
|
||||
result: [
|
||||
`# A
|
||||
|
||||
af da da fda a a`,
|
||||
`# A
|
||||
## B
|
||||
|
||||
阿凡撒发生的都是发大水`,
|
||||
`# A
|
||||
## B
|
||||
### c
|
||||
|
||||
dsgsgfsgs22`,
|
||||
`# A
|
||||
## B
|
||||
### c
|
||||
#### D
|
||||
|
||||
dsgsgfsgs22`,
|
||||
`# A
|
||||
## B
|
||||
### c
|
||||
#### D
|
||||
##### E
|
||||
|
||||
dsgsgfsgs22sddddddd`
|
||||
]
|
||||
};
|
||||
|
||||
const data = rawText2Chunks({
|
||||
rawText: mock.text,
|
||||
chunkTriggerType: ChunkTriggerConfigTypeEnum.forceChunk,
|
||||
chunkTriggerMinSize: 1000,
|
||||
maxSize: 10000,
|
||||
chunkSize: 512,
|
||||
backupParse: false
|
||||
});
|
||||
|
||||
expect(formatChunks(data)).toEqual(formatResult(mock.result));
|
||||
});
|
||||
|
||||
// 强制分块测试-大于最小值
|
||||
it(`Test splitText2Chunks 6`, () => {
|
||||
const mock = {
|
||||
text: `# A
|
||||
|
||||
af da da fda a a
|
||||
|
||||
## B
|
||||
|
||||
阿凡撒发生的都是发大水
|
||||
|
||||
### c
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
#### D
|
||||
|
||||
dsgsgfsgs22
|
||||
|
||||
##### E
|
||||
|
||||
dsgsgfsgs22sddddddd`,
|
||||
result: [
|
||||
`# A
|
||||
|
||||
af da da fda a a`,
|
||||
`# A
|
||||
## B
|
||||
|
||||
阿凡撒发生的都是发大水`,
|
||||
`# A
|
||||
## B
|
||||
### c
|
||||
|
||||
dsgsgfsgs22`,
|
||||
`# A
|
||||
## B
|
||||
### c
|
||||
#### D
|
||||
|
||||
dsgsgfsgs22`,
|
||||
`# A
|
||||
## B
|
||||
### c
|
||||
#### D
|
||||
##### E
|
||||
|
||||
dsgsgfsgs22sddddddd`
|
||||
]
|
||||
};
|
||||
|
||||
const data = rawText2Chunks({
|
||||
rawText: mock.text,
|
||||
chunkTriggerType: ChunkTriggerConfigTypeEnum.forceChunk,
|
||||
chunkTriggerMinSize: 10,
|
||||
maxSize: 10000,
|
||||
chunkSize: 512,
|
||||
backupParse: false
|
||||
});
|
||||
|
||||
expect(formatChunks(data)).toEqual(formatResult(mock.result));
|
||||
});
|
||||
Reference in New Issue
Block a user