Compare commits

..

1 Commits

Author SHA1 Message Date
Dechao Sun
63028dacb2 support openGauss vector store (#4819) 2025-05-28 10:49:06 +08:00
248 changed files with 2188 additions and 6128 deletions

View File

@@ -132,15 +132,15 @@ services:
# fastgpt
sandbox:
container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云
networks:
- fastgpt
restart: always
fastgpt-mcp-server:
container_name: fastgpt-mcp-server
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云
ports:
- 3005:3000
networks:
@@ -150,8 +150,8 @@ services:
- FASTGPT_ENDPOINT=http://fastgpt:3000
fastgpt:
container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
image: ghcr.io/labring/fastgpt:v4.9.10 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云
ports:
- 3000:3000
networks:

View File

@@ -109,15 +109,15 @@ services:
# fastgpt
sandbox:
container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云
networks:
- fastgpt
restart: always
fastgpt-mcp-server:
container_name: fastgpt-mcp-server
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云
ports:
- 3005:3000
networks:
@@ -127,8 +127,8 @@ services:
- FASTGPT_ENDPOINT=http://fastgpt:3000
fastgpt:
container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
image: ghcr.io/labring/fastgpt:v4.9.10 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云
ports:
- 3000:3000
networks:

View File

@@ -0,0 +1,218 @@
# 数据库的默认账号和密码仅首次运行时设置有效
# 如果修改了账号密码,记得改数据库和项目连接参数,别只改一处~
# 该配置文件只是给快速启动,测试使用。正式使用,记得务必修改账号密码,以及调整合适的知识库参数,共享内存等。
# 如何无法访问 dockerhub 和 git可以用阿里云阿里云没有arm包
version: '3.3'
services:
# db
gs:
image: opengauss/opengauss:7.0.0-RC1 # docker hub
container_name: gs
restart: always
# ports: # 生产环境建议不要暴露
# - 5432:5432
networks:
- fastgpt
environment:
# 这里的配置只有首次运行生效。修改后,重启镜像是不会生效的。需要把持久化数据删除再重启,才有效果
- GS_USER=username
- GS_PASSWORD=password
- GS_DB=postgres
volumes:
- ./opengauss/data:/var/lib/opengauss/data
healthcheck:
test: ['CMD-SHELL', 'netstat -lntp | grep tcp6 > /dev/null 2>&1']
interval: 10s
timeout: 10s
retries: 10
mongo:
image: mongo:5.0.18 # dockerhub
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/mongo:5.0.18 # 阿里云
# image: mongo:4.4.29 # cpu不支持AVX时候使用
container_name: mongo
restart: always
# ports:
# - 27017:27017
networks:
- fastgpt
command: mongod --keyFile /data/mongodb.key --replSet rs0
environment:
- MONGO_INITDB_ROOT_USERNAME=myusername
- MONGO_INITDB_ROOT_PASSWORD=mypassword
volumes:
- ./mongo/data:/data/db
entrypoint:
- bash
- -c
- |
openssl rand -base64 128 > /data/mongodb.key
chmod 400 /data/mongodb.key
chown 999:999 /data/mongodb.key
echo 'const isInited = rs.status().ok === 1
if(!isInited){
rs.initiate({
_id: "rs0",
members: [
{ _id: 0, host: "mongo:27017" }
]
})
}' > /data/initReplicaSet.js
# 启动MongoDB服务
exec docker-entrypoint.sh "$$@" &
# 等待MongoDB服务启动
until mongo -u myusername -p mypassword --authenticationDatabase admin --eval "print('waited for connection')"; do
echo "Waiting for MongoDB to start..."
sleep 2
done
# 执行初始化副本集的脚本
mongo -u myusername -p mypassword --authenticationDatabase admin /data/initReplicaSet.js
# 等待docker-entrypoint.sh脚本执行的MongoDB服务进程
wait $$!
redis:
image: redis:7.2-alpine
container_name: redis
# ports:
# - 6379:6379
networks:
- fastgpt
restart: always
command: |
redis-server --requirepass mypassword --loglevel warning --maxclients 10000 --appendonly yes --save 60 10 --maxmemory 4gb --maxmemory-policy noeviction
healthcheck:
test: ['CMD', 'redis-cli', '-a', 'mypassword', 'ping']
interval: 10s
timeout: 3s
retries: 3
start_period: 30s
volumes:
- ./redis/data:/data
# fastgpt
sandbox:
container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.7-fix2 # 阿里云
networks:
- fastgpt
restart: always
fastgpt-mcp-server:
container_name: fastgpt-mcp-server
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.7-fix2 # 阿里云
ports:
- 3005:3000
networks:
- fastgpt
restart: always
environment:
- FASTGPT_ENDPOINT=http://fastgpt:3000
fastgpt:
container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.7-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.7-fix2 # 阿里云
# image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/ghcr.io/labring/fastgpt:v4.8.4-linuxarm64 # openGauss在arm架构上性能更好
ports:
- 3000:3000
networks:
- fastgpt
depends_on:
- mongo
- gs
- sandbox
restart: always
environment:
# 前端外部可访问的地址,用于自动补全文件资源路径。例如 https:fastgpt.cn不能填 localhost。这个值可以不填不填则发给模型的图片会是一个相对路径而不是全路径模型可能伪造Host。
- FE_DOMAIN=
# root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。
- DEFAULT_ROOT_PSW=1234
# AI Proxy 的地址,如果配了该地址,优先使用
- AIPROXY_API_ENDPOINT=http://aiproxy:3000
# AI Proxy 的 Admin Token与 AI Proxy 中的环境变量 ADMIN_KEY
- AIPROXY_API_TOKEN=aiproxy
# 数据库最大连接数
- DB_MAX_LINK=30
# 登录凭证密钥
- TOKEN_KEY=any
# root的密钥常用于升级时候的初始化请求
- ROOT_KEY=root_key
# 文件阅读加密
- FILE_TOKEN_KEY=filetoken
# MongoDB 连接参数. 用户名myusername,密码mypassword。
- MONGODB_URI=mongodb://myusername:mypassword@mongo:27017/fastgpt?authSource=admin
# openGauss 连接参数
- OPENGAUSS_URL=opengauss://gaussdb:Huawei12%23%24@gs:9999/test
# Redis 连接参数
- REDIS_URL=redis://default:mypassword@redis:6379
# sandbox 地址
- SANDBOX_URL=http://sandbox:3000
# 日志等级: debug, info, warn, error
- LOG_LEVEL=info
- STORE_LOG_LEVEL=warn
# 工作流最大运行次数
- WORKFLOW_MAX_RUN_TIMES=1000
# 批量执行节点,最大输入长度
- WORKFLOW_MAX_LOOP_TIMES=100
# 自定义跨域,不配置时,默认都允许跨域(多个域名通过逗号分割)
- ALLOWED_ORIGINS=
# 是否开启IP限制默认不开启
- USE_IP_LIMIT=false
# 对话文件过期天数
- CHAT_FILE_EXPIRE_TIME=7
volumes:
- ./config.json:/app/data/config.json
# AI Proxy
aiproxy:
image: ghcr.io/labring/aiproxy:v0.1.7
# image: registry.cn-hangzhou.aliyuncs.com/labring/aiproxy:v0.1.7 # 阿里云
container_name: aiproxy
restart: unless-stopped
depends_on:
aiproxy_pg:
condition: service_healthy
networks:
- fastgpt
environment:
# 对应 fastgpt 里的AIPROXY_API_TOKEN
- ADMIN_KEY=aiproxy
# 错误日志详情保存时间(小时)
- LOG_DETAIL_STORAGE_HOURS=1
# 数据库连接地址
- SQL_DSN=postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy
# 最大重试次数
- RETRY_TIMES=3
# 不需要计费
- BILLING_ENABLED=false
# 不需要严格检测模型
- DISABLE_MODEL_CONFIG=true
healthcheck:
test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status']
interval: 5s
timeout: 5s
retries: 10
aiproxy_pg:
image: pgvector/pgvector:0.8.0-pg15 # docker hub
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.8.0-pg15 # 阿里云
restart: unless-stopped
container_name: aiproxy_pg
volumes:
- ./aiproxy_pg:/var/lib/postgresql/data
networks:
- fastgpt
environment:
TZ: Asia/Shanghai
POSTGRES_USER: postgres
POSTGRES_DB: aiproxy
POSTGRES_PASSWORD: aiproxy
healthcheck:
test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy']
interval: 5s
timeout: 5s
retries: 10
networks:
fastgpt:

View File

@@ -96,15 +96,15 @@ services:
# fastgpt
sandbox:
container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云
networks:
- fastgpt
restart: always
fastgpt-mcp-server:
container_name: fastgpt-mcp-server
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云
ports:
- 3005:3000
networks:
@@ -114,8 +114,8 @@ services:
- FASTGPT_ENDPOINT=http://fastgpt:3000
fastgpt:
container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
image: ghcr.io/labring/fastgpt:v4.9.10 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云
ports:
- 3000:3000
networks:

View File

@@ -72,15 +72,15 @@ services:
sandbox:
container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-sandbox:v4.9.10 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.10 # 阿里云
networks:
- fastgpt
restart: always
fastgpt-mcp-server:
container_name: fastgpt-mcp-server
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10-fix2 # 阿里云
image: ghcr.io/labring/fastgpt-mcp_server:v4.9.10 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.9.10 # 阿里云
ports:
- 3005:3000
networks:
@@ -90,8 +90,8 @@ services:
- FASTGPT_ENDPOINT=http://fastgpt:3000
fastgpt:
container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.10-fix2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10-fix2 # 阿里云
image: ghcr.io/labring/fastgpt:v4.9.10 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.10 # 阿里云
ports:
- 3000:3000
networks:

232
dev.md
View File

@@ -1,118 +1,114 @@
## Premise
Since FastGPT is managed in the same way as monorepo, it is recommended to install make first during development.
monorepo Project Name:
- app: main project
-......
## Dev
```sh
# Give automatic script code execution permission (on non-Linux systems, you can manually execute the postinstall.sh file content)
chmod -R +x ./scripts/
# Executing under the code root directory installs all dependencies within the root package, projects, and packages
pnpm i
# Not make cmd
cd projects/app
pnpm dev
# Make cmd
make dev name=app
```
Note: If the Node version is >= 20, you need to pass the `--no-node-snapshot` parameter to Node when running `pnpm i`
```sh
NODE_OPTIONS=--no-node-snapshot pnpm i
```
### Jest
https://fael3z0zfze.feishu.cn/docx/ZOI1dABpxoGhS7xzhkXcKPxZnDL
## I18N
### Install i18n-ally Plugin
1. Open the Extensions Marketplace in VSCode, search for and install the `i18n Ally` plugin.
### Code Optimization Examples
#### Fetch Specific Namespace Translations in `getServerSideProps`
```typescript
// pages/yourPage.tsx
export async function getServerSideProps(context: any) {
return {
props: {
currentTab: context?.query?.currentTab || TabEnum.info,
...(await serverSideTranslations(context.locale, ['publish', 'user']))
}
};
}
```
#### Use useTranslation Hook in Page
```typescript
// pages/yourPage.tsx
import { useTranslation } from 'next-i18next';
const YourComponent = () => {
const { t } = useTranslation();
return (
<Button
variant="outline"
size="sm"
mr={2}
onClick={() => setShowSelected(false)}
>
{t('common:close')}
</Button>
);
};
export default YourComponent;
```
#### Handle Static File Translations
```typescript
// utils/i18n.ts
import { i18nT } from '@fastgpt/web/i18n/utils';
const staticContent = {
id: 'simpleChat',
avatar: 'core/workflow/template/aiChat',
name: i18nT('app:template.simple_robot'),
};
export default staticContent;
```
### Standardize Translation Format
- Use the t(namespace:key) format to ensure consistent naming.
- Translation keys should use lowercase letters and underscores, e.g., common.close.
## audit
Please fill the OperationLogEventEnum and operationLog/audit function is added to the ts, and on the corresponding position to fill i18n, at the same time to add the location of the log using addOpearationLog function add function
## Build
```sh
# Docker cmd: Build image, not proxy
docker build -f ./projects/app/Dockerfile -t registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1 . --build-arg name=app
# Make cmd: Build image, not proxy
make build name=app image=registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1
# Docker cmd: Build image with proxy
docker build -f ./projects/app/Dockerfile -t registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1 . --build-arg name=app --build-arg proxy=taobao
# Make cmd: Build image with proxy
make build name=app image=registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1 proxy=taobao
```
## Premise
Since FastGPT is managed in the same way as monorepo, it is recommended to install make first during development.
monorepo Project Name:
- app: main project
-......
## Dev
```sh
# Give automatic script code execution permission (on non-Linux systems, you can manually execute the postinstall.sh file content)
chmod -R +x ./scripts/
# Executing under the code root directory installs all dependencies within the root package, projects, and packages
pnpm i
# Not make cmd
cd projects/app
pnpm dev
# Make cmd
make dev name=app
```
Note: If the Node version is >= 20, you need to pass the `--no-node-snapshot` parameter to Node when running `pnpm i`
```sh
NODE_OPTIONS=--no-node-snapshot pnpm i
```
### Jest
https://fael3z0zfze.feishu.cn/docx/ZOI1dABpxoGhS7xzhkXcKPxZnDL
## I18N
### Install i18n-ally Plugin
1. Open the Extensions Marketplace in VSCode, search for and install the `i18n Ally` plugin.
### Code Optimization Examples
#### Fetch Specific Namespace Translations in `getServerSideProps`
```typescript
// pages/yourPage.tsx
export async function getServerSideProps(context: any) {
return {
props: {
currentTab: context?.query?.currentTab || TabEnum.info,
...(await serverSideTranslations(context.locale, ['publish', 'user']))
}
};
}
```
#### Use useTranslation Hook in Page
```typescript
// pages/yourPage.tsx
import { useTranslation } from 'next-i18next';
const YourComponent = () => {
const { t } = useTranslation();
return (
<Button
variant="outline"
size="sm"
mr={2}
onClick={() => setShowSelected(false)}
>
{t('common:close')}
</Button>
);
};
export default YourComponent;
```
#### Handle Static File Translations
```typescript
// utils/i18n.ts
import { i18nT } from '@fastgpt/web/i18n/utils';
const staticContent = {
id: 'simpleChat',
avatar: 'core/workflow/template/aiChat',
name: i18nT('app:template.simple_robot'),
};
export default staticContent;
```
### Standardize Translation Format
- Use the t(namespace:key) format to ensure consistent naming.
- Translation keys should use lowercase letters and underscores, e.g., common.close.
## Build
```sh
# Docker cmd: Build image, not proxy
docker build -f ./projects/app/Dockerfile -t registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1 . --build-arg name=app
# Make cmd: Build image, not proxy
make build name=app image=registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1
# Docker cmd: Build image with proxy
docker build -f ./projects/app/Dockerfile -t registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1 . --build-arg name=app --build-arg proxy=taobao
# Make cmd: Build image with proxy
make build name=app image=registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.1 proxy=taobao
```

Binary file not shown.

Before

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 73 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 69 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 110 KiB

View File

@@ -645,7 +645,7 @@ data 为集合的 ID。
{{< /tab >}}
{{< /tabs >}}
### 创建一个外部文件库集合(弃用
### 创建一个外部文件库集合(商业版
{{< tabs tabTotal="3" >}}
{{< tab tabName="请求示例" >}}

View File

@@ -1,5 +1,5 @@
---
title: 'V4.9.1(包含升级脚本)'
title: 'V4.9.1'
description: 'FastGPT V4.9.1 更新说明'
icon: 'upgrade'
draft: false

View File

@@ -15,8 +15,8 @@ weight: 790
### 2. 更新镜像 tag
- 更新 FastGPT 镜像 tag: v4.9.10-fix2
- 更新 FastGPT 商业版镜像 tag: v4.9.10-fix2
- 更新 FastGPT 镜像 tag: v4.9.10
- 更新 FastGPT 商业版镜像 tag: v4.9.10
- mcp_server 无需更新
- Sandbox 无需更新
- AIProxy 无需更新

View File

@@ -7,36 +7,15 @@ toc: true
weight: 789
---
## 执行升级脚本
该脚本仅需商业版用户执行。
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`{{host}} 替换成**FastGPT 域名**。
```bash
curl --location --request POST 'https://{{host}}/api/admin/initv4911' \
--header 'rootkey: {{rootkey}}' \
--header 'Content-Type: application/json'
```
**脚本功能**
1. 移动第三方知识库 API 配置。
## 🚀 新增内容
1. 商业版支持图片知识库
2. 工作流中增加节点搜索功能。
3. 工作流中,子流程版本控制,可选择“保持最新版本”,无需手动更新。
4. 增加更多审计操作日志。
1. 工作流中,子流程版本控制,可选择“保持最新版本”,无需手动更新
## ⚙️ 优化
1. 原文缓存改用 gridfs 存储,提高上限。
## 🐛 修复
1. 工作流中,管理员声明的全局系统工具,无法进行版本管理。
2. 工具调用节点前,有交互节点时,上下文异常。
3. 修复备份导入,小于 1000 字时,无法分块问题。
4. 自定义 PDF 解析,无法保存 base64 图片。
1. 工作流中,管理员声明的全局系统工具,无法进行版本管理。

View File

@@ -1,5 +1,5 @@
---
title: 'V4.9.4(包含升级脚本)'
title: 'V4.9.4'
description: 'FastGPT V4.9.4 更新说明'
icon: 'upgrade'
draft: false

View File

@@ -1,161 +0,0 @@
---
title: '第三方知识库开发'
description: '本节详细介绍如何在FastGPT上自己接入第三方知识库'
icon: 'language'
draft: false
toc: true
weight: 410
---
目前,互联网上拥有各种各样的文档库,例如飞书,语雀等等。 FastGPT 的不同用户可能使用的文档库不同,目前 FastGPT 内置了飞书、语雀文档库,如果需要接入其他文档库,可以参考本节内容。
## 统一的接口规范
为了实现对不同文档库的统一接入FastGPT 对第三方文档库进行了接口的规范,共包含 4 个接口内容,可以[查看 API 文件库接口](/docs/guide/knowledge_base/api_datase)。
所有内置的文档库,都是基于标准的 API 文件库进行扩展。可以参考`FastGPT/packages/service/core/dataset/apiDataset/yuqueDataset/api.ts`中的代码,进行其他文档库的扩展。一共需要完成 4 个接口开发:
1. 获取文件列表
2. 获取文件内容/文件链接
3. 获取原文预览地址
4. 获取文件详情信息
## 开始一个第三方文件库
为了方便讲解,这里以添加飞书知识库为例。
### 1. 添加第三方文档库参数
首先,要进入 FastGPT 项目路径下的`FastGPT\packages\global\core\dataset\apiDataset.d.ts`文件,添加第三方文档库 Server 类型。例如,语雀文档中,需要提供`userId``token`两个字段作为鉴权信息。
```ts
export type YuqueServer = {
userId: string;
token?: string;
basePath?: string;
};
```
{{% alert icon="🤖 " context="success" %}}
如果文档库有`根目录`选择的功能,需要设置添加一个字段`basePath`
{{% /alert %}}
### 2. 创建 Hook 文件
每个第三方文档库都会采用 Hook 的方式来实现一套 API 接口的维护Hook 里包含 4 个函数需要完成。
-`FastGPT\packages\service\core\dataset\apiDataset\`下创建一个文档库的文件夹,然后在文件夹下创建一个`api.ts`文件
-`api.ts`文件中,需要完成 4 个函数的定义,分别是:
- `listFiles`:获取文件列表
- `getFileContent`:获取文件内容/文件链接
- `getFileDetail`:获取文件详情信息
- `getFilePreviewUrl`:获取原文预览地址
### 3. 数据库添加配置字段
-`packages/service/core/dataset/schema.ts` 中添加第三方文档库的配置字段,类型统一设置成`Object`
-`FastGPT/packages/global/core/dataset/type.d.ts`中添加第三方文档库配置字段的数据类型,类型设置为第一步创建的参数。
![](/imgs/thirddataset-7.png)
{{% alert icon="🤖 " context="success" %}}
`schema.ts`文件修改后,需要重新启动 FastGPT 项目才会生效。
{{% /alert %}}
### 4. 添加知识库类型
`projects/app/src/web/core/dataset/constants.ts`中,添加自己的知识库类型
```TS
export const datasetTypeCourseMap: Record<`${DatasetTypeEnum}`, string> = {
[DatasetTypeEnum.folder]: '',
[DatasetTypeEnum.dataset]: '',
[DatasetTypeEnum.apiDataset]: '/docs/guide/knowledge_base/api_dataset/',
[DatasetTypeEnum.websiteDataset]: '/docs/guide/knowledge_base/websync/',
[DatasetTypeEnum.feishuShare]: '/docs/guide/knowledge_base/lark_share_dataset/',
[DatasetTypeEnum.feishuKnowledge]: '/docs/guide/knowledge_base/lark_knowledge_dataset/',
[DatasetTypeEnum.yuque]: '/docs/guide/knowledge_base/yuque_dataset/',
[DatasetTypeEnum.externalFile]: ''
};
```
{{% alert icon="🤖 " context="success" %}}
在 datasetTypeCourseMap 中添加自己的知识库类型,`' '`内是相应的文档说明,如果有的话,可以添加。
文档添加在`FastGPT\docSite\content\zh-cn\docs\guide\knowledge_base\`
{{% /alert %}}
## 添加前端
`FastGPT\packages\web\i18n\zh-CN\dataset.json`,`FastGPT\packages\web\i18n\en\dataset.json``FastGPT\packages\web\i18n\zh-Hant\dataset.json`中添加自己的 I18n 翻译,以中文翻译为例,大体需要如下几个内容:
![](/imgs/thirddataset-24.png)
`FastGPT\packages\web\components\common\Icon\icons\core\dataset\`添加自己的知识库图标,一共是两个,分为`Outline``Color`,分别是有颜色的和无色的,具体看如下图片。
![](/imgs/thirddataset-10.png)
`FastGPT\packages\web\components\common\Icon\constants.ts`文件中,添加自己的图标。 `import` 是图标的存放路径。
![](/imgs/thirddataset-9.png)
`FastGPT\packages\global\core\dataset\constants.ts`文件中,添加自己的知识库类型。
![](/imgs/thirddataset-8.png)
{{% alert icon="🤖 " context="success" %}}
`label`内容是自己之前通过 i18n 翻译添加的知识库名称的
`icon`是自己之前添加的 Icon , I18n 的添加看最后清单。
{{% /alert %}}
`FastGPT\projects\app\src\pages\dataset\list\index.tsx`文件下,添加如下内容。这个文件负责的是知识库列表页的`新建`按钮点击后的菜单,只有在该文件添加知识库后,才能创建知识库。
![](/imgs/thirddataset-12.png)
`FastGPT\projects\app\src\pageComponents\dataset\detail\Info\index.tsx`文件下,添加如下内容。
![](/imgs/thirddataset-18.png)
`FastGPT\projects\app\src\pageComponents\dataset\list\CreateModal.tsx`文件下,添加如下内容。
| | |
| --- | --- |
| ![](/imgs/thirddataset-19.png) | ![](/imgs/thirddataset-20.png) |
`FastGPT\projects\app\src\pageComponents\dataset\list\SideTag.tsx`文件下,添加如下内容。
![](/imgs/thirddataset-21.png)
`FastGPT\projects\app\src\web\core\dataset\context\datasetPageContext.tsx`文件下,添加如下内容。
![](/imgs/thirddataset-23.png)
## 添加配置表单
`FastGPT\projects\app\src\pageComponents\dataset\ApiDatasetForm.tsx`文件下,添加自己如下内容。这个文件负责的是创建知识库页的字段填写。
| | | |
| --- | --- | --- |
| ![](/imgs/thirddataset-13.png) | ![](/imgs/thirddataset-14.png) | ![](/imgs/thirddataset-15.png) |
代码中添加的两个组件是对根目录选择的渲染,对应设计的 api 的 getfiledetail 方法,如果你的文件不支持,你可以不引用。
```
{renderBaseUrlSelector()} //这是对`Base URL`字段的渲染
{renderDirectoryModal()} //点击`选择`后出现的`选择根目录`窗口,见图
```
| | |
| --- | --- |
| ![](/imgs/thirddataset-16.png) | ![](/imgs/thirddataset-17.png) |
如果知识库需要支持根目录,还需要在`ApiDatasetForm`文件中添加相关内容。
## 添加杂项
最后,需要在很多文件里添加`server`类型,这里由于文件过多,且不大,不一一列举文件的清单。只提供方法:使用自己编程工具的全局搜索功能,搜索`YuqueServer``yuqueServer`。在搜索到的文件中,逐一添加自己的知识库类型。
## 提示
建议知识库创建完成后,完整测试一遍知识库的功能,以确定有无漏洞,如果你的知识库添加有问题,且无法在文档找到对应的文件解决,一定是杂项没有添加完全,建议重复一次全局搜索`YuqueServer``yuqueServer`,检查是否有地方没有加上自己的类型。

1
env.d.ts vendored
View File

@@ -15,6 +15,7 @@ declare global {
MONGODB_LOG_URI?: string;
PG_URL: string;
OCEANBASE_URL: string;
OPENGAUSS_URL: string;
MILVUS_ADDRESS: string;
MILVUS_TOKEN: string;
SANDBOX_URL: string;

View File

@@ -6,8 +6,7 @@ export const fileImgs = [
{ suffix: '(doc|docs)', src: 'file/fill/doc' },
{ suffix: 'txt', src: 'file/fill/txt' },
{ suffix: 'md', src: 'file/fill/markdown' },
{ suffix: 'html', src: 'file/fill/html' },
{ suffix: '(jpg|jpeg|png|gif|bmp|webp|svg|ico|tiff|tif)', src: 'image' }
{ suffix: 'html', src: 'file/fill/html' }
// { suffix: '.', src: '/imgs/files/file.svg' }
];

View File

@@ -2,5 +2,4 @@ export type AuthFrequencyLimitProps = {
eventId: string;
maxAmount: number;
expiredTime: Date;
num?: number;
};

View File

@@ -34,7 +34,7 @@ export const valToStr = (val: any) => {
};
// replace {{variable}} to value
export function replaceVariable(text: any, obj: Record<string, string | number | undefined>) {
export function replaceVariable(text: any, obj: Record<string, string | number>) {
if (typeof text !== 'string') return text;
for (const key in obj) {

View File

@@ -1,9 +1,4 @@
import type {
ChunkSettingsType,
DatasetDataIndexItemType,
DatasetDataFieldType,
DatasetSchemaType
} from './type';
import type { ChunkSettingsType, DatasetDataIndexItemType, DatasetSchemaType } from './type';
import type {
DatasetCollectionTypeEnum,
DatasetCollectionDataProcessModeEnum,
@@ -12,14 +7,12 @@ import type {
ChunkTriggerConfigTypeEnum,
ParagraphChunkAIModeEnum
} from './constants';
import type { ParentIdType } from '../../common/parentFolder/type';
import type { LLMModelItemType } from '../ai/model.d';
import type { ParentIdType } from 'common/parentFolder/type';
/* ================= dataset ===================== */
export type DatasetUpdateBody = {
id: string;
apiDatasetServer?: DatasetSchemaType['apiDatasetServer'];
parentId?: ParentIdType;
name?: string;
avatar?: string;
@@ -31,6 +24,9 @@ export type DatasetUpdateBody = {
websiteConfig?: DatasetSchemaType['websiteConfig'];
externalReadUrl?: DatasetSchemaType['externalReadUrl'];
defaultPermission?: DatasetSchemaType['defaultPermission'];
apiServer?: DatasetSchemaType['apiServer'];
yuqueServer?: DatasetSchemaType['yuqueServer'];
feishuServer?: DatasetSchemaType['feishuServer'];
chunkSettings?: DatasetSchemaType['chunkSettings'];
// sync schedule
@@ -104,9 +100,6 @@ export type ExternalFileCreateDatasetCollectionParams = ApiCreateDatasetCollecti
externalFileUrl: string;
filename?: string;
};
export type ImageCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
collectionName: string;
};
/* ================= tag ===================== */
export type CreateDatasetCollectionTagParams = {
@@ -131,10 +124,16 @@ export type PgSearchRawType = {
collection_id: string;
score: number;
};
export type GsSearchRawType = {
id: string;
collection_id: string;
score: number;
};
export type PushDatasetDataChunkProps = {
q?: string;
a?: string;
imageId?: string;
q: string; // embedding content
a?: string; // bonus content
chunkIndex?: number;
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
};

View File

@@ -1,5 +1,5 @@
import { RequireOnlyOne } from '../../../common/type/utils';
import type { ParentIdType } from '../../../common/parentFolder/type';
import { RequireOnlyOne } from '../../common/type/utils';
import type { ParentIdType } from '../../common/parentFolder/type.d';
export type APIFileItem = {
id: string;
@@ -28,12 +28,6 @@ export type YuqueServer = {
basePath?: string;
};
export type ApiDatasetServerType = {
apiServer?: APIFileServer;
feishuServer?: FeishuServer;
yuqueServer?: YuqueServer;
};
// Api dataset api
export type APIFileListResponse = APIFileItem[];

View File

@@ -1,31 +0,0 @@
import type { ApiDatasetServerType } from './type';
export const filterApiDatasetServerPublicData = (apiDatasetServer?: ApiDatasetServerType) => {
if (!apiDatasetServer) return undefined;
const { apiServer, yuqueServer, feishuServer } = apiDatasetServer;
return {
apiServer: apiServer
? {
baseUrl: apiServer.baseUrl,
authorization: '',
basePath: apiServer.basePath
}
: undefined,
yuqueServer: yuqueServer
? {
userId: yuqueServer.userId,
token: '',
basePath: yuqueServer.basePath
}
: undefined,
feishuServer: feishuServer
? {
appId: feishuServer.appId,
appSecret: '',
folderToken: feishuServer.folderToken
}
: undefined
};
};

View File

@@ -6,80 +6,45 @@ export enum DatasetTypeEnum {
dataset = 'dataset',
websiteDataset = 'websiteDataset', // depp link
externalFile = 'externalFile',
apiDataset = 'apiDataset',
feishu = 'feishu',
yuque = 'yuque'
}
// @ts-ignore
export const ApiDatasetTypeMap: Record<
`${DatasetTypeEnum}`,
{
icon: string;
avatar: string;
label: any;
collectionLabel: string;
courseUrl?: string;
}
> = {
[DatasetTypeEnum.apiDataset]: {
icon: 'core/dataset/externalDatasetOutline',
avatar: 'core/dataset/externalDatasetColor',
label: i18nT('dataset:api_file'),
collectionLabel: i18nT('common:File'),
courseUrl: '/docs/guide/knowledge_base/api_dataset/'
},
[DatasetTypeEnum.feishu]: {
icon: 'core/dataset/feishuDatasetOutline',
avatar: 'core/dataset/feishuDatasetColor',
label: i18nT('dataset:feishu_dataset'),
collectionLabel: i18nT('common:File'),
courseUrl: '/docs/guide/knowledge_base/lark_dataset/'
},
[DatasetTypeEnum.yuque]: {
icon: 'core/dataset/yuqueDatasetOutline',
avatar: 'core/dataset/yuqueDatasetColor',
label: i18nT('dataset:yuque_dataset'),
collectionLabel: i18nT('common:File'),
courseUrl: '/docs/guide/knowledge_base/yuque_dataset/'
}
};
export const DatasetTypeMap: Record<
`${DatasetTypeEnum}`,
{
icon: string;
avatar: string;
label: any;
collectionLabel: string;
courseUrl?: string;
}
> = {
...ApiDatasetTypeMap,
export const DatasetTypeMap = {
[DatasetTypeEnum.folder]: {
icon: 'common/folderFill',
avatar: 'common/folderFill',
label: i18nT('dataset:folder_dataset'),
collectionLabel: i18nT('common:Folder')
},
[DatasetTypeEnum.dataset]: {
icon: 'core/dataset/commonDatasetOutline',
avatar: 'core/dataset/commonDatasetColor',
label: i18nT('dataset:common_dataset'),
collectionLabel: i18nT('common:File')
},
[DatasetTypeEnum.websiteDataset]: {
icon: 'core/dataset/websiteDatasetOutline',
avatar: 'core/dataset/websiteDatasetColor',
label: i18nT('dataset:website_dataset'),
collectionLabel: i18nT('common:Website'),
courseUrl: '/docs/guide/knowledge_base/websync/'
collectionLabel: i18nT('common:Website')
},
[DatasetTypeEnum.externalFile]: {
icon: 'core/dataset/externalDatasetOutline',
avatar: 'core/dataset/externalDatasetColor',
label: i18nT('dataset:external_file'),
collectionLabel: i18nT('common:File')
},
[DatasetTypeEnum.apiDataset]: {
icon: 'core/dataset/externalDatasetOutline',
label: i18nT('dataset:api_file'),
collectionLabel: i18nT('common:File')
},
[DatasetTypeEnum.feishu]: {
icon: 'core/dataset/feishuDatasetOutline',
label: i18nT('dataset:feishu_dataset'),
collectionLabel: i18nT('common:File')
},
[DatasetTypeEnum.yuque]: {
icon: 'core/dataset/yuqueDatasetOutline',
label: i18nT('dataset:yuque_dataset'),
collectionLabel: i18nT('common:File')
}
};
@@ -112,8 +77,7 @@ export enum DatasetCollectionTypeEnum {
file = 'file',
link = 'link', // one link
externalFile = 'externalFile',
apiFile = 'apiFile',
images = 'images'
apiFile = 'apiFile'
}
export const DatasetCollectionTypeMap = {
[DatasetCollectionTypeEnum.folder]: {
@@ -133,9 +97,6 @@ export const DatasetCollectionTypeMap = {
},
[DatasetCollectionTypeEnum.apiFile]: {
name: i18nT('common:core.dataset.apiFile')
},
[DatasetCollectionTypeEnum.images]: {
name: i18nT('dataset:core.dataset.Image collection')
}
};
@@ -159,7 +120,6 @@ export const DatasetCollectionSyncResultMap = {
export enum DatasetCollectionDataProcessModeEnum {
chunk = 'chunk',
qa = 'qa',
imageParse = 'imageParse',
backup = 'backup',
auto = 'auto' // abandon
@@ -173,10 +133,6 @@ export const DatasetCollectionDataProcessModeMap = {
label: i18nT('common:core.dataset.training.QA mode'),
tooltip: i18nT('common:core.dataset.import.QA Import Tip')
},
[DatasetCollectionDataProcessModeEnum.imageParse]: {
label: i18nT('dataset:training.Image mode'),
tooltip: i18nT('common:core.dataset.import.Chunk Split Tip')
},
[DatasetCollectionDataProcessModeEnum.backup]: {
label: i18nT('dataset:backup_mode'),
tooltip: i18nT('dataset:backup_mode')
@@ -216,16 +172,14 @@ export enum ImportDataSourceEnum {
fileCustom = 'fileCustom',
externalFile = 'externalFile',
apiDataset = 'apiDataset',
reTraining = 'reTraining',
imageDataset = 'imageDataset'
reTraining = 'reTraining'
}
export enum TrainingModeEnum {
chunk = 'chunk',
qa = 'qa',
auto = 'auto',
image = 'image',
imageParse = 'imageParse'
image = 'image'
}
/* ------------ search -------------- */

View File

@@ -8,19 +8,17 @@ export type CreateDatasetDataProps = {
chunkIndex?: number;
q: string;
a?: string;
imageId?: string;
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
};
export type UpdateDatasetDataProps = {
dataId: string;
q: string;
q?: string;
a?: string;
indexes?: (Omit<DatasetDataIndexItemType, 'dataId'> & {
dataId?: string; // pg data id
})[];
imageId?: string;
};
export type PatchIndexesProps =

View File

@@ -1,13 +0,0 @@
export type DatasetImageSchema = {
_id: string;
teamId: string;
datasetId: string;
collectionId?: string;
name: string;
contentType: string;
size: number;
metadata?: Record<string, any>;
expiredTime?: Date;
createdAt: Date;
updatedAt: Date;
};

View File

@@ -13,15 +13,9 @@ import type {
ChunkTriggerConfigTypeEnum
} from './constants';
import type { DatasetPermission } from '../../support/permission/dataset/controller';
import type {
ApiDatasetServerType,
APIFileServer,
FeishuServer,
YuqueServer
} from './apiDataset/type';
import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
import type { SourceMemberType } from 'support/user/type';
import type { DatasetDataIndexTypeEnum } from './data/constants';
import type { ParentIdType } from 'common/parentFolder/type';
export type ChunkSettingsType = {
trainingType?: DatasetCollectionDataProcessModeEnum;
@@ -55,7 +49,7 @@ export type ChunkSettingsType = {
export type DatasetSchemaType = {
_id: string;
parentId: ParentIdType;
parentId?: string;
userId: string;
teamId: string;
tmbId: string;
@@ -78,16 +72,14 @@ export type DatasetSchemaType = {
chunkSettings?: ChunkSettingsType;
inheritPermission: boolean;
apiDatasetServer?: ApiDatasetServerType;
apiServer?: APIFileServer;
feishuServer?: FeishuServer;
yuqueServer?: YuqueServer;
// abandon
autoSync?: boolean;
externalReadUrl?: string;
defaultPermission?: number;
apiServer?: APIFileServer;
feishuServer?: FeishuServer;
yuqueServer?: YuqueServer;
};
export type DatasetCollectionSchemaType = ChunkSettingsType & {
@@ -140,13 +132,7 @@ export type DatasetDataIndexItemType = {
dataId: string; // pg data id
text: string;
};
export type DatasetDataFieldType = {
q: string; // large chunks or question
a?: string; // answer or custom content
imageId?: string;
};
export type DatasetDataSchemaType = DatasetDataFieldType & {
export type DatasetDataSchemaType = {
_id: string;
userId: string;
teamId: string;
@@ -155,9 +141,13 @@ export type DatasetDataSchemaType = DatasetDataFieldType & {
collectionId: string;
chunkIndex: number;
updateTime: Date;
history?: (DatasetDataFieldType & {
q: string; // large chunks or question
a: string; // answer or custom content
history?: {
q: string;
a: string;
updateTime: Date;
})[];
}[];
forbid?: boolean;
fullTextToken: string;
indexes: DatasetDataIndexItemType[];
@@ -189,7 +179,6 @@ export type DatasetTrainingSchemaType = {
dataId?: string;
q: string;
a: string;
imageId?: string;
chunkIndex: number;
indexSize?: number;
weight: number;
@@ -255,18 +244,20 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
};
/* ================= data ===================== */
export type DatasetDataItemType = DatasetDataFieldType & {
export type DatasetDataItemType = {
id: string;
teamId: string;
datasetId: string;
imagePreivewUrl?: string;
updateTime: Date;
collectionId: string;
sourceName: string;
sourceId?: string;
q: string;
a: string;
chunkIndex: number;
indexes: DatasetDataIndexItemType[];
isOwner: boolean;
// permission: DatasetPermission;
};
/* --------------- file ---------------------- */
@@ -293,14 +284,3 @@ export type SearchDataResponseItemType = Omit<
score: { type: `${SearchScoreTypeEnum}`; value: number; index: number }[];
// score: number;
};
export type DatasetCiteItemType = {
_id: string;
q: string;
a?: string;
imagePreivewUrl?: string;
history?: DatasetDataSchemaType['history'];
updateTime: DatasetDataSchemaType['updateTime'];
index: DatasetDataSchemaType['chunkIndex'];
updated?: boolean;
};

View File

@@ -2,15 +2,10 @@ import { TrainingModeEnum, DatasetCollectionTypeEnum } from './constants';
import { getFileIcon } from '../../common/file/icon';
import { strIsLink } from '../../common/string/tools';
export function getCollectionIcon({
type = DatasetCollectionTypeEnum.file,
name = '',
sourceId
}: {
type?: DatasetCollectionTypeEnum;
name?: string;
sourceId?: string;
}) {
export function getCollectionIcon(
type: DatasetCollectionTypeEnum = DatasetCollectionTypeEnum.file,
name = ''
) {
if (type === DatasetCollectionTypeEnum.folder) {
return 'common/folderFill';
}
@@ -20,10 +15,7 @@ export function getCollectionIcon({
if (type === DatasetCollectionTypeEnum.virtual) {
return 'file/fill/manual';
}
if (type === DatasetCollectionTypeEnum.images) {
return 'core/dataset/imageFill';
}
return getSourceNameIcon({ sourceName: name, sourceId });
return getFileIcon(name);
}
export function getSourceNameIcon({
sourceName,
@@ -48,6 +40,5 @@ export function getSourceNameIcon({
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
if (mode === TrainingModeEnum.qa) return data.length * 20;
if (mode === TrainingModeEnum.auto) return data.length * 5;
if (mode === TrainingModeEnum.image) return data.length * 2;
return data.length;
};

View File

@@ -125,7 +125,6 @@ export type FlowNodeItemType = FlowNodeTemplateType & {
nodeId: string;
parentNodeId?: string;
isError?: boolean;
searchedText?: string;
debugResult?: {
status: 'running' | 'success' | 'skipped' | 'failed';
message?: string;

View File

@@ -1,5 +1,4 @@
export enum OperationLogEventEnum {
//Team
LOGIN = 'LOGIN',
CREATE_INVITATION_LINK = 'CREATE_INVITATION_LINK',
JOIN_TEAM = 'JOIN_TEAM',
@@ -12,52 +11,5 @@ export enum OperationLogEventEnum {
RELOCATE_DEPARTMENT = 'RELOCATE_DEPARTMENT',
CREATE_GROUP = 'CREATE_GROUP',
DELETE_GROUP = 'DELETE_GROUP',
ASSIGN_PERMISSION = 'ASSIGN_PERMISSION',
//APP
CREATE_APP = 'CREATE_APP',
UPDATE_APP_INFO = 'UPDATE_APP_INFO',
MOVE_APP = 'MOVE_APP',
DELETE_APP = 'DELETE_APP',
UPDATE_APP_COLLABORATOR = 'UPDATE_APP_COLLABORATOR',
DELETE_APP_COLLABORATOR = 'DELETE_APP_COLLABORATOR',
TRANSFER_APP_OWNERSHIP = 'TRANSFER_APP_OWNERSHIP',
CREATE_APP_COPY = 'CREATE_APP_COPY',
CREATE_APP_FOLDER = 'CREATE_APP_FOLDER',
UPDATE_PUBLISH_APP = 'UPDATE_PUBLISH_APP',
CREATE_APP_PUBLISH_CHANNEL = 'CREATE_APP_PUBLISH_CHANNEL',
UPDATE_APP_PUBLISH_CHANNEL = 'UPDATE_APP_PUBLISH_CHANNEL',
DELETE_APP_PUBLISH_CHANNEL = 'DELETE_APP_PUBLISH_CHANNEL',
EXPORT_APP_CHAT_LOG = 'EXPORT_APP_CHAT_LOG',
//Dataset
CREATE_DATASET = 'CREATE_DATASET',
UPDATE_DATASET = 'UPDATE_DATASET',
DELETE_DATASET = 'DELETE_DATASET',
MOVE_DATASET = 'MOVE_DATASET',
UPDATE_DATASET_COLLABORATOR = 'UPDATE_DATASET_COLLABORATOR',
DELETE_DATASET_COLLABORATOR = 'DELETE_DATASET_COLLABORATOR',
TRANSFER_DATASET_OWNERSHIP = 'TRANSFER_DATASET_OWNERSHIP',
EXPORT_DATASET = 'EXPORT_DATASET',
CREATE_DATASET_FOLDER = 'CREATE_DATASET_FOLDER',
//Collection
CREATE_COLLECTION = 'CREATE_COLLECTION',
UPDATE_COLLECTION = 'UPDATE_COLLECTION',
DELETE_COLLECTION = 'DELETE_COLLECTION',
RETRAIN_COLLECTION = 'RETRAIN_COLLECTION',
//Data
CREATE_DATA = 'CREATE_DATA',
UPDATE_DATA = 'UPDATE_DATA',
DELETE_DATA = 'DELETE_DATA',
//SearchTest
SEARCH_TEST = 'SEARCH_TEST',
//Account
CHANGE_PASSWORD = 'CHANGE_PASSWORD',
CHANGE_NOTIFICATION_SETTINGS = 'CHANGE_NOTIFICATION_SETTINGS',
CHANGE_MEMBER_NAME_ACCOUNT = 'CHANGE_MEMBER_NAME_ACCOUNT',
PURCHASE_PLAN = 'PURCHASE_PLAN',
EXPORT_BILL_RECORDS = 'EXPORT_BILL_RECORDS',
CREATE_INVOICE = 'CREATE_INVOICE',
SET_INVOICE_HEADER = 'SET_INVOICE_HEADER',
CREATE_API_KEY = 'CREATE_API_KEY',
UPDATE_API_KEY = 'UPDATE_API_KEY',
DELETE_API_KEY = 'DELETE_API_KEY'
ASSIGN_PERMISSION = 'ASSIGN_PERMISSION'
}

View File

@@ -13,7 +13,6 @@ const staticPluginList = [
'WeWorkWebhook',
'google',
'bing',
'bocha',
'delay'
];
// Run in worker thread (Have npm packages)

View File

@@ -1,677 +0,0 @@
{
"author": "",
"name": "博查搜索",
"avatar": "core/workflow/template/bocha",
"intro": "使用博查AI搜索引擎进行网络搜索。",
"showStatus": true,
"weight": 10,
"courseUrl": "",
"isTool": true,
"templateType": "search",
"workflow": {
"nodes": [
{
"nodeId": "pluginInput",
"name": "workflow:template.plugin_start",
"intro": "workflow:intro_plugin_input",
"avatar": "core/workflow/template/workflowStart",
"flowNodeType": "pluginInput",
"showStatus": false,
"position": {
"x": 636.3048409085379,
"y": -238.61714728578016
},
"version": "481",
"inputs": [
{
"renderTypeList": [
"input"
],
"selectedTypeIndex": 0,
"valueType": "string",
"canEdit": true,
"key": "apiKey",
"label": "apiKey",
"description": "博查API密钥",
"defaultValue": "",
"required": true
},
{
"renderTypeList": [
"input",
"reference"
],
"selectedTypeIndex": 0,
"valueType": "string",
"canEdit": true,
"key": "query",
"label": "query",
"description": "搜索查询词",
"defaultValue": "",
"required": true,
"toolDescription": "搜索查询词"
},
{
"renderTypeList": [
"input",
"reference"
],
"selectedTypeIndex": 0,
"valueType": "string",
"canEdit": true,
"key": "freshness",
"label": "freshness",
"description": "搜索指定时间范围内的网页。可填值oneDay(一天内)、oneWeek(一周内)、oneMonth(一个月内)、oneYear(一年内)、noLimit(不限,默认)、YYYY-MM-DD..YYYY-MM-DD(日期范围)、YYYY-MM-DD(指定日期)",
"defaultValue": "noLimit",
"required": false,
"toolDescription": "搜索时间范围"
},
{
"renderTypeList": [
"input",
"reference"
],
"selectedTypeIndex": 0,
"valueType": "boolean",
"canEdit": true,
"key": "summary",
"label": "summary",
"description": "是否显示文本摘要。true显示false不显示(默认)",
"defaultValue": false,
"required": false,
"toolDescription": "是否显示文本摘要"
},
{
"renderTypeList": [
"input",
"reference"
],
"selectedTypeIndex": 0,
"valueType": "string",
"canEdit": true,
"key": "include",
"label": "include",
"description": "指定搜索的site范围。多个域名使用|或,分隔最多20个。例如qq.com|m.163.com",
"defaultValue": "",
"required": false,
"toolDescription": "指定搜索的site范围"
},
{
"renderTypeList": [
"input",
"reference"
],
"selectedTypeIndex": 0,
"valueType": "string",
"canEdit": true,
"key": "exclude",
"label": "exclude",
"description": "排除搜索的网站范围。多个域名使用|或,分隔最多20个。例如qq.com|m.163.com",
"defaultValue": "",
"required": false,
"toolDescription": "排除搜索的网站范围"
},
{
"renderTypeList": [
"input",
"reference"
],
"selectedTypeIndex": 0,
"valueType": "number",
"canEdit": true,
"key": "count",
"label": "count",
"description": "返回结果的条数。可填范围1-50默认为10",
"defaultValue": 10,
"required": false,
"min": 1,
"max": 50,
"toolDescription": "返回结果条数"
}
],
"outputs": [
{
"id": "apiKey",
"valueType": "string",
"key": "apiKey",
"label": "apiKey",
"type": "hidden"
},
{
"id": "query",
"valueType": "string",
"key": "query",
"label": "query",
"type": "hidden"
},
{
"id": "freshness",
"valueType": "string",
"key": "freshness",
"label": "freshness",
"type": "hidden"
},
{
"id": "summary",
"valueType": "boolean",
"key": "summary",
"label": "summary",
"type": "hidden"
},
{
"id": "include",
"valueType": "string",
"key": "include",
"label": "include",
"type": "hidden"
},
{
"id": "exclude",
"valueType": "string",
"key": "exclude",
"label": "exclude",
"type": "hidden"
},
{
"id": "count",
"valueType": "number",
"key": "count",
"label": "count",
"type": "hidden"
}
]
},
{
"nodeId": "pluginOutput",
"name": "common:core.module.template.self_output",
"intro": "workflow:intro_custom_plugin_output",
"avatar": "core/workflow/template/pluginOutput",
"flowNodeType": "pluginOutput",
"showStatus": false,
"position": {
"x": 2764.1105686698083,
"y": -30.617147285780163
},
"version": "481",
"inputs": [
{
"renderTypeList": [
"reference"
],
"valueType": "object",
"canEdit": true,
"key": "result",
"label": "result",
"isToolOutput": true,
"description": "",
"value": [
"nyA6oA8mF1iW",
"httpRawResponse"
]
}
],
"outputs": []
},
{
"nodeId": "pluginConfig",
"name": "common:core.module.template.system_config",
"intro": "",
"avatar": "core/workflow/template/systemConfig",
"flowNodeType": "pluginConfig",
"position": {
"x": 184.66337662472682,
"y": -216.05298493910115
},
"version": "4811",
"inputs": [],
"outputs": []
},
{
"nodeId": "nyA6oA8mF1iW",
"name": "HTTP 请求",
"intro": "调用博查搜索API",
"avatar": "core/workflow/template/httpRequest",
"flowNodeType": "httpRequest468",
"showStatus": true,
"position": {
"x": 1335.0647252518884,
"y": -455.9043948565971
},
"version": "481",
"inputs": [
{
"key": "system_addInputParam",
"renderTypeList": [
"addInputParam"
],
"valueType": "dynamic",
"label": "",
"required": false,
"description": "common:core.module.input.description.HTTP Dynamic Input",
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectDataset",
"selectApp"
],
"showDescription": false,
"showDefaultValue": true
},
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpMethod",
"renderTypeList": [
"custom"
],
"valueType": "string",
"label": "",
"value": "POST",
"required": true,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpTimeout",
"renderTypeList": [
"custom"
],
"valueType": "number",
"label": "",
"value": 30,
"min": 5,
"max": 600,
"required": true,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpReqUrl",
"renderTypeList": [
"hidden"
],
"valueType": "string",
"label": "",
"description": "common:core.module.input.description.Http Request Url",
"placeholder": "https://api.ai.com/getInventory",
"required": false,
"value": "https://api.bochaai.com/v1/web-search",
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpHeader",
"renderTypeList": [
"custom"
],
"valueType": "any",
"value": [
{
"key": "Authorization",
"type": "string",
"value": "Bearer {{$pluginInput.apiKey$}}"
},
{
"key": "Content-Type",
"type": "string",
"value": "application/json"
}
],
"label": "",
"description": "common:core.module.input.description.Http Request Header",
"placeholder": "common:core.module.input.description.Http Request Header",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpParams",
"renderTypeList": [
"hidden"
],
"valueType": "any",
"value": [],
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpJsonBody",
"renderTypeList": [
"hidden"
],
"valueType": "any",
"value": "{\n \"query\": \"{{query}}\",\n \"freshness\": \"{{freshness}}\",\n \"summary\": {{summary}},\n \"include\": \"{{include}}\",\n \"exclude\": \"{{exclude}}\",\n \"count\": {{count}}\n}",
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpFormBody",
"renderTypeList": [
"hidden"
],
"valueType": "any",
"value": [],
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpContentType",
"renderTypeList": [
"hidden"
],
"valueType": "string",
"value": "json",
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"valueType": "string",
"renderTypeList": [
"reference"
],
"key": "query",
"label": "query",
"toolDescription": "博查搜索检索词",
"required": true,
"canEdit": true,
"editField": {
"key": true,
"description": true
},
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"value": [
"pluginInput",
"query"
]
},
{
"valueType": "string",
"renderTypeList": [
"reference"
],
"key": "freshness",
"label": "freshness",
"toolDescription": "搜索时间范围",
"required": false,
"canEdit": true,
"editField": {
"key": true,
"description": true
},
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"value": [
"pluginInput",
"freshness"
]
},
{
"valueType": "boolean",
"renderTypeList": [
"reference"
],
"key": "summary",
"label": "summary",
"toolDescription": "是否显示文本摘要",
"required": false,
"canEdit": true,
"editField": {
"key": true,
"description": true
},
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"value": [
"pluginInput",
"summary"
]
},
{
"valueType": "string",
"renderTypeList": [
"reference"
],
"key": "include",
"label": "include",
"toolDescription": "指定搜索的site范围",
"required": false,
"canEdit": true,
"editField": {
"key": true,
"description": true
},
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"value": [
"pluginInput",
"include"
]
},
{
"valueType": "string",
"renderTypeList": [
"reference"
],
"key": "exclude",
"label": "exclude",
"toolDescription": "排除搜索的网站范围",
"required": false,
"canEdit": true,
"editField": {
"key": true,
"description": true
},
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"value": [
"pluginInput",
"exclude"
]
},
{
"valueType": "number",
"renderTypeList": [
"reference"
],
"key": "count",
"label": "count",
"toolDescription": "返回结果条数",
"required": false,
"canEdit": true,
"editField": {
"key": true,
"description": true
},
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"value": [
"pluginInput",
"count"
]
}
],
"outputs": [
{
"id": "error",
"key": "error",
"label": "workflow:request_error",
"description": "HTTP请求错误信息成功时返回空",
"valueType": "object",
"type": "static"
},
{
"id": "httpRawResponse",
"key": "httpRawResponse",
"required": true,
"label": "workflow:raw_response",
"description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。",
"valueType": "any",
"type": "static"
},
{
"id": "system_addOutputParam",
"key": "system_addOutputParam",
"type": "dynamic",
"valueType": "dynamic",
"label": "",
"editField": {
"key": true,
"valueType": true
}
}
]
}
],
"edges": [
{
"source": "pluginInput",
"target": "nyA6oA8mF1iW",
"sourceHandle": "pluginInput-source-right",
"targetHandle": "nyA6oA8mF1iW-target-left"
},
{
"source": "nyA6oA8mF1iW",
"target": "pluginOutput",
"sourceHandle": "nyA6oA8mF1iW-source-right",
"targetHandle": "pluginOutput-target-left"
}
]
},
"chatConfig": {}
}

View File

@@ -1,8 +1,5 @@
import type {
ApiDatasetDetailResponse,
FeishuServer,
YuqueServer
} from '@fastgpt/global/core/dataset/apiDataset/type';
import type { ApiDatasetDetailResponse } from '@fastgpt/global/core/dataset/apiDataset';
import { FeishuServer, YuqueServer } from '@fastgpt/global/core/dataset/apiDataset';
import type {
DeepRagSearchProps,
SearchDatasetDataResponse

View File

@@ -1,181 +0,0 @@
import { retryFn } from '@fastgpt/global/common/system/utils';
import { connectionMongo } from '../../mongo';
import { MongoRawTextBufferSchema, bucketName } from './schema';
import { addLog } from '../../system/log';
import { setCron } from '../../system/cron';
import { checkTimerLock } from '../../system/timerLock/utils';
import { TimerIdEnum } from '../../system/timerLock/constants';
const getGridBucket = () => {
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
bucketName: bucketName
});
};
export const addRawTextBuffer = async ({
sourceId,
sourceName,
text,
expiredTime
}: {
sourceId: string;
sourceName: string;
text: string;
expiredTime: Date;
}) => {
const gridBucket = getGridBucket();
const metadata = {
sourceId,
sourceName,
expiredTime
};
const buffer = Buffer.from(text);
const fileSize = buffer.length;
// 单块大小:尽可能大,但不超过 14MB不小于128KB
const chunkSizeBytes = (() => {
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
// 确保块大小至少为128KB
const minChunkSize = 128 * 1024; // 128KB
// 取理想块大小和最小块大小中的较大值
let chunkSize = Math.max(idealChunkSize, minChunkSize);
// 将块大小向上取整到最接近的64KB的倍数使其更整齐
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
return chunkSize;
})();
const uploadStream = gridBucket.openUploadStream(sourceId, {
metadata,
chunkSizeBytes
});
return retryFn(async () => {
return new Promise((resolve, reject) => {
uploadStream.end(buffer);
uploadStream.on('finish', () => {
resolve(uploadStream.id);
});
uploadStream.on('error', (error) => {
addLog.error('addRawTextBuffer error', error);
resolve('');
});
});
});
};
export const getRawTextBuffer = async (sourceId: string) => {
const gridBucket = getGridBucket();
return retryFn(async () => {
const bufferData = await MongoRawTextBufferSchema.findOne(
{
'metadata.sourceId': sourceId
},
'_id metadata'
).lean();
if (!bufferData) {
return null;
}
// Read file content
const downloadStream = gridBucket.openDownloadStream(bufferData._id);
const chunks: Buffer[] = [];
return new Promise<{
text: string;
sourceName: string;
} | null>((resolve, reject) => {
downloadStream.on('data', (chunk) => {
chunks.push(chunk);
});
downloadStream.on('end', () => {
const buffer = Buffer.concat(chunks);
const text = buffer.toString('utf8');
resolve({
text,
sourceName: bufferData.metadata?.sourceName || ''
});
});
downloadStream.on('error', (error) => {
addLog.error('getRawTextBuffer error', error);
resolve(null);
});
});
});
};
export const deleteRawTextBuffer = async (sourceId: string): Promise<boolean> => {
const gridBucket = getGridBucket();
return retryFn(async () => {
const buffer = await MongoRawTextBufferSchema.findOne({ 'metadata.sourceId': sourceId });
if (!buffer) {
return false;
}
await gridBucket.delete(buffer._id);
return true;
});
};
export const updateRawTextBufferExpiredTime = async ({
sourceId,
expiredTime
}: {
sourceId: string;
expiredTime: Date;
}) => {
return retryFn(async () => {
return MongoRawTextBufferSchema.updateOne(
{ 'metadata.sourceId': sourceId },
{ $set: { 'metadata.expiredTime': expiredTime } }
);
});
};
export const clearExpiredRawTextBufferCron = async () => {
const gridBucket = getGridBucket();
const clearExpiredRawTextBuffer = async () => {
addLog.debug('Clear expired raw text buffer start');
const data = await MongoRawTextBufferSchema.find(
{
'metadata.expiredTime': { $lt: new Date() }
},
'_id'
).lean();
for (const item of data) {
try {
await gridBucket.delete(item._id);
} catch (error) {
addLog.error('Delete expired raw text buffer error', error);
}
}
addLog.debug('Clear expired raw text buffer end');
};
setCron('*/10 * * * *', async () => {
if (
await checkTimerLock({
timerId: TimerIdEnum.clearExpiredRawTextBuffer,
lockMinuted: 9
})
) {
try {
await clearExpiredRawTextBuffer();
} catch (error) {
addLog.error('clearExpiredRawTextBufferCron error', error);
}
}
});
};

View File

@@ -1,22 +1,33 @@
import { getMongoModel, type Types, Schema } from '../../mongo';
import { getMongoModel, Schema } from '../../mongo';
import { type RawTextBufferSchemaType } from './type';
export const bucketName = 'buffer_rawtext';
export const collectionName = 'buffer_rawtexts';
const RawTextBufferSchema = new Schema({
metadata: {
sourceId: { type: String, required: true },
sourceName: { type: String, required: true },
expiredTime: { type: Date, required: true }
}
sourceId: {
type: String,
required: true
},
rawText: {
type: String,
default: ''
},
createTime: {
type: Date,
default: () => new Date()
},
metadata: Object
});
RawTextBufferSchema.index({ 'metadata.sourceId': 'hashed' });
RawTextBufferSchema.index({ 'metadata.expiredTime': -1 });
export const MongoRawTextBufferSchema = getMongoModel<{
_id: Types.ObjectId;
metadata: {
sourceId: string;
sourceName: string;
expiredTime: Date;
};
}>(`${bucketName}.files`, RawTextBufferSchema);
try {
RawTextBufferSchema.index({ sourceId: 1 });
// 20 minutes
RawTextBufferSchema.index({ createTime: 1 }, { expireAfterSeconds: 20 * 60 });
} catch (error) {
console.log(error);
}
export const MongoRawTextBuffer = getMongoModel<RawTextBufferSchemaType>(
collectionName,
RawTextBufferSchema
);

View File

@@ -0,0 +1,8 @@
export type RawTextBufferSchemaType = {
sourceId: string;
rawText: string;
createTime: Date;
metadata?: {
filename: string;
};
};

View File

@@ -6,14 +6,13 @@ import { type DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
import { readRawContentByFileBuffer } from '../read/utils';
import { computeGridFsChunSize, gridFsStream2Buffer, stream2Encoding } from './utils';
import { gridFsStream2Buffer, stream2Encoding } from './utils';
import { addLog } from '../../system/log';
import { readFromSecondary } from '../../mongo/utils';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
import { Readable } from 'stream';
import { addRawTextBuffer, getRawTextBuffer } from '../../buffer/rawText/controller';
import { addMinutes } from 'date-fns';
import { retryFn } from '@fastgpt/global/common/system/utils';
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoDatasetFileSchema;
@@ -65,7 +64,23 @@ export async function uploadFile({
// create a gridfs bucket
const bucket = getGridBucket(bucketName);
const chunkSizeBytes = computeGridFsChunSize(stats.size);
const fileSize = stats.size;
// 单块大小:尽可能大,但不超过 14MB不小于512KB
const chunkSizeBytes = (() => {
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
// 确保块大小至少为512KB
const minChunkSize = 512 * 1024; // 512KB
// 取理想块大小和最小块大小中的较大值
let chunkSize = Math.max(idealChunkSize, minChunkSize);
// 将块大小向上取整到最接近的64KB的倍数使其更整齐
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
return chunkSize;
})();
const stream = bucket.openUploadStream(filename, {
metadata,
@@ -158,18 +173,24 @@ export async function getFileById({
export async function delFileByFileIdList({
bucketName,
fileIdList
fileIdList,
retry = 3
}: {
bucketName: `${BucketNameEnum}`;
fileIdList: string[];
retry?: number;
}): Promise<any> {
return retryFn(async () => {
try {
const bucket = getGridBucket(bucketName);
for await (const fileId of fileIdList) {
await bucket.delete(new Types.ObjectId(fileId));
}
});
} catch (error) {
if (retry > 0) {
return delFileByFileIdList({ bucketName, fileIdList, retry: retry - 1 });
}
}
}
export async function getDownloadStream({
@@ -202,13 +223,15 @@ export const readFileContentFromMongo = async ({
rawText: string;
filename: string;
}> => {
const bufferId = `${String(fileId)}-${customPdfParse}`;
const bufferId = `${fileId}-${customPdfParse}`;
// read buffer
const fileBuffer = await getRawTextBuffer(bufferId);
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: bufferId }, undefined, {
...readFromSecondary
}).lean();
if (fileBuffer) {
return {
rawText: fileBuffer.text,
filename: fileBuffer?.sourceName
rawText: fileBuffer.rawText,
filename: fileBuffer.metadata?.filename || ''
};
}
@@ -242,13 +265,16 @@ export const readFileContentFromMongo = async ({
}
});
// Add buffer
addRawTextBuffer({
sourceId: bufferId,
sourceName: file.filename,
text: rawText,
expiredTime: addMinutes(new Date(), 20)
});
// < 14M
if (fileBuffers.length < 14 * 1024 * 1024 && rawText.trim()) {
MongoRawTextBuffer.create({
sourceId: bufferId,
rawText,
metadata: {
filename: file.filename
}
});
}
return {
rawText,

View File

@@ -1,16 +1,16 @@
import { Schema, getMongoModel } from '../../mongo';
const DatasetFileSchema = new Schema({
metadata: Object
});
const ChatFileSchema = new Schema({
metadata: Object
});
const DatasetFileSchema = new Schema({});
const ChatFileSchema = new Schema({});
DatasetFileSchema.index({ uploadDate: -1 });
try {
DatasetFileSchema.index({ uploadDate: -1 });
ChatFileSchema.index({ uploadDate: -1 });
ChatFileSchema.index({ 'metadata.chatId': 1 });
ChatFileSchema.index({ uploadDate: -1 });
ChatFileSchema.index({ 'metadata.chatId': 1 });
} catch (error) {
console.log(error);
}
export const MongoDatasetFileSchema = getMongoModel('dataset.files', DatasetFileSchema);
export const MongoChatFileSchema = getMongoModel('chat.files', ChatFileSchema);

View File

@@ -1,57 +1,5 @@
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { PassThrough } from 'stream';
import { getGridBucket } from './controller';
import { type BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { retryFn } from '@fastgpt/global/common/system/utils';
export const createFileFromText = async ({
bucket,
filename,
text,
metadata
}: {
bucket: `${BucketNameEnum}`;
filename: string;
text: string;
metadata: Record<string, any>;
}) => {
const gridBucket = getGridBucket(bucket);
const buffer = Buffer.from(text);
const fileSize = buffer.length;
// 单块大小:尽可能大,但不超过 14MB不小于128KB
const chunkSizeBytes = (() => {
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
// 确保块大小至少为128KB
const minChunkSize = 128 * 1024; // 128KB
// 取理想块大小和最小块大小中的较大值
let chunkSize = Math.max(idealChunkSize, minChunkSize);
// 将块大小向上取整到最接近的64KB的倍数使其更整齐
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
return chunkSize;
})();
const uploadStream = gridBucket.openUploadStream(filename, {
metadata,
chunkSizeBytes
});
return retryFn(async () => {
return new Promise<{ fileId: string }>((resolve, reject) => {
uploadStream.end(buffer);
uploadStream.on('finish', () => {
resolve({ fileId: String(uploadStream.id) });
});
uploadStream.on('error', reject);
});
});
};
export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
return new Promise<Buffer>((resolve, reject) => {
@@ -105,20 +53,3 @@ export const stream2Encoding = async (stream: NodeJS.ReadableStream) => {
stream: copyStream
};
};
// 单块大小:尽可能大,但不超过 14MB不小于512KB
export const computeGridFsChunSize = (fileSize: number) => {
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
// 确保块大小至少为512KB
const minChunkSize = 512 * 1024; // 512KB
// 取理想块大小和最小块大小中的较大值
let chunkSize = Math.max(idealChunkSize, minChunkSize);
// 将块大小向上取整到最接近的64KB的倍数使其更整齐
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
return chunkSize;
};

View File

@@ -22,7 +22,7 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
maxSize *= 1024 * 1024;
class UploadModel {
uploaderSingle = multer({
uploader = multer({
limits: {
fieldSize: maxSize
},
@@ -41,7 +41,8 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
}
})
}).single('file');
async getUploadFile<T = any>(
async doUpload<T = any>(
req: NextApiRequest,
res: NextApiResponse,
originBucketName?: `${BucketNameEnum}`
@@ -53,7 +54,7 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
bucketName?: `${BucketNameEnum}`;
}>((resolve, reject) => {
// @ts-ignore
this.uploaderSingle(req, res, (error) => {
this.uploader(req, res, (error) => {
if (error) {
return reject(error);
}
@@ -93,58 +94,6 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
});
});
}
uploaderMultiple = multer({
limits: {
fieldSize: maxSize
},
preservePath: true,
storage: multer.diskStorage({
// destination: (_req, _file, cb) => {
// cb(null, tmpFileDirPath);
// },
filename: (req, file, cb) => {
if (!file?.originalname) {
cb(new Error('File not found'), '');
} else {
const { ext } = path.parse(decodeURIComponent(file.originalname));
cb(null, `${getNanoid()}${ext}`);
}
}
})
}).array('file', global.feConfigs?.uploadFileMaxSize);
async getUploadFiles<T = any>(req: NextApiRequest, res: NextApiResponse) {
return new Promise<{
files: FileType[];
data: T;
}>((resolve, reject) => {
// @ts-ignore
this.uploaderMultiple(req, res, (error) => {
if (error) {
console.log(error);
return reject(error);
}
// @ts-ignore
const files = req.files as FileType[];
resolve({
files: files.map((file) => ({
...file,
originalname: decodeURIComponent(file.originalname)
})),
data: (() => {
if (!req.body?.data) return {};
try {
return JSON.parse(req.body.data);
} catch (error) {
return {};
}
})()
});
});
});
}
}
return new UploadModel();

View File

@@ -110,7 +110,7 @@ export const readRawContentByFileBuffer = async ({
return {
rawText: text,
formatText: text,
formatText: rawText,
imageList
};
};

View File

@@ -4,8 +4,7 @@ import { MongoFrequencyLimit } from './schema';
export const authFrequencyLimit = async ({
eventId,
maxAmount,
expiredTime,
num = 1
expiredTime
}: AuthFrequencyLimitProps) => {
try {
// 对应 eventId 的 account+1, 不存在的话,则创建一个
@@ -15,7 +14,7 @@ export const authFrequencyLimit = async ({
expiredTime: { $gte: new Date() }
},
{
$inc: { amount: num },
$inc: { amount: 1 },
// If not exist, set the expiredTime
$setOnInsert: { expiredTime }
},

View File

@@ -5,10 +5,7 @@ export enum TimerIdEnum {
clearExpiredSubPlan = 'clearExpiredSubPlan',
updateStandardPlan = 'updateStandardPlan',
scheduleTriggerApp = 'scheduleTriggerApp',
notification = 'notification',
clearExpiredRawTextBuffer = 'clearExpiredRawTextBuffer',
clearExpiredDatasetImage = 'clearExpiredDatasetImage'
notification = 'notification'
}
export enum LockNotificationEnum {

View File

@@ -3,5 +3,6 @@ export const DatasetVectorTableName = 'modeldata';
export const PG_ADDRESS = process.env.PG_URL;
export const OCEANBASE_ADDRESS = process.env.OCEANBASE_URL;
export const OPENGAUSS_ADDRESS = process.env.OPENGAUSS_URL;
export const MILVUS_ADDRESS = process.env.MILVUS_ADDRESS;
export const MILVUS_TOKEN = process.env.MILVUS_TOKEN;

View File

@@ -1,10 +1,11 @@
/* vector crud */
import { PgVectorCtrl } from './pg';
import { ObVectorCtrl } from './oceanbase';
import { GsVectorCtrl } from './opengauss';
import { getVectorsByText } from '../../core/ai/embedding';
import { type DelDatasetVectorCtrlProps, type InsertVectorProps } from './controller.d';
import { type EmbeddingModelItemType } from '@fastgpt/global/core/ai/model.d';
import { MILVUS_ADDRESS, PG_ADDRESS, OCEANBASE_ADDRESS } from './constants';
import { MILVUS_ADDRESS, PG_ADDRESS, OCEANBASE_ADDRESS, OPENGAUSS_ADDRESS } from './constants';
import { MilvusCtrl } from './milvus';
import { setRedisCache, getRedisCache, delRedisCache, CacheKeyEnum } from '../redis/cache';
import { throttle } from 'lodash';
@@ -14,6 +15,7 @@ const getVectorObj = () => {
if (PG_ADDRESS) return new PgVectorCtrl();
if (OCEANBASE_ADDRESS) return new ObVectorCtrl();
if (MILVUS_ADDRESS) return new MilvusCtrl();
if (OPENGAUSS_ADDRESS) return new GsVectorCtrl();
return new PgVectorCtrl();
};

View File

@@ -0,0 +1,188 @@
import { delay } from '@fastgpt/global/common/system/utils';
import { addLog } from '../../system/log';
import { Pool } from 'pg';
import type { QueryResultRow } from 'pg';
import { OPENGAUSS_ADDRESS } from '../constants';
export const connectGs = async (): Promise<Pool> => {
if (global.gsClient) {
return global.gsClient;
}
global.gsClient = new Pool({
connectionString: OPENGAUSS_ADDRESS,
max: Number(process.env.DB_MAX_LINK || 20),
min: 10,
keepAlive: true,
idleTimeoutMillis: 600000,
connectionTimeoutMillis: 20000,
query_timeout: 30000,
statement_timeout: 40000,
idle_in_transaction_session_timeout: 60000
});
global.gsClient.on('error', async (err) => {
addLog.error(`openGauss error`, err);
global.gsClient?.end();
global.gsClient = null;
await delay(1000);
addLog.info(`Retry connect openGauss`);
connectGs();
});
try {
await global.gsClient.connect();
console.log('openGauss connected');
return global.gsClient;
} catch (error) {
addLog.error(`openGauss connect error`, error);
global.gsClient?.end();
global.gsClient = null;
await delay(1000);
addLog.info(`Retry connect openGauss`);
return connectGs();
}
};
type WhereProps = (string | [string, string | number])[];
type GetProps = {
fields?: string[];
where?: WhereProps;
order?: { field: string; mode: 'DESC' | 'ASC' | string }[];
limit?: number;
offset?: number;
};
type DeleteProps = {
where: WhereProps;
};
type ValuesProps = { key: string; value?: string | number }[];
type UpdateProps = {
values: ValuesProps;
where: WhereProps;
};
type InsertProps = {
values: ValuesProps[];
};
class GsClass {
private getWhereStr(where?: WhereProps) {
return where
? `WHERE ${where
.map((item) => {
if (typeof item === 'string') {
return item;
}
const val = typeof item[1] === 'number' ? item[1] : `'${String(item[1])}'`;
return `${item[0]}=${val}`;
})
.join(' ')}`
: '';
}
private getUpdateValStr(values: ValuesProps) {
return values
.map((item) => {
const val =
typeof item.value === 'number'
? item.value
: `'${String(item.value).replace(/\'/g, '"')}'`;
return `${item.key}=${val}`;
})
.join(',');
}
private getInsertValStr(values: ValuesProps[]) {
return values
.map(
(items) =>
`(${items
.map((item) =>
typeof item.value === 'number'
? item.value
: `'${String(item.value).replace(/\'/g, '"')}'`
)
.join(',')})`
)
.join(',');
}
async select<T extends QueryResultRow = any>(table: string, props: GetProps) {
const sql = `SELECT ${
!props.fields || props.fields?.length === 0 ? '*' : props.fields?.join(',')
}
FROM ${table}
${this.getWhereStr(props.where)}
${
props.order
? `ORDER BY ${props.order.map((item) => `${item.field} ${item.mode}`).join(',')}`
: ''
}
LIMIT ${props.limit || 10} OFFSET ${props.offset || 0}
`;
const gs = await connectGs();
return gs.query<T>(sql);
}
async count(table: string, props: GetProps) {
const sql = `SELECT COUNT(${props?.fields?.[0] || '*'})
FROM ${table}
${this.getWhereStr(props.where)}
`;
const gs = await connectGs();
return gs.query(sql).then((res) => Number(res.rows[0]?.count || 0));
}
async delete(table: string, props: DeleteProps) {
const sql = `DELETE FROM ${table} ${this.getWhereStr(props.where)}`;
const gs = await connectGs();
return gs.query(sql);
}
async update(table: string, props: UpdateProps) {
if (props.values.length === 0) {
return {
rowCount: 0
};
}
const sql = `UPDATE ${table} SET ${this.getUpdateValStr(props.values)} ${this.getWhereStr(
props.where
)}`;
const gs = await connectGs();
return gs.query(sql);
}
async insert(table: string, props: InsertProps) {
if (props.values.length === 0) {
return {
rowCount: 0,
rows: []
};
}
const fields = props.values[0].map((item) => item.key).join(',');
const sql = `INSERT INTO ${table} (${fields}) VALUES ${this.getInsertValStr(
props.values
)} RETURNING id`;
const gs = await connectGs();
return gs.query<{ id: string }>(sql);
}
async query<T extends QueryResultRow = any>(sql: string) {
const gs = await connectGs();
const start = Date.now();
return gs.query<T>(sql).then((res) => {
const time = Date.now() - start;
if (time > 300) {
addLog.warn(`gs query time: ${time}ms, sql: ${sql}`);
}
return res;
});
}
}
export const GsClient = new GsClass();
export const Gs = global.gsClient;

View File

@@ -0,0 +1,253 @@
/* pg vector crud */
import { DatasetVectorTableName } from '../constants';
import { delay } from '@fastgpt/global/common/system/utils';
import { GsClient, connectGs } from './controller';
import { GsSearchRawType } from '@fastgpt/global/core/dataset/api';
import type {
DelDatasetVectorCtrlProps,
EmbeddingRecallCtrlProps,
EmbeddingRecallResponse,
InsertVectorControllerProps
} from '../controller.d';
import dayjs from 'dayjs';
import { addLog } from '../../system/log';
export class GsVectorCtrl {
constructor() {}
init = async () => {
try {
await connectGs();
await GsClient.query(`
CREATE EXTENSION IF NOT EXISTS vector;
CREATE TABLE IF NOT EXISTS ${DatasetVectorTableName} (
id BIGSERIAL PRIMARY KEY,
vector VECTOR(1536) NOT NULL,
team_id VARCHAR(50) NOT NULL,
dataset_id VARCHAR(50) NOT NULL,
collection_id VARCHAR(50) NOT NULL,
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
`);
await GsClient.query(
`CREATE INDEX CONCURRENTLY IF NOT EXISTS vector_index ON ${DatasetVectorTableName} USING hnsw (vector vector_ip_ops) WITH (m = 32, ef_construction = 128);`
);
await GsClient.query(
`CREATE INDEX CONCURRENTLY IF NOT EXISTS team_dataset_collection_index ON ${DatasetVectorTableName} USING btree(team_id, dataset_id, collection_id);`
);
await GsClient.query(
`CREATE INDEX CONCURRENTLY IF NOT EXISTS create_time_index ON ${DatasetVectorTableName} USING btree(createtime);`
);
addLog.info('init pg successful');
} catch (error) {
addLog.error('init pg error', error);
}
};
insert = async (props: InsertVectorControllerProps): Promise<{ insertId: string }> => {
const { teamId, datasetId, collectionId, vector, retry = 3 } = props;
try {
const { rowCount, rows } = await GsClient.insert(DatasetVectorTableName, {
values: [
[
{ key: 'vector', value: `[${vector}]` },
{ key: 'team_id', value: String(teamId) },
{ key: 'dataset_id', value: String(datasetId) },
{ key: 'collection_id', value: String(collectionId) }
]
]
});
if (rowCount === 0) {
return Promise.reject('insertDatasetData: no insert');
}
return {
insertId: rows[0].id
};
} catch (error) {
if (retry <= 0) {
return Promise.reject(error);
}
await delay(500);
return this.insert({
...props,
retry: retry - 1
});
}
};
delete = async (props: DelDatasetVectorCtrlProps): Promise<any> => {
const { teamId, retry = 2 } = props;
const teamIdWhere = `team_id='${String(teamId)}' AND`;
const where = await (() => {
if ('id' in props && props.id) return `${teamIdWhere} id=${props.id}`;
if ('datasetIds' in props && props.datasetIds) {
const datasetIdWhere = `dataset_id IN (${props.datasetIds
.map((id) => `'${String(id)}'`)
.join(',')})`;
if ('collectionIds' in props && props.collectionIds) {
return `${teamIdWhere} ${datasetIdWhere} AND collection_id IN (${props.collectionIds
.map((id) => `'${String(id)}'`)
.join(',')})`;
}
return `${teamIdWhere} ${datasetIdWhere}`;
}
if ('idList' in props && Array.isArray(props.idList)) {
if (props.idList.length === 0) return;
return `${teamIdWhere} id IN (${props.idList.map((id) => String(id)).join(',')})`;
}
return Promise.reject('deleteDatasetData: no where');
})();
if (!where) return;
try {
await GsClient.delete(DatasetVectorTableName, {
where: [where]
});
} catch (error) {
if (retry <= 0) {
return Promise.reject(error);
}
await delay(500);
return this.delete({
...props,
retry: retry - 1
});
}
};
embRecall = async (props: EmbeddingRecallCtrlProps): Promise<EmbeddingRecallResponse> => {
const {
teamId,
datasetIds,
vector,
limit,
forbidCollectionIdList,
filterCollectionIdList,
retry = 2
} = props;
// Get forbid collection
const formatForbidCollectionIdList = (() => {
if (!filterCollectionIdList) return forbidCollectionIdList;
const list = forbidCollectionIdList
.map((id) => String(id))
.filter((id) => !filterCollectionIdList.includes(id));
return list;
})();
const forbidCollectionSql =
formatForbidCollectionIdList.length > 0
? `AND collection_id NOT IN (${formatForbidCollectionIdList.map((id) => `'${id}'`).join(',')})`
: '';
// Filter by collectionId
const formatFilterCollectionId = (() => {
if (!filterCollectionIdList) return;
return filterCollectionIdList
.map((id) => String(id))
.filter((id) => !forbidCollectionIdList.includes(id));
})();
const filterCollectionIdSql = formatFilterCollectionId
? `AND collection_id IN (${formatFilterCollectionId.map((id) => `'${id}'`).join(',')})`
: '';
// Empty data
if (formatFilterCollectionId && formatFilterCollectionId.length === 0) {
return { results: [] };
}
try {
const results: any = await GsClient.query(
`BEGIN;
SET ob_hnsw_ef_search = ${global.systemEnv?.hnswEfSearch || 100};
SELECT id, collection_id, inner_product(vector, [${vector}]) AS score
FROM ${DatasetVectorTableName}
WHERE team_id='${teamId}'
AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
${filterCollectionIdSql}
${forbidCollectionSql}
ORDER BY score desc APPROXIMATE LIMIT ${limit};
COMMIT;`
);
const rows = results?.[3]?.rows as GsSearchRawType[];
if (!Array.isArray(rows)) {
return {
results: []
};
}
return {
results: rows.map((item) => ({
id: String(item.id),
collectionId: item.collection_id,
score: item.score * -1
}))
};
} catch (error) {
if (retry <= 0) {
return Promise.reject(error);
}
return this.embRecall({
...props,
retry: retry - 1
});
}
};
getVectorDataByTime = async (start: Date, end: Date) => {
const { rows } = await GsClient.query<{
id: string;
team_id: string;
dataset_id: string;
}>(`SELECT id, team_id, dataset_id
FROM ${DatasetVectorTableName}
WHERE createtime BETWEEN '${dayjs(start).format('YYYY-MM-DD HH:mm:ss')}' AND '${dayjs(
end
).format('YYYY-MM-DD HH:mm:ss')}';
`);
return rows.map((item) => ({
id: String(item.id),
teamId: item.team_id,
datasetId: item.dataset_id
}));
};
getVectorCountByTeamId = async (teamId: string) => {
const total = await GsClient.count(DatasetVectorTableName, {
where: [['team_id', String(teamId)]]
});
return total;
};
getVectorCountByDatasetId = async (teamId: string, datasetId: string) => {
const total = await GsClient.count(DatasetVectorTableName, {
where: [['team_id', String(teamId)], 'and', ['dataset_id', String(datasetId)]]
});
return total;
};
getVectorCountByCollectionId = async (
teamId: string,
datasetId: string,
collectionId: string
) => {
const total = await GsClient.count(DatasetVectorTableName, {
where: [
['team_id', String(teamId)],
'and',
['dataset_id', String(datasetId)],
'and',
['collection_id', String(collectionId)]
]
});
return total;
};
}

View File

@@ -6,6 +6,7 @@ declare global {
var pgClient: Pool | null;
var obClient: MysqlPool | null;
var milvusClient: MilvusClient | null;
var gsClient: Pool | null;
}
export type EmbeddingRecallItemType = {

View File

@@ -20,10 +20,6 @@ export const getVlmModel = (model?: string) => {
?.find((item) => item.model === model || item.name === model);
};
export const getVlmModelList = () => {
return Array.from(global.llmModelMap.values())?.filter((item) => item.vision) || [];
};
export const getDefaultEmbeddingModel = () => global?.systemDefaultModel.embedding!;
export const getEmbeddingModel = (model?: string) => {
if (!model) return getDefaultEmbeddingModel();

View File

@@ -3,11 +3,12 @@ import type {
ApiFileReadContentResponse,
APIFileReadResponse,
ApiDatasetDetailResponse,
APIFileServer
} from '@fastgpt/global/core/dataset/apiDataset/type';
APIFileServer,
APIFileItem
} from '@fastgpt/global/core/dataset/apiDataset';
import axios, { type Method } from 'axios';
import { addLog } from '../../../../common/system/log';
import { readFileRawTextByUrl } from '../../read';
import { addLog } from '../../../common/system/log';
import { readFileRawTextByUrl } from '../read';
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
import { type RequireOnlyOne } from '@fastgpt/global/common/type/utils';

View File

@@ -1,10 +1,18 @@
import { useApiDatasetRequest } from './custom/api';
import { useYuqueDatasetRequest } from './yuqueDataset/api';
import { useFeishuDatasetRequest } from './feishuDataset/api';
import type { ApiDatasetServerType } from '@fastgpt/global/core/dataset/apiDataset/type';
import type {
APIFileServer,
YuqueServer,
FeishuServer
} from '@fastgpt/global/core/dataset/apiDataset';
import { useApiDatasetRequest } from './api';
import { useYuqueDatasetRequest } from '../yuqueDataset/api';
import { useFeishuDatasetRequest } from '../feishuDataset/api';
export const getApiDatasetRequest = async (apiDatasetServer?: ApiDatasetServerType) => {
const { apiServer, yuqueServer, feishuServer } = apiDatasetServer || {};
export const getApiDatasetRequest = async (data: {
apiServer?: APIFileServer;
yuqueServer?: YuqueServer;
feishuServer?: FeishuServer;
}) => {
const { apiServer, yuqueServer, feishuServer } = data;
if (apiServer) {
return useApiDatasetRequest({ apiServer });

View File

@@ -5,10 +5,9 @@ import {
} from '@fastgpt/global/core/dataset/constants';
import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { MongoDatasetCollection } from './schema';
import type {
DatasetCollectionSchemaType,
DatasetDataFieldType,
DatasetSchemaType
import {
type DatasetCollectionSchemaType,
type DatasetSchemaType
} from '@fastgpt/global/core/dataset/type';
import { MongoDatasetTraining } from '../training/schema';
import { MongoDatasetData } from '../data/schema';
@@ -16,7 +15,7 @@ import { delImgByRelatedId } from '../../../common/file/image/controller';
import { deleteDatasetDataVector } from '../../../common/vectorDB/controller';
import { delFileByFileIdList } from '../../../common/file/gridfs/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import type { ClientSession } from '../../../common/mongo';
import { type ClientSession } from '../../../common/mongo';
import { createOrGetCollectionTags } from './utils';
import { rawText2Chunks } from '../read';
import { checkDatasetLimit } from '../../../support/permission/teamLimit';
@@ -39,25 +38,20 @@ import {
getLLMMaxChunkSize
} from '@fastgpt/global/core/dataset/training/utils';
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
import { deleteDatasetImage } from '../image/controller';
import { clearCollectionImages, removeDatasetImageExpiredTime } from '../image/utils';
export const createCollectionAndInsertData = async ({
dataset,
rawText,
relatedId,
imageIds,
createCollectionParams,
backupParse = false,
billId,
session
}: {
dataset: DatasetSchemaType;
rawText?: string;
rawText: string;
relatedId?: string;
imageIds?: string[];
createCollectionParams: CreateOneCollectionParams;
backupParse?: boolean;
billId?: string;
@@ -75,18 +69,15 @@ export const createCollectionAndInsertData = async ({
// Set default params
const trainingType =
createCollectionParams.trainingType || DatasetCollectionDataProcessModeEnum.chunk;
const chunkSize = computeChunkSize({
...createCollectionParams,
trainingType,
llmModel: getLLMModel(dataset.agentModel)
});
const chunkSplitter = computeChunkSplitter(createCollectionParams);
const paragraphChunkDeep = computeParagraphChunkDeep(createCollectionParams);
const trainingMode = getTrainingModeByCollection({
trainingType: trainingType,
autoIndexes: createCollectionParams.autoIndexes,
imageIndex: createCollectionParams.imageIndex
});
if (
trainingType === DatasetCollectionDataProcessModeEnum.qa ||
trainingType === DatasetCollectionDataProcessModeEnum.backup
) {
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
delete createCollectionParams.chunkTriggerType;
delete createCollectionParams.chunkTriggerMinSize;
delete createCollectionParams.dataEnhanceCollectionName;
@@ -96,60 +87,35 @@ export const createCollectionAndInsertData = async ({
delete createCollectionParams.qaPrompt;
}
// 1. split chunks or create image chunks
const {
chunks,
chunkSize
}: {
chunks: Array<{
q?: string;
a?: string; // answer or custom content
imageId?: string;
indexes?: string[];
}>;
chunkSize?: number;
} = (() => {
if (rawText) {
const chunkSize = computeChunkSize({
...createCollectionParams,
trainingType,
llmModel: getLLMModel(dataset.agentModel)
});
// Process text chunks
const chunks = rawText2Chunks({
rawText,
chunkTriggerType: createCollectionParams.chunkTriggerType,
chunkTriggerMinSize: createCollectionParams.chunkTriggerMinSize,
chunkSize,
paragraphChunkDeep,
paragraphChunkMinSize: createCollectionParams.paragraphChunkMinSize,
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : [],
backupParse
});
return { chunks, chunkSize };
}
if (imageIds) {
// Process image chunks
const chunks = imageIds.map((imageId: string) => ({
imageId,
indexes: []
}));
return { chunks };
}
throw new Error('Either rawText or imageIdList must be provided');
})();
// 1. split chunks
const chunks = rawText2Chunks({
rawText,
chunkTriggerType: createCollectionParams.chunkTriggerType,
chunkTriggerMinSize: createCollectionParams.chunkTriggerMinSize,
chunkSize,
paragraphChunkDeep,
paragraphChunkMinSize: createCollectionParams.paragraphChunkMinSize,
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : [],
backupParse
});
// 2. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingMode, chunks)
insertLen: predictDataLimitLength(
getTrainingModeByCollection({
trainingType: trainingType,
autoIndexes: createCollectionParams.autoIndexes,
imageIndex: createCollectionParams.imageIndex
}),
chunks
)
});
const fn = async (session: ClientSession) => {
// 3. Create collection
// 3. create collection
const { _id: collectionId } = await createOneCollection({
...createCollectionParams,
trainingType,
@@ -157,8 +123,8 @@ export const createCollectionAndInsertData = async ({
chunkSize,
chunkSplitter,
hashRawText: rawText ? hashStr(rawText) : undefined,
rawTextLength: rawText?.length,
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
nextSyncTime: (() => {
// ignore auto collections sync for website datasets
if (!dataset.autoSync && dataset.type === DatasetTypeEnum.websiteDataset) return undefined;
@@ -200,7 +166,11 @@ export const createCollectionAndInsertData = async ({
vectorModel: dataset.vectorModel,
vlmModel: dataset.vlmModel,
indexSize: createCollectionParams.indexSize,
mode: trainingMode,
mode: getTrainingModeByCollection({
trainingType: trainingType,
autoIndexes: createCollectionParams.autoIndexes,
imageIndex: createCollectionParams.imageIndex
}),
prompt: createCollectionParams.qaPrompt,
billId: traingBillId,
data: chunks.map((item, index) => ({
@@ -214,12 +184,7 @@ export const createCollectionAndInsertData = async ({
session
});
// 6. Remove images ttl index
await removeDatasetImageExpiredTime({
ids: imageIds,
collectionId,
session
});
// 6. remove related image ttl
if (relatedId) {
await MongoImage.updateMany(
{
@@ -239,7 +204,7 @@ export const createCollectionAndInsertData = async ({
}
return {
collectionId: String(collectionId),
collectionId,
insertResults
};
};
@@ -320,20 +285,17 @@ export const delCollectionRelatedSource = async ({
.map((item) => item?.metadata?.relatedImgId || '')
.filter(Boolean);
// Delete files and images in parallel
await Promise.all([
// Delete files
delFileByFileIdList({
bucketName: BucketNameEnum.dataset,
fileIdList
}),
// Delete images
delImgByRelatedId({
teamId,
relateIds: relatedImageIds,
session
})
]);
// Delete files
await delFileByFileIdList({
bucketName: BucketNameEnum.dataset,
fileIdList
});
// Delete images
await delImgByRelatedId({
teamId,
relateIds: relatedImageIds,
session
});
};
/**
* delete collection and it related data
@@ -378,16 +340,16 @@ export async function delCollection({
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
}),
// Delete dataset_images
clearCollectionImages(collectionIds),
// Delete images if needed
...(delImg
? collections
.map((item) => item?.metadata?.relatedImgId || '')
.filter(Boolean)
.map((imageId) => deleteDatasetImage(imageId))
? [
delImgByRelatedId({
teamId,
relateIds: collections
.map((item) => item?.metadata?.relatedImgId || '')
.filter(Boolean)
})
]
: []),
// Delete files if needed
...(delFile
? [
delFileByFileIdList({

View File

@@ -1,9 +1,11 @@
import { MongoDatasetCollection } from './schema';
import type { ClientSession } from '../../../common/mongo';
import { type ClientSession } from '../../../common/mongo';
import { MongoDatasetCollectionTags } from '../tag/schema';
import { readFromSecondary } from '../../../common/mongo/utils';
import type { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
import {
type CollectionWithDatasetType,
type DatasetCollectionSchemaType
} from '@fastgpt/global/core/dataset/type';
import {
DatasetCollectionDataProcessModeEnum,
DatasetCollectionSyncResultEnum,
@@ -157,7 +159,9 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
return {
type: DatasetSourceReadTypeEnum.apiFile,
sourceId,
apiDatasetServer: dataset.apiDatasetServer
apiServer: dataset.apiServer,
feishuServer: dataset.feishuServer,
yuqueServer: dataset.yuqueServer
};
})();
@@ -229,37 +233,18 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
QA: 独立进程
Chunk: Image Index -> Auto index -> chunk index
*/
export const getTrainingModeByCollection = ({
trainingType,
autoIndexes,
imageIndex
}: {
trainingType: DatasetCollectionDataProcessModeEnum;
autoIndexes?: boolean;
imageIndex?: boolean;
export const getTrainingModeByCollection = (collection: {
trainingType: DatasetCollectionSchemaType['trainingType'];
autoIndexes?: DatasetCollectionSchemaType['autoIndexes'];
imageIndex?: DatasetCollectionSchemaType['imageIndex'];
}) => {
if (
trainingType === DatasetCollectionDataProcessModeEnum.imageParse &&
global.feConfigs?.isPlus
) {
return TrainingModeEnum.imageParse;
}
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
if (collection.trainingType === DatasetCollectionDataProcessModeEnum.qa) {
return TrainingModeEnum.qa;
}
if (
trainingType === DatasetCollectionDataProcessModeEnum.chunk &&
imageIndex &&
global.feConfigs?.isPlus
) {
if (collection.imageIndex && global.feConfigs?.isPlus) {
return TrainingModeEnum.image;
}
if (
trainingType === DatasetCollectionDataProcessModeEnum.chunk &&
autoIndexes &&
global.feConfigs?.isPlus
) {
if (collection.autoIndexes && global.feConfigs?.isPlus) {
return TrainingModeEnum.auto;
}
return TrainingModeEnum.chunk;

View File

@@ -9,7 +9,6 @@ import { deleteDatasetDataVector } from '../../common/vectorDB/controller';
import { MongoDatasetDataText } from './data/dataTextSchema';
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
import { retryFn } from '@fastgpt/global/common/system/utils';
import { clearDatasetImages } from './image/utils';
/* ============= dataset ========== */
/* find all datasetId by top datasetId */
@@ -103,10 +102,8 @@ export async function delDatasetRelevantData({
}),
//delete dataset_datas
MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }),
// Delete collection image and file
// Delete Image and file
delCollectionRelatedSource({ collections }),
// Delete dataset Image
clearDatasetImages(datasetIds),
// Delete vector data
deleteDatasetDataVector({ teamId, datasetIds })
]);

View File

@@ -1,56 +0,0 @@
import { getDatasetImagePreviewUrl } from '../image/utils';
import type { DatasetCiteItemType, DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type';
export const formatDatasetDataValue = ({
q,
a,
imageId,
teamId,
datasetId
}: {
q: string;
a?: string;
imageId?: string;
teamId: string;
datasetId: string;
}): {
q: string;
a?: string;
imagePreivewUrl?: string;
} => {
if (!imageId) {
return {
q,
a
};
}
const previewUrl = getDatasetImagePreviewUrl({
imageId,
teamId,
datasetId,
expiredMinutes: 60 * 24 * 7 // 7 days
});
return {
q: `![${q.replaceAll('\n', '\\n')}](${previewUrl})`,
a,
imagePreivewUrl: previewUrl
};
};
export const getFormatDatasetCiteList = (list: DatasetDataSchemaType[]) => {
return list.map<DatasetCiteItemType>((item) => ({
_id: item._id,
...formatDatasetDataValue({
teamId: item.teamId,
datasetId: item.datasetId,
q: item.q,
a: item.a,
imageId: item.imageId
}),
history: item.history,
updateTime: item.updateTime,
index: item.chunkIndex
}));
};

View File

@@ -37,7 +37,8 @@ const DatasetDataSchema = new Schema({
required: true
},
a: {
type: String
type: String,
default: ''
},
history: {
type: [
@@ -73,9 +74,6 @@ const DatasetDataSchema = new Schema({
default: []
},
imageId: {
type: String
},
updateTime: {
type: Date,
default: () => new Date()

View File

@@ -3,10 +3,10 @@ import type {
ApiFileReadContentResponse,
ApiDatasetDetailResponse,
FeishuServer
} from '@fastgpt/global/core/dataset/apiDataset/type';
} from '@fastgpt/global/core/dataset/apiDataset';
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
import axios, { type Method } from 'axios';
import { addLog } from '../../../../common/system/log';
import { addLog } from '../../../common/system/log';
type ResponseDataType = {
success: boolean;

View File

@@ -1,166 +0,0 @@
import { addMinutes } from 'date-fns';
import { bucketName, MongoDatasetImageSchema } from './schema';
import { connectionMongo, Types } from '../../../common/mongo';
import fs from 'fs';
import type { FileType } from '../../../common/file/multer';
import fsp from 'fs/promises';
import { computeGridFsChunSize } from '../../../common/file/gridfs/utils';
import { setCron } from '../../../common/system/cron';
import { checkTimerLock } from '../../../common/system/timerLock/utils';
import { TimerIdEnum } from '../../../common/system/timerLock/constants';
import { addLog } from '../../../common/system/log';
const getGridBucket = () => {
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
bucketName: bucketName
});
};
export const createDatasetImage = async ({
teamId,
datasetId,
file,
expiredTime = addMinutes(new Date(), 30)
}: {
teamId: string;
datasetId: string;
file: FileType;
expiredTime?: Date;
}): Promise<{ imageId: string; previewUrl: string }> => {
const path = file.path;
const gridBucket = getGridBucket();
const metadata = {
teamId: String(teamId),
datasetId: String(datasetId),
expiredTime
};
const stats = await fsp.stat(path);
if (!stats.isFile()) return Promise.reject(`${path} is not a file`);
const readStream = fs.createReadStream(path, {
highWaterMark: 256 * 1024
});
const chunkSizeBytes = computeGridFsChunSize(stats.size);
const stream = gridBucket.openUploadStream(file.originalname, {
metadata,
contentType: file.mimetype,
chunkSizeBytes
});
// save to gridfs
await new Promise((resolve, reject) => {
readStream
.pipe(stream as any)
.on('finish', resolve)
.on('error', reject);
});
return {
imageId: String(stream.id),
previewUrl: ''
};
};
export const getDatasetImageReadData = async (imageId: string) => {
// Get file metadata to get contentType
const fileInfo = await MongoDatasetImageSchema.findOne({
_id: new Types.ObjectId(imageId)
}).lean();
if (!fileInfo) {
return Promise.reject('Image not found');
}
const gridBucket = getGridBucket();
return {
stream: gridBucket.openDownloadStream(new Types.ObjectId(imageId)),
fileInfo
};
};
export const getDatasetImageBase64 = async (imageId: string) => {
// Get file metadata to get contentType
const fileInfo = await MongoDatasetImageSchema.findOne({
_id: new Types.ObjectId(imageId)
}).lean();
if (!fileInfo) {
return Promise.reject('Image not found');
}
// Get image stream from GridFS
const { stream } = await getDatasetImageReadData(imageId);
// Convert stream to buffer
const chunks: Buffer[] = [];
return new Promise<string>((resolve, reject) => {
stream.on('data', (chunk: Buffer) => {
chunks.push(chunk);
});
stream.on('end', () => {
// Combine all chunks into a single buffer
const buffer = Buffer.concat(chunks);
// Convert buffer to base64 string
const base64 = buffer.toString('base64');
const dataUrl = `data:${fileInfo.contentType || 'image/jpeg'};base64,${base64}`;
resolve(dataUrl);
});
stream.on('error', reject);
});
};
export const deleteDatasetImage = async (imageId: string) => {
const gridBucket = getGridBucket();
try {
await gridBucket.delete(new Types.ObjectId(imageId));
} catch (error: any) {
const msg = error?.message;
if (msg.includes('File not found')) {
addLog.warn('Delete dataset image error', error);
return;
} else {
return Promise.reject(error);
}
}
};
export const clearExpiredDatasetImageCron = async () => {
const gridBucket = getGridBucket();
const clearExpiredDatasetImages = async () => {
addLog.debug('Clear expired dataset image start');
const data = await MongoDatasetImageSchema.find(
{
'metadata.expiredTime': { $lt: new Date() }
},
'_id'
).lean();
for (const item of data) {
try {
await gridBucket.delete(item._id);
} catch (error) {
addLog.error('Delete expired dataset image error', error);
}
}
addLog.debug('Clear expired dataset image end');
};
setCron('*/10 * * * *', async () => {
if (
await checkTimerLock({
timerId: TimerIdEnum.clearExpiredDatasetImage,
lockMinuted: 9
})
) {
try {
await clearExpiredDatasetImages();
} catch (error) {
addLog.error('clearExpiredDatasetImageCron error', error);
}
}
});
};

View File

@@ -1,36 +0,0 @@
import type { Types } from '../../../common/mongo';
import { getMongoModel, Schema } from '../../../common/mongo';
export const bucketName = 'dataset_image';
const MongoDatasetImage = new Schema({
length: { type: Number, required: true },
chunkSize: { type: Number, required: true },
uploadDate: { type: Date, required: true },
filename: { type: String, required: true },
contentType: { type: String, required: true },
metadata: {
teamId: { type: String, required: true },
datasetId: { type: String, required: true },
collectionId: { type: String },
expiredTime: { type: Date, required: true }
}
});
MongoDatasetImage.index({ 'metadata.datasetId': 'hashed' });
MongoDatasetImage.index({ 'metadata.collectionId': 'hashed' });
MongoDatasetImage.index({ 'metadata.expiredTime': -1 });
export const MongoDatasetImageSchema = getMongoModel<{
_id: Types.ObjectId;
length: number;
chunkSize: number;
uploadDate: Date;
filename: string;
contentType: string;
metadata: {
teamId: string;
datasetId: string;
collectionId: string;
expiredTime: Date;
};
}>(`${bucketName}.files`, MongoDatasetImage);

View File

@@ -1,101 +0,0 @@
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
import { Types, type ClientSession } from '../../../common/mongo';
import { deleteDatasetImage } from './controller';
import { MongoDatasetImageSchema } from './schema';
import { addMinutes } from 'date-fns';
import jwt from 'jsonwebtoken';
export const removeDatasetImageExpiredTime = async ({
ids = [],
collectionId,
session
}: {
ids?: string[];
collectionId: string;
session?: ClientSession;
}) => {
if (ids.length === 0) return;
return MongoDatasetImageSchema.updateMany(
{
_id: {
$in: ids
.filter((id) => Types.ObjectId.isValid(id))
.map((id) => (typeof id === 'string' ? new Types.ObjectId(id) : id))
}
},
{
$unset: { 'metadata.expiredTime': '' },
$set: {
'metadata.collectionId': String(collectionId)
}
},
{ session }
);
};
export const getDatasetImagePreviewUrl = ({
imageId,
teamId,
datasetId,
expiredMinutes
}: {
imageId: string;
teamId: string;
datasetId: string;
expiredMinutes: number;
}) => {
const expiredTime = Math.floor(addMinutes(new Date(), expiredMinutes).getTime() / 1000);
const key = (process.env.FILE_TOKEN_KEY as string) ?? 'filetoken';
const token = jwt.sign(
{
teamId: String(teamId),
datasetId: String(datasetId),
exp: expiredTime
},
key
);
return `/api/core/dataset/image/${imageId}?token=${token}`;
};
export const authDatasetImagePreviewUrl = (token?: string) =>
new Promise<{
teamId: string;
datasetId: string;
}>((resolve, reject) => {
if (!token) {
return reject(ERROR_ENUM.unAuthFile);
}
const key = (process.env.FILE_TOKEN_KEY as string) ?? 'filetoken';
jwt.verify(token, key, (err, decoded: any) => {
if (err || !decoded?.teamId || !decoded?.datasetId) {
reject(ERROR_ENUM.unAuthFile);
return;
}
resolve({
teamId: decoded.teamId,
datasetId: decoded.datasetId
});
});
});
export const clearDatasetImages = async (datasetIds: string[]) => {
const images = await MongoDatasetImageSchema.find(
{
'metadata.datasetId': { $in: datasetIds.map((item) => String(item)) }
},
'_id'
).lean();
await Promise.all(images.map((image) => deleteDatasetImage(String(image._id))));
};
export const clearCollectionImages = async (collectionIds: string[]) => {
const images = await MongoDatasetImageSchema.find(
{
'metadata.collectionId': { $in: collectionIds.map((item) => String(item)) }
},
'_id'
).lean();
await Promise.all(images.map((image) => deleteDatasetImage(String(image._id))));
};

View File

@@ -9,9 +9,13 @@ import { type TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/st
import axios from 'axios';
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
import {
type APIFileServer,
type FeishuServer,
type YuqueServer
} from '@fastgpt/global/core/dataset/apiDataset';
import { getApiDatasetRequest } from './apiDataset';
import Papa from 'papaparse';
import type { ApiDatasetServerType } from '@fastgpt/global/core/dataset/apiDataset/type';
export const readFileRawTextByUrl = async ({
teamId,
@@ -65,7 +69,9 @@ export const readDatasetSourceRawText = async ({
sourceId,
selector,
externalFileId,
apiDatasetServer,
apiServer,
feishuServer,
yuqueServer,
customPdfParse,
getFormatText
}: {
@@ -78,7 +84,9 @@ export const readDatasetSourceRawText = async ({
selector?: string; // link selector
externalFileId?: string; // external file dataset
apiDatasetServer?: ApiDatasetServerType; // api dataset
apiServer?: APIFileServer; // api dataset
feishuServer?: FeishuServer; // feishu dataset
yuqueServer?: YuqueServer; // yuque dataset
}): Promise<{
title?: string;
rawText: string;
@@ -120,7 +128,9 @@ export const readDatasetSourceRawText = async ({
};
} else if (type === DatasetSourceReadTypeEnum.apiFile) {
const { title, rawText } = await readApiServerFileContent({
apiDatasetServer,
apiServer,
feishuServer,
yuqueServer,
apiFileId: sourceId,
teamId,
tmbId
@@ -137,13 +147,17 @@ export const readDatasetSourceRawText = async ({
};
export const readApiServerFileContent = async ({
apiDatasetServer,
apiServer,
feishuServer,
yuqueServer,
apiFileId,
teamId,
tmbId,
customPdfParse
}: {
apiDatasetServer?: ApiDatasetServerType;
apiServer?: APIFileServer;
feishuServer?: FeishuServer;
yuqueServer?: YuqueServer;
apiFileId: string;
teamId: string;
tmbId: string;
@@ -152,7 +166,13 @@ export const readApiServerFileContent = async ({
title?: string;
rawText: string;
}> => {
return (await getApiDatasetRequest(apiDatasetServer)).getFileContent({
return (
await getApiDatasetRequest({
apiServer,
yuqueServer,
feishuServer
})
).getFileContent({
teamId,
tmbId,
apiFileId,
@@ -166,11 +186,9 @@ export const rawText2Chunks = ({
chunkTriggerMinSize = 1000,
backupParse,
chunkSize = 512,
imageIdList,
...splitProps
}: {
rawText: string;
imageIdList?: string[];
chunkTriggerType?: ChunkTriggerConfigTypeEnum;
chunkTriggerMinSize?: number; // maxSize from agent model, not store
@@ -181,7 +199,6 @@ export const rawText2Chunks = ({
q: string;
a: string;
indexes?: string[];
imageIdList?: string[];
}[] => {
const parseDatasetBackup2Chunks = (rawText: string) => {
const csvArr = Papa.parse(rawText).data as string[][];
@@ -192,8 +209,7 @@ export const rawText2Chunks = ({
.map((item) => ({
q: item[0] || '',
a: item[1] || '',
indexes: item.slice(2),
imageIdList
indexes: item.slice(2)
}))
.filter((item) => item.q || item.a);
@@ -202,10 +218,6 @@ export const rawText2Chunks = ({
};
};
if (backupParse) {
return parseDatasetBackup2Chunks(rawText).chunks;
}
// Chunk condition
// 1. 选择最大值条件,只有超过了最大值(默认为模型的最大值*0.7),才会触发分块
if (chunkTriggerType === ChunkTriggerConfigTypeEnum.maxSize) {
@@ -215,8 +227,7 @@ export const rawText2Chunks = ({
return [
{
q: rawText,
a: '',
imageIdList
a: ''
}
];
}
@@ -225,10 +236,14 @@ export const rawText2Chunks = ({
if (chunkTriggerType !== ChunkTriggerConfigTypeEnum.forceChunk) {
const textLength = rawText.trim().length;
if (textLength < chunkTriggerMinSize) {
return [{ q: rawText, a: '', imageIdList }];
return [{ q: rawText, a: '' }];
}
}
if (backupParse) {
return parseDatasetBackup2Chunks(rawText).chunks;
}
const { chunks } = splitText2Chunks({
text: rawText,
chunkSize,
@@ -238,7 +253,6 @@ export const rawText2Chunks = ({
return chunks.map((item) => ({
q: item,
a: '',
indexes: [],
imageIdList
indexes: []
}));
};

View File

@@ -127,16 +127,14 @@ const DatasetSchema = new Schema({
type: Boolean,
default: true
},
apiDatasetServer: Object,
apiServer: Object,
feishuServer: Object,
yuqueServer: Object,
// abandoned
autoSync: Boolean,
externalReadUrl: String,
defaultPermission: Number,
apiServer: Object,
feishuServer: Object,
yuqueServer: Object
defaultPermission: Number
});
try {

View File

@@ -28,7 +28,6 @@ import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
import { datasetSearchQueryExtension } from './utils';
import type { RerankModelItemType } from '@fastgpt/global/core/ai/model.d';
import { addLog } from '../../../common/system/log';
import { formatDatasetDataValue } from '../data/controller';
export type SearchDatasetDataProps = {
histories: ChatItemType[];
@@ -176,12 +175,6 @@ export async function searchDatasetData(
collectionFilterMatch
} = props;
// Constants data
const datasetDataSelectField =
'_id datasetId collectionId updateTime q a imageId chunkIndex indexes';
const datsaetCollectionSelectField =
'_id name fileId rawLink apiFileId externalFileId externalFileUrl';
/* init params */
searchMode = DatasetSearchModeMap[searchMode] ? searchMode : DatasetSearchModeEnum.embedding;
usingReRank = usingReRank && !!getDefaultRerankModel();
@@ -470,14 +463,14 @@ export async function searchDatasetData(
collectionId: { $in: collectionIdList },
'indexes.dataId': { $in: results.map((item) => item.id?.trim()) }
},
datasetDataSelectField,
'_id datasetId collectionId updateTime q a chunkIndex indexes',
{ ...readFromSecondary }
).lean(),
MongoDatasetCollection.find(
{
_id: { $in: collectionIdList }
},
datsaetCollectionSelectField,
'_id name fileId rawLink apiFileId externalFileId externalFileUrl',
{ ...readFromSecondary }
).lean()
]);
@@ -501,13 +494,8 @@ export async function searchDatasetData(
const result: SearchDataResponseItemType = {
id: String(data._id),
updateTime: data.updateTime,
...formatDatasetDataValue({
teamId,
datasetId: data.datasetId,
q: data.q,
a: data.a,
imageId: data.imageId
}),
q: data.q,
a: data.a,
chunkIndex: data.chunkIndex,
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
@@ -609,14 +597,14 @@ export async function searchDatasetData(
{
_id: { $in: searchResults.map((item) => item.dataId) }
},
datasetDataSelectField,
'_id datasetId collectionId updateTime q a chunkIndex indexes',
{ ...readFromSecondary }
).lean(),
MongoDatasetCollection.find(
{
_id: { $in: searchResults.map((item) => item.collectionId) }
},
datsaetCollectionSelectField,
'_id name fileId rawLink apiFileId externalFileId externalFileUrl',
{ ...readFromSecondary }
).lean()
]);
@@ -642,13 +630,8 @@ export async function searchDatasetData(
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
updateTime: data.updateTime,
...formatDatasetDataValue({
teamId,
datasetId: data.datasetId,
q: data.q,
a: data.a,
imageId: data.imageId
}),
q: data.q,
a: data.a,
chunkIndex: data.chunkIndex,
indexes: data.indexes,
...getCollectionSourceData(collection),

View File

@@ -12,7 +12,10 @@ import { getCollectionWithDataset } from '../controller';
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
import { type PushDataToTrainingQueueProps } from '@fastgpt/global/core/dataset/training/type';
import { i18nT } from '../../../../web/i18n/utils';
import { getLLMMaxChunkSize } from '../../../../global/core/dataset/training/utils';
import {
getLLMDefaultChunkSize,
getLLMMaxChunkSize
} from '../../../../global/core/dataset/training/utils';
export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => {
try {
@@ -62,7 +65,7 @@ export async function pushDataListToTrainingQueue({
const getImageChunkMode = (data: PushDatasetDataChunkProps, mode: TrainingModeEnum) => {
if (mode !== TrainingModeEnum.image) return mode;
// 检查内容中,是否包含 ![](xxx) 的图片格式
const text = (data.q || '') + (data.a || '');
const text = data.q + data.a || '';
const regex = /!\[\]\((.*?)\)/g;
const match = text.match(regex);
if (match) {
@@ -79,6 +82,9 @@ export async function pushDataListToTrainingQueue({
if (!agentModelData) {
return Promise.reject(i18nT('common:error_llm_not_config'));
}
if (mode === TrainingModeEnum.chunk || mode === TrainingModeEnum.auto) {
prompt = undefined;
}
const { model, maxToken, weight } = await (async () => {
if (mode === TrainingModeEnum.chunk) {
@@ -95,7 +101,7 @@ export async function pushDataListToTrainingQueue({
weight: 0
};
}
if (mode === TrainingModeEnum.image || mode === TrainingModeEnum.imageParse) {
if (mode === TrainingModeEnum.image) {
const vllmModelData = getVlmModel(vlmModel);
if (!vllmModelData) {
return Promise.reject(i18nT('common:error_vlm_not_config'));
@@ -111,9 +117,11 @@ export async function pushDataListToTrainingQueue({
})();
// filter repeat or equal content
const set = new Set();
const filterResult: Record<string, PushDatasetDataChunkProps[]> = {
success: [],
overToken: [],
repeat: [],
error: []
};
@@ -132,7 +140,7 @@ export async function pushDataListToTrainingQueue({
.filter(Boolean);
// filter repeat content
if (!item.imageId && !item.q) {
if (!item.q) {
filterResult.error.push(item);
return;
}
@@ -145,26 +153,32 @@ export async function pushDataListToTrainingQueue({
return;
}
filterResult.success.push(item);
if (set.has(text)) {
filterResult.repeat.push(item);
} else {
filterResult.success.push(item);
set.add(text);
}
});
// insert data to db
const insertLen = filterResult.success.length;
const failedDocuments: PushDatasetDataChunkProps[] = [];
// 使用 insertMany 批量插入
const batchSize = 500;
const batchSize = 200;
const insertData = async (startIndex: number, session: ClientSession) => {
const list = filterResult.success.slice(startIndex, startIndex + batchSize);
if (list.length === 0) return;
try {
const result = await MongoDatasetTraining.insertMany(
await MongoDatasetTraining.insertMany(
list.map((item) => ({
teamId,
tmbId,
datasetId: datasetId,
collectionId: collectionId,
datasetId,
collectionId,
billId,
mode: getImageChunkMode(item, mode),
prompt,
@@ -175,25 +189,25 @@ export async function pushDataListToTrainingQueue({
indexSize,
weight: weight ?? 0,
indexes: item.indexes,
retryCount: 5,
...(item.imageId ? { imageId: item.imageId } : {})
retryCount: 5
})),
{
session,
ordered: false,
rawResult: true,
includeResultMetadata: false // 进一步减少返回数据
ordered: true
}
);
if (result.insertedCount !== list.length) {
return Promise.reject(`Insert data error, ${JSON.stringify(result)}`);
}
} catch (error: any) {
addLog.error(`Insert error`, error);
return Promise.reject(error);
// 如果有错误,将失败的文档添加到失败列表中
error.writeErrors?.forEach((writeError: any) => {
failedDocuments.push(data[writeError.index]);
});
console.log('failed', failedDocuments);
}
// 对于失败的文档,尝试单独插入
await MongoDatasetTraining.create(failedDocuments, { session });
return insertData(startIndex + batchSize, session);
};
@@ -208,6 +222,7 @@ export async function pushDataListToTrainingQueue({
delete filterResult.success;
return {
insertLen
insertLen,
...filterResult
};
}

View File

@@ -99,9 +99,6 @@ const TrainingDataSchema = new Schema({
],
default: []
},
imageId: {
type: String
},
errorMsg: String
});

View File

@@ -3,9 +3,9 @@ import type {
ApiFileReadContentResponse,
YuqueServer,
ApiDatasetDetailResponse
} from '@fastgpt/global/core/dataset/apiDataset/type';
} from '@fastgpt/global/core/dataset/apiDataset';
import axios, { type Method } from 'axios';
import { addLog } from '../../../../common/system/log';
import { addLog } from '../../../common/system/log';
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
type ResponseDataType = {
@@ -105,6 +105,7 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
if (!parentId) {
if (yuqueServer.basePath) parentId = yuqueServer.basePath;
}
let files: APIFileItem[] = [];
if (!parentId) {

View File

@@ -86,6 +86,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
});
// Check interactive entry
const interactiveResponse = lastInteractive;
props.node.isEntry = false;
const hasReadFilesTool = toolNodes.some(
(item) => item.flowNodeType === FlowNodeTypeEnum.readFiles
@@ -142,7 +143,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
})
}
];
if (lastInteractive && isEntry) {
if (interactiveResponse) {
return value.slice(0, -2);
}
return value;
@@ -182,7 +183,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
toolModel,
maxRunToolTimes: 30,
messages: adaptMessages,
interactiveEntryToolParams: lastInteractive?.toolParams
interactiveEntryToolParams: interactiveResponse?.toolParams
});
}
if (toolModel.functionCall) {
@@ -193,7 +194,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
toolNodes,
toolModel,
messages: adaptMessages,
interactiveEntryToolParams: lastInteractive?.toolParams
interactiveEntryToolParams: interactiveResponse?.toolParams
});
}
@@ -223,7 +224,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
toolNodes,
toolModel,
messages: adaptMessages,
interactiveEntryToolParams: lastInteractive?.toolParams
interactiveEntryToolParams: interactiveResponse?.toolParams
});
})();

View File

@@ -358,7 +358,7 @@ async function filterDatasetQuote({
return replaceVariable(quoteTemplate, {
id: item.id,
q: item.q,
a: item.a || '',
a: item.a,
updateTime: formatTime2YMDHM(item.updateTime),
source: item.sourceName,
sourceId: String(item.sourceId || ''),

View File

@@ -11,6 +11,7 @@ import type {
SystemVariablesType
} from '@fastgpt/global/core/workflow/runtime/type';
import type { RuntimeNodeItemType } from '@fastgpt/global/core/workflow/runtime/type.d';
import type { FlowNodeOutputItemType } from '@fastgpt/global/core/workflow/type/io.d';
import type {
AIChatItemValueItemType,
ChatHistoryItemResType,

View File

@@ -17,7 +17,6 @@ import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
import { getPluginRunUserQuery } from '@fastgpt/global/core/workflow/utils';
import { getPluginInputsFromStoreNodes } from '@fastgpt/global/core/app/plugin/utils';
import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
import { getUserChatInfoAndAuthTeamPoints } from '../../../../support/permission/auth/team';
type RunPluginProps = ModuleDispatchProps<{
[NodeInputKeyEnum.forbidStream]?: boolean;
@@ -74,11 +73,9 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
};
});
const { externalProvider } = await getUserChatInfoAndAuthTeamPoints(runningAppInfo.tmbId);
const runtimeVariables = {
...filterSystemVariables(props.variables),
appId: String(plugin.id),
...(externalProvider ? externalProvider.externalWorkflowVariables : {})
appId: String(plugin.id)
};
const { flowResponses, flowUsages, assistantResponses, runTimes } = await dispatchWorkFlow({
...props,

View File

@@ -20,7 +20,6 @@ import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { getAppVersionById } from '../../../app/version/controller';
import { parseUrlToFileType } from '@fastgpt/global/common/file/tools';
import { type ChildrenInteractive } from '@fastgpt/global/core/workflow/template/system/interactive/type';
import { getUserChatInfoAndAuthTeamPoints } from '../../../../support/permission/auth/team';
type Props = ModuleDispatchProps<{
[NodeInputKeyEnum.userChatInput]: string;
@@ -98,13 +97,11 @@ export const dispatchRunAppNode = async (props: Props): Promise<Response> => {
// Rewrite children app variables
const systemVariables = filterSystemVariables(variables);
const { externalProvider } = await getUserChatInfoAndAuthTeamPoints(appData.tmbId);
const childrenRunVariables = {
...systemVariables,
...childrenAppVariables,
histories: chatHistories,
appId: String(appData._id),
...(externalProvider ? externalProvider.externalWorkflowVariables : {})
appId: String(appData._id)
};
const childrenInteractive =

View File

@@ -5,6 +5,8 @@ import { NodeOutputKeyEnum } from '@fastgpt/global/core/workflow/constants';
import { type DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
import axios from 'axios';
import { serverRequestBaseUrl } from '../../../../common/api/serverRequest';
import { MongoRawTextBuffer } from '../../../../common/buffer/rawText/schema';
import { readFromSecondary } from '../../../../common/mongo/utils';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { detectFileEncoding, parseUrlToFileType } from '@fastgpt/global/common/file/tools';
import { readRawContentByFileBuffer } from '../../../../common/file/read/utils';
@@ -12,8 +14,6 @@ import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { type ChatItemType, type UserChatItemValueItemType } from '@fastgpt/global/core/chat/type';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
import { addLog } from '../../../../common/system/log';
import { addRawTextBuffer, getRawTextBuffer } from '../../../../common/buffer/rawText/controller';
import { addMinutes } from 'date-fns';
type Props = ModuleDispatchProps<{
[NodeInputKeyEnum.fileUrlList]: string[];
@@ -158,12 +158,14 @@ export const getFileContentFromLinks = async ({
parseUrlList
.map(async (url) => {
// Get from buffer
const fileBuffer = await getRawTextBuffer(url);
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: url }, undefined, {
...readFromSecondary
}).lean();
if (fileBuffer) {
return formatResponseObject({
filename: fileBuffer.sourceName || url,
filename: fileBuffer.metadata?.filename || url,
url,
content: fileBuffer.text
content: fileBuffer.rawText
});
}
@@ -218,12 +220,17 @@ export const getFileContentFromLinks = async ({
});
// Add to buffer
addRawTextBuffer({
sourceId: url,
sourceName: filename,
text: rawText,
expiredTime: addMinutes(new Date(), 20)
});
try {
if (buffer.length < 14 * 1024 * 1024 && rawText.trim()) {
MongoRawTextBuffer.create({
sourceId: url,
rawText,
metadata: {
filename: filename
}
});
}
} catch (error) {}
return formatResponseObject({ filename, url, content: rawText });
} catch (error) {

View File

@@ -2,7 +2,6 @@ import { OperationLogEventEnum } from '@fastgpt/global/support/operationLog/cons
import { i18nT } from '../../../web/i18n/utils';
export const operationLogMap = {
//Team
[OperationLogEventEnum.LOGIN]: {
content: i18nT('account_team:log_login'),
typeLabel: i18nT('account_team:login'),
@@ -67,309 +66,6 @@ export const operationLogMap = {
content: i18nT('account_team:log_assign_permission'),
typeLabel: i18nT('account_team:assign_permission'),
params: {} as { name?: string; objectName: string; permission: string }
},
//APP
[OperationLogEventEnum.CREATE_APP]: {
content: i18nT('account_team:log_create_app'),
typeLabel: i18nT('account_team:create_app'),
params: {} as { name?: string; appName: string; appType: string }
},
[OperationLogEventEnum.UPDATE_APP_INFO]: {
content: i18nT('account_team:log_update_app_info'),
typeLabel: i18nT('account_team:update_app_info'),
params: {} as {
name?: string;
appName: string;
newItemNames: string[];
newItemValues: string[];
appType: string;
}
},
[OperationLogEventEnum.MOVE_APP]: {
content: i18nT('account_team:log_move_app'),
typeLabel: i18nT('account_team:move_app'),
params: {} as { name?: string; appName: string; targetFolderName: string; appType: string }
},
[OperationLogEventEnum.DELETE_APP]: {
content: i18nT('account_team:log_delete_app'),
typeLabel: i18nT('account_team:delete_app'),
params: {} as { name?: string; appName: string; appType: string }
},
[OperationLogEventEnum.UPDATE_APP_COLLABORATOR]: {
content: i18nT('account_team:log_update_app_collaborator'),
typeLabel: i18nT('account_team:update_app_collaborator'),
params: {} as {
name?: string;
appName: string;
appType: string;
tmbList: string[];
groupList: string[];
orgList: string[];
permission: string;
}
},
[OperationLogEventEnum.DELETE_APP_COLLABORATOR]: {
content: i18nT('account_team:log_delete_app_collaborator'),
typeLabel: i18nT('account_team:delete_app_collaborator'),
params: {} as {
name?: string;
appName: string;
appType: string;
itemName: string;
itemValueName: string;
}
},
[OperationLogEventEnum.TRANSFER_APP_OWNERSHIP]: {
content: i18nT('account_team:log_transfer_app_ownership'),
typeLabel: i18nT('account_team:transfer_app_ownership'),
params: {} as {
name?: string;
appName: string;
appType: string;
oldOwnerName: string;
newOwnerName: string;
}
},
[OperationLogEventEnum.CREATE_APP_COPY]: {
content: i18nT('account_team:log_create_app_copy'),
typeLabel: i18nT('account_team:create_app_copy'),
params: {} as { name?: string; appName: string; appType: string }
},
[OperationLogEventEnum.CREATE_APP_FOLDER]: {
content: i18nT('account_team:log_create_app_folder'),
typeLabel: i18nT('account_team:create_app_folder'),
params: {} as { name?: string; folderName: string }
},
[OperationLogEventEnum.UPDATE_PUBLISH_APP]: {
content: i18nT('account_team:log_update_publish_app'),
typeLabel: i18nT('account_team:update_publish_app'),
params: {} as {
name?: string;
operationName: string;
appName: string;
appId: string;
appType: string;
}
},
[OperationLogEventEnum.CREATE_APP_PUBLISH_CHANNEL]: {
content: i18nT('account_team:log_create_app_publish_channel'),
typeLabel: i18nT('account_team:create_app_publish_channel'),
params: {} as { name?: string; appName: string; channelName: string; appType: string }
},
[OperationLogEventEnum.UPDATE_APP_PUBLISH_CHANNEL]: {
content: i18nT('account_team:log_update_app_publish_channel'),
typeLabel: i18nT('account_team:update_app_publish_channel'),
params: {} as { name?: string; appName: string; channelName: string; appType: string }
},
[OperationLogEventEnum.DELETE_APP_PUBLISH_CHANNEL]: {
content: i18nT('account_team:log_delete_app_publish_channel'),
typeLabel: i18nT('account_team:delete_app_publish_channel'),
params: {} as { name?: string; appName: string; channelName: string; appType: string }
},
[OperationLogEventEnum.EXPORT_APP_CHAT_LOG]: {
content: i18nT('account_team:log_export_app_chat_log'),
typeLabel: i18nT('account_team:export_app_chat_log'),
params: {} as { name?: string; appName: string; appType: string }
},
//Dataset
[OperationLogEventEnum.CREATE_DATASET]: {
content: i18nT('account_team:log_create_dataset'),
typeLabel: i18nT('account_team:create_dataset'),
params: {} as { name?: string; datasetName: string; datasetType: string }
},
[OperationLogEventEnum.UPDATE_DATASET]: {
content: i18nT('account_team:log_update_dataset'),
typeLabel: i18nT('account_team:update_dataset'),
params: {} as { name?: string; datasetName: string; datasetType: string }
},
[OperationLogEventEnum.DELETE_DATASET]: {
content: i18nT('account_team:log_delete_dataset'),
typeLabel: i18nT('account_team:delete_dataset'),
params: {} as { name?: string; datasetName: string; datasetType: string }
},
[OperationLogEventEnum.MOVE_DATASET]: {
content: i18nT('account_team:log_move_dataset'),
typeLabel: i18nT('account_team:move_dataset'),
params: {} as {
name?: string;
datasetName: string;
targetFolderName: string;
datasetType: string;
}
},
[OperationLogEventEnum.UPDATE_DATASET_COLLABORATOR]: {
content: i18nT('account_team:log_update_dataset_collaborator'),
typeLabel: i18nT('account_team:update_dataset_collaborator'),
params: {} as {
name?: string;
datasetName: string;
datasetType: string;
tmbList: string[];
groupList: string[];
orgList: string[];
permission: string;
}
},
[OperationLogEventEnum.DELETE_DATASET_COLLABORATOR]: {
content: i18nT('account_team:log_delete_dataset_collaborator'),
typeLabel: i18nT('account_team:delete_dataset_collaborator'),
params: {} as {
name?: string;
datasetName: string;
datasetType: string;
itemName: string;
itemValueName: string;
}
},
[OperationLogEventEnum.TRANSFER_DATASET_OWNERSHIP]: {
content: i18nT('account_team:log_transfer_dataset_ownership'),
typeLabel: i18nT('account_team:transfer_dataset_ownership'),
params: {} as {
name?: string;
datasetName: string;
datasetType: string;
oldOwnerName: string;
newOwnerName: string;
}
},
[OperationLogEventEnum.EXPORT_DATASET]: {
content: i18nT('account_team:log_export_dataset'),
typeLabel: i18nT('account_team:export_dataset'),
params: {} as { name?: string; datasetName: string; datasetType: string }
},
[OperationLogEventEnum.CREATE_DATASET_FOLDER]: {
content: i18nT('account_team:log_create_dataset_folder'),
typeLabel: i18nT('account_team:create_dataset_folder'),
params: {} as { name?: string; folderName: string }
},
//Collection
[OperationLogEventEnum.CREATE_COLLECTION]: {
content: i18nT('account_team:log_create_collection'),
typeLabel: i18nT('account_team:create_collection'),
params: {} as {
name?: string;
collectionName: string;
datasetName: string;
datasetType: string;
}
},
[OperationLogEventEnum.UPDATE_COLLECTION]: {
content: i18nT('account_team:log_update_collection'),
typeLabel: i18nT('account_team:update_collection'),
params: {} as {
name?: string;
collectionName: string;
datasetName: string;
datasetType: string;
}
},
[OperationLogEventEnum.DELETE_COLLECTION]: {
content: i18nT('account_team:log_delete_collection'),
typeLabel: i18nT('account_team:delete_collection'),
params: {} as {
name?: string;
collectionName: string;
datasetName: string;
datasetType: string;
}
},
[OperationLogEventEnum.RETRAIN_COLLECTION]: {
content: i18nT('account_team:log_retrain_collection'),
typeLabel: i18nT('account_team:retrain_collection'),
params: {} as {
name?: string;
collectionName: string;
datasetName: string;
datasetType: string;
}
},
//Data
[OperationLogEventEnum.CREATE_DATA]: {
content: i18nT('account_team:log_create_data'),
typeLabel: i18nT('account_team:create_data'),
params: {} as {
name?: string;
collectionName: string;
datasetName: string;
datasetType: string;
}
},
[OperationLogEventEnum.UPDATE_DATA]: {
content: i18nT('account_team:log_update_data'),
typeLabel: i18nT('account_team:update_data'),
params: {} as {
name?: string;
collectionName: string;
datasetName: string;
datasetType: string;
}
},
[OperationLogEventEnum.DELETE_DATA]: {
content: i18nT('account_team:log_delete_data'),
typeLabel: i18nT('account_team:delete_data'),
params: {} as {
name?: string;
collectionName: string;
datasetName: string;
datasetType: string;
}
},
//SearchTest
[OperationLogEventEnum.SEARCH_TEST]: {
content: i18nT('account_team:log_search_test'),
typeLabel: i18nT('account_team:search_test'),
params: {} as { name?: string; datasetName: string; datasetType: string }
},
//Account
[OperationLogEventEnum.CHANGE_PASSWORD]: {
content: i18nT('account_team:log_change_password'),
typeLabel: i18nT('account_team:change_password'),
params: {} as { name?: string }
},
[OperationLogEventEnum.CHANGE_NOTIFICATION_SETTINGS]: {
content: i18nT('account_team:log_change_notification_settings'),
typeLabel: i18nT('account_team:change_notification_settings'),
params: {} as { name?: string }
},
[OperationLogEventEnum.CHANGE_MEMBER_NAME_ACCOUNT]: {
content: i18nT('account_team:log_change_member_name_self'),
typeLabel: i18nT('account_team:change_member_name_self'),
params: {} as { name?: string; oldName: string; newName: string }
},
[OperationLogEventEnum.PURCHASE_PLAN]: {
content: i18nT('account_team:log_purchase_plan'),
typeLabel: i18nT('account_team:purchase_plan'),
params: {} as { name?: string }
},
[OperationLogEventEnum.EXPORT_BILL_RECORDS]: {
content: i18nT('account_team:log_export_bill_records'),
typeLabel: i18nT('account_team:export_bill_records'),
params: {} as { name?: string }
},
[OperationLogEventEnum.CREATE_INVOICE]: {
content: i18nT('account_team:log_create_invoice'),
typeLabel: i18nT('account_team:create_invoice'),
params: {} as { name?: string }
},
[OperationLogEventEnum.SET_INVOICE_HEADER]: {
content: i18nT('account_team:log_set_invoice_header'),
typeLabel: i18nT('account_team:set_invoice_header'),
params: {} as { name?: string }
},
[OperationLogEventEnum.CREATE_API_KEY]: {
content: i18nT('account_team:log_create_api_key'),
typeLabel: i18nT('account_team:create_api_key'),
params: {} as { name?: string; keyName: string }
},
[OperationLogEventEnum.UPDATE_API_KEY]: {
content: i18nT('account_team:log_update_api_key'),
typeLabel: i18nT('account_team:update_api_key'),
params: {} as { name?: string; keyName: string }
},
[OperationLogEventEnum.DELETE_API_KEY]: {
content: i18nT('account_team:log_delete_api_key'),
typeLabel: i18nT('account_team:delete_api_key'),
params: {} as { name?: string; keyName: string }
}
} as const;

View File

@@ -1,36 +0,0 @@
import { AppTypeEnum } from '@fastgpt/global/core/app/constants';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { i18nT } from '../../../web/i18n/utils';
export function getI18nAppType(type: AppTypeEnum): string {
if (type === AppTypeEnum.folder) return i18nT('account_team:type.Folder');
if (type === AppTypeEnum.simple) return i18nT('account_team:type.Simple bot');
if (type === AppTypeEnum.workflow) return i18nT('account_team:type.Workflow bot');
if (type === AppTypeEnum.plugin) return i18nT('account_team:type.Plugin');
if (type === AppTypeEnum.httpPlugin) return i18nT('account_team:type.Http plugin');
if (type === AppTypeEnum.toolSet) return i18nT('account_team:type.Tool set');
if (type === AppTypeEnum.tool) return i18nT('account_team:type.Tool');
return i18nT('common:UnKnow');
}
export function getI18nCollaboratorItemType(
tmbId: string | undefined,
groupId: string | undefined,
orgId: string | undefined
): string {
if (tmbId) return i18nT('account_team:member');
if (groupId) return i18nT('account_team:group');
if (orgId) return i18nT('account_team:department');
return i18nT('common:UnKnow');
}
export function getI18nDatasetType(type: DatasetTypeEnum | string): string {
if (type === DatasetTypeEnum.folder) return i18nT('account_team:dataset.folder_dataset');
if (type === DatasetTypeEnum.dataset) return i18nT('account_team:dataset.common_dataset');
if (type === DatasetTypeEnum.websiteDataset) return i18nT('account_team:dataset.website_dataset');
if (type === DatasetTypeEnum.externalFile) return i18nT('account_team:dataset.external_file');
if (type === DatasetTypeEnum.apiDataset) return i18nT('account_team:dataset.api_file');
if (type === DatasetTypeEnum.feishu) return i18nT('account_team:dataset.feishu_dataset');
if (type === DatasetTypeEnum.yuque) return i18nT('account_team:dataset.yuque_dataset');
return i18nT('common:UnKnow');
}

View File

@@ -16,7 +16,6 @@ import { type AuthModeType, type AuthResponseType } from '../type';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
import { DatasetDefaultPermissionVal } from '@fastgpt/global/support/permission/dataset/constant';
import { getDatasetImagePreviewUrl } from '../../../core/dataset/image/utils';
export const authDatasetByTmbId = async ({
tmbId,
@@ -268,15 +267,6 @@ export async function authDatasetData({
updateTime: datasetData.updateTime,
q: datasetData.q,
a: datasetData.a,
imageId: datasetData.imageId,
imagePreivewUrl: datasetData.imageId
? getDatasetImagePreviewUrl({
imageId: datasetData.imageId,
teamId: datasetData.teamId,
datasetId: datasetData.datasetId,
expiredMinutes: 30
})
: undefined,
chunkIndex: datasetData.chunkIndex,
indexes: datasetData.indexes,
datasetId: String(datasetData.datasetId),

View File

@@ -1,7 +1,7 @@
import { getWorkerController, WorkerNameEnum } from './utils';
export const preLoadWorker = async () => {
const max = Math.min(Number(global.systemEnv?.tokenWorkers || 30), 100);
const max = Number(global.systemEnv?.tokenWorkers || 30);
const workerController = getWorkerController({
name: WorkerNameEnum.countGptMessagesTokens,
maxReservedThreads: max

View File

@@ -220,11 +220,9 @@ export const iconPaths = {
import('./icons/core/dataset/feishuDatasetOutline.svg'),
'core/dataset/fileCollection': () => import('./icons/core/dataset/fileCollection.svg'),
'core/dataset/fullTextRecall': () => import('./icons/core/dataset/fullTextRecall.svg'),
'core/dataset/imageFill': () => import('./icons/core/dataset/imageFill.svg'),
'core/dataset/manualCollection': () => import('./icons/core/dataset/manualCollection.svg'),
'core/dataset/mixedRecall': () => import('./icons/core/dataset/mixedRecall.svg'),
'core/dataset/modeEmbedding': () => import('./icons/core/dataset/modeEmbedding.svg'),
'core/dataset/otherDataset': () => import('./icons/core/dataset/otherDataset.svg'),
'core/dataset/questionExtension': () => import('./icons/core/dataset/questionExtension.svg'),
'core/dataset/rerank': () => import('./icons/core/dataset/rerank.svg'),
'core/dataset/searchfilter': () => import('./icons/core/dataset/searchfilter.svg'),
@@ -232,6 +230,7 @@ export const iconPaths = {
'core/dataset/tableCollection': () => import('./icons/core/dataset/tableCollection.svg'),
'core/dataset/tag': () => import('./icons/core/dataset/tag.svg'),
'core/dataset/websiteDataset': () => import('./icons/core/dataset/websiteDataset.svg'),
'core/dataset/otherDataset': () => import('./icons/core/dataset/otherDataset.svg'),
'core/dataset/websiteDatasetColor': () => import('./icons/core/dataset/websiteDatasetColor.svg'),
'core/dataset/websiteDatasetOutline': () =>
import('./icons/core/dataset/websiteDatasetOutline.svg'),
@@ -288,7 +287,6 @@ export const iconPaths = {
'core/workflow/template/aiChat': () => import('./icons/core/workflow/template/aiChat.svg'),
'core/workflow/template/baseChart': () => import('./icons/core/workflow/template/baseChart.svg'),
'core/workflow/template/bing': () => import('./icons/core/workflow/template/bing.svg'),
'core/workflow/template/bocha': () => import('./icons/core/workflow/template/bocha.svg'),
'core/workflow/template/codeRun': () => import('./icons/core/workflow/template/codeRun.svg'),
'core/workflow/template/customFeedback': () =>
import('./icons/core/workflow/template/customFeedback.svg'),
@@ -380,12 +378,10 @@ export const iconPaths = {
fullScreen: () => import('./icons/fullScreen.svg'),
help: () => import('./icons/help.svg'),
history: () => import('./icons/history.svg'),
image: () => import('./icons/image.svg'),
infoRounded: () => import('./icons/infoRounded.svg'),
kbTest: () => import('./icons/kbTest.svg'),
key: () => import('./icons/key.svg'),
keyPrimary: () => import('./icons/keyPrimary.svg'),
loading: () => import('./icons/loading.svg'),
menu: () => import('./icons/menu.svg'),
minus: () => import('./icons/minus.svg'),
'modal/AddClb': () => import('./icons/modal/AddClb.svg'),

View File

@@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 21 20" >
<path fill-rule="evenodd" clip-rule="evenodd" d="M2.24348 4.15292C1.9165 4.79466 1.9165 5.63474 1.9165 7.31489V12.6852C1.9165 14.3654 1.9165 15.2054 2.24348 15.8472C2.5311 16.4117 2.99005 16.8706 3.55453 17.1582C4.19627 17.4852 5.03635 17.4852 6.7165 17.4852H13.7832C15.4633 17.4852 16.3034 17.4852 16.9451 17.1582C17.5096 16.8706 17.9686 16.4117 18.2562 15.8472C18.5832 15.2054 18.5832 14.3654 18.5832 12.6852V7.31489C18.5832 5.63473 18.5832 4.79466 18.2562 4.15292C17.9686 3.58843 17.5096 3.12949 16.9451 2.84187C16.3034 2.51489 15.4633 2.51489 13.7832 2.51489H6.7165C5.03635 2.51489 4.19627 2.51489 3.55453 2.84187C2.99005 3.12949 2.5311 3.58843 2.24348 4.15292ZM7.88951 6.75656C7.88951 7.67703 7.14331 8.42322 6.22284 8.42322C5.30236 8.42322 4.55617 7.67703 4.55617 6.75656C4.55617 5.83608 5.30236 5.08989 6.22284 5.08989C7.14331 5.08989 7.88951 5.83608 7.88951 6.75656ZM12.8631 8.65525C12.5376 8.32981 12.01 8.32981 11.6845 8.65525L5.92965 14.4101C5.40468 14.9351 5.77648 15.8327 6.5189 15.8327L15.5062 15.8327C16.4267 15.8327 17.1729 15.0865 17.1729 14.1661V13.3103C17.1729 13.0892 17.0851 12.8773 16.9288 12.721L12.8631 8.65525Z" fill="#3370FF"/>
</svg>

Before

Width:  |  Height:  |  Size: 1.2 KiB

View File

@@ -1,5 +0,0 @@
<svg width="113" height="97" viewBox="0 0 113 97" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M0 31.7259C1.80046 29.9255 3.82784 28.3872 5.96621 27.1988C8.10469 26.0103 10.3126 25.1947 12.4634 24.7992C14.6143 24.4037 16.6664 24.4361 18.5022 24.8938C20.2678 25.334 21.7994 26.1604 23.0183 27.3272L23.021 27.3245L47.189 51.4924L33.4778 65.2037L0 31.7259Z" fill="#C4DEFE"/>
<path d="M9.15662 11.5625C11.3617 10.2893 13.7181 9.32825 16.0912 8.73374C18.4645 8.13923 20.8082 7.92284 22.9882 8.09751C25.1681 8.27217 27.1419 8.83457 28.7966 9.75182C30.3881 10.6341 31.6537 11.8287 32.529 13.2712L32.5316 13.2697L32.6082 13.4025C32.6162 13.4162 32.6251 13.4297 32.633 13.4435L49.886 43.3286L33.0941 53.0234L9.15662 11.5625Z" fill="#A6CBFF"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M31.1377 0C33.6839 4.40811e-05 36.2052 0.345872 38.5576 1.01758C40.9099 1.68929 43.0472 2.67394 44.8477 3.91504C46.6482 5.15627 48.0773 6.63021 49.0518 8.25195C49.9888 9.81168 50.4867 11.4792 50.5234 13.166H50.5273V21.4072C56.6623 17.6586 63.874 15.498 71.5898 15.498C93.9304 15.4984 112.042 33.6087 112.042 55.9492C112.042 78.29 93.9305 96.401 71.5898 96.4014C49.3907 96.4014 31.3704 78.5193 31.1426 56.374H31.1377V0ZM71.9473 35.0439C60.1187 35.0441 50.5295 44.6334 50.5293 56.4619C50.5293 63.5338 53.9569 69.8057 59.2412 73.7061C66.4989 79.0625 76.5515 75.3841 85.3955 77.1592C92.613 78.608 97.2369 82.6827 98.3652 83.7686C97.3562 82.731 93.791 78.7138 92.2715 72.3291C89.8011 61.9479 94.8744 49.6043 87.5771 41.8184C83.6695 37.6493 78.1122 35.0441 71.9473 35.0439Z" fill="#006EFF"/>
</svg>

Before

Width:  |  Height:  |  Size: 1.6 KiB

View File

@@ -1,4 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 17 16" >
<path d="M5.50794 6.8195C6.06022 6.8195 6.50794 6.37178 6.50794 5.8195C6.50794 5.26721 6.06022 4.8195 5.50794 4.8195C4.95565 4.8195 4.50794 5.26721 4.50794 5.8195C4.50794 6.37178 4.95565 6.8195 5.50794 6.8195Z" />
<path fill-rule="evenodd" clip-rule="evenodd" d="M1.55029 5.85187C1.55029 4.50775 1.55029 3.83568 1.81188 3.32229C2.04197 2.87071 2.40913 2.50355 2.86072 2.27346C3.3741 2.01187 4.04617 2.01187 5.39029 2.01187H11.0436C12.3878 2.01187 13.0598 2.01187 13.5732 2.27346C14.0248 2.50355 14.3919 2.87071 14.622 3.32229C14.8836 3.83568 14.8836 4.50775 14.8836 5.85187V10.1481C14.8836 11.4922 14.8836 12.1643 14.622 12.6777C14.3919 13.1293 14.0248 13.4964 13.5732 13.7265C13.0598 13.9881 12.3878 13.9881 11.0436 13.9881H5.39029C4.04617 13.9881 3.3741 13.9881 2.86072 13.7265C2.40913 13.4964 2.04197 13.1293 1.81188 12.6777C1.55029 12.1643 1.55029 11.4922 1.55029 10.1481V5.85187ZM5.39029 3.3452H11.0436C11.7377 3.3452 12.1781 3.34624 12.5114 3.37347C12.8291 3.39944 12.9305 3.44241 12.9679 3.46146C13.1686 3.56373 13.3318 3.72691 13.434 3.92761C13.4531 3.96502 13.4961 4.06638 13.522 4.38413C13.5493 4.71745 13.5503 5.15781 13.5503 5.85187V10.1481C13.5503 10.1562 13.5503 10.1641 13.5503 10.1721L10.3165 6.93829C10.0561 6.67794 9.634 6.67794 9.37365 6.93829L3.70938 12.6026C3.5547 12.5791 3.49333 12.5524 3.46604 12.5385C3.26533 12.4363 3.10215 12.2731 2.99989 12.0724C2.98083 12.035 2.93786 11.9336 2.9119 11.6159C2.88466 11.2825 2.88363 10.8422 2.88363 10.1481V5.85187C2.88363 5.15781 2.88466 4.71745 2.9119 4.38413C2.93786 4.06638 2.98083 3.96502 2.99989 3.92761C3.10215 3.72691 3.26533 3.56373 3.46604 3.46146C3.50344 3.44241 3.6048 3.39944 3.92255 3.37347C4.25587 3.34624 4.69623 3.3452 5.39029 3.3452ZM9.84506 8.3525L5.54277 12.6548H11.0436C11.7377 12.6548 12.1781 12.6538 12.5114 12.6265C12.8291 12.6006 12.9305 12.5576 12.9679 12.5385C13.1686 12.4363 13.3318 12.2731 13.434 12.0724C13.4422 12.0563 13.4549 12.0283 13.4687 11.9762L9.84506 8.3525Z" />
</svg>

Before

Width:  |  Height:  |  Size: 2.0 KiB

View File

@@ -1,4 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="48" height="48" viewBox="0 0 48 48" >
<path d="M47.3337 24C47.3337 36.8866 36.887 47.3333 24.0003 47.3333C11.1137 47.3333 0.666992 36.8866 0.666992 24C0.666992 11.1133 11.1137 0.666626 24.0003 0.666626C36.887 0.666626 47.3337 11.1133 47.3337 24ZM5.33366 24C5.33366 34.3093 13.691 42.6666 24.0003 42.6666C34.3096 42.6666 42.667 34.3093 42.667 24C42.667 13.6906 34.3096 5.33329 24.0003 5.33329C13.691 5.33329 5.33366 13.6906 5.33366 24Z" />
<path d="M24.0003 2.99996C24.0003 1.71129 25.0476 0.654541 26.3298 0.783194C29.1026 1.06141 31.8097 1.83481 34.3204 3.07293C37.5303 4.6559 40.3331 6.95608 42.5119 9.79553C44.6907 12.635 46.1871 15.9376 46.8853 19.4479C47.4314 22.1934 47.4778 25.0084 47.0289 27.7588C46.8213 29.0306 45.5295 29.7687 44.2848 29.4352C43.04 29.1016 42.3169 27.8222 42.4926 26.5456C42.7752 24.4926 42.7147 22.4014 42.3083 20.3583C41.7497 17.5501 40.5526 14.908 38.8096 12.6364C37.0666 10.3649 34.8243 8.52471 32.2564 7.25833C30.3881 6.33698 28.3838 5.73731 26.3276 5.47894C25.049 5.31827 24.0003 4.28862 24.0003 2.99996Z" />
</svg>

Before

Width:  |  Height:  |  Size: 1.1 KiB

Some files were not shown because too many files have changed in this diff Show More