Compare commits
38 Commits
v4.8.13-fi
...
v4.8.15-al
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1aebe5f185 | ||
|
|
b188544386 | ||
|
|
7faa427e84 | ||
|
|
cb56d1e53e | ||
|
|
f28b7e41a8 | ||
|
|
c506442993 | ||
|
|
1cef206c13 | ||
|
|
d0e8c9c62e | ||
|
|
a4a8b7909c | ||
|
|
6c77134e82 | ||
|
|
93e302b7e8 | ||
|
|
78f52a7836 | ||
|
|
0d30b7ba73 | ||
|
|
31f7cf6cc5 | ||
|
|
b09e972c20 | ||
|
|
5fa2e3c5ac | ||
|
|
4723a08d98 | ||
|
|
3d9c1d5472 | ||
|
|
a75036b626 | ||
|
|
cf1a90c596 | ||
|
|
8aa6b53760 | ||
|
|
7e1d31b5a9 | ||
|
|
bc79d46d4b | ||
|
|
dfd2fed033 | ||
|
|
51559bc821 | ||
|
|
c7681342f7 | ||
|
|
0f4870a7a5 | ||
|
|
763b41b385 | ||
|
|
019bf67e2d | ||
|
|
9b2c3b242a | ||
|
|
4f55025906 | ||
|
|
489bb076a3 | ||
|
|
a9db5b57c5 | ||
|
|
fdb3720b41 | ||
|
|
00641a8652 | ||
|
|
5c56b375c7 | ||
|
|
d8d9b936c4 | ||
|
|
b237a3ec55 |
2
.github/ISSUE_TEMPLATE/bugs.md
vendored
@@ -21,7 +21,7 @@ assignees: ''
|
||||
- [ ] 公有云版本
|
||||
- [ ] 私有部署版本, 具体版本号:
|
||||
|
||||
**问题描述, 日志截图**
|
||||
**问题描述, 日志截图,配置文件等**
|
||||
|
||||
**复现步骤**
|
||||
|
||||
|
||||
4
.vscode/settings.json
vendored
@@ -16,8 +16,8 @@
|
||||
"i18n-ally.keystyle": "flat",
|
||||
"i18n-ally.sortKeys": true,
|
||||
"i18n-ally.keepFulfilled": false,
|
||||
"i18n-ally.sourceLanguage": "zh", // 根据此语言文件翻译其他语言文件的变量和内容
|
||||
"i18n-ally.displayLanguage": "zh", // 显示语言
|
||||
"i18n-ally.sourceLanguage": "zh-CN", // 根据此语言文件翻译其他语言文件的变量和内容
|
||||
"i18n-ally.displayLanguage": "zh-CN", // 显示语言
|
||||
"i18n-ally.namespace": true,
|
||||
"i18n-ally.pathMatcher": "{locale}/{namespaces}.json",
|
||||
"i18n-ally.extract.targetPickingStrategy": "most-similar-by-key",
|
||||
|
||||
17
README.md
@@ -105,12 +105,12 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
|
||||
[点击查看 Sealos 一键部署 FastGPT 教程](https://doc.tryfastgpt.ai/docs/development/sealos/)
|
||||
|
||||
* [快速开始本地开发](https://doc.tryfastgpt.ai/docs/development/intro/)
|
||||
* [部署 FastGPT](https://doc.tryfastgpt.ai/docs/development/sealos)
|
||||
* [部署 FastGPT](https://doc.tryfastgpt.ai/docs/development/sealos/)
|
||||
* [系统配置文件说明](https://doc.tryfastgpt.ai/docs/development/configuration/)
|
||||
* [多模型配置](https://doc.tryfastgpt.ai/docs/development/one-api/)
|
||||
* [版本更新/升级介绍](https://doc.tryfastgpt.ai/docs/development/upgrading)
|
||||
* [版本更新/升级介绍](https://doc.tryfastgpt.ai/docs/development/upgrading/)
|
||||
* [OpenAPI API 文档](https://doc.tryfastgpt.ai/docs/development/openapi/)
|
||||
* [知识库结构详解](https://doc.tryfastgpt.ai/docs/course/dataset_engine/)
|
||||
* [知识库结构详解](https://doc.tryfastgpt.ai/docs/guide/knowledge_base/rag/)
|
||||
|
||||
<a href="#readme">
|
||||
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
|
||||
@@ -118,7 +118,7 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
|
||||
|
||||
## 🏘️ 社区交流群
|
||||
|
||||
扫码加入飞书话题群 (新开,逐渐弃用微信群):
|
||||
扫码加入飞书话题群:
|
||||
|
||||

|
||||
|
||||
@@ -126,6 +126,11 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
|
||||
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
|
||||
</a>
|
||||
|
||||
## 🏘️ 加入我们
|
||||
|
||||
我们正在寻找志同道合的小伙伴,加速 FastGPT 的发展。你可以通过 [FastGPT 2025 招聘](https://fael3z0zfze.feishu.cn/wiki/P7FOwEmPziVcaYkvVaacnVX1nvg)了解 FastGPT 的招聘信息。
|
||||
|
||||
|
||||
## 💪 相关项目
|
||||
|
||||
- [Laf:3 分钟快速接入三方应用](https://github.com/labring/laf)
|
||||
@@ -149,7 +154,7 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
|
||||
|
||||
## 🌿 第三方生态
|
||||
|
||||
- [OnWeChat 个人微信/企微机器人](https://doc.tryfastgpt.ai/docs/use-cases/onwechat/)
|
||||
- [COW 个人微信/企微机器人](https://doc.tryfastgpt.ai/docs/use-cases/external-integration/onwechat/)
|
||||
|
||||
<a href="#readme">
|
||||
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
|
||||
@@ -171,7 +176,7 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://next.ossinsight.io/widgets/official/compose-org-active-contributors/thumbnail.png?activity=active&period=past_28_days&owner_id=102226726&repo_ids=605673387&image_size=2x3&color_scheme=dark">
|
||||
<img alt="Active participants of labring - past 28 days" src="https://next.ossinsight.io/widgets/official/compose-org-active-contributors/thumbnail.png?activity=active&period=past_28_days&owner_id=102226726&repo_ids=605673387&image_size=2x3&color_scheme=light">
|
||||
</picture>
|
||||
</picture>****
|
||||
</td>
|
||||
<td rowspan="2">
|
||||
<picture>
|
||||
|
||||
BIN
docSite/assets/imgs/api-dataset-1.png
Normal file
|
After Width: | Height: | Size: 97 KiB |
BIN
docSite/assets/imgs/htmlRendering1.png
Normal file
|
After Width: | Height: | Size: 71 KiB |
BIN
docSite/assets/imgs/htmlRendering2.png
Normal file
|
After Width: | Height: | Size: 92 KiB |
BIN
docSite/assets/imgs/htmlRendering3.png
Normal file
|
After Width: | Height: | Size: 58 KiB |
BIN
docSite/assets/imgs/image-10.png
Normal file
|
After Width: | Height: | Size: 82 KiB |
BIN
docSite/assets/imgs/image-11.png
Normal file
|
After Width: | Height: | Size: 819 KiB |
BIN
docSite/assets/imgs/image-12.png
Normal file
|
After Width: | Height: | Size: 179 KiB |
BIN
docSite/assets/imgs/image-13.png
Normal file
|
After Width: | Height: | Size: 172 KiB |
BIN
docSite/assets/imgs/image-14.png
Normal file
|
After Width: | Height: | Size: 599 KiB |
BIN
docSite/assets/imgs/image-15.png
Normal file
|
After Width: | Height: | Size: 258 KiB |
BIN
docSite/assets/imgs/image-16.png
Normal file
|
After Width: | Height: | Size: 139 KiB |
BIN
docSite/assets/imgs/image-17.png
Normal file
|
After Width: | Height: | Size: 222 KiB |
BIN
docSite/assets/imgs/image-18.png
Normal file
|
After Width: | Height: | Size: 158 KiB |
BIN
docSite/assets/imgs/image-19.png
Normal file
|
After Width: | Height: | Size: 167 KiB |
BIN
docSite/assets/imgs/image-8.png
Normal file
|
After Width: | Height: | Size: 117 KiB |
BIN
docSite/assets/imgs/image-9.png
Normal file
|
After Width: | Height: | Size: 83 KiB |
@@ -23,6 +23,7 @@ weight: 708
|
||||
"systemEnv": {
|
||||
"vectorMaxProcess": 15,
|
||||
"qaMaxProcess": 15,
|
||||
"tokenWorkers": 50, // Token 计算线程保持数,会持续占用内存,不能设置太大。
|
||||
"pgHNSWEfSearch": 100 // 向量搜索参数。越大,搜索越精确,但是速度越慢。设置为100,有99%+精度。
|
||||
},
|
||||
"llmModels": [
|
||||
@@ -42,7 +43,7 @@ weight: 708
|
||||
"usedInExtractFields": true, // 是否用于内容提取(务必保证至少有一个为true)
|
||||
"usedInToolCall": true, // 是否用于工具调用(务必保证至少有一个为true)
|
||||
"usedInQueryExtension": true, // 是否用于问题优化(务必保证至少有一个为true)
|
||||
"toolChoice": true, // 是否支持工具选择(分类,内容提取,工具调用会用到。目前只有gpt支持)
|
||||
"toolChoice": true, // 是否支持工具选择(分类,内容提取,工具调用会用到。)
|
||||
"functionCall": false, // 是否支持函数调用(分类,内容提取,工具调用会用到。会优先使用 toolChoice,如果为false,则使用 functionCall,如果仍为 false,则使用提示词模式)
|
||||
"customCQPrompt": "", // 自定义文本分类提示词(不支持工具和函数调用的模型
|
||||
"customExtractPrompt": "", // 自定义内容提取提示词
|
||||
@@ -94,9 +95,7 @@ weight: 708
|
||||
"customExtractPrompt": "",
|
||||
"defaultSystemChatPrompt": "",
|
||||
"defaultConfig": {
|
||||
"temperature": 1,
|
||||
"max_tokens": null,
|
||||
"stream": false
|
||||
"temperature": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -121,9 +120,7 @@ weight: 708
|
||||
"customExtractPrompt": "",
|
||||
"defaultSystemChatPrompt": "",
|
||||
"defaultConfig": {
|
||||
"temperature": 1,
|
||||
"max_tokens": null,
|
||||
"stream": false
|
||||
"temperature": 1
|
||||
}
|
||||
}
|
||||
],
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
---
|
||||
title: '接入 Marker PDF 文档解析'
|
||||
description: '使用 Marker 解析 PDF 文档,可实现图片提取和布局识别'
|
||||
icon: 'api'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 909
|
||||
---
|
||||
|
||||
## 背景
|
||||
|
||||
PDF 是一个相对复杂的文件格式,在 FastGPT 内置的 pdf 解析器中,依赖的是 pdfjs 库解析,该库基于逻辑解析,无法有效的理解复杂的 pdf 文件。所以我们在解析 pdf 时候,如果遇到图片、表格、公式等非简单文本内容,会发现解析效果不佳。
|
||||
|
||||
市面上目前有多种解析 PDF 的方法,比如使用 [Marker](https://github.com/VikParuchuri/marker),该项目使用了 Surya 模型,基于视觉解析,可以有效提取图片、表格、公式等复杂内容。为了可以让 Marker 快速接入 FastGPT,我们做了一个自定义解析的拓展 Demo。
|
||||
|
||||
在 FastGPT 4.8.15 版本中,你可以通过增加一个环境变量,来替换掉 FastGPT 系统内置解析器,实现自定义的文档解析服务。该功能只是 Demo 阶段,后期配置模式和交互规则会发生改动。
|
||||
|
||||
## 使用教程
|
||||
|
||||
### 1. 按照 Marker
|
||||
|
||||
参考文档 [Marker 安装教程](https://github.com/labring/FastGPT/tree/main/python/pdf-marker),安装 Marker 模型。封装的 API 已经适配了 FastGPT 自定义解析服务。
|
||||
|
||||
这里介绍快速 Docker 按照的方法:
|
||||
|
||||
```
|
||||
```
|
||||
|
||||
### 2. 添加 FastGPT 环境变量
|
||||
|
||||
```
|
||||
CUSTOM_READ_FILE_URL=http://xxxx.com/v1/parse/file
|
||||
CUSTOM_READ_FILE_EXTENSION=pdf
|
||||
```
|
||||
|
||||
* CUSTOM_READ_FILE_URL - 自定义解析服务的地址, host改成解析服务的访问地址,path 不能变动。
|
||||
* CUSTOM_READ_FILE_EXTENSION - 支持的文件后缀,多个文件类型,可用逗号隔开。
|
||||
|
||||
### 3. 测试效果
|
||||
|
||||
通过知识库上传一个 pdf 文件,并确认上传,可以在日志中看到 LOG (LOG_LEVEL需要设置 info 或者 debug):
|
||||
|
||||
```
|
||||
[Info] 2024-12-05 15:04:42 Parsing files from an external service
|
||||
[Info] 2024-12-05 15:07:08 Custom file parsing is complete, time: 1316ms
|
||||
```
|
||||
|
||||
然后你就可以发现,通过 Marker 解析出来的 pdf 会携带图片链接:
|
||||
|
||||

|
||||
|
||||
|
||||
## 效果展示
|
||||
|
||||
以清华的 [ChatDev Communicative Agents for Software Develop.pdf](https://arxiv.org/abs/2307.07924) 为例,展示 Marker 解析的效果:
|
||||
|
||||
| | | |
|
||||
| --- | --- | --- |
|
||||
|  |  |  |
|
||||
|  |  |  |
|
||||
|
||||
上图是分块后的结果,下图是 pdf 原文。整体图片、公式、表格都可以提取出来,效果还是杠杠的。
|
||||
|
||||
不过要注意的是,[Marker](https://github.com/VikParuchuri/marker) 的协议是`GPL-3.0 license`,请在遵守协议的前提下使用。
|
||||
@@ -145,7 +145,7 @@ curl --location --request POST 'https://<oneapi_url>/v1/chat/completions' \
|
||||
"usedInExtractFields": true, // 是否用于内容提取(务必保证至少有一个为true)
|
||||
"usedInToolCall": true, // 是否用于工具调用(务必保证至少有一个为true)
|
||||
"usedInQueryExtension": true, // 是否用于问题优化(务必保证至少有一个为true)
|
||||
"toolChoice": true, // 是否支持工具选择(分类,内容提取,工具调用会用到。目前只有gpt支持)
|
||||
"toolChoice": true, // 是否支持工具选择(分类,内容提取,工具调用会用到。)
|
||||
"functionCall": false, // 是否支持函数调用(分类,内容提取,工具调用会用到。会优先使用 toolChoice,如果为false,则使用 functionCall,如果仍为 false,则使用提示词模式)
|
||||
"customCQPrompt": "", // 自定义文本分类提示词(不支持工具和函数调用的模型
|
||||
"customExtractPrompt": "", // 自定义内容提取提示词
|
||||
|
||||
@@ -35,9 +35,7 @@ weight: 707
|
||||
|
||||
### Milvus版本
|
||||
|
||||
暂不推荐,部分系统存在精度丢失,等待修复。
|
||||
|
||||
对于千万级以上向量性能更优秀。
|
||||
生产部署首选,对于千万级以上向量性能更优秀。
|
||||
|
||||
[点击查看 Milvus 官方推荐配置](https://milvus.io/docs/prerequisite-docker.md)
|
||||
|
||||
@@ -51,9 +49,7 @@ weight: 707
|
||||
|
||||
### zilliz cloud版本
|
||||
|
||||
暂不推荐,部分系统存在精度丢失,等待修复。
|
||||
|
||||
亿级以上向量首选。
|
||||
Milvus 的全托管服务,性能优于 Milvus 并提供 SLA,点击使用 [Zilliz Cloud](https://zilliz.com.cn/)。
|
||||
|
||||
由于向量库使用了 Cloud,无需占用本地资源,无需太关注。
|
||||
|
||||
@@ -138,14 +134,16 @@ curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/mai
|
||||
# curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/main/files/docker/docker-compose-zilliz.yml
|
||||
```
|
||||
|
||||
### 2. 修改 docker-compose.yml 环境变量
|
||||
### 2. 修改环境变量
|
||||
|
||||
找到 yml 文件中,fastgpt 容器的环境变量进行下面操作:
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
{{< tab tabName="PgVector版本" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```
|
||||
无需操作
|
||||
FE_DOMAIN=你的前端你访问地址,例如 http://192.168.0.1:3000;https://cloud.fastgpt.cn
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
@@ -154,7 +152,7 @@ curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/mai
|
||||
{{< markdownify >}}
|
||||
|
||||
```
|
||||
无需操作
|
||||
FE_DOMAIN=你的前端你访问地址,例如 http://192.168.0.1:3000;https://cloud.fastgpt.cn
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
@@ -162,11 +160,14 @@ curl -o docker-compose.yml https://raw.githubusercontent.com/labring/FastGPT/mai
|
||||
{{< tab tabName="Zilliz版本" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
打开 [Zilliz Cloud](https://zilliz.com.cn/), 创建实例并获取相关秘钥。
|
||||
|
||||

|
||||
|
||||
{{% alert icon="🤖" context="success" %}}
|
||||
|
||||
修改`MILVUS_ADDRESS`和`MILVUS_TOKEN`链接参数,分别对应 `zilliz` 的 `Public Endpoint` 和 `Api key`,记得把自己ip加入白名单。
|
||||
1. 修改`MILVUS_ADDRESS`和`MILVUS_TOKEN`链接参数,分别对应 `zilliz` 的 `Public Endpoint` 和 `Api key`,记得把自己ip加入白名单。
|
||||
2. 修改FE_DOMAIN=你的前端你访问地址,例如 http://192.168.0.1:3000;https://cloud.fastgpt.cn
|
||||
|
||||
{{% /alert %}}
|
||||
|
||||
|
||||
@@ -132,7 +132,11 @@ OneAPI 的 API Key 配置错误,需要修改`OPENAI_API_KEY`环境变量,并
|
||||
|
||||
4.8.10 版本新增了错误日志,报错时,会在日志中打印出实际发送的 Body 参数,可以复制该参数后,通过 curl 向 oneapi 发起请求测试。
|
||||
|
||||
由于 oneapi 在 stream 模式下,无法正确捕获错误,可以设置成 `stream=false` 后进行测试。
|
||||
由于 oneapi 在 stream 模式下,无法正确捕获错误,可以设置成 `stream=false` 后进行测试。可能的问题:
|
||||
|
||||
1. 国内模型命中风控
|
||||
2. 不支持的模型参数:只保留 messages 和必要参数来测试,删除其他参数测试。
|
||||
3. 参数不符合模型要求:例如有的模型 temperature 不支持 0,有些不支持两位小数。max_tokens 超出,上下文超长等。
|
||||
|
||||
### 如何测试模型是否支持工具调用
|
||||
|
||||
@@ -265,4 +269,4 @@ curl --location --request POST 'https://oneapi.xxxx/v1/chat/completions' \
|
||||
],
|
||||
"tool_choice": "auto"
|
||||
}'
|
||||
```
|
||||
```
|
||||
|
||||
@@ -25,7 +25,7 @@ FastGPT 的 API Key **有 2 类**,一类是全局通用的 key (无法直接
|
||||
|
||||
| 通用key | 应用特定 key |
|
||||
| --------------------- | --------------------- |
|
||||
|  |  |
|
||||
|  |  |
|
||||
|
||||
## 基本配置
|
||||
|
||||
|
||||
@@ -35,9 +35,10 @@ curl --location --request POST 'http://localhost:3000/api/v1/chat/completions' \
|
||||
--header 'Authorization: Bearer fastgpt-xxxxxx' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"chatId": "abcd",
|
||||
"chatId": "my_chatId",
|
||||
"stream": false,
|
||||
"detail": false,
|
||||
"responseChatItemId": "my_responseChatItemId",
|
||||
"variables": {
|
||||
"uid": "asdfadsfasfd2323",
|
||||
"name": "张三"
|
||||
@@ -104,6 +105,7 @@ curl --location --request POST 'http://localhost:3000/api/v1/chat/completions' \
|
||||
- 为 `undefined` 时(不传入),不使用 FastGpt 提供的上下文功能,完全通过传入的 messages 构建上下文。 不会将你的记录存储到数据库中,你也无法在记录汇总中查阅到。
|
||||
- 为`非空字符串`时,意味着使用 chatId 进行对话,自动从 FastGpt 数据库取历史记录,并使用 messages 数组最后一个内容作为用户问题。请自行确保 chatId 唯一,长度小于250,通常可以是自己系统的对话框ID。
|
||||
- messages: 结构与 [GPT接口](https://platform.openai.com/docs/api-reference/chat/object) chat模式一致。
|
||||
- responseChatItemId: string | undefined 。如果传入,则会将该值作为本次对话的响应消息的 ID,FastGPT 会自动将该 ID 存入数据库。请确保,在当前`chatId`下,`responseChatItemId`是唯一的。
|
||||
- detail: 是否返回中间值(模块状态,响应的完整结果等),`stream模式`下会通过`event`进行区分,`非stream模式`结果保存在`responseData`中。
|
||||
- variables: 模块变量,一个对象,会替换模块中,输入框内容里的`{{key}}`
|
||||
{{% /alert %}}
|
||||
|
||||
@@ -31,17 +31,6 @@ curl --location --request POST 'http://localhost:3000/api/support/wallet/usage/c
|
||||
}'
|
||||
```
|
||||
|
||||
**x例子**
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'http://localhost:3000/api/support/wallet/bill/createTrainingBill' \
|
||||
--header 'Authorization: Bearer {{apikey}}' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"name": "可选,自定义订单名称,例如:文档训练-fastgpt.docx"
|
||||
}'
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
@@ -418,9 +407,7 @@ curl --location --request POST 'http://localhost:3000/api/core/dataset/collectio
|
||||
- parentId: 父级ID,不填则默认为根目录
|
||||
- name: 集合名称(必填)
|
||||
- metadata: 元数据(暂时没啥用)
|
||||
- trainingType:(必填)
|
||||
- chunk: 按文本长度进行分割
|
||||
- qa: QA拆分
|
||||
- trainingType: 训练模式(必填)
|
||||
- chunkSize: 每个 chunk 的长度(可选). chunk模式:100~3000; qa模式: 4000~模型最大token(16k模型通常建议不超过10000)
|
||||
- chunkSplitter: 自定义最高优先分割符号(可选)
|
||||
- qaPrompt: qa拆分自定义提示词(可选)
|
||||
@@ -494,9 +481,7 @@ curl --location --request POST 'http://localhost:3000/api/core/dataset/collectio
|
||||
- datasetId: 知识库的ID(必填)
|
||||
- parentId: 父级ID,不填则默认为根目录
|
||||
- metadata.webPageSelector: 网页选择器,用于指定网页中的哪个元素作为文本(可选)
|
||||
- trainingType:(必填)
|
||||
- chunk: 按文本长度进行分割
|
||||
- qa: QA拆分
|
||||
- trainingType:训练模式(必填)
|
||||
- chunkSize: 每个 chunk 的长度(可选). chunk模式:100~3000; qa模式: 4000~模型最大token(16k模型通常建议不超过10000)
|
||||
- chunkSplitter: 自定义最高优先分割符号(可选)
|
||||
- qaPrompt: qa拆分自定义提示词(可选)
|
||||
@@ -516,7 +501,13 @@ data 为集合的 ID。
|
||||
"statusText": "",
|
||||
"message": "",
|
||||
"data": {
|
||||
"collectionId": "65abd0ad9d1448617cba6031"
|
||||
"collectionId": "65abd0ad9d1448617cba6031",
|
||||
"results": {
|
||||
"insertLen": 1,
|
||||
"overToken": [],
|
||||
"repeat": [],
|
||||
"error": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -555,9 +546,7 @@ curl --location --request POST 'http://localhost:3000/api/core/dataset/collectio
|
||||
- data: 知识库相关信息(json序列化后传入)
|
||||
- datasetId: 知识库的ID(必填)
|
||||
- parentId: 父级ID,不填则默认为根目录
|
||||
- trainingType:(必填)
|
||||
- chunk: 按文本长度进行分割
|
||||
- qa: QA拆分
|
||||
- trainingType:训练模式(必填)
|
||||
- chunkSize: 每个 chunk 的长度(可选). chunk模式:100~3000; qa模式: 4000~模型最大token(16k模型通常建议不超过10000)
|
||||
- chunkSplitter: 自定义最高优先分割符号(可选)
|
||||
- qaPrompt: qa拆分自定义提示词(可选)
|
||||
@@ -592,6 +581,82 @@ data 为集合的 ID。
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
### 创建一个API集合
|
||||
|
||||
传入一个文件的 id,创建一个集合,会读取文件内容进行分割。目前支持:pdf, docx, md, txt, html, csv。
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
{{< tab tabName="请求示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
使用代码上传时,请注意中文 filename 需要进行 encode 处理,否则容易乱码。
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/apiCollection' \
|
||||
--header 'Authorization: Bearer fastgpt-xxx' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"name": "A Quick Guide to Building a Discord Bot.pdf",
|
||||
"apiFileId":"A Quick Guide to Building a Discord Bot.pdf",
|
||||
|
||||
"datasetId": "674e9e479c3503c385495027",
|
||||
"parentId": null,
|
||||
|
||||
"trainingType": "chunk",
|
||||
"chunkSize":512,
|
||||
"chunkSplitter":"",
|
||||
"qaPrompt":""
|
||||
}'
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
{{< tab tabName="参数说明" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
需要使用 POST form-data 的格式上传。包含 file 和 data 两个字段。
|
||||
|
||||
{{% alert icon=" " context="success" %}}
|
||||
- name: 集合名,建议就用文件名,必填。
|
||||
- apiFileId: 文件的ID,必填。
|
||||
- datasetId: 知识库的ID(必填)
|
||||
- parentId: 父级ID,不填则默认为根目录
|
||||
- trainingType:训练模式(必填)
|
||||
- chunkSize: 每个 chunk 的长度(可选). chunk模式:100~3000; qa模式: 4000~模型最大token(16k模型通常建议不超过10000)
|
||||
- chunkSplitter: 自定义最高优先分割符号(可选)
|
||||
- qaPrompt: qa拆分自定义提示词(可选)
|
||||
{{% /alert %}}
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
{{< tab tabName="响应示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
data 为集合的 ID。
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 200,
|
||||
"statusText": "",
|
||||
"message": "",
|
||||
"data": {
|
||||
"collectionId": "65abc044e4704bac793fbd81",
|
||||
"results": {
|
||||
"insertLen": 1,
|
||||
"overToken": [],
|
||||
"repeat": [],
|
||||
"error": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
### 创建一个外部文件库集合(商业版)
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
@@ -648,7 +713,12 @@ data 为集合的 ID。
|
||||
"message": "",
|
||||
"data": {
|
||||
"collectionId": "6646fcedfabd823cdc6de746",
|
||||
"insertLen": 3
|
||||
"results": {
|
||||
"insertLen": 1,
|
||||
"overToken": [],
|
||||
"repeat": [],
|
||||
"error": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -1028,9 +1098,7 @@ curl --location --request POST 'https://api.fastgpt.in/api/core/dataset/data/pus
|
||||
|
||||
{{% alert icon=" " context="success" %}}
|
||||
- collectionId: 集合ID(必填)
|
||||
- trainingType:(必填)
|
||||
- chunk: 按文本长度进行分割
|
||||
- qa: QA拆分
|
||||
- trainingType:训练模式(必填)
|
||||
- prompt: 自定义 QA 拆分提示词,需严格按照模板,建议不要传入。(选填)
|
||||
- data:(具体数据)
|
||||
- q: 主要数据(必填)
|
||||
|
||||
@@ -38,11 +38,7 @@ weight: 813
|
||||
"customExtractPrompt": "",
|
||||
"defaultSystemChatPrompt": "",
|
||||
"defaultConfig": {
|
||||
"temperature": 1,
|
||||
"stream": false
|
||||
},
|
||||
"fieldMap": {
|
||||
"max_tokens": "max_completion_tokens"
|
||||
"temperature": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -67,11 +63,7 @@ weight: 813
|
||||
"customExtractPrompt": "",
|
||||
"defaultSystemChatPrompt": "",
|
||||
"defaultConfig": {
|
||||
"temperature": 1,
|
||||
"stream": false
|
||||
},
|
||||
"fieldMap": {
|
||||
"max_tokens": "max_completion_tokens"
|
||||
"temperature": 1
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -19,11 +19,11 @@ weight: 811
|
||||
|
||||
### 3. 添加环境变量
|
||||
|
||||
- 给 fastgpt 和 fastgpt-pro 镜像添加环境变量:`FE_DOMAIN=http://xx.com`,值为 fastgpt 前端访问地址,注意后面不要加`/`。如果没加到话,图片识别可能会有问题。
|
||||
- 给 fastgpt 和 fastgpt-pro 镜像添加环境变量:`FE_DOMAIN=http://xx.com`,值为 fastgpt 前端访问地址,注意后面不要加`/`。可以自动补齐相对文件地址的前缀。
|
||||
|
||||
### 4. 调整文件上传编排
|
||||
|
||||
虽然依然兼容旧版的文件上传编排,但是未来两个版本内将会去除兼容代码,请尽快调整编排,以适应最新的文件上传逻辑。尤其是嵌套应用的文件传递,未来将不会自动传递,必须手动指定传递的文件。
|
||||
虽然依然兼容旧版的文件上传编排,但是未来两个版本内将会去除兼容代码,请尽快调整编排,以适应最新的文件上传逻辑。尤其是嵌套应用的文件传递,未来将不会自动传递,必须手动指定传递的文件。具体内容可参考: [文件上传变更](/docs/guide/course/fileinput/#4813%E7%89%88%E6%9C%AC%E8%B5%B7%E5%85%B3%E4%BA%8E%E6%96%87%E4%BB%B6%E4%B8%8A%E4%BC%A0%E7%9A%84%E6%9B%B4%E6%96%B0)
|
||||
|
||||
## 更新说明
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: 'V4.8.14(进行中)'
|
||||
title: 'V4.8.14'
|
||||
description: 'FastGPT V4.8.14 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
@@ -7,7 +7,48 @@ toc: true
|
||||
weight: 810
|
||||
---
|
||||
|
||||
## 更新预告
|
||||
## 更新指南
|
||||
|
||||
1.
|
||||
2. 新增 - 工作流支持进入聊天框/点击开始对话后,自动触发一轮对话。
|
||||
### 1. 做好数据备份
|
||||
|
||||
### 2. 修改镜像
|
||||
|
||||
- 更新 FastGPT 镜像 tag: v4.8.14-fix
|
||||
- 更新 FastGPT 商业版镜像 tag: v4.8.14 (fastgpt-pro镜像)
|
||||
- Sandbox 镜像,可以不更新
|
||||
|
||||
milvus版本使用:v4.8.14-milvus-fix 镜像。
|
||||
|
||||
## 新功能预览
|
||||
|
||||
### 自动触发工作流
|
||||
|
||||
可以允许你配置用户加载对话时,自动触发一次工作流。可以用于一些 CRM 系统,可以快速的引导用户使用,无需等待用户主动触发。
|
||||
|
||||
| | |
|
||||
| --- | --- |
|
||||
|  |  |
|
||||
|
||||
|
||||
## 完整更新内容
|
||||
|
||||
1. 新增 - 工作流支持进入聊天框/点击开始对话后,自动触发一轮对话。
|
||||
2. 新增 - 重写 chatContext,对话测试也会有日志,并且刷新后不会丢失对话。
|
||||
3. 新增 - 分享链接支持配置是否允许查看原文。
|
||||
4. 新增 - 新的 doc2x 插件。
|
||||
5. 新增 - 繁体中文-台湾。
|
||||
6. 新增 - 分析链接和 chat api 支持传入自定义 uid。
|
||||
7. 商业版新增 - 微软 oauth 登录
|
||||
8. 优化 - 工作流 ui 细节。
|
||||
9. 优化 - 应用编辑记录采用 diff 存储,避免浏览器溢出。
|
||||
10. 优化 - 代码入口,增加 register 入口,无需等待首次访问才执行。
|
||||
11. 优化 - 工作流检查,增加更多缺失值检查。
|
||||
12. 优化 - 增加知识库训练最大重试次数限制。
|
||||
13. 优化 - 图片路径问题和示意图任务
|
||||
14. 优化 - Milvus description
|
||||
15. 修复 - 分块策略,四级标题会被丢失。 同时新增了五级标题的支持。
|
||||
16. 修复 - MongoDB 知识库集合唯一索引。
|
||||
17. 修复 - 反选知识库引用后可能会报错。
|
||||
18. 修复 - 简易模式转工作流,不是使用最新编辑记录进行转移。
|
||||
19. 修复 - 表单输入的说明文字不显示。
|
||||
20. 修复 - API 无法使用 base64 图片。
|
||||
|
||||
27
docSite/content/zh-cn/docs/development/upgrading/4815.md
Normal file
@@ -0,0 +1,27 @@
|
||||
---
|
||||
title: 'V4.8.15(进行中)'
|
||||
description: 'FastGPT V4.8.15 更新说明'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 809
|
||||
---
|
||||
|
||||
|
||||
## 完整更新内容
|
||||
|
||||
1. 新增 - API 知识库, 见 [API 知识库介绍](/docs/guide/knowledge_base/api_dataset/),外部文件库会被弃用。
|
||||
2. 新增 - 工具箱页面,展示所有可用的系统资源。商业版后台可更便捷的配置系统插件和自定义分类。
|
||||
3. 新增 - Markdown 中,HTML代码会被额外渲染,可以选择预览模式,会限制所有 script 脚本,仅做展示。
|
||||
4. 新增 - 自定义系统级文件解析服务, 见 [接入 Marker PDF 文档解析](/docs/development/custom-models/marker/)
|
||||
5. 新增 - 集合直接重新调整参数,无需删除再导入。
|
||||
6. 新增 - 商业版后台支持配置侧边栏跳转链接。
|
||||
7. 优化 - base64 图片截取判断。
|
||||
8. 优化 - i18n cookie 判断。
|
||||
9. 优化 - 支持 Markdown 文本分割时,只有标题,无内容。
|
||||
10. 优化 - 字符串变量替换,未赋值的变量会转成 undefined,而不是保留原来 id 串。
|
||||
11. 优化 - 全局变量默认值在 API 生效,并且自定义变量支持默认值。
|
||||
12. 修复 - 分享链接点赞鉴权问题。
|
||||
13. 修复 - 对话页面切换自动执行应用时,会误触发非自动执行应用。
|
||||
14. 修复 - 语言播放鉴权问题。
|
||||
15. 修复 - 插件应用知识库引用上限始终为 3000
|
||||
9
docSite/content/zh-cn/docs/guide/DialogBoxes/_index.md
Normal file
@@ -0,0 +1,9 @@
|
||||
---
|
||||
weight: 470
|
||||
title: '对话框'
|
||||
description: '对话框组件,支持多种交互方式,提升用户在应用中的交互体验。'
|
||||
icon: 'chat_bubble'
|
||||
draft: false
|
||||
images: []
|
||||
---
|
||||
<!-- 470 ~ 500 -->
|
||||
@@ -0,0 +1,57 @@
|
||||
---
|
||||
title: "对话框与HTML渲染"
|
||||
description: "如何在FastGPT中通过Markdown嵌入HTML代码块,并提供全屏、源代码切换等交互功能"
|
||||
icon: "group"
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 470
|
||||
---
|
||||
|
||||
| 源码模式 | 预览模式 | 全屏模式 |
|
||||
| --- | --- | --- |
|
||||
|  |  |  |
|
||||
|
||||
|
||||
### 1. **设计背景**
|
||||
|
||||
尽管Markdown本身支持嵌入HTML标签,但由于安全问题,许多平台和环境对HTML的渲染进行了限制,特别是在渲染动态内容、交互式元素以及外部资源时。这些限制大大降低了用户在撰写和展示复杂文档时的灵活性,尤其是当需要嵌入外部HTML内容时。为了应对这一问题,我们通过使用 `iframe` 来嵌入和渲染HTML内容,并结合 `sandbox` 属性,保障了外部HTML的安全渲染。
|
||||
|
||||
### 2. 功能简介
|
||||
|
||||
该功能模块的主要目的是扩展FastGPT在Markdown渲染中的能力,支持嵌入和渲染HTML内容。由于是利用 Iframe 渲染,所以无法确认内容的高度,FastGPT 中会给 Iframe 设置一个固定高度来进行渲染。并且不支持 HTML 中执行 js 脚本。
|
||||
|
||||
### 3. 技术实现
|
||||
|
||||
本模块通过以下方式实现了HTML渲染和互动功能:
|
||||
|
||||
- **组件设计**:该模块通过渲染 `iframe` 类型的代码块展示HTML内容。使用自定义的 `IframeBlock` 组件,结合 `sandbox` 属性来保障嵌入内容的安全性。`sandbox` 限制了外部HTML中的行为,如禁用脚本执行、限制表单提交等,确保HTML内容的安全性。通过辅助函数与渲染Markdown内容的部分结合,处理 `iframe` 嵌入的HTML内容。
|
||||
- **安全机制**:通过 `iframe` 的 `sandbox` 属性和 `referrerPolicy` 来防止潜在的安全风险。`sandbox` 属性提供了细粒度的控制,允许特定的功能(如脚本、表单、弹出窗口等)在受限的环境中执行,以确保渲染的HTML内容不会对系统造成威胁。
|
||||
- **展示与互动功能**:用户可以通过不同的展示模式(如全屏、预览、源代码模式)自由切换,以便更灵活地查看和控制嵌入的HTML内容。嵌入的 `iframe` 自适应父容器的宽度,同时保证 `iframe`嵌入的内容能够适当显示。
|
||||
|
||||
### 4. 如何使用
|
||||
|
||||
你只需要通过 Markdown 代码块格式,并标记语言为 `html` 即可。例如:
|
||||
|
||||
```md
|
||||
```html
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<meta http-equiv="X-UA-Compatible" content="ie=edge">
|
||||
<title>欢迎使用FastGPT</title>
|
||||
</head>
|
||||
<body>
|
||||
<nav>
|
||||
<ul>
|
||||
<li><a href="#home">首页</a></li>
|
||||
<li><a href="#about">关于我们</a></li>
|
||||
<li><a href="#contact">联系我们</a></li>
|
||||
<li><a href="#gallery">图库</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
```
|
||||
183
docSite/content/zh-cn/docs/guide/knowledge_base/api_dataset.md
Normal file
@@ -0,0 +1,183 @@
|
||||
---
|
||||
title: 'API 文件库'
|
||||
description: 'FastGPT API 文件库功能介绍和使用方式'
|
||||
icon: 'language'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 405
|
||||
---
|
||||
|
||||
| | |
|
||||
| --- | --- |
|
||||
|  |  |
|
||||
|
||||
## 背景
|
||||
|
||||
目前 FastGPT 支持本地文件导入,但是很多时候,用户自身已经有了一套文档库,如果把文件重复导入一遍,会造成二次存储,并且不方便管理。因为 FastGPT 提供了一个 API 文件库的概念,可以通过简单的 API 接口,去拉取已有的文档库,并且可以灵活配置是否导入。
|
||||
|
||||
API 文件库能够让用户轻松对接已有的文档库,只需要按照 FastGPT 的 API 文件库规范,提供相应文件接口,然后将服务接口的 baseURL 和 token 填入知识库创建参数中,就能直接在页面上拿到文件库的内容,并选择性导入
|
||||
|
||||
## 如何使用 API 文件库
|
||||
|
||||
创建知识库时,选择 API 文件库类型,然后需要配置两个关键参数:文件服务接口的 baseURL 和用于身份验证的请求头信息。只要提供的接口规范符合 FastGPT 的要求,系统就能自动获取并展示完整的文件列表,可以根据需要选择性地将文件导入到知识库中。
|
||||
|
||||
你需要提供两个参数:
|
||||
- baseURL: 文件服务接口的 baseURL
|
||||
- authorization: 用于身份验证的请求头信息,实际请求格式为 `Authorization: Bearer <token>`
|
||||
|
||||
## 接口规范
|
||||
|
||||
接口响应格式:
|
||||
|
||||
```ts
|
||||
type ResponseType = {
|
||||
success: boolean;
|
||||
message: string;
|
||||
data: any;
|
||||
}
|
||||
```
|
||||
|
||||
数据类型:
|
||||
|
||||
```ts
|
||||
// 文件列表中,单项的文件类型
|
||||
type FileListItem = {
|
||||
id: string;
|
||||
parentId: string | null;
|
||||
name: string;
|
||||
type: 'file' | 'folder';
|
||||
updateTime: Date;
|
||||
createTime: Date;
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
### 1. 获取文件树
|
||||
|
||||
{{< tabs tabTotal="2" >}}
|
||||
{{< tab tabName="请求示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
{{% alert icon=" " context="success" %}}
|
||||
- parentId - 父级 id,可选,或者 null。
|
||||
- searchKey - 检索词,可选
|
||||
{{% /alert %}}
|
||||
|
||||
```bash
|
||||
curl --location --request POST '{{baseURL}}/v1/file/list' \
|
||||
--header 'Authorization: Bearer {{authorization}}' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"parentId": null,
|
||||
"searchKey": ""
|
||||
}'
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
{{< tab tabName="响应示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 200,
|
||||
"success": true,
|
||||
"message": "",
|
||||
"data": [
|
||||
{
|
||||
"id": "xxxx",
|
||||
"parentId": "xxxx",
|
||||
"type": "file", // file | folder
|
||||
"name":"test.json",
|
||||
"updateTime":"2024-11-26T03:05:24.759Z",
|
||||
"createTime":"2024-11-26T03:05:24.759Z"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
### 2. 获取单个文件内容(文本内容或访问链接)
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
{{< tab tabName="请求示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```bash
|
||||
curl --location --request GET '{{baseURL}}/v1/file/content?id=xx' \
|
||||
--header 'Authorization: Bearer {{authorization}}'
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
{{< tab tabName="响应示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 200,
|
||||
"success": true,
|
||||
"message": "",
|
||||
"data": {
|
||||
"content": "FastGPT 是一个基于 LLM 大语言模型的知识库问答系统,提供开箱即用的数据处理、模型调用等能力。同时可以通过 Flow 可视化进行工作流编排,从而实现复杂的问答场景!\n",
|
||||
"previewUrl": "xxxx"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
{{% alert icon=" " context="success" %}}
|
||||
二选一返回,如果同时返回则 content 优先级更高。
|
||||
|
||||
- content - 文件内容,直接拿来用。
|
||||
- previewUrl - 文件链接,系统会请求该地址获取文件内容。
|
||||
{{% /alert %}}
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
|
||||
### 3. 获取文件阅读链接(用于查看原文)
|
||||
|
||||
{{< tabs tabTotal="2" >}}
|
||||
{{< tab tabName="请求示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
id 为文件的 id。
|
||||
|
||||
```bash
|
||||
curl --location --request GET '{{baseURL}}/v1/file/read?id=xx' \
|
||||
--header 'Authorization: Bearer {{authorization}}'
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
{{< tab tabName="响应示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 200,
|
||||
"success": true,
|
||||
"message": "",
|
||||
"data": {
|
||||
"url": "xxxx"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
{{% alert icon=" " context="success" %}}
|
||||
- url - 文件访问链接,拿到后会自动打开。
|
||||
{{% /alert %}}
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
|
||||
@@ -23,4 +23,17 @@ weight: 408
|
||||
- 文件阅读ID:通常情况下,文件访问URL是临时的。如果希望永久可以访问,你需要使用该文件阅读ID,并配合上“外部预览地址”,跳转至新的阅读地址进行原文件访问。
|
||||
- 文件名:默认会自动解析文件访问URL上的文件名。如果你手动填写,将会以手动填写的值为准。
|
||||
|
||||
[点击查看API导入文档](/docs/development/openapi/dataset/#创建一个外部文件库集合商业版)
|
||||
[点击查看API导入文档](/docs/development/openapi/dataset/#创建一个外部文件库集合商业版)
|
||||
|
||||
## API 文件库替代方案
|
||||
|
||||
4.8.15 提供了新的知识库类型 - API 文件库,对外部文件知识库做了进一步的拓展
|
||||
|
||||
通过对接口进行简单的调整,就能使用 API 文件库代替外部文件知识库的功能
|
||||
|
||||
你可以直接将外部文件知识库中的外部预览地址,作为 API 文件库接口规范中获取文件阅读链接的接口返回
|
||||
|
||||
然后再以相同的 baseURL 实现获取文件列表和获取单个文件内容这两个接口
|
||||
|
||||
这样就能轻松地使用 API 文件库替代原有的外部文件知识库,更多详细的内容见 API 文件库的文档
|
||||
|
||||
|
||||
@@ -85,16 +85,15 @@ FastGPT 商业版软件根据不同的部署方式,分为 3 类收费模式。
|
||||
|
||||
## QA
|
||||
|
||||
1. 如何交付?
|
||||
### 如何交付?
|
||||
|
||||
完整版应用 = 开源版镜像 + 商业版镜像
|
||||
|
||||
我们会提供一个商业版镜像给你使用,该镜像需要一个 License 启动。
|
||||
|
||||
2. 二次开发如何操作?
|
||||
|
||||
可自行修改开源版代码进行二次开发,不支持修改商业版镜像。
|
||||
### 二次开发如何操作?
|
||||
|
||||
可以修改开源版部分代码,不支持修改商业版镜像。完整版本=开源版+商业版镜像,所以是可以修改部分内容的。但是如果二开了,后续则需要自己进行代码合并升级。
|
||||
|
||||
## Sealos 费用
|
||||
|
||||
|
||||
@@ -37,40 +37,55 @@ weight: 506
|
||||
私有部署的用户可自行查阅自己的 IP 地址。
|
||||
|
||||
海外版用户(cloud.tryfastgpt.ai)可以填写下面的 IP 白名单:
|
||||
如果仍无响应,可输入命令: `nslookup cloud.sealos.io | awk '/^Address: [0-9]/ {print $2}'` 获取最新 IP
|
||||
|
||||
```
|
||||
34.143.240.160
|
||||
35.197.149.75
|
||||
34.87.173.252
|
||||
34.87.20.189
|
||||
34.87.44.74
|
||||
34.124.189.116
|
||||
34.126.163.205
|
||||
35.247.161.35
|
||||
34.87.110.152
|
||||
34.87.51.146
|
||||
34.87.102.86
|
||||
35.247.163.68
|
||||
35.240.227.100
|
||||
34.142.157.52
|
||||
34.87.152.33
|
||||
34.124.237.188
|
||||
34.143.149.171
|
||||
34.143.240.160
|
||||
34.87.51.146
|
||||
34.87.79.202
|
||||
34.87.180.104
|
||||
35.247.163.68
|
||||
34.87.102.86
|
||||
35.198.192.104
|
||||
34.126.163.205
|
||||
34.124.189.116
|
||||
34.143.149.171
|
||||
34.87.173.252
|
||||
34.142.157.52
|
||||
34.87.180.104
|
||||
34.87.20.189
|
||||
34.87.110.152
|
||||
34.87.44.74
|
||||
34.87.152.33
|
||||
35.197.149.75
|
||||
35.247.161.35
|
||||
```
|
||||
|
||||
国内版用户(fastgpt.cn)可以填写下面的 IP 白名单:
|
||||
如果仍无响应,可输入命令: `nslookup hzh.sealos.run | awk '/^Address: [0-9]/ {print $2}'` 获取最新 IP
|
||||
|
||||
```
|
||||
47.98.36.227
|
||||
118.31.58.217
|
||||
121.40.213.28
|
||||
120.26.162.94
|
||||
223.4.211.186
|
||||
47.97.1.240
|
||||
121.43.105.217
|
||||
121.41.178.7
|
||||
121.40.65.187
|
||||
47.97.59.172
|
||||
101.37.205.32
|
||||
120.55.195.90
|
||||
120.26.229.115
|
||||
120.55.193.112
|
||||
47.98.190.173
|
||||
112.124.41.79
|
||||
121.196.235.183
|
||||
121.41.75.88
|
||||
121.43.108.48
|
||||
112.124.12.6
|
||||
121.43.52.222
|
||||
121.199.162.43
|
||||
121.199.162.102
|
||||
120.55.94.163
|
||||
47.99.59.223
|
||||
112.124.46.5
|
||||
121.40.46.247
|
||||
```
|
||||
|
||||
## 4. 获取AES Key,选择加密方式
|
||||
|
||||
@@ -114,15 +114,15 @@ services:
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.8.11 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.11 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.8.14 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.14 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.8.11 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.11 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.8.14-milvus-fix # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.14-milvus-fix # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
@@ -133,6 +133,8 @@ services:
|
||||
- sandbox
|
||||
restart: always
|
||||
environment:
|
||||
# 前端访问地址: http://localhost:3000
|
||||
- FE_DOMAIN=
|
||||
# root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。
|
||||
- DEFAULT_ROOT_PSW=1234
|
||||
# AI模型的API地址哦。务必加 /v1。这里默认填写了OneApi的访问地址。
|
||||
@@ -154,8 +156,6 @@ services:
|
||||
- MILVUS_TOKEN=none
|
||||
# sandbox 地址
|
||||
- SANDBOX_URL=http://sandbox:3000
|
||||
# 前端地址: http://localhost:3000
|
||||
- FE_DOMAIN=
|
||||
# 日志等级: debug, info, warn, error
|
||||
- LOG_LEVEL=info
|
||||
- STORE_LOG_LEVEL=warn
|
||||
|
||||
@@ -72,15 +72,15 @@ services:
|
||||
# fastgpt
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.8.11 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.11 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.8.13 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.13 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.8.11 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.11 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.8.14 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.14 # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
@@ -91,6 +91,8 @@ services:
|
||||
- sandbox
|
||||
restart: always
|
||||
environment:
|
||||
# 前端访问地址: http://localhost:3000
|
||||
- FE_DOMAIN=
|
||||
# root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。
|
||||
- DEFAULT_ROOT_PSW=1234
|
||||
# AI模型的API地址哦。务必加 /v1。这里默认填写了OneApi的访问地址。
|
||||
@@ -111,8 +113,6 @@ services:
|
||||
- PG_URL=postgresql://username:password@pg:5432/postgres
|
||||
# sandbox 地址
|
||||
- SANDBOX_URL=http://sandbox:3000
|
||||
# 前端地址: http://localhost:3000
|
||||
- FE_DOMAIN=
|
||||
# 日志等级: debug, info, warn, error
|
||||
- LOG_LEVEL=info
|
||||
- STORE_LOG_LEVEL=warn
|
||||
|
||||
@@ -53,15 +53,15 @@ services:
|
||||
wait $$!
|
||||
sandbox:
|
||||
container_name: sandbox
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.8.11 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.11 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt-sandbox:v4.8.14 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.14 # 阿里云
|
||||
networks:
|
||||
- fastgpt
|
||||
restart: always
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
image: ghcr.io/labring/fastgpt:v4.8.11 # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.11 # 阿里云
|
||||
image: ghcr.io/labring/fastgpt:v4.8.14-milvus-fix # git
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.14-milvus-fix # 阿里云
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
@@ -71,6 +71,8 @@ services:
|
||||
- sandbox
|
||||
restart: always
|
||||
environment:
|
||||
# 前端访问地址: http://localhost:3000
|
||||
- FE_DOMAIN=
|
||||
# root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。
|
||||
- DEFAULT_ROOT_PSW=1234
|
||||
# AI模型的API地址哦。务必加 /v1。这里默认填写了OneApi的访问地址。
|
||||
@@ -92,8 +94,6 @@ services:
|
||||
- MILVUS_TOKEN=zilliz_cloud_token
|
||||
# sandbox 地址
|
||||
- SANDBOX_URL=http://sandbox:3000
|
||||
# 前端地址: http://localhost:3000
|
||||
- FE_DOMAIN=
|
||||
# 日志等级: debug, info, warn, error
|
||||
- LOG_LEVEL=info
|
||||
- STORE_LOG_LEVEL=warn
|
||||
|
||||
@@ -10,7 +10,8 @@
|
||||
"postinstall": "sh ./scripts/postinstall.sh",
|
||||
"initIcon": "node ./scripts/icon/init.js",
|
||||
"previewIcon": "node ./scripts/icon/index.js",
|
||||
"api:gen": "tsc ./scripts/openapi/index.ts && node ./scripts/openapi/index.js && npx @redocly/cli build-docs ./scripts/openapi/openapi.json -o ./projects/app/public/openapi/index.html"
|
||||
"api:gen": "tsc ./scripts/openapi/index.ts && node ./scripts/openapi/index.js && npx @redocly/cli build-docs ./scripts/openapi/openapi.json -o ./projects/app/public/openapi/index.html",
|
||||
"create:i18n": "node ./scripts/i18n/index.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@chakra-ui/cli": "^2.4.1",
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { i18nT } from '../../../../web/i18n/utils';
|
||||
import { ErrType } from '../errorCode';
|
||||
|
||||
/* dataset: 501000 */
|
||||
@@ -9,9 +10,19 @@ export enum DatasetErrEnum {
|
||||
unAuthDatasetData = 'unAuthDatasetData',
|
||||
unAuthDatasetFile = 'unAuthDatasetFile',
|
||||
unLinkCollection = 'unLinkCollection',
|
||||
invalidVectorModelOrQAModel = 'invalidVectorModelOrQAModel'
|
||||
invalidVectorModelOrQAModel = 'invalidVectorModelOrQAModel',
|
||||
notSupportSync = 'notSupportSync',
|
||||
sameApiCollection = 'sameApiCollection'
|
||||
}
|
||||
const datasetErr = [
|
||||
{
|
||||
statusText: DatasetErrEnum.sameApiCollection,
|
||||
message: i18nT('dataset:same_api_collection')
|
||||
},
|
||||
{
|
||||
statusText: DatasetErrEnum.notSupportSync,
|
||||
message: i18nT('dataset:collection_not_support_sync')
|
||||
},
|
||||
{
|
||||
statusText: DatasetErrEnum.unExist,
|
||||
message: 'core.dataset.error.unExistDataset'
|
||||
|
||||
@@ -5,7 +5,8 @@ export enum UserErrEnum {
|
||||
unAuthUser = 'unAuthUser',
|
||||
unAuthRole = 'unAuthRole',
|
||||
binVisitor = 'binVisitor',
|
||||
balanceNotEnough = 'balanceNotEnough'
|
||||
balanceNotEnough = 'balanceNotEnough',
|
||||
unAuthSso = 'unAuthSso'
|
||||
}
|
||||
const errList = [
|
||||
{
|
||||
@@ -23,6 +24,10 @@ const errList = [
|
||||
{
|
||||
statusText: UserErrEnum.balanceNotEnough,
|
||||
message: i18nT('common:code_error.user_error.balance_not_enough')
|
||||
},
|
||||
{
|
||||
statusText: UserErrEnum.unAuthSso,
|
||||
message: i18nT('user:sso_auth_failed')
|
||||
}
|
||||
];
|
||||
export default errList.reduce((acc, cur, index) => {
|
||||
|
||||
@@ -12,5 +12,7 @@ export const fileImgs = [
|
||||
];
|
||||
|
||||
export function getFileIcon(name = '', defaultImg = 'file/fill/file') {
|
||||
return fileImgs.find((item) => new RegExp(item.suffix, 'gi').test(name))?.src || defaultImg;
|
||||
return (
|
||||
fileImgs.find((item) => new RegExp(`\.${item.suffix}`, 'gi').test(name))?.src || defaultImg
|
||||
);
|
||||
}
|
||||
|
||||
@@ -23,6 +23,11 @@ export const parseUrlToFileType = (url: string): UserChatItemValueItemType['file
|
||||
const parseUrl = new URL(url, 'https://locaohost:3000');
|
||||
|
||||
const filename = (() => {
|
||||
// Check base64 image
|
||||
if (url.startsWith('data:image/')) {
|
||||
const mime = url.split(',')[0].split(':')[1].split(';')[0];
|
||||
return `image.${mime.split('/')[1]}`;
|
||||
}
|
||||
// Old version file url: https://xxx.com/file/read?filename=xxx.pdf
|
||||
const filenameQuery = parseUrl.searchParams.get('filename');
|
||||
if (filenameQuery) return filenameQuery;
|
||||
|
||||
3
packages/global/common/file/type.d.ts
vendored
@@ -3,6 +3,7 @@ import { BucketNameEnum } from './constants';
|
||||
export type FileTokenQuery = {
|
||||
bucketName: `${BucketNameEnum}`;
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
uid: string; // tmbId/ share uid/ teamChat uid
|
||||
fileId: string;
|
||||
customExpireMinutes?: number;
|
||||
};
|
||||
|
||||
@@ -95,20 +95,23 @@ export const markdownProcess = async ({
|
||||
};
|
||||
|
||||
export const matchMdImgTextAndUpload = (text: string) => {
|
||||
const base64Regex = /"(data:image\/[^;]+;base64[^"]+)"/g;
|
||||
const base64Regex = /!\[([^\]]*)\]\((data:image\/[^;]+;base64[^)]+)\)/g;
|
||||
const imageList: ImageType[] = [];
|
||||
const images = Array.from(text.match(base64Regex) || []);
|
||||
for (const image of images) {
|
||||
|
||||
text = text.replace(base64Regex, (match, altText, base64Url) => {
|
||||
const uuid = `IMAGE_${getNanoid(12)}_IMAGE`;
|
||||
const mime = image.split(';')[0].split(':')[1];
|
||||
const base64 = image.split(',')[1];
|
||||
text = text.replace(image, uuid);
|
||||
const mime = base64Url.split(';')[0].split(':')[1];
|
||||
const base64 = base64Url.split(',')[1];
|
||||
|
||||
imageList.push({
|
||||
uuid,
|
||||
base64,
|
||||
mime
|
||||
});
|
||||
}
|
||||
|
||||
// 保持原有的 alt 文本,只替换 base64 部分
|
||||
return ``;
|
||||
});
|
||||
|
||||
return {
|
||||
text,
|
||||
|
||||
@@ -99,7 +99,7 @@ ${mdSplitString}
|
||||
5. 标点分割:重叠
|
||||
*/
|
||||
const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
let { text = '', chunkLen, overlapRatio = 0.2, customReg = [] } = props;
|
||||
let { text = '', chunkLen, overlapRatio = 0.15, customReg = [] } = props;
|
||||
|
||||
const splitMarker = 'SPLIT_HERE_SPLIT_HERE';
|
||||
const codeBlockMarker = 'CODE_BLOCK_LINE_MARKER';
|
||||
@@ -113,6 +113,8 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
text = text.replace(/(\r?\n|\r){3,}/g, '\n\n\n');
|
||||
|
||||
// The larger maxLen is, the next sentence is less likely to trigger splitting
|
||||
const markdownIndex = 4;
|
||||
const forbidOverlapIndex = 8;
|
||||
const stepReges: { reg: RegExp; maxLen: number }[] = [
|
||||
...customReg.map((text) => ({
|
||||
reg: new RegExp(`(${replaceRegChars(text)})`, 'g'),
|
||||
@@ -122,9 +124,11 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
{ reg: /^(##\s[^\n]+\n)/gm, maxLen: chunkLen * 1.4 },
|
||||
{ reg: /^(###\s[^\n]+\n)/gm, maxLen: chunkLen * 1.6 },
|
||||
{ reg: /^(####\s[^\n]+\n)/gm, maxLen: chunkLen * 1.8 },
|
||||
{ reg: /^(#####\s[^\n]+\n)/gm, maxLen: chunkLen * 1.8 },
|
||||
|
||||
{ reg: /([\n]([`~]))/g, maxLen: chunkLen * 4 }, // code block
|
||||
{ reg: /([\n](?!\s*[\*\-|>0-9]))/g, maxLen: chunkLen * 2 }, // 增大块,尽可能保证它是一个完整的段落。 (?![\*\-|>`0-9]): markdown special char
|
||||
{ reg: /([\n](?=\s*[0-9]+\.))/g, maxLen: chunkLen * 2 }, // 增大块,尽可能保证它是一个完整的段落。 (?![\*\-|>`0-9]): markdown special char
|
||||
{ reg: /(\n{2,})/g, maxLen: chunkLen * 1.6 },
|
||||
{ reg: /([\n])/g, maxLen: chunkLen * 1.2 },
|
||||
// ------ There's no overlap on the top
|
||||
{ reg: /([。]|([a-zA-Z])\.\s)/g, maxLen: chunkLen * 1.2 },
|
||||
@@ -136,8 +140,10 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
|
||||
const customRegLen = customReg.length;
|
||||
const checkIsCustomStep = (step: number) => step < customRegLen;
|
||||
const checkIsMarkdownSplit = (step: number) => step >= customRegLen && step <= 3 + customRegLen;
|
||||
const checkForbidOverlap = (step: number) => step <= 6 + customRegLen;
|
||||
const checkIsMarkdownSplit = (step: number) =>
|
||||
step >= customRegLen && step <= markdownIndex + customRegLen;
|
||||
+customReg.length;
|
||||
const checkForbidOverlap = (step: number) => step <= forbidOverlapIndex + customRegLen;
|
||||
|
||||
// if use markdown title split, Separate record title
|
||||
const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
|
||||
@@ -176,7 +182,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
title: matchTitle
|
||||
};
|
||||
})
|
||||
.filter((item) => item.text?.trim());
|
||||
.filter((item) => !!item.title || !!item.text?.trim());
|
||||
};
|
||||
|
||||
/* Gets the overlap at the end of a text as the beginning of the next block */
|
||||
@@ -231,7 +237,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
// use slice-chunkLen to split text
|
||||
const chunks: string[] = [];
|
||||
for (let i = 0; i < text.length; i += chunkLen - overlapLen) {
|
||||
chunks.push(`${parentTitle}${text.slice(i, i + chunkLen)}`);
|
||||
chunks.push(text.slice(i, i + chunkLen));
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
@@ -241,7 +247,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
|
||||
const maxLen = splitTexts.length > 1 ? stepReges[step].maxLen : chunkLen;
|
||||
const minChunkLen = chunkLen * 0.7;
|
||||
// console.log(splitTexts, stepReges[step].reg);
|
||||
|
||||
const chunks: string[] = [];
|
||||
for (let i = 0; i < splitTexts.length; i++) {
|
||||
@@ -249,12 +254,36 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
|
||||
const lastTextLen = lastText.length;
|
||||
const currentText = item.text;
|
||||
const currentTextLen = currentText.length;
|
||||
const newText = lastText + currentText;
|
||||
const newTextLen = lastTextLen + currentTextLen;
|
||||
const newTextLen = newText.length;
|
||||
|
||||
// Markdown 模式下,会强制向下拆分最小块,并再最后一个标题时候,给小块都补充上所有标题(包含父级标题)
|
||||
if (isMarkdownStep) {
|
||||
// split new Text, split chunks must will greater 1 (small lastText)
|
||||
const innerChunks = splitTextRecursively({
|
||||
text: newText,
|
||||
step: step + 1,
|
||||
lastText: '',
|
||||
parentTitle: parentTitle + item.title
|
||||
});
|
||||
|
||||
if (innerChunks.length === 0) {
|
||||
chunks.push(`${parentTitle}${item.title}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
chunks.push(
|
||||
...innerChunks.map(
|
||||
(chunk) =>
|
||||
step === markdownIndex + customRegLen ? `${parentTitle}${item.title}${chunk}` : chunk // 合并进 Markdown 分块时,需要补标题
|
||||
)
|
||||
);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// newText is too large(now, The lastText must be smaller than chunkLen)
|
||||
if (newTextLen > maxLen || isMarkdownStep) {
|
||||
if (newTextLen > maxLen) {
|
||||
// lastText greater minChunkLen, direct push it to chunks, not add to next chunk. (large lastText)
|
||||
if (lastTextLen > minChunkLen) {
|
||||
chunks.push(lastText);
|
||||
@@ -278,15 +307,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
|
||||
if (!lastChunk) continue;
|
||||
|
||||
if (forbidConcat) {
|
||||
chunks.push(
|
||||
...innerChunks.map(
|
||||
(chunk) => (step === 3 + customRegLen ? `${parentTitle}${chunk}` : chunk) // 合并进 Markdown 分块时,需要补标题
|
||||
)
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// last chunk is too small, concat it to lastText(next chunk start)
|
||||
if (lastChunk.length < minChunkLen) {
|
||||
chunks.push(...innerChunks.slice(0, -1));
|
||||
@@ -304,11 +324,11 @@ const commonSplit = (props: SplitProps): SplitResponse => {
|
||||
continue;
|
||||
}
|
||||
|
||||
// new text is small
|
||||
// New text is small
|
||||
|
||||
// Not overlap
|
||||
if (forbidConcat) {
|
||||
chunks.push(`${parentTitle}${item.title}${item.text}`);
|
||||
chunks.push(item.text);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
16
packages/global/common/system/types/index.d.ts
vendored
@@ -10,6 +10,14 @@ import type {
|
||||
} from '../../../core/ai/model.d';
|
||||
import { SubTypeEnum } from '../../../support/wallet/sub/constants';
|
||||
|
||||
export type NavbarItemType = {
|
||||
id: string;
|
||||
name: string;
|
||||
avatar: string;
|
||||
url: string;
|
||||
isActive: boolean;
|
||||
};
|
||||
|
||||
/* fastgpt main */
|
||||
export type FastGPTConfigFileType = {
|
||||
feConfigs: FastGPTFeConfigsType;
|
||||
@@ -38,7 +46,6 @@ export type FastGPTFeConfigsType = {
|
||||
concatMd?: string;
|
||||
|
||||
docUrl?: string;
|
||||
chatbotUrl?: string;
|
||||
openAPIDocUrl?: string;
|
||||
systemPluginCourseUrl?: string;
|
||||
appTemplateCourse?: string;
|
||||
@@ -56,7 +63,11 @@ export type FastGPTFeConfigsType = {
|
||||
github?: string;
|
||||
google?: string;
|
||||
wechat?: string;
|
||||
microsoft?: string;
|
||||
microsoft?: {
|
||||
clientId?: string;
|
||||
tenantId?: string;
|
||||
customButton?: string;
|
||||
};
|
||||
};
|
||||
limit?: {
|
||||
exportDatasetLimitMinutes?: number;
|
||||
@@ -70,6 +81,7 @@ export type FastGPTFeConfigsType = {
|
||||
uploadFileMaxAmount?: number;
|
||||
uploadFileMaxSize?: number;
|
||||
lafEnv?: string;
|
||||
navbarItems?: NavbarItemType[];
|
||||
};
|
||||
|
||||
export type SystemEnvType = {
|
||||
|
||||
1
packages/global/core/ai/type.d.ts
vendored
@@ -49,6 +49,7 @@ export type ChatCompletionMessageParam = (
|
||||
| CustomChatCompletionAssistantMessageParam
|
||||
) & {
|
||||
dataId?: string;
|
||||
hideInUI?: boolean;
|
||||
};
|
||||
export type SdkChatCompletionMessageParam = SdkChatCompletionMessageParam;
|
||||
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
import { AppTTSConfigType, AppFileSelectConfigType, AppWhisperConfigType } from './type';
|
||||
import {
|
||||
AppTTSConfigType,
|
||||
AppFileSelectConfigType,
|
||||
AppWhisperConfigType,
|
||||
AppAutoExecuteConfigType
|
||||
} from './type';
|
||||
|
||||
export enum AppTypeEnum {
|
||||
folder = 'folder',
|
||||
@@ -12,6 +17,11 @@ export const AppFolderTypeList = [AppTypeEnum.folder, AppTypeEnum.httpPlugin];
|
||||
|
||||
export const defaultTTSConfig: AppTTSConfigType = { type: 'web' };
|
||||
|
||||
export const defaultAutoExecuteConfig: AppAutoExecuteConfigType = {
|
||||
open: false,
|
||||
defaultPrompt: ''
|
||||
};
|
||||
|
||||
export const defaultWhisperConfig: AppWhisperConfigType = {
|
||||
open: false,
|
||||
autoSend: false,
|
||||
@@ -38,3 +48,5 @@ export enum AppTemplateTypeEnum {
|
||||
roleplay = 'roleplay',
|
||||
officeServices = 'office-services'
|
||||
}
|
||||
|
||||
export const defaultDatasetMaxTokens = 16000;
|
||||
|
||||
12
packages/global/core/app/type.d.ts
vendored
@@ -96,6 +96,7 @@ export type AppSimpleEditFormType = {
|
||||
export type AppChatConfigType = {
|
||||
welcomeText?: string;
|
||||
variables?: VariableItemType[];
|
||||
autoExecute?: AppAutoExecuteConfigType;
|
||||
questionGuide?: boolean;
|
||||
ttsConfig?: AppTTSConfigType;
|
||||
whisperConfig?: AppWhisperConfigType;
|
||||
@@ -158,9 +159,20 @@ export type AppScheduledTriggerConfigType = {
|
||||
timezone: string;
|
||||
defaultPrompt: string;
|
||||
};
|
||||
// auto execute
|
||||
export type AppAutoExecuteConfigType = {
|
||||
open: boolean;
|
||||
defaultPrompt: string;
|
||||
};
|
||||
// File
|
||||
export type AppFileSelectConfigType = {
|
||||
canSelectFile: boolean;
|
||||
canSelectImg: boolean;
|
||||
maxFiles: number;
|
||||
};
|
||||
|
||||
export type SystemPluginListItemType = {
|
||||
_id: string;
|
||||
name: string;
|
||||
avatar: string;
|
||||
};
|
||||
|
||||
@@ -76,6 +76,7 @@ export const chats2GPTMessages = ({
|
||||
|
||||
results.push({
|
||||
dataId,
|
||||
hideInUI: item.hideInUI,
|
||||
role: ChatCompletionRequestMessageRoleEnum.User,
|
||||
content: simpleUserContentPart(value)
|
||||
});
|
||||
@@ -318,6 +319,7 @@ export const GPTMessages2Chats = (
|
||||
return {
|
||||
dataId: item.dataId,
|
||||
obj,
|
||||
hideInUI: item.hideInUI,
|
||||
value
|
||||
} as ChatItemType;
|
||||
})
|
||||
|
||||
8
packages/global/core/chat/api.d.ts
vendored
@@ -1,11 +1,9 @@
|
||||
export type UpdateChatFeedbackProps = {
|
||||
import { OutLinkChatAuthProps } from '../../support/permission/chat';
|
||||
|
||||
export type UpdateChatFeedbackProps = OutLinkChatAuthProps & {
|
||||
appId: string;
|
||||
chatId: string;
|
||||
dataId: string;
|
||||
shareId?: string;
|
||||
teamId?: string;
|
||||
teamToken?: string;
|
||||
outLinkUid?: string;
|
||||
userBadFeedback?: string;
|
||||
userGoodFeedback?: string;
|
||||
};
|
||||
|
||||
1
packages/global/core/chat/type.d.ts
vendored
@@ -56,6 +56,7 @@ export type UserChatItemValueItemType = {
|
||||
export type UserChatItemType = {
|
||||
obj: ChatRoleEnum.Human;
|
||||
value: UserChatItemValueItemType[];
|
||||
hideInUI?: boolean;
|
||||
};
|
||||
export type SystemChatItemValueItemType = {
|
||||
type: ChatItemValueTypeEnum.text;
|
||||
|
||||
@@ -101,7 +101,7 @@ export const filterPublicNodeResponseData = ({
|
||||
for (let key in item) {
|
||||
if (key === 'toolDetail' || key === 'pluginDetail') {
|
||||
// @ts-ignore
|
||||
obj[key] = filterPublicNodeResponseData({ flowResponses: item[key] });
|
||||
obj[key] = filterPublicNodeResponseData({ flowResponses: item[key], responseDetail });
|
||||
} else if (filedList.includes(key)) {
|
||||
// @ts-ignore
|
||||
obj[key] = item[key];
|
||||
|
||||
18
packages/global/core/dataset/api.d.ts
vendored
@@ -16,6 +16,7 @@ export type DatasetUpdateBody = {
|
||||
websiteConfig?: DatasetSchemaType['websiteConfig'];
|
||||
externalReadUrl?: DatasetSchemaType['externalReadUrl'];
|
||||
defaultPermission?: DatasetSchemaType['defaultPermission'];
|
||||
apiServer?: DatasetSchemaType['apiServer'];
|
||||
};
|
||||
|
||||
/* ================= collection ===================== */
|
||||
@@ -34,15 +35,18 @@ export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType &
|
||||
name: string;
|
||||
type: DatasetCollectionTypeEnum;
|
||||
|
||||
tags?: string[];
|
||||
|
||||
fileId?: string;
|
||||
rawLink?: string;
|
||||
externalFileId?: string;
|
||||
|
||||
externalFileUrl?: string;
|
||||
apiFileId?: string;
|
||||
|
||||
rawTextLength?: number;
|
||||
hashRawText?: string;
|
||||
|
||||
tags?: string[];
|
||||
|
||||
createTime?: Date;
|
||||
};
|
||||
|
||||
export type ApiCreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
|
||||
@@ -56,9 +60,17 @@ export type TextCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams
|
||||
export type LinkCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
link: string;
|
||||
};
|
||||
export type ApiDatasetCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
name: string;
|
||||
apiFileId: string;
|
||||
};
|
||||
export type FileIdCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
fileId: string;
|
||||
};
|
||||
export type reTrainingDatasetFileCollectionParams = DatasetCollectionChunkMetadataType & {
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
};
|
||||
export type FileCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
fileMetadata?: Record<string, any>;
|
||||
collectionMetadata?: Record<string, any>;
|
||||
|
||||
24
packages/global/core/dataset/apiDataset.d.ts
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
export type APIFileItem = {
|
||||
id: string;
|
||||
parentId: string | null;
|
||||
name: string;
|
||||
type: 'file' | 'folder';
|
||||
updateTime: Date;
|
||||
createTime: Date;
|
||||
};
|
||||
|
||||
export type APIFileServer = {
|
||||
baseUrl: string;
|
||||
authorization: string;
|
||||
};
|
||||
|
||||
export type APIFileListResponse = APIFileItem[];
|
||||
|
||||
export type APIFileContentResponse = {
|
||||
content?: string;
|
||||
previewUrl?: string;
|
||||
};
|
||||
|
||||
export type APIFileReadResponse = {
|
||||
url: string;
|
||||
};
|
||||
@@ -9,7 +9,8 @@ export const getCollectionSourceData = (
|
||||
collection?.fileId ||
|
||||
collection?.rawLink ||
|
||||
collection?.externalFileId ||
|
||||
collection?.externalFileUrl,
|
||||
collection?.externalFileUrl ||
|
||||
collection?.apiFileId,
|
||||
sourceName: collection?.name || ''
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
import { i18nT } from '../../../web/i18n/utils';
|
||||
|
||||
/* ------------ dataset -------------- */
|
||||
export enum DatasetTypeEnum {
|
||||
folder = 'folder',
|
||||
dataset = 'dataset',
|
||||
websiteDataset = 'websiteDataset', // depp link
|
||||
externalFile = 'externalFile'
|
||||
externalFile = 'externalFile',
|
||||
apiDataset = 'apiDataset'
|
||||
}
|
||||
export const DatasetTypeMap = {
|
||||
[DatasetTypeEnum.folder]: {
|
||||
@@ -25,6 +28,11 @@ export const DatasetTypeMap = {
|
||||
icon: 'core/dataset/externalDatasetOutline',
|
||||
label: 'external_file',
|
||||
collectionLabel: 'common.File'
|
||||
},
|
||||
[DatasetTypeEnum.apiDataset]: {
|
||||
icon: 'core/dataset/externalDatasetOutline',
|
||||
label: 'api_file',
|
||||
collectionLabel: 'common.File'
|
||||
}
|
||||
};
|
||||
|
||||
@@ -34,10 +42,10 @@ export enum DatasetStatusEnum {
|
||||
}
|
||||
export const DatasetStatusMap = {
|
||||
[DatasetStatusEnum.active]: {
|
||||
label: 'core.dataset.status.active'
|
||||
label: i18nT('common:core.dataset.status.active')
|
||||
},
|
||||
[DatasetStatusEnum.syncing]: {
|
||||
label: 'core.dataset.status.syncing'
|
||||
label: i18nT('common:core.dataset.status.syncing')
|
||||
}
|
||||
};
|
||||
|
||||
@@ -48,23 +56,27 @@ export enum DatasetCollectionTypeEnum {
|
||||
|
||||
file = 'file',
|
||||
link = 'link', // one link
|
||||
externalFile = 'externalFile'
|
||||
externalFile = 'externalFile',
|
||||
apiFile = 'apiFile'
|
||||
}
|
||||
export const DatasetCollectionTypeMap = {
|
||||
[DatasetCollectionTypeEnum.folder]: {
|
||||
name: 'core.dataset.folder'
|
||||
name: i18nT('common:core.dataset.folder')
|
||||
},
|
||||
[DatasetCollectionTypeEnum.file]: {
|
||||
name: 'core.dataset.file'
|
||||
name: i18nT('common:core.dataset.file')
|
||||
},
|
||||
[DatasetCollectionTypeEnum.externalFile]: {
|
||||
name: 'core.dataset.externalFile'
|
||||
name: i18nT('common:core.dataset.externalFile')
|
||||
},
|
||||
[DatasetCollectionTypeEnum.link]: {
|
||||
name: 'core.dataset.link'
|
||||
name: i18nT('common:core.dataset.link')
|
||||
},
|
||||
[DatasetCollectionTypeEnum.virtual]: {
|
||||
name: 'core.dataset.Manual collection'
|
||||
name: i18nT('common:core.dataset.Manual collection')
|
||||
},
|
||||
[DatasetCollectionTypeEnum.apiFile]: {
|
||||
name: i18nT('common:core.dataset.apiFile')
|
||||
}
|
||||
};
|
||||
|
||||
@@ -74,10 +86,10 @@ export enum DatasetCollectionSyncResultEnum {
|
||||
}
|
||||
export const DatasetCollectionSyncResultMap = {
|
||||
[DatasetCollectionSyncResultEnum.sameRaw]: {
|
||||
label: 'core.dataset.collection.sync.result.sameRaw'
|
||||
label: i18nT('common:core.dataset.collection.sync.result.sameRaw')
|
||||
},
|
||||
[DatasetCollectionSyncResultEnum.success]: {
|
||||
label: 'core.dataset.collection.sync.result.success'
|
||||
label: i18nT('common:core.dataset.collection.sync.result.success')
|
||||
}
|
||||
};
|
||||
|
||||
@@ -89,7 +101,9 @@ export enum ImportDataSourceEnum {
|
||||
fileLink = 'fileLink',
|
||||
fileCustom = 'fileCustom',
|
||||
csvTable = 'csvTable',
|
||||
externalFile = 'externalFile'
|
||||
externalFile = 'externalFile',
|
||||
apiDataset = 'apiDataset',
|
||||
reTraining = 'reTraining'
|
||||
}
|
||||
|
||||
export enum TrainingModeEnum {
|
||||
@@ -100,18 +114,18 @@ export enum TrainingModeEnum {
|
||||
|
||||
export const TrainingTypeMap = {
|
||||
[TrainingModeEnum.chunk]: {
|
||||
label: 'core.dataset.training.Chunk mode',
|
||||
tooltip: 'core.dataset.import.Chunk Split Tip',
|
||||
label: i18nT('common:core.dataset.training.Chunk mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.Chunk Split Tip'),
|
||||
openSource: true
|
||||
},
|
||||
[TrainingModeEnum.auto]: {
|
||||
label: 'core.dataset.training.Auto mode',
|
||||
tooltip: 'core.dataset.training.Auto mode Tip',
|
||||
label: i18nT('common:core.dataset.training.Auto mode'),
|
||||
tooltip: i18nT('common:core.dataset.training.Auto mode Tip'),
|
||||
openSource: false
|
||||
},
|
||||
[TrainingModeEnum.qa]: {
|
||||
label: 'core.dataset.training.QA mode',
|
||||
tooltip: 'core.dataset.import.QA Import Tip',
|
||||
label: i18nT('common:core.dataset.training.QA mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.QA Import Tip'),
|
||||
openSource: true
|
||||
}
|
||||
};
|
||||
@@ -126,20 +140,20 @@ export enum DatasetSearchModeEnum {
|
||||
export const DatasetSearchModeMap = {
|
||||
[DatasetSearchModeEnum.embedding]: {
|
||||
icon: 'core/dataset/modeEmbedding',
|
||||
title: 'core.dataset.search.mode.embedding',
|
||||
desc: 'core.dataset.search.mode.embedding desc',
|
||||
title: i18nT('common:core.dataset.search.mode.embedding'),
|
||||
desc: i18nT('common:core.dataset.search.mode.embedding desc'),
|
||||
value: DatasetSearchModeEnum.embedding
|
||||
},
|
||||
[DatasetSearchModeEnum.fullTextRecall]: {
|
||||
icon: 'core/dataset/fullTextRecall',
|
||||
title: 'core.dataset.search.mode.fullTextRecall',
|
||||
desc: 'core.dataset.search.mode.fullTextRecall desc',
|
||||
title: i18nT('common:core.dataset.search.mode.fullTextRecall'),
|
||||
desc: i18nT('common:core.dataset.search.mode.fullTextRecall desc'),
|
||||
value: DatasetSearchModeEnum.fullTextRecall
|
||||
},
|
||||
[DatasetSearchModeEnum.mixedRecall]: {
|
||||
icon: 'core/dataset/mixedRecall',
|
||||
title: 'core.dataset.search.mode.mixedRecall',
|
||||
desc: 'core.dataset.search.mode.mixedRecall desc',
|
||||
title: i18nT('common:core.dataset.search.mode.mixedRecall'),
|
||||
desc: i18nT('common:core.dataset.search.mode.mixedRecall desc'),
|
||||
value: DatasetSearchModeEnum.mixedRecall
|
||||
}
|
||||
};
|
||||
@@ -152,23 +166,23 @@ export enum SearchScoreTypeEnum {
|
||||
}
|
||||
export const SearchScoreTypeMap = {
|
||||
[SearchScoreTypeEnum.embedding]: {
|
||||
label: 'core.dataset.search.score.embedding',
|
||||
desc: 'core.dataset.search.score.embedding desc',
|
||||
label: i18nT('common:core.dataset.search.score.embedding'),
|
||||
desc: i18nT('common:core.dataset.search.score.embedding desc'),
|
||||
showScore: true
|
||||
},
|
||||
[SearchScoreTypeEnum.fullText]: {
|
||||
label: 'core.dataset.search.score.fullText',
|
||||
desc: 'core.dataset.search.score.fullText desc',
|
||||
label: i18nT('common:core.dataset.search.score.fullText'),
|
||||
desc: i18nT('common:core.dataset.search.score.fullText desc'),
|
||||
showScore: false
|
||||
},
|
||||
[SearchScoreTypeEnum.reRank]: {
|
||||
label: 'core.dataset.search.score.reRank',
|
||||
desc: 'core.dataset.search.score.reRank desc',
|
||||
label: i18nT('common:core.dataset.search.score.reRank'),
|
||||
desc: i18nT('common:core.dataset.search.score.reRank desc'),
|
||||
showScore: true
|
||||
},
|
||||
[SearchScoreTypeEnum.rrf]: {
|
||||
label: 'core.dataset.search.score.rrf',
|
||||
desc: 'core.dataset.search.score.rrf desc',
|
||||
label: i18nT('common:core.dataset.search.score.rrf'),
|
||||
desc: i18nT('common:core.dataset.search.score.rrf desc'),
|
||||
showScore: false
|
||||
}
|
||||
};
|
||||
@@ -180,5 +194,7 @@ export const LinkCollectionIcon = 'common/linkBlue';
|
||||
export enum DatasetSourceReadTypeEnum {
|
||||
fileLocal = 'fileLocal',
|
||||
link = 'link',
|
||||
externalFile = 'externalFile'
|
||||
externalFile = 'externalFile',
|
||||
apiFile = 'apiFile',
|
||||
reTraining = 'reTraining'
|
||||
}
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
import { DatasetSourceReadTypeEnum, ImportDataSourceEnum } from './constants';
|
||||
|
||||
export const importType2ReadType = (type: ImportDataSourceEnum) => {
|
||||
if (type === ImportDataSourceEnum.csvTable || type === ImportDataSourceEnum.fileLocal) {
|
||||
return DatasetSourceReadTypeEnum.fileLocal;
|
||||
}
|
||||
if (type === ImportDataSourceEnum.fileLink) {
|
||||
return DatasetSourceReadTypeEnum.link;
|
||||
}
|
||||
if (type === ImportDataSourceEnum.externalFile) {
|
||||
return DatasetSourceReadTypeEnum.externalFile;
|
||||
}
|
||||
return DatasetSourceReadTypeEnum.link;
|
||||
};
|
||||
8
packages/global/core/dataset/type.d.ts
vendored
@@ -10,6 +10,7 @@ import {
|
||||
} from './constants';
|
||||
import { DatasetPermission } from '../../support/permission/dataset/controller';
|
||||
import { Permission } from '../../support/permission/controller';
|
||||
import { APIFileServer } from './apiDataset';
|
||||
|
||||
export type DatasetSchemaType = {
|
||||
_id: string;
|
||||
@@ -30,10 +31,11 @@ export type DatasetSchemaType = {
|
||||
url: string;
|
||||
selector: string;
|
||||
};
|
||||
externalReadUrl?: string;
|
||||
inheritPermission: boolean;
|
||||
apiServer?: APIFileServer;
|
||||
|
||||
// abandon
|
||||
externalReadUrl?: string;
|
||||
defaultPermission?: number;
|
||||
};
|
||||
|
||||
@@ -64,6 +66,7 @@ export type DatasetCollectionSchemaType = {
|
||||
rawTextLength?: number;
|
||||
hashRawText?: string;
|
||||
externalFileUrl?: string; // external import url
|
||||
apiFileId?: string; // api file id
|
||||
metadata?: {
|
||||
webPageSelector?: string;
|
||||
relatedImgId?: string; // The id of the associated image collections
|
||||
@@ -204,7 +207,8 @@ export type DatasetFileSchema = {
|
||||
contentType: string;
|
||||
metadata: {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
tmbId?: string;
|
||||
uid: string;
|
||||
encoding?: string;
|
||||
};
|
||||
};
|
||||
|
||||
3
packages/global/core/plugin/type.d.ts
vendored
@@ -39,6 +39,7 @@ export type PluginTemplateType = PluginRuntimeType & {
|
||||
};
|
||||
|
||||
export type PluginRuntimeType = {
|
||||
id: string;
|
||||
teamId?: string;
|
||||
name: string;
|
||||
avatar: string;
|
||||
@@ -46,4 +47,6 @@ export type PluginRuntimeType = {
|
||||
isTool?: boolean;
|
||||
nodes: StoreNodeItemType[];
|
||||
edges: StoreEdgeItemType[];
|
||||
currentCost?: number;
|
||||
hasTokenFee?: boolean;
|
||||
};
|
||||
|
||||
@@ -106,6 +106,7 @@ export enum NodeInputKeyEnum {
|
||||
variables = 'variables',
|
||||
scheduleTrigger = 'scheduleTrigger',
|
||||
chatInputGuide = 'chatInputGuide',
|
||||
autoExecute = 'autoExecute',
|
||||
|
||||
// plugin config
|
||||
instruction = 'instruction',
|
||||
|
||||
11
packages/global/core/workflow/runtime/type.d.ts
vendored
@@ -82,17 +82,6 @@ export type RuntimeNodeItemType = {
|
||||
version: string;
|
||||
};
|
||||
|
||||
export type PluginRuntimeType = {
|
||||
id: string;
|
||||
teamId?: string;
|
||||
name: string;
|
||||
avatar: string;
|
||||
showStatus?: boolean;
|
||||
currentCost?: number;
|
||||
nodes: StoreNodeItemType[];
|
||||
edges: StoreEdgeItemType[];
|
||||
};
|
||||
|
||||
export type RuntimeEdgeItemType = StoreEdgeItemType & {
|
||||
status: 'waiting' | 'active' | 'skipped';
|
||||
};
|
||||
|
||||
@@ -283,68 +283,47 @@ export const getReferenceVariableValue = ({
|
||||
export function replaceEditorVariable({
|
||||
text,
|
||||
nodes,
|
||||
variables,
|
||||
runningNode
|
||||
variables
|
||||
}: {
|
||||
text: any;
|
||||
nodes: RuntimeNodeItemType[];
|
||||
variables: Record<string, any>; // global variables
|
||||
runningNode: RuntimeNodeItemType;
|
||||
}) {
|
||||
if (typeof text !== 'string') return text;
|
||||
|
||||
const globalVariables = Object.keys(variables).map((key) => {
|
||||
return {
|
||||
nodeId: VARIABLE_NODE_ID,
|
||||
id: key,
|
||||
value: variables[key]
|
||||
};
|
||||
});
|
||||
const variablePattern = /\{\{\$([^.]+)\.([^$]+)\$\}\}/g;
|
||||
const matches = [...text.matchAll(variablePattern)];
|
||||
if (matches.length === 0) return text;
|
||||
|
||||
// Upstream node outputs
|
||||
const nodeVariables = nodes
|
||||
.map((node) => {
|
||||
return node.outputs.map((output) => {
|
||||
return {
|
||||
nodeId: node.nodeId,
|
||||
id: output.id,
|
||||
value: output.value
|
||||
};
|
||||
});
|
||||
})
|
||||
.flat();
|
||||
matches.forEach((match) => {
|
||||
const nodeId = match[1];
|
||||
const id = match[2];
|
||||
|
||||
// Get runningNode inputs(Will be replaced with reference)
|
||||
const customInputs = runningNode.inputs.flatMap((item) => {
|
||||
return [
|
||||
{
|
||||
id: item.key,
|
||||
value: getReferenceVariableValue({
|
||||
value: item.value,
|
||||
nodes,
|
||||
variables
|
||||
}),
|
||||
nodeId: runningNode.nodeId
|
||||
const variableVal = (() => {
|
||||
if (nodeId === VARIABLE_NODE_ID) {
|
||||
return variables[id];
|
||||
}
|
||||
];
|
||||
});
|
||||
// Find upstream node input/output
|
||||
const node = nodes.find((node) => node.nodeId === nodeId);
|
||||
if (!node) return;
|
||||
|
||||
const allVariables = [...globalVariables, ...nodeVariables, ...customInputs];
|
||||
const output = node.outputs.find((output) => output.id === id);
|
||||
if (output) return output.value;
|
||||
|
||||
// Replace {{$xxx.xxx$}} to value
|
||||
for (const key in allVariables) {
|
||||
const variable = allVariables[key];
|
||||
const val = variable.value;
|
||||
const formatVal = (() => {
|
||||
if (val === undefined) return '';
|
||||
if (val === null) return 'null';
|
||||
|
||||
return typeof val === 'object' ? JSON.stringify(val) : String(val);
|
||||
const input = node.inputs.find((input) => input.key === id);
|
||||
if (input) return getReferenceVariableValue({ value: input.value, nodes, variables });
|
||||
})();
|
||||
|
||||
const regex = new RegExp(`\\{\\{\\$(${variable.nodeId}\\.${variable.id})\\$\\}\\}`, 'g');
|
||||
const formatVal = (() => {
|
||||
if (variableVal === undefined) return 'undefined';
|
||||
if (variableVal === null) return 'null';
|
||||
return typeof variableVal === 'object' ? JSON.stringify(variableVal) : String(variableVal);
|
||||
})();
|
||||
|
||||
const regex = new RegExp(`\\{\\{\\$(${nodeId}\\.${id})\\$\\}\\}`, 'g');
|
||||
text = text.replace(regex, formatVal);
|
||||
}
|
||||
});
|
||||
|
||||
return text || '';
|
||||
}
|
||||
|
||||
|
||||
@@ -63,15 +63,20 @@ export type TemplateMarketListItemType = {
|
||||
// system plugin
|
||||
export type SystemPluginTemplateItemType = WorkflowTemplateType & {
|
||||
customWorkflow?: string;
|
||||
associatedPluginId?: string;
|
||||
userGuide?: string;
|
||||
|
||||
templateType: FlowNodeTemplateTypeEnum;
|
||||
templateType: string;
|
||||
isTool?: boolean;
|
||||
|
||||
// commercial plugin config
|
||||
originCost: number; // n points/one time
|
||||
currentCost: number;
|
||||
hasTokenFee: boolean;
|
||||
pluginOrder: number;
|
||||
|
||||
isActive?: boolean;
|
||||
isOfficial?: boolean;
|
||||
inputConfig?: {
|
||||
// Render config input form. Find the corresponding node and replace the variable directly
|
||||
key: string;
|
||||
|
||||
8
packages/global/core/workflow/type/node.d.ts
vendored
@@ -54,7 +54,7 @@ type HandleType = {
|
||||
// system template
|
||||
export type FlowNodeTemplateType = FlowNodeCommonType & {
|
||||
id: string; // node id, unique
|
||||
templateType: FlowNodeTemplateTypeEnum;
|
||||
templateType: string;
|
||||
|
||||
// show handle
|
||||
sourceHandle?: HandleType;
|
||||
@@ -76,7 +76,7 @@ export type NodeTemplateListItemType = {
|
||||
flowNodeType: FlowNodeTypeEnum; // render node card
|
||||
parentId?: ParentIdType;
|
||||
isFolder?: boolean;
|
||||
templateType: FlowNodeTemplateTypeEnum;
|
||||
templateType: string;
|
||||
avatar?: string;
|
||||
name: string;
|
||||
intro?: string; // template list intro
|
||||
@@ -85,10 +85,12 @@ export type NodeTemplateListItemType = {
|
||||
author?: string;
|
||||
unique?: boolean; // 唯一的
|
||||
currentCost?: number; // 当前积分消耗
|
||||
hasTokenFee?: boolean; // 是否配置积分
|
||||
instructions?: string; // 使用说明
|
||||
};
|
||||
|
||||
export type NodeTemplateListType = {
|
||||
type: FlowNodeTemplateTypeEnum;
|
||||
type: string;
|
||||
label: string;
|
||||
list: NodeTemplateListItemType[];
|
||||
}[];
|
||||
|
||||
@@ -25,10 +25,12 @@ import type {
|
||||
AppWhisperConfigType,
|
||||
AppScheduledTriggerConfigType,
|
||||
ChatInputGuideConfigType,
|
||||
AppChatConfigType
|
||||
AppChatConfigType,
|
||||
AppAutoExecuteConfigType
|
||||
} from '../app/type';
|
||||
import { EditorVariablePickerType } from '../../../web/components/common/Textarea/PromptEditor/type';
|
||||
import {
|
||||
defaultAutoExecuteConfig,
|
||||
defaultChatInputGuideConfig,
|
||||
defaultTTSConfig,
|
||||
defaultWhisperConfig
|
||||
@@ -69,34 +71,37 @@ export const getGuideModule = (modules: StoreNodeItemType[]) =>
|
||||
);
|
||||
export const splitGuideModule = (guideModules?: StoreNodeItemType) => {
|
||||
const welcomeText: string =
|
||||
guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.welcomeText)?.value || '';
|
||||
guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.welcomeText)?.value ?? '';
|
||||
|
||||
const variables: VariableItemType[] =
|
||||
guideModules?.inputs.find((item) => item.key === NodeInputKeyEnum.variables)?.value || [];
|
||||
guideModules?.inputs.find((item) => item.key === NodeInputKeyEnum.variables)?.value ?? [];
|
||||
|
||||
const questionGuide: boolean =
|
||||
!!guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.questionGuide)?.value ||
|
||||
!!guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.questionGuide)?.value ??
|
||||
false;
|
||||
|
||||
const ttsConfig: AppTTSConfigType =
|
||||
guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.tts)?.value ||
|
||||
guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.tts)?.value ??
|
||||
defaultTTSConfig;
|
||||
|
||||
const whisperConfig: AppWhisperConfigType =
|
||||
guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.whisper)?.value ||
|
||||
guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.whisper)?.value ??
|
||||
defaultWhisperConfig;
|
||||
|
||||
const scheduledTriggerConfig: AppScheduledTriggerConfigType = guideModules?.inputs?.find(
|
||||
(item) => item.key === NodeInputKeyEnum.scheduleTrigger
|
||||
)?.value;
|
||||
const scheduledTriggerConfig: AppScheduledTriggerConfigType =
|
||||
guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.scheduleTrigger)?.value ??
|
||||
undefined;
|
||||
|
||||
const chatInputGuide: ChatInputGuideConfigType =
|
||||
guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.chatInputGuide)?.value ||
|
||||
guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.chatInputGuide)?.value ??
|
||||
defaultChatInputGuideConfig;
|
||||
|
||||
// plugin
|
||||
const instruction: string =
|
||||
guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.instruction)?.value || '';
|
||||
guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.instruction)?.value ?? '';
|
||||
|
||||
const autoExecute: AppAutoExecuteConfigType =
|
||||
guideModules?.inputs?.find((item) => item.key === NodeInputKeyEnum.autoExecute)?.value ??
|
||||
defaultAutoExecuteConfig;
|
||||
|
||||
return {
|
||||
welcomeText,
|
||||
@@ -106,7 +111,8 @@ export const splitGuideModule = (guideModules?: StoreNodeItemType) => {
|
||||
whisperConfig,
|
||||
scheduledTriggerConfig,
|
||||
chatInputGuide,
|
||||
instruction
|
||||
instruction,
|
||||
autoExecute
|
||||
};
|
||||
};
|
||||
|
||||
@@ -132,7 +138,8 @@ export const getAppChatConfig = ({
|
||||
whisperConfig,
|
||||
scheduledTriggerConfig,
|
||||
chatInputGuide,
|
||||
instruction
|
||||
instruction,
|
||||
autoExecute
|
||||
} = splitGuideModule(systemConfigNode);
|
||||
|
||||
const config: AppChatConfigType = {
|
||||
@@ -142,6 +149,7 @@ export const getAppChatConfig = ({
|
||||
scheduledTriggerConfig,
|
||||
chatInputGuide,
|
||||
instruction,
|
||||
autoExecute,
|
||||
...chatConfig,
|
||||
variables: storeVariables ?? chatConfig?.variables ?? variables,
|
||||
welcomeText: storeWelcomeText ?? chatConfig?.welcomeText ?? welcomeText
|
||||
|
||||
@@ -15,5 +15,6 @@ export enum OAuthEnum {
|
||||
github = 'github',
|
||||
google = 'google',
|
||||
wechat = 'wechat',
|
||||
microsoft = 'microsoft'
|
||||
microsoft = 'microsoft',
|
||||
sso = 'sso'
|
||||
}
|
||||
|
||||
1
packages/global/support/wallet/sub/api.d.ts
vendored
@@ -12,6 +12,7 @@ export type StandardSubPlanUpdateResponse = {
|
||||
payPrice?: number;
|
||||
planPrice: number;
|
||||
planPointPrice: number;
|
||||
name?: string;
|
||||
|
||||
currentMode: `${SubModeEnum}`;
|
||||
nextMode: `${SubModeEnum}`;
|
||||
|
||||
2
packages/global/support/wallet/sub/type.d.ts
vendored
@@ -2,6 +2,7 @@ import { StandardSubLevelEnum, SubModeEnum, SubTypeEnum } from './constants';
|
||||
|
||||
// Content of plan
|
||||
export type TeamStandardSubPlanItemType = {
|
||||
name?: string;
|
||||
price: number; // read price / month
|
||||
pointPrice: number; // read price/ one thousand
|
||||
totalPoints: number; // n
|
||||
@@ -24,6 +25,7 @@ export type StandSubPlanLevelMapType = Record<
|
||||
|
||||
export type SubPlanType = {
|
||||
[SubTypeEnum.standard]: StandSubPlanLevelMapType;
|
||||
planDescriptionUrl?: string;
|
||||
[SubTypeEnum.extraDatasetSize]: {
|
||||
price: number;
|
||||
};
|
||||
|
||||
@@ -40,7 +40,8 @@ export const getCommunityPlugins = () => {
|
||||
id: `${PluginSourceEnum.community}-${name}`,
|
||||
isFolder,
|
||||
parentId,
|
||||
isActive: true
|
||||
isActive: true,
|
||||
isOfficial: true
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
@@ -4,6 +4,7 @@ import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
|
||||
type Props = {
|
||||
apikey: string;
|
||||
HTMLtable: boolean;
|
||||
files: string[];
|
||||
};
|
||||
|
||||
@@ -14,7 +15,88 @@ type Response = Promise<{
|
||||
error?: Record<string, any>;
|
||||
}>;
|
||||
|
||||
const main = async ({ apikey, files }: Props): Response => {
|
||||
function processContent(content: string, HTMLtable: boolean): string {
|
||||
if (HTMLtable) {
|
||||
return content;
|
||||
}
|
||||
return content.replace(/<table>[\s\S]*?<\/table>/g, (htmlTable) => {
|
||||
try {
|
||||
// Clean up whitespace and newlines
|
||||
const cleanHtml = htmlTable.replace(/\n\s*/g, '');
|
||||
const rows = cleanHtml.match(/<tr>(.*?)<\/tr>/g);
|
||||
if (!rows) return htmlTable;
|
||||
|
||||
// Parse table data
|
||||
let tableData: string[][] = [];
|
||||
let maxColumns = 0;
|
||||
|
||||
// Try to convert to markdown table
|
||||
try {
|
||||
rows.forEach((row, rowIndex) => {
|
||||
if (!tableData[rowIndex]) {
|
||||
tableData[rowIndex] = [];
|
||||
}
|
||||
let colIndex = 0;
|
||||
const cells = row.match(/<td.*?>(.*?)<\/td>/g) || [];
|
||||
|
||||
cells.forEach((cell) => {
|
||||
while (tableData[rowIndex][colIndex]) {
|
||||
colIndex++;
|
||||
}
|
||||
const colspan = parseInt(cell.match(/colspan="(\d+)"/)?.[1] || '1');
|
||||
const rowspan = parseInt(cell.match(/rowspan="(\d+)"/)?.[1] || '1');
|
||||
const content = cell.replace(/<td.*?>|<\/td>/g, '').trim();
|
||||
|
||||
for (let i = 0; i < rowspan; i++) {
|
||||
for (let j = 0; j < colspan; j++) {
|
||||
if (!tableData[rowIndex + i]) {
|
||||
tableData[rowIndex + i] = [];
|
||||
}
|
||||
tableData[rowIndex + i][colIndex + j] = i === 0 && j === 0 ? content : '^^';
|
||||
}
|
||||
}
|
||||
colIndex += colspan;
|
||||
maxColumns = Math.max(maxColumns, colIndex);
|
||||
});
|
||||
|
||||
for (let i = 0; i < maxColumns; i++) {
|
||||
if (!tableData[rowIndex][i]) {
|
||||
tableData[rowIndex][i] = ' ';
|
||||
}
|
||||
}
|
||||
});
|
||||
const chunks: string[] = [];
|
||||
|
||||
const headerCells = tableData[0]
|
||||
.slice(0, maxColumns)
|
||||
.map((cell) => (cell === '^^' ? ' ' : cell || ' '));
|
||||
const headerRow = '| ' + headerCells.join(' | ') + ' |';
|
||||
chunks.push(headerRow);
|
||||
|
||||
const separator = '| ' + Array(headerCells.length).fill('---').join(' | ') + ' |';
|
||||
chunks.push(separator);
|
||||
|
||||
tableData.slice(1).forEach((row) => {
|
||||
const paddedRow = row
|
||||
.slice(0, maxColumns)
|
||||
.map((cell) => (cell === '^^' ? ' ' : cell || ' '));
|
||||
while (paddedRow.length < maxColumns) {
|
||||
paddedRow.push(' ');
|
||||
}
|
||||
chunks.push('| ' + paddedRow.join(' | ') + ' |');
|
||||
});
|
||||
|
||||
return chunks.join('\n');
|
||||
} catch (error) {
|
||||
return htmlTable;
|
||||
}
|
||||
} catch (error) {
|
||||
return htmlTable;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const main = async ({ apikey, files, HTMLtable }: Props): Response => {
|
||||
// Check the apikey
|
||||
if (!apikey) {
|
||||
return Promise.reject(`API key is required`);
|
||||
@@ -30,77 +112,88 @@ const main = async ({ apikey, files }: Props): Response => {
|
||||
for await (const url of files) {
|
||||
try {
|
||||
//Fetch the pdf and check its content type
|
||||
const PDFResponse = await axiosInstance.get(url, { responseType: 'arraybuffer' });
|
||||
const PDFResponse = await axios
|
||||
.get(url, {
|
||||
responseType: 'arraybuffer',
|
||||
proxy: false,
|
||||
timeout: 20000
|
||||
})
|
||||
.catch((error) => {
|
||||
throw new Error(`[Fetch PDF Error] Failed to fetch PDF: ${getErrText(error)}`);
|
||||
});
|
||||
|
||||
if (PDFResponse.status !== 200) {
|
||||
throw new Error(
|
||||
`File:${url} \n<Content>\nFailed to fetch PDF from URL: ${PDFResponse.statusText}\n</Content>`
|
||||
`[Fetch PDF Error] Failed with status ${PDFResponse.status}: ${PDFResponse.data}`
|
||||
);
|
||||
}
|
||||
|
||||
const contentType = PDFResponse.headers['content-type'];
|
||||
const file_name = url.match(/read\/([^?]+)/)?.[1] || 'unknown.pdf';
|
||||
if (!contentType || !contentType.startsWith('application/pdf')) {
|
||||
throw new Error(
|
||||
`File:${file_name}\n<Content>\nThe provided file does not point to a PDF: ${contentType}\n</Content>`
|
||||
);
|
||||
throw new Error(`The provided file does not point to a PDF: ${contentType}`);
|
||||
}
|
||||
|
||||
const blob = new Blob([PDFResponse.data], { type: 'application/pdf' });
|
||||
// Get pre-upload URL first
|
||||
const preupload_response = await axiosInstance.post(
|
||||
'https://v2.doc2x.noedgeai.com/api/v2/parse/preupload',
|
||||
null,
|
||||
{
|
||||
const preupload_response = await axiosInstance
|
||||
.post('https://v2.doc2x.noedgeai.com/api/v2/parse/preupload', null, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${apikey}`
|
||||
}
|
||||
}
|
||||
);
|
||||
})
|
||||
.catch((error) => {
|
||||
throw new Error(`[Pre-upload Error] Failed to get pre-upload URL: ${getErrText(error)}`);
|
||||
});
|
||||
|
||||
if (preupload_response.status !== 200) {
|
||||
throw new Error(
|
||||
`File:${file_name}\n<Content>\nFailed to get pre-upload URL: ${preupload_response.statusText}\n</Content>`
|
||||
);
|
||||
throw new Error(`Failed to get pre-upload URL: ${preupload_response.data}`);
|
||||
}
|
||||
|
||||
const preupload_data = preupload_response.data;
|
||||
if (preupload_data.code !== 'success') {
|
||||
throw new Error(
|
||||
`File:${file_name}\n<Content>\nFailed to get pre-upload URL: ${JSON.stringify(preupload_data)}\n</Content>`
|
||||
);
|
||||
throw new Error(`Failed to get pre-upload URL: ${JSON.stringify(preupload_data)}`);
|
||||
}
|
||||
|
||||
const upload_url = preupload_data.data.url;
|
||||
const uid = preupload_data.data.uid;
|
||||
// Upload file to pre-signed URL with binary stream
|
||||
|
||||
const response = await axiosInstance.put(upload_url, blob, {
|
||||
headers: {
|
||||
'Content-Type': 'application/pdf'
|
||||
}
|
||||
});
|
||||
const response = await axiosInstance
|
||||
.put(upload_url, blob, {
|
||||
headers: {
|
||||
'Content-Type': 'application/pdf'
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
throw new Error(`[Upload Error] Failed to upload file: ${getErrText(error)}`);
|
||||
});
|
||||
|
||||
if (response.status !== 200) {
|
||||
throw new Error(`Upload failed with status ${response.status}: ${response.statusText}`);
|
||||
}
|
||||
|
||||
// Get the result by uid
|
||||
|
||||
// Wait for the result, at most 90s
|
||||
const checkResult = async (retry = 30) => {
|
||||
// Wait for the result
|
||||
const checkResult = async (retry = 20) => {
|
||||
if (retry <= 0)
|
||||
return Promise.reject(
|
||||
`File:${file_name}\n<Content>\nFailed to get result (uid: ${uid}): Get result timeout\n</Content>`
|
||||
`File:${file_name}\n<Content>\n[Parse Timeout Error] Failed to get result (uid: ${uid}): Process timeout\n</Content>`
|
||||
);
|
||||
|
||||
try {
|
||||
const result_response = await axiosInstance.get(
|
||||
`https://v2.doc2x.noedgeai.com/api/v2/parse/status?uid=${uid}`,
|
||||
{
|
||||
const result_response = await axiosInstance
|
||||
.get(`https://v2.doc2x.noedgeai.com/api/v2/parse/status?uid=${uid}`, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${apikey}`
|
||||
}
|
||||
}
|
||||
);
|
||||
})
|
||||
.catch((error) => {
|
||||
throw new Error(
|
||||
`[Parse Status Error] Failed to get parse status: ${getErrText(error)}`
|
||||
);
|
||||
});
|
||||
|
||||
const result_data = result_response.data;
|
||||
if (!['ok', 'success'].includes(result_data.code)) {
|
||||
@@ -110,29 +203,35 @@ const main = async ({ apikey, files }: Props): Response => {
|
||||
}
|
||||
|
||||
if (['ready', 'processing'].includes(result_data.data.status)) {
|
||||
await delay(3000);
|
||||
await delay(4000);
|
||||
return checkResult(retry - 1);
|
||||
}
|
||||
|
||||
if (result_data.data.status === 'success') {
|
||||
const result = (
|
||||
const result = processContent(
|
||||
await Promise.all(
|
||||
result_data.data.result.pages.map((page: { md: any }) => page.md)
|
||||
).then((pages) => pages.join('\n'))
|
||||
).then((pages) => pages.join('\n')),
|
||||
HTMLtable
|
||||
)
|
||||
// Do some post-processing
|
||||
.replace(/\\[\(\)]/g, '$')
|
||||
.replace(/\\[\[\]]/g, '$$')
|
||||
.replace(/<img\s+src="([^"]+)"(?:\s*\?[^>]*)?(?:\s*\/>|>)/g, '');
|
||||
.replace(/<img\s+src="([^"]+)"(?:\s*\?[^>]*)?(?:\s*\/>|>)/g, '')
|
||||
.replace(/<!-- Media -->/g, '')
|
||||
.replace(/<!-- Footnote -->/g, '')
|
||||
.replace(/\$(.+?)\s+\\tag\{(.+?)\}\$/g, '$$$1 \\qquad \\qquad ($2)$$')
|
||||
.replace(/\\text\{([^}]*?)(\b\w+)_(\w+\b)([^}]*?)\}/g, '\\text{$1$2\\_$3$4}');
|
||||
|
||||
return `File:${file_name}\n<Content>\n${result}\n</Content>`;
|
||||
}
|
||||
|
||||
await delay(100);
|
||||
return checkResult(retry - 1);
|
||||
} catch (error) {
|
||||
await delay(100);
|
||||
return checkResult(retry - 1);
|
||||
if (retry > 1) {
|
||||
await delay(100);
|
||||
return checkResult(retry - 1);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -140,7 +239,7 @@ const main = async ({ apikey, files }: Props): Response => {
|
||||
successResult.push(result);
|
||||
} catch (error) {
|
||||
failedResult.push(
|
||||
`File:${url} \n<Content>\nFailed to fetch image from URL: ${getErrText(error)}\n</Content>`
|
||||
`File:${url} \n<Content>\nFailed to fetch file from URL: ${getErrText(error)}\n</Content>`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,10 +7,8 @@
|
||||
"courseUrl": "https://fael3z0zfze.feishu.cn/wiki/Rkc5witXWiJoi5kORd2cofh6nDg?fromScene=spaceOverview",
|
||||
"showStatus": true,
|
||||
"weight": 10,
|
||||
|
||||
"isTool": true,
|
||||
"templateType": "tools",
|
||||
|
||||
"workflow": {
|
||||
"nodes": [
|
||||
{
|
||||
@@ -52,6 +50,26 @@
|
||||
"canSelectImg": false,
|
||||
"maxFiles": 14,
|
||||
"defaultValue": ""
|
||||
},
|
||||
{
|
||||
"renderTypeList": ["switch", "reference"],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "boolean",
|
||||
"canEdit": true,
|
||||
"key": "HTMLtable",
|
||||
"label": "HTMLtable",
|
||||
"description": "是否以HTML格式输出表格。如果需要精确地输出表格,请打开此开关以使用HTML格式。关闭后,表格将转换为Markdown形式输出,但这可能会损失一些表格特性,如合并单元格。",
|
||||
"defaultValue": false,
|
||||
"list": [
|
||||
{
|
||||
"label": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"maxFiles": 5,
|
||||
"canSelectFile": true,
|
||||
"canSelectImg": true,
|
||||
"required": true
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
@@ -68,6 +86,13 @@
|
||||
"key": "files",
|
||||
"label": "files",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "htmltable",
|
||||
"valueType": "boolean",
|
||||
"key": "HTMLtable",
|
||||
"label": "HTMLtable",
|
||||
"type": "hidden"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -220,7 +245,7 @@
|
||||
"key": "system_httpJsonBody",
|
||||
"renderTypeList": ["hidden"],
|
||||
"valueType": "any",
|
||||
"value": "{\n \"apikey\": \"{{apikey}}\",\n \"files\": {{files}}\n}",
|
||||
"value": "{\n \"apikey\": \"{{apikey}}\",\n \"HTMLtable\": {{HTMLtable}},\n \"files\": {{files}}\n}",
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
@@ -305,6 +330,36 @@
|
||||
},
|
||||
"required": true,
|
||||
"value": [["pluginInput", "url"]]
|
||||
},
|
||||
{
|
||||
"renderTypeList": ["reference"],
|
||||
"valueType": "boolean",
|
||||
"canEdit": true,
|
||||
"key": "HTMLtable",
|
||||
"label": "HTMLtable",
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"required": true,
|
||||
"value": ["pluginInput", "htmltable"]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
|
||||
2
packages/plugins/type.d.ts
vendored
@@ -1,6 +1,7 @@
|
||||
import { PluginTemplateType } from '@fastgpt/global/core/plugin/type.d';
|
||||
import { systemPluginResponseEnum } from '@fastgpt/global/core/workflow/runtime/constants';
|
||||
import { SystemPluginTemplateItemType } from '@fastgpt/global/core/workflow/type';
|
||||
import { PluginGroupSchemaType } from '@fastgpt/service/core/app/plugin/type';
|
||||
|
||||
export type SystemPluginResponseType = Promise<Record<string, any>>;
|
||||
export type SystemPluginSpecialResponse = {
|
||||
@@ -10,6 +11,7 @@ export type SystemPluginSpecialResponse = {
|
||||
};
|
||||
|
||||
declare global {
|
||||
var pluginGroups: PluginGroupSchemaType[];
|
||||
var systemPlugins: SystemPluginTemplateItemType[];
|
||||
var systemPluginCb: Record<string, (e: any) => SystemPluginResponseType>;
|
||||
}
|
||||
|
||||
@@ -83,8 +83,8 @@ export function request(url: string, data: any, config: ConfigType, method: Meth
|
||||
baseURL: serverRequestBaseUrl,
|
||||
url,
|
||||
method,
|
||||
data: ['POST', 'PUT'].includes(method) ? data : null,
|
||||
params: !['POST', 'PUT'].includes(method) ? data : null,
|
||||
data: ['POST', 'PUT'].includes(method) ? data : undefined,
|
||||
params: !['POST', 'PUT'].includes(method) ? data : undefined,
|
||||
...config // custom config
|
||||
})
|
||||
.then((res) => checkRes(res.data))
|
||||
|
||||
@@ -32,7 +32,7 @@ export function getGridBucket(bucket: `${BucketNameEnum}`) {
|
||||
export async function uploadFile({
|
||||
bucketName,
|
||||
teamId,
|
||||
tmbId,
|
||||
uid,
|
||||
path,
|
||||
filename,
|
||||
contentType,
|
||||
@@ -41,7 +41,7 @@ export async function uploadFile({
|
||||
}: {
|
||||
bucketName: `${BucketNameEnum}`;
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
uid: string; // tmbId / outLinkUId
|
||||
path: string;
|
||||
filename: string;
|
||||
contentType?: string;
|
||||
@@ -58,7 +58,7 @@ export async function uploadFile({
|
||||
|
||||
// Add default metadata
|
||||
metadata.teamId = teamId;
|
||||
metadata.tmbId = tmbId;
|
||||
metadata.uid = uid;
|
||||
metadata.encoding = encoding;
|
||||
|
||||
// create a gridfs bucket
|
||||
|
||||
@@ -4,7 +4,7 @@ import path from 'path';
|
||||
import { BucketNameEnum, bucketNameMap } from '@fastgpt/global/common/file/constants';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
|
||||
type FileType = {
|
||||
export type FileType = {
|
||||
fieldname: string;
|
||||
originalname: string;
|
||||
encoding: string;
|
||||
@@ -41,7 +41,7 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
|
||||
})
|
||||
}).single('file');
|
||||
|
||||
async doUpload<T = Record<string, any>>(
|
||||
async doUpload<T = any>(
|
||||
req: NextApiRequest,
|
||||
res: NextApiResponse,
|
||||
originBucketName?: `${BucketNameEnum}`
|
||||
|
||||
@@ -66,6 +66,7 @@ export const readRawContentByFileBuffer = async ({
|
||||
return;
|
||||
|
||||
const start = Date.now();
|
||||
addLog.info('Parsing files from an external service');
|
||||
|
||||
const data = new FormData();
|
||||
data.append('file', buffer, {
|
||||
@@ -88,7 +89,7 @@ export const readRawContentByFileBuffer = async ({
|
||||
}
|
||||
});
|
||||
|
||||
addLog.info(`Use custom read file service, time: ${Date.now() - start}ms`);
|
||||
addLog.info(`Custom file parsing is complete, time: ${Date.now() - start}ms`);
|
||||
|
||||
const rawText = response.data.markdown;
|
||||
const { text, imageList } = matchMdImgTextAndUpload(rawText);
|
||||
|
||||
@@ -31,6 +31,8 @@ export const jsonRes = <T = any>(
|
||||
clearCookie(res);
|
||||
}
|
||||
|
||||
addLog.error(`Api response error: ${url}`, ERROR_RESPONSE[errResponseKey]);
|
||||
|
||||
return res.json(ERROR_RESPONSE[errResponseKey]);
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ import {
|
||||
} from '@fastgpt/global/core/ai/type';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { addLog } from '../../common/system/log';
|
||||
import { i18nT } from '../../../web/i18n/utils';
|
||||
|
||||
export const openaiBaseUrl = process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1';
|
||||
|
||||
@@ -62,6 +63,7 @@ export const createChatCompletion = async <T extends CompletionsBodyType>({
|
||||
}): Promise<{
|
||||
response: InferResponseType<T>;
|
||||
isStreamResponse: boolean;
|
||||
getEmptyResponseTip: () => string;
|
||||
}> => {
|
||||
try {
|
||||
const formatTimeout = timeout ? timeout : body.stream ? 60000 : 600000;
|
||||
@@ -76,9 +78,21 @@ export const createChatCompletion = async <T extends CompletionsBodyType>({
|
||||
response !== null &&
|
||||
('iterator' in response || 'controller' in response);
|
||||
|
||||
const getEmptyResponseTip = () => {
|
||||
addLog.warn(`LLM response empty`, {
|
||||
baseUrl: userKey?.baseUrl,
|
||||
requestBody: body
|
||||
});
|
||||
if (userKey?.baseUrl) {
|
||||
return `您的 OpenAI key 没有响应: ${JSON.stringify(body)}`;
|
||||
}
|
||||
return i18nT('chat:LLM_model_response_empty');
|
||||
};
|
||||
|
||||
return {
|
||||
response: response as InferResponseType<T>,
|
||||
isStreamResponse
|
||||
isStreamResponse,
|
||||
getEmptyResponseTip
|
||||
};
|
||||
} catch (error) {
|
||||
addLog.error(`LLM response error`, error);
|
||||
|
||||
@@ -5,6 +5,7 @@ import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
|
||||
import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
|
||||
import { getLLMModel } from '../model';
|
||||
import { llmCompletionsBodyFormat } from '../utils';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
|
||||
/*
|
||||
query extension - 问题扩展
|
||||
@@ -183,7 +184,7 @@ A: ${chatBg}
|
||||
tokens: await countGptMessagesTokens(messages)
|
||||
};
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
addLog.error(`Query extension error`, error);
|
||||
return {
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
|
||||
@@ -51,7 +51,6 @@ export function reRankRecall({
|
||||
}));
|
||||
})
|
||||
.catch((err) => {
|
||||
console.log(err);
|
||||
addLog.error('rerank error', err);
|
||||
|
||||
return [];
|
||||
|
||||
@@ -5,39 +5,44 @@ import { getLLMModel } from '../ai/model';
|
||||
import { MongoApp } from './schema';
|
||||
|
||||
export const beforeUpdateAppFormat = <T extends AppSchema['modules'] | undefined>({
|
||||
nodes
|
||||
nodes,
|
||||
isPlugin
|
||||
}: {
|
||||
nodes: T;
|
||||
isPlugin: boolean;
|
||||
}) => {
|
||||
if (nodes) {
|
||||
let maxTokens = 3000;
|
||||
// Check dataset maxTokens
|
||||
if (isPlugin) {
|
||||
let maxTokens = 16000;
|
||||
|
||||
nodes.forEach((item) => {
|
||||
if (
|
||||
item.flowNodeType === FlowNodeTypeEnum.chatNode ||
|
||||
item.flowNodeType === FlowNodeTypeEnum.tools
|
||||
) {
|
||||
const model =
|
||||
item.inputs.find((item) => item.key === NodeInputKeyEnum.aiModel)?.value || '';
|
||||
const chatModel = getLLMModel(model);
|
||||
const quoteMaxToken = chatModel.quoteMaxToken || 3000;
|
||||
nodes.forEach((item) => {
|
||||
if (
|
||||
item.flowNodeType === FlowNodeTypeEnum.chatNode ||
|
||||
item.flowNodeType === FlowNodeTypeEnum.tools
|
||||
) {
|
||||
const model =
|
||||
item.inputs.find((item) => item.key === NodeInputKeyEnum.aiModel)?.value || '';
|
||||
const chatModel = getLLMModel(model);
|
||||
const quoteMaxToken = chatModel.quoteMaxToken || 16000;
|
||||
|
||||
maxTokens = Math.max(maxTokens, quoteMaxToken);
|
||||
}
|
||||
});
|
||||
maxTokens = Math.max(maxTokens, quoteMaxToken);
|
||||
}
|
||||
});
|
||||
|
||||
nodes.forEach((item) => {
|
||||
if (item.flowNodeType === FlowNodeTypeEnum.datasetSearchNode) {
|
||||
item.inputs.forEach((input) => {
|
||||
if (input.key === NodeInputKeyEnum.datasetMaxTokens) {
|
||||
const val = input.value as number;
|
||||
if (val > maxTokens) {
|
||||
input.value = maxTokens;
|
||||
nodes.forEach((item) => {
|
||||
if (item.flowNodeType === FlowNodeTypeEnum.datasetSearchNode) {
|
||||
item.inputs.forEach((input) => {
|
||||
if (input.key === NodeInputKeyEnum.datasetMaxTokens) {
|
||||
const val = input.value as number;
|
||||
if (val > maxTokens) {
|
||||
input.value = maxTokens;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
|
||||
@@ -2,7 +2,6 @@ import { FlowNodeTemplateType } from '@fastgpt/global/core/workflow/type/node.d'
|
||||
import { FlowNodeTypeEnum, defaultNodeVersion } from '@fastgpt/global/core/workflow/node/constant';
|
||||
import { appData2FlowNodeIO, pluginData2FlowNodeIO } from '@fastgpt/global/core/workflow/utils';
|
||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
import type { PluginRuntimeType } from '@fastgpt/global/core/workflow/runtime/type';
|
||||
import { FlowNodeTemplateTypeEnum } from '@fastgpt/global/core/workflow/constants';
|
||||
import { getHandleConfig } from '@fastgpt/global/core/workflow/template/utils';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
@@ -11,6 +10,9 @@ import { MongoApp } from '../schema';
|
||||
import { SystemPluginTemplateItemType } from '@fastgpt/global/core/workflow/type';
|
||||
import { getSystemPluginTemplates } from '../../../../plugins/register';
|
||||
import { getAppLatestVersion, getAppVersionById } from '../version/controller';
|
||||
import { PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
|
||||
import { MongoSystemPlugin } from './systemPluginSchema';
|
||||
import { PluginErrEnum } from '@fastgpt/global/common/error/code/plugin';
|
||||
|
||||
/*
|
||||
plugin id rule:
|
||||
@@ -37,15 +39,45 @@ export async function splitCombinePluginId(id: string) {
|
||||
|
||||
type ChildAppType = SystemPluginTemplateItemType & { teamId?: string };
|
||||
const getSystemPluginTemplateById = async (
|
||||
pluginId: string
|
||||
pluginId: string,
|
||||
versionId?: string
|
||||
): Promise<SystemPluginTemplateItemType> => {
|
||||
const item = getSystemPluginTemplates().find((plugin) => plugin.id === pluginId);
|
||||
if (!item) return Promise.reject('plugin not found');
|
||||
if (!item) return Promise.reject(PluginErrEnum.unAuth);
|
||||
|
||||
return cloneDeep(item);
|
||||
const plugin = cloneDeep(item);
|
||||
|
||||
if (plugin.associatedPluginId) {
|
||||
// The verification plugin is set as a system plugin
|
||||
const systemPlugin = await MongoSystemPlugin.findOne(
|
||||
{ pluginId: plugin.id, 'customConfig.associatedPluginId': plugin.associatedPluginId },
|
||||
'associatedPluginId'
|
||||
).lean();
|
||||
if (!systemPlugin) return Promise.reject(PluginErrEnum.unAuth);
|
||||
|
||||
const app = await MongoApp.findById(plugin.associatedPluginId).lean();
|
||||
if (!app) return Promise.reject(PluginErrEnum.unAuth);
|
||||
|
||||
const version = versionId
|
||||
? await getAppVersionById({
|
||||
appId: plugin.associatedPluginId,
|
||||
versionId,
|
||||
app
|
||||
})
|
||||
: await getAppLatestVersion(plugin.associatedPluginId, app);
|
||||
if (!version.versionId) return Promise.reject('App version not found');
|
||||
|
||||
plugin.workflow = {
|
||||
nodes: version.nodes,
|
||||
edges: version.edges,
|
||||
chatConfig: version.chatConfig
|
||||
};
|
||||
plugin.version = versionId || String(version.versionId);
|
||||
}
|
||||
return plugin;
|
||||
};
|
||||
|
||||
/* format plugin modules to plugin preview module */
|
||||
/* Format plugin to workflow preview node data */
|
||||
export async function getChildAppPreviewNode({
|
||||
id
|
||||
}: {
|
||||
@@ -77,7 +109,9 @@ export async function getChildAppPreviewNode({
|
||||
templateType: FlowNodeTemplateTypeEnum.teamApp,
|
||||
version: version.versionId,
|
||||
originCost: 0,
|
||||
currentCost: 0
|
||||
currentCost: 0,
|
||||
hasTokenFee: false,
|
||||
pluginOrder: 0
|
||||
};
|
||||
} else {
|
||||
return getSystemPluginTemplateById(pluginId);
|
||||
@@ -147,10 +181,12 @@ export async function getChildAppRuntimeById(
|
||||
// 用不到
|
||||
version: item?.pluginData?.nodeVersion || defaultNodeVersion,
|
||||
originCost: 0,
|
||||
currentCost: 0
|
||||
currentCost: 0,
|
||||
hasTokenFee: false,
|
||||
pluginOrder: 0
|
||||
};
|
||||
} else {
|
||||
return getSystemPluginTemplateById(pluginId);
|
||||
return getSystemPluginTemplateById(pluginId, versionId);
|
||||
}
|
||||
})();
|
||||
|
||||
@@ -162,6 +198,7 @@ export async function getChildAppRuntimeById(
|
||||
showStatus: app.showStatus,
|
||||
currentCost: app.currentCost,
|
||||
nodes: app.workflow.nodes,
|
||||
edges: app.workflow.edges
|
||||
edges: app.workflow.edges,
|
||||
hasTokenFee: app.hasTokenFee
|
||||
};
|
||||
}
|
||||
|
||||
35
packages/service/core/app/plugin/pluginGroupSchema.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import { connectionMongo, getMongoModel } from '../../../common/mongo/index';
|
||||
import { PluginGroupSchemaType, TGroupType } from './type';
|
||||
const { Schema } = connectionMongo;
|
||||
|
||||
export const collectionName = 'app_plugin_groups';
|
||||
|
||||
const PluginGroupSchema = new Schema({
|
||||
groupId: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
groupAvatar: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
groupName: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
groupTypes: {
|
||||
type: Array<TGroupType>,
|
||||
default: []
|
||||
},
|
||||
groupOrder: {
|
||||
type: Number,
|
||||
default: 0
|
||||
}
|
||||
});
|
||||
|
||||
PluginGroupSchema.index({ groupId: 1 }, { unique: true });
|
||||
|
||||
export const MongoPluginGroups = getMongoModel<PluginGroupSchemaType>(
|
||||
collectionName,
|
||||
PluginGroupSchema
|
||||
);
|
||||
@@ -25,12 +25,20 @@ const SystemPluginSchema = new Schema({
|
||||
type: Number,
|
||||
default: 0
|
||||
},
|
||||
hasTokenFee: {
|
||||
type: Boolean,
|
||||
default: false
|
||||
},
|
||||
pluginOrder: {
|
||||
type: Number,
|
||||
default: 0
|
||||
},
|
||||
customConfig: Object
|
||||
});
|
||||
|
||||
SystemPluginSchema.index({ pluginId: 1 });
|
||||
|
||||
export const MongoSystemPluginSchema = getMongoModel<SystemPluginConfigSchemaType>(
|
||||
export const MongoSystemPlugin = getMongoModel<SystemPluginConfigSchemaType>(
|
||||
collectionName,
|
||||
SystemPluginSchema
|
||||
);
|
||||
|
||||
20
packages/service/core/app/plugin/type.d.ts
vendored
@@ -1,3 +1,4 @@
|
||||
import { SystemPluginListItemType } from '@fastgpt/global/core/app/type';
|
||||
import { FlowNodeTemplateTypeEnum } from '@fastgpt/global/core/workflow/constants';
|
||||
import {
|
||||
SystemPluginTemplateItemType,
|
||||
@@ -9,7 +10,9 @@ export type SystemPluginConfigSchemaType = {
|
||||
|
||||
originCost: number; // n points/one time
|
||||
currentCost: number;
|
||||
hasTokenFee: boolean;
|
||||
isActive: boolean;
|
||||
pluginOrder: number;
|
||||
inputConfig: SystemPluginTemplateItemType['inputConfig'];
|
||||
|
||||
customConfig?: {
|
||||
@@ -19,6 +22,21 @@ export type SystemPluginConfigSchemaType = {
|
||||
version: string;
|
||||
weight?: number;
|
||||
workflow: WorkflowTemplateBasicType;
|
||||
templateType: FlowNodeTemplateTypeEnum;
|
||||
templateType: string;
|
||||
associatedPluginId: string;
|
||||
userGuide: string;
|
||||
};
|
||||
};
|
||||
|
||||
export type TGroupType = {
|
||||
typeName: string;
|
||||
typeId: string;
|
||||
};
|
||||
|
||||
export type PluginGroupSchemaType = {
|
||||
groupId: string;
|
||||
groupAvatar: string;
|
||||
groupName: string;
|
||||
groupTypes: TGroupType[];
|
||||
groupOrder: number;
|
||||
};
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import { PluginRuntimeType } from '@fastgpt/global/core/workflow/runtime/type';
|
||||
import { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
|
||||
import { splitCombinePluginId } from './controller';
|
||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
import { PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
|
||||
|
||||
/*
|
||||
1. Commercial plugin: n points per times
|
||||
2. Other plugin: sum of children points
|
||||
Plugin points calculation:
|
||||
1. Return 0 if error
|
||||
2. Add configured points if commercial plugin
|
||||
3. Add sum of child nodes points
|
||||
*/
|
||||
export const computedPluginUsage = async ({
|
||||
plugin,
|
||||
@@ -16,13 +16,13 @@ export const computedPluginUsage = async ({
|
||||
childrenUsage: ChatNodeUsageType[];
|
||||
error?: boolean;
|
||||
}) => {
|
||||
const { source } = await splitCombinePluginId(plugin.id);
|
||||
|
||||
// Commercial plugin: n points per times
|
||||
if (source === PluginSourceEnum.commercial) {
|
||||
if (error) return 0;
|
||||
return plugin.currentCost ?? 0;
|
||||
if (error) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
|
||||
const childrenIUsages = childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
|
||||
|
||||
const pluginCurrentCose = plugin.currentCost ?? 0;
|
||||
|
||||
return plugin.hasTokenFee ? pluginCurrentCose + childrenIUsages : pluginCurrentCose;
|
||||
};
|
||||
|
||||
@@ -17,7 +17,8 @@ export const chatConfigType = {
|
||||
scheduledTriggerConfig: Object,
|
||||
chatInputGuide: Object,
|
||||
fileSelectConfig: Object,
|
||||
instruction: String
|
||||
instruction: String,
|
||||
autoExecute: Object
|
||||
};
|
||||
|
||||
// schema
|
||||
|
||||
@@ -46,6 +46,10 @@ const ChatItemSchema = new Schema({
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
hideInUI: {
|
||||
type: Boolean,
|
||||
default: false
|
||||
},
|
||||
obj: {
|
||||
// chat role
|
||||
type: String,
|
||||
|
||||
@@ -1,15 +1,6 @@
|
||||
import type {
|
||||
AIChatItemType,
|
||||
ChatItemType,
|
||||
UserChatItemType
|
||||
} from '@fastgpt/global/core/chat/type.d';
|
||||
import axios from 'axios';
|
||||
import type { AIChatItemType, UserChatItemType } from '@fastgpt/global/core/chat/type.d';
|
||||
import { MongoApp } from '../app/schema';
|
||||
import {
|
||||
ChatItemValueTypeEnum,
|
||||
ChatRoleEnum,
|
||||
ChatSourceEnum
|
||||
} from '@fastgpt/global/core/chat/constants';
|
||||
import { ChatItemValueTypeEnum, ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
|
||||
import { MongoChatItem } from './chatItemSchema';
|
||||
import { MongoChat } from './chatSchema';
|
||||
import { addLog } from '../../common/system/log';
|
||||
@@ -133,21 +124,15 @@ export async function saveChat({
|
||||
export const updateInteractiveChat = async ({
|
||||
chatId,
|
||||
appId,
|
||||
teamId,
|
||||
tmbId,
|
||||
userInteractiveVal,
|
||||
aiResponse,
|
||||
newVariables,
|
||||
newTitle
|
||||
newVariables
|
||||
}: {
|
||||
chatId: string;
|
||||
appId: string;
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
userInteractiveVal: string;
|
||||
aiResponse: AIChatItemType & { dataId?: string };
|
||||
newVariables?: Record<string, any>;
|
||||
newTitle: string;
|
||||
}) => {
|
||||
if (!chatId) return;
|
||||
|
||||
@@ -232,7 +217,6 @@ export const updateInteractiveChat = async ({
|
||||
{
|
||||
$set: {
|
||||
variables: newVariables,
|
||||
title: newTitle,
|
||||
updateTime: new Date()
|
||||
}
|
||||
},
|
||||
|
||||
@@ -118,6 +118,11 @@ export const loadRequestMessages = async ({
|
||||
return item.image_url.url;
|
||||
})();
|
||||
|
||||
// base64 image
|
||||
if (imgUrl.startsWith('data:image/')) {
|
||||
return item;
|
||||
}
|
||||
|
||||
try {
|
||||
// If imgUrl is a local path, load image from local, and set url to base64
|
||||
if (imgUrl.startsWith('/')) {
|
||||
|
||||
143
packages/service/core/dataset/apiDataset/api.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
import type {
|
||||
APIFileContentResponse,
|
||||
APIFileListResponse,
|
||||
APIFileReadResponse,
|
||||
APIFileServer
|
||||
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||
import axios, { Method } from 'axios';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import { readFileRawTextByUrl } from '../read';
|
||||
import { ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
||||
|
||||
type ResponseDataType = {
|
||||
success: boolean;
|
||||
message: string;
|
||||
data: any;
|
||||
};
|
||||
|
||||
export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }) => {
|
||||
const instance = axios.create({
|
||||
baseURL: apiServer.baseUrl,
|
||||
timeout: 60000, // 超时时间
|
||||
headers: {
|
||||
'content-type': 'application/json',
|
||||
Authorization: `Bearer ${apiServer.authorization}`
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* 响应数据检查
|
||||
*/
|
||||
const checkRes = (data: ResponseDataType) => {
|
||||
if (data === undefined) {
|
||||
addLog.info('Api dataset data is empty');
|
||||
return Promise.reject('服务器异常');
|
||||
} else if (!data.success) {
|
||||
return Promise.reject(data);
|
||||
}
|
||||
return data.data;
|
||||
};
|
||||
const responseError = (err: any) => {
|
||||
console.log('error->', '请求错误', err);
|
||||
|
||||
if (!err) {
|
||||
return Promise.reject({ message: '未知错误' });
|
||||
}
|
||||
if (typeof err === 'string') {
|
||||
return Promise.reject({ message: err });
|
||||
}
|
||||
if (typeof err.message === 'string') {
|
||||
return Promise.reject({ message: err.message });
|
||||
}
|
||||
if (typeof err.data === 'string') {
|
||||
return Promise.reject({ message: err.data });
|
||||
}
|
||||
if (err?.response?.data) {
|
||||
return Promise.reject(err?.response?.data);
|
||||
}
|
||||
return Promise.reject(err);
|
||||
};
|
||||
|
||||
const request = <T>(url: string, data: any, method: Method): Promise<T> => {
|
||||
/* 去空 */
|
||||
for (const key in data) {
|
||||
if (data[key] === undefined) {
|
||||
delete data[key];
|
||||
}
|
||||
}
|
||||
|
||||
return instance
|
||||
.request({
|
||||
url,
|
||||
method,
|
||||
data: ['POST', 'PUT'].includes(method) ? data : undefined,
|
||||
params: !['POST', 'PUT'].includes(method) ? data : undefined
|
||||
})
|
||||
.then((res) => checkRes(res.data))
|
||||
.catch((err) => responseError(err));
|
||||
};
|
||||
|
||||
const listFiles = async ({
|
||||
searchKey,
|
||||
parentId
|
||||
}: {
|
||||
searchKey?: string;
|
||||
parentId?: ParentIdType;
|
||||
}) => {
|
||||
const files = await request<APIFileListResponse>(
|
||||
`/v1/file/list`,
|
||||
{
|
||||
searchKey,
|
||||
parentId
|
||||
},
|
||||
'POST'
|
||||
);
|
||||
|
||||
if (!Array.isArray(files)) {
|
||||
return Promise.reject('Invalid file list format');
|
||||
}
|
||||
if (files.some((file) => !file.id || !file.name || typeof file.type === 'undefined')) {
|
||||
return Promise.reject('Invalid file data format');
|
||||
}
|
||||
return files;
|
||||
};
|
||||
|
||||
const getFileContent = async ({ teamId, apiFileId }: { teamId: string; apiFileId: string }) => {
|
||||
const data = await request<APIFileContentResponse>(
|
||||
`/v1/file/content`,
|
||||
{ id: apiFileId },
|
||||
'GET'
|
||||
);
|
||||
const content = data.content;
|
||||
const previewUrl = data.previewUrl;
|
||||
|
||||
if (content) {
|
||||
return content;
|
||||
}
|
||||
if (previewUrl) {
|
||||
const rawText = await readFileRawTextByUrl({
|
||||
teamId,
|
||||
url: previewUrl,
|
||||
relatedId: apiFileId
|
||||
});
|
||||
return rawText;
|
||||
}
|
||||
return Promise.reject('Invalid content type: content or previewUrl is required');
|
||||
};
|
||||
|
||||
const getFilePreviewUrl = async ({ apiFileId }: { apiFileId: string }) => {
|
||||
const { url } = await request<APIFileReadResponse>(`/v1/file/read`, { id: apiFileId }, 'GET');
|
||||
|
||||
if (!url || typeof url !== 'string') {
|
||||
return Promise.reject('Invalid response url');
|
||||
}
|
||||
|
||||
return url;
|
||||
};
|
||||
|
||||
return {
|
||||
getFileContent,
|
||||
listFiles,
|
||||
getFilePreviewUrl
|
||||
};
|
||||
};
|
||||
@@ -3,7 +3,8 @@ import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset
|
||||
import { MongoDatasetCollection } from './schema';
|
||||
import {
|
||||
CollectionWithDatasetType,
|
||||
DatasetCollectionSchemaType
|
||||
DatasetCollectionSchemaType,
|
||||
DatasetSchemaType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoDatasetTraining } from '../training/schema';
|
||||
import { MongoDatasetData } from '../data/schema';
|
||||
@@ -13,7 +14,132 @@ import { delFileByFileIdList } from '../../../common/file/gridfs/controller';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { ClientSession } from '../../../common/mongo';
|
||||
import { createOrGetCollectionTags } from './utils';
|
||||
import { rawText2Chunks } from '../read';
|
||||
import { checkDatasetLimit } from '../../../support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '../../../../global/core/dataset/utils';
|
||||
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
|
||||
import { createTrainingUsage } from '../../../support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '../../ai/model';
|
||||
import { pushDataListToTrainingQueue } from '../training/controller';
|
||||
import { MongoImage } from '../../../common/file/image/schema';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
|
||||
export const createCollectionAndInsertData = async ({
|
||||
dataset,
|
||||
rawText,
|
||||
relatedId,
|
||||
createCollectionParams,
|
||||
isQAImport = false,
|
||||
session
|
||||
}: {
|
||||
dataset: DatasetSchemaType;
|
||||
rawText: string;
|
||||
relatedId?: string;
|
||||
createCollectionParams: CreateOneCollectionParams;
|
||||
|
||||
isQAImport?: boolean;
|
||||
session?: ClientSession;
|
||||
}) => {
|
||||
const teamId = createCollectionParams.teamId;
|
||||
const tmbId = createCollectionParams.tmbId;
|
||||
// Chunk split params
|
||||
const trainingType = createCollectionParams.trainingType || TrainingModeEnum.chunk;
|
||||
const chunkSize = createCollectionParams.chunkSize;
|
||||
const chunkSplitter = createCollectionParams.chunkSplitter;
|
||||
const qaPrompt = createCollectionParams.qaPrompt;
|
||||
const usageName = createCollectionParams.name;
|
||||
|
||||
// 1. split chunks
|
||||
const chunks = rawText2Chunks({
|
||||
rawText,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : [],
|
||||
isQAImport
|
||||
});
|
||||
|
||||
// 2. auth limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
const fn = async (session: ClientSession) => {
|
||||
// 3. create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...createCollectionParams,
|
||||
|
||||
hashRawText: hashStr(rawText),
|
||||
rawTextLength: rawText.length,
|
||||
session
|
||||
});
|
||||
|
||||
// 4. create training bill
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: usageName,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
session
|
||||
});
|
||||
|
||||
// 5. insert to training queue
|
||||
const insertResults = await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((item, index) => ({
|
||||
...item,
|
||||
chunkIndex: index
|
||||
})),
|
||||
session
|
||||
});
|
||||
|
||||
// 6. remove related image ttl
|
||||
if (relatedId) {
|
||||
await MongoImage.updateMany(
|
||||
{
|
||||
teamId,
|
||||
'metadata.relatedId': relatedId
|
||||
},
|
||||
{
|
||||
// Remove expiredTime to avoid ttl expiration
|
||||
$unset: {
|
||||
expiredTime: 1
|
||||
}
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
collectionId,
|
||||
insertResults
|
||||
};
|
||||
};
|
||||
|
||||
if (session) {
|
||||
return fn(session);
|
||||
}
|
||||
return mongoSessionRun(fn);
|
||||
};
|
||||
|
||||
export type CreateOneCollectionParams = CreateDatasetCollectionParams & {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
session?: ClientSession;
|
||||
};
|
||||
export async function createOneCollection({
|
||||
teamId,
|
||||
tmbId,
|
||||
@@ -33,18 +159,15 @@ export async function createOneCollection({
|
||||
externalFileId,
|
||||
externalFileUrl,
|
||||
|
||||
apiFileId,
|
||||
|
||||
hashRawText,
|
||||
rawTextLength,
|
||||
metadata = {},
|
||||
session,
|
||||
tags,
|
||||
...props
|
||||
}: CreateDatasetCollectionParams & {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
[key: string]: any;
|
||||
session?: ClientSession;
|
||||
}) {
|
||||
createTime
|
||||
}: CreateOneCollectionParams) {
|
||||
// Create collection tags
|
||||
const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
|
||||
|
||||
@@ -52,7 +175,6 @@ export async function createOneCollection({
|
||||
const [collection] = await MongoDatasetCollection.create(
|
||||
[
|
||||
{
|
||||
...props,
|
||||
teamId,
|
||||
tmbId,
|
||||
parentId: parentId || null,
|
||||
@@ -64,16 +186,18 @@ export async function createOneCollection({
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
metadata,
|
||||
|
||||
fileId,
|
||||
rawLink,
|
||||
externalFileId,
|
||||
externalFileUrl,
|
||||
...(fileId ? { fileId } : {}),
|
||||
...(rawLink ? { rawLink } : {}),
|
||||
...(externalFileId ? { externalFileId } : {}),
|
||||
...(externalFileUrl ? { externalFileUrl } : {}),
|
||||
...(apiFileId ? { apiFileId } : {}),
|
||||
|
||||
rawTextLength,
|
||||
hashRawText,
|
||||
metadata,
|
||||
tags: collectionTags
|
||||
tags: collectionTags,
|
||||
createTime
|
||||
}
|
||||
],
|
||||
{ session }
|
||||
@@ -116,7 +240,68 @@ export const delCollectionRelatedSource = async ({
|
||||
/**
|
||||
* delete collection and it related data
|
||||
*/
|
||||
export async function delCollectionAndRelatedSources({
|
||||
export async function delCollection({
|
||||
collections,
|
||||
session,
|
||||
delRelatedSource
|
||||
}: {
|
||||
collections: (CollectionWithDatasetType | DatasetCollectionSchemaType)[];
|
||||
session: ClientSession;
|
||||
delRelatedSource: boolean;
|
||||
}) {
|
||||
if (collections.length === 0) return;
|
||||
|
||||
const teamId = collections[0].teamId;
|
||||
|
||||
if (!teamId) return Promise.reject('teamId is not exist');
|
||||
|
||||
const datasetIds = Array.from(
|
||||
new Set(
|
||||
collections.map((item) => {
|
||||
if (typeof item.datasetId === 'string') {
|
||||
return String(item.datasetId);
|
||||
}
|
||||
return String(item.datasetId._id);
|
||||
})
|
||||
)
|
||||
);
|
||||
const collectionIds = collections.map((item) => String(item._id));
|
||||
|
||||
// delete training data
|
||||
await MongoDatasetTraining.deleteMany({
|
||||
teamId,
|
||||
datasetIds: { $in: datasetIds },
|
||||
collectionId: { $in: collectionIds }
|
||||
});
|
||||
|
||||
/* file and imgs */
|
||||
if (delRelatedSource) {
|
||||
await delCollectionRelatedSource({ collections, session });
|
||||
}
|
||||
|
||||
// delete dataset.datas
|
||||
await MongoDatasetData.deleteMany(
|
||||
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
|
||||
{ session }
|
||||
);
|
||||
|
||||
// delete collections
|
||||
await MongoDatasetCollection.deleteMany(
|
||||
{
|
||||
teamId,
|
||||
_id: { $in: collectionIds }
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
|
||||
// no session delete: delete files, vector data
|
||||
await deleteDatasetDataVector({ teamId, datasetIds, collectionIds });
|
||||
}
|
||||
|
||||
/**
|
||||
* delete delOnlyCollection
|
||||
*/
|
||||
export async function delOnlyCollection({
|
||||
collections,
|
||||
session
|
||||
}: {
|
||||
@@ -148,9 +333,6 @@ export async function delCollectionAndRelatedSources({
|
||||
collectionId: { $in: collectionIds }
|
||||
});
|
||||
|
||||
/* file and imgs */
|
||||
await delCollectionRelatedSource({ collections, session });
|
||||
|
||||
// delete dataset.datas
|
||||
await MongoDatasetData.deleteMany(
|
||||
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
|
||||
|
||||
@@ -10,90 +10,100 @@ import {
|
||||
|
||||
export const DatasetColCollectionName = 'dataset_collections';
|
||||
|
||||
const DatasetCollectionSchema = new Schema({
|
||||
parentId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetColCollectionName,
|
||||
default: null
|
||||
},
|
||||
teamId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamCollectionName,
|
||||
required: true
|
||||
},
|
||||
tmbId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamMemberCollectionName,
|
||||
required: true
|
||||
},
|
||||
datasetId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetCollectionName,
|
||||
required: true
|
||||
},
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetCollectionTypeMap),
|
||||
required: true
|
||||
},
|
||||
name: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
createTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
updateTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
forbid: {
|
||||
type: Boolean,
|
||||
default: false
|
||||
},
|
||||
const DatasetCollectionSchema = new Schema(
|
||||
{
|
||||
parentId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetColCollectionName,
|
||||
default: null
|
||||
},
|
||||
teamId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamCollectionName,
|
||||
required: true
|
||||
},
|
||||
tmbId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamMemberCollectionName,
|
||||
required: true
|
||||
},
|
||||
datasetId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetCollectionName,
|
||||
required: true
|
||||
},
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetCollectionTypeMap),
|
||||
required: true
|
||||
},
|
||||
name: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
createTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
updateTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
forbid: {
|
||||
type: Boolean,
|
||||
default: false
|
||||
},
|
||||
|
||||
// chunk filed
|
||||
trainingType: {
|
||||
type: String,
|
||||
enum: Object.keys(TrainingTypeMap)
|
||||
},
|
||||
chunkSize: {
|
||||
type: Number,
|
||||
required: true
|
||||
},
|
||||
chunkSplitter: {
|
||||
type: String
|
||||
},
|
||||
qaPrompt: {
|
||||
type: String
|
||||
},
|
||||
ocrParse: Boolean,
|
||||
// chunk filed
|
||||
trainingType: {
|
||||
type: String,
|
||||
enum: Object.keys(TrainingTypeMap)
|
||||
},
|
||||
chunkSize: {
|
||||
type: Number,
|
||||
required: true
|
||||
},
|
||||
chunkSplitter: {
|
||||
type: String
|
||||
},
|
||||
qaPrompt: {
|
||||
type: String
|
||||
},
|
||||
ocrParse: Boolean,
|
||||
|
||||
tags: {
|
||||
type: [String],
|
||||
default: []
|
||||
},
|
||||
tags: {
|
||||
type: [String],
|
||||
default: []
|
||||
},
|
||||
|
||||
// local file collection
|
||||
fileId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: 'dataset.files'
|
||||
},
|
||||
// web link collection
|
||||
rawLink: String,
|
||||
// external collection
|
||||
externalFileId: String,
|
||||
// local file collection
|
||||
fileId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: 'dataset.files'
|
||||
},
|
||||
// web link collection
|
||||
rawLink: String,
|
||||
// api collection
|
||||
apiFileId: String,
|
||||
// external collection
|
||||
externalFileId: String,
|
||||
externalFileUrl: String, // external import url
|
||||
|
||||
// metadata
|
||||
rawTextLength: Number,
|
||||
hashRawText: String,
|
||||
externalFileUrl: String, // external import url
|
||||
metadata: {
|
||||
type: Object,
|
||||
default: {}
|
||||
// metadata
|
||||
rawTextLength: Number,
|
||||
hashRawText: String,
|
||||
metadata: {
|
||||
type: Object,
|
||||
default: {}
|
||||
}
|
||||
},
|
||||
{
|
||||
// Auto update updateTime
|
||||
timestamps: {
|
||||
updatedAt: 'updateTime'
|
||||
}
|
||||
}
|
||||
});
|
||||
);
|
||||
|
||||
try {
|
||||
// auth file
|
||||
|
||||
@@ -1,17 +1,19 @@
|
||||
import type { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import { MongoDatasetCollection } from './schema';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { MongoDatasetTraining } from '../training/schema';
|
||||
import { urlsFetch } from '../../../common/string/cheerio';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { ClientSession } from '../../../common/mongo';
|
||||
import { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api';
|
||||
import { MongoDatasetCollectionTags } from '../tag/schema';
|
||||
import { readFromSecondary } from '../../../common/mongo/utils';
|
||||
import { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
|
||||
import {
|
||||
DatasetCollectionSyncResultEnum,
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetSourceReadTypeEnum,
|
||||
DatasetTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||
import { readDatasetSourceRawText } from '../read';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
|
||||
import { createCollectionAndInsertData, delCollection } from './controller';
|
||||
|
||||
/**
|
||||
* get all collection by top collectionId
|
||||
@@ -61,148 +63,6 @@ export function getCollectionUpdateTime({ name, time }: { time?: Date; name: str
|
||||
return new Date();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get collection raw text by Collection or collectionId
|
||||
*/
|
||||
export const getCollectionAndRawText = async ({
|
||||
collectionId,
|
||||
collection,
|
||||
newRawText
|
||||
}: {
|
||||
collectionId?: string;
|
||||
collection?: CollectionWithDatasetType;
|
||||
newRawText?: string;
|
||||
}) => {
|
||||
const col = await (async () => {
|
||||
if (collection) return collection;
|
||||
if (collectionId) {
|
||||
return (await MongoDatasetCollection.findById(collectionId).populate(
|
||||
'datasetId'
|
||||
)) as CollectionWithDatasetType;
|
||||
}
|
||||
|
||||
return null;
|
||||
})();
|
||||
|
||||
if (!col) {
|
||||
return Promise.reject('Collection not found');
|
||||
}
|
||||
|
||||
const { title, rawText } = await (async () => {
|
||||
if (newRawText)
|
||||
return {
|
||||
title: '',
|
||||
rawText: newRawText
|
||||
};
|
||||
// link
|
||||
if (col.type === DatasetCollectionTypeEnum.link && col.rawLink) {
|
||||
// crawl new data
|
||||
const result = await urlsFetch({
|
||||
urlList: [col.rawLink],
|
||||
selector: col.datasetId?.websiteConfig?.selector || col?.metadata?.webPageSelector
|
||||
});
|
||||
|
||||
return {
|
||||
title: result[0]?.title,
|
||||
rawText: result[0]?.content
|
||||
};
|
||||
}
|
||||
|
||||
// file
|
||||
|
||||
return {
|
||||
title: '',
|
||||
rawText: ''
|
||||
};
|
||||
})();
|
||||
|
||||
const hashRawText = hashStr(rawText);
|
||||
const isSameRawText = rawText && col.hashRawText === hashRawText;
|
||||
|
||||
return {
|
||||
collection: col,
|
||||
title,
|
||||
rawText,
|
||||
isSameRawText
|
||||
};
|
||||
};
|
||||
|
||||
/* link collection start load data */
|
||||
export const reloadCollectionChunks = async ({
|
||||
collection,
|
||||
tmbId,
|
||||
billId,
|
||||
rawText,
|
||||
session
|
||||
}: {
|
||||
collection: CollectionWithDatasetType;
|
||||
tmbId: string;
|
||||
billId?: string;
|
||||
rawText?: string;
|
||||
session: ClientSession;
|
||||
}): Promise<PushDatasetDataResponse> => {
|
||||
const {
|
||||
title,
|
||||
rawText: newRawText,
|
||||
collection: col,
|
||||
isSameRawText
|
||||
} = await getCollectionAndRawText({
|
||||
collection,
|
||||
newRawText: rawText
|
||||
});
|
||||
|
||||
if (isSameRawText)
|
||||
return {
|
||||
insertLen: 0
|
||||
};
|
||||
|
||||
// split data
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: newRawText,
|
||||
chunkLen: col.chunkSize || 512,
|
||||
customReg: col.chunkSplitter ? [col.chunkSplitter] : []
|
||||
});
|
||||
|
||||
// insert to training queue
|
||||
const model = await (() => {
|
||||
if (col.trainingType === TrainingModeEnum.chunk) return col.datasetId.vectorModel;
|
||||
if (col.trainingType === TrainingModeEnum.qa) return col.datasetId.agentModel;
|
||||
return Promise.reject('Training model error');
|
||||
})();
|
||||
|
||||
const result = await MongoDatasetTraining.insertMany(
|
||||
chunks.map((item, i) => ({
|
||||
teamId: col.teamId,
|
||||
tmbId,
|
||||
datasetId: col.datasetId._id,
|
||||
collectionId: col._id,
|
||||
billId,
|
||||
mode: col.trainingType,
|
||||
prompt: '',
|
||||
model,
|
||||
q: item,
|
||||
a: '',
|
||||
chunkIndex: i
|
||||
})),
|
||||
{ session }
|
||||
);
|
||||
|
||||
// update raw text
|
||||
await MongoDatasetCollection.findByIdAndUpdate(
|
||||
col._id,
|
||||
{
|
||||
...(title && { name: title }),
|
||||
rawTextLength: newRawText.length,
|
||||
hashRawText: hashStr(newRawText)
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
|
||||
return {
|
||||
insertLen: result.length
|
||||
};
|
||||
};
|
||||
|
||||
export const createOrGetCollectionTags = async ({
|
||||
tags,
|
||||
datasetId,
|
||||
@@ -268,3 +128,88 @@ export const collectionTagsToTagLabel = async ({
|
||||
})
|
||||
.filter(Boolean);
|
||||
};
|
||||
|
||||
export const syncCollection = async (collection: CollectionWithDatasetType) => {
|
||||
const dataset = collection.datasetId;
|
||||
|
||||
if (
|
||||
collection.type !== DatasetCollectionTypeEnum.link &&
|
||||
dataset.type !== DatasetTypeEnum.apiDataset
|
||||
) {
|
||||
return Promise.reject(DatasetErrEnum.notSupportSync);
|
||||
}
|
||||
|
||||
// Get new text
|
||||
const sourceReadType = await (async () => {
|
||||
if (collection.type === DatasetCollectionTypeEnum.link) {
|
||||
if (!collection.rawLink) return Promise.reject('rawLink is missing');
|
||||
return {
|
||||
type: DatasetSourceReadTypeEnum.link,
|
||||
sourceId: collection.rawLink,
|
||||
selector: collection.metadata?.webPageSelector
|
||||
};
|
||||
}
|
||||
|
||||
if (!collection.apiFileId) return Promise.reject('apiFileId is missing');
|
||||
if (!dataset.apiServer) return Promise.reject('apiServer not found');
|
||||
return {
|
||||
type: DatasetSourceReadTypeEnum.apiFile,
|
||||
sourceId: collection.apiFileId,
|
||||
apiServer: dataset.apiServer
|
||||
};
|
||||
})();
|
||||
const rawText = await readDatasetSourceRawText({
|
||||
teamId: collection.teamId,
|
||||
...sourceReadType
|
||||
});
|
||||
|
||||
// Check if the original text is the same: skip if same
|
||||
const hashRawText = hashStr(rawText);
|
||||
if (collection.hashRawText && hashRawText === collection.hashRawText) {
|
||||
return DatasetCollectionSyncResultEnum.sameRaw;
|
||||
}
|
||||
|
||||
await mongoSessionRun(async (session) => {
|
||||
// Create new collection
|
||||
await createCollectionAndInsertData({
|
||||
session,
|
||||
dataset,
|
||||
rawText: rawText,
|
||||
createCollectionParams: {
|
||||
teamId: collection.teamId,
|
||||
tmbId: collection.tmbId,
|
||||
datasetId: collection.datasetId._id,
|
||||
name: collection.name,
|
||||
type: collection.type,
|
||||
|
||||
fileId: collection.fileId,
|
||||
rawLink: collection.rawLink,
|
||||
externalFileId: collection.externalFileId,
|
||||
externalFileUrl: collection.externalFileUrl,
|
||||
apiFileId: collection.apiFileId,
|
||||
|
||||
rawTextLength: rawText.length,
|
||||
hashRawText,
|
||||
|
||||
tags: collection.tags,
|
||||
createTime: collection.createTime,
|
||||
|
||||
parentId: collection.parentId,
|
||||
trainingType: collection.trainingType,
|
||||
chunkSize: collection.chunkSize,
|
||||
chunkSplitter: collection.chunkSplitter,
|
||||
qaPrompt: collection.qaPrompt,
|
||||
metadata: collection.metadata
|
||||
}
|
||||
});
|
||||
|
||||
// Delete old collection
|
||||
await delCollection({
|
||||
collections: [collection],
|
||||
delRelatedSource: false,
|
||||
session
|
||||
});
|
||||
});
|
||||
|
||||
return DatasetCollectionSyncResultEnum.success;
|
||||
};
|
||||
|
||||
@@ -7,6 +7,8 @@ import { TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/
|
||||
import axios from 'axios';
|
||||
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
|
||||
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
|
||||
import { APIFileServer } from '@fastgpt/global/core/dataset/apiDataset';
|
||||
import { useApiDatasetRequest } from './apiDataset/api';
|
||||
|
||||
export const readFileRawTextByUrl = async ({
|
||||
teamId,
|
||||
@@ -15,7 +17,7 @@ export const readFileRawTextByUrl = async ({
|
||||
}: {
|
||||
teamId: string;
|
||||
url: string;
|
||||
relatedId?: string;
|
||||
relatedId: string; // externalFileId / apiFileId
|
||||
}) => {
|
||||
const response = await axios({
|
||||
method: 'get',
|
||||
@@ -40,9 +42,9 @@ export const readFileRawTextByUrl = async ({
|
||||
};
|
||||
|
||||
/*
|
||||
fileId - local file, read from mongo
|
||||
link - request
|
||||
externalFile = request read
|
||||
fileId - local file, read from mongo
|
||||
link - request
|
||||
externalFile/apiFile = request read
|
||||
*/
|
||||
export const readDatasetSourceRawText = async ({
|
||||
teamId,
|
||||
@@ -50,14 +52,17 @@ export const readDatasetSourceRawText = async ({
|
||||
sourceId,
|
||||
isQAImport,
|
||||
selector,
|
||||
relatedId
|
||||
externalFileId,
|
||||
apiServer
|
||||
}: {
|
||||
teamId: string;
|
||||
type: DatasetSourceReadTypeEnum;
|
||||
sourceId: string;
|
||||
isQAImport?: boolean;
|
||||
selector?: string;
|
||||
relatedId?: string;
|
||||
|
||||
isQAImport?: boolean; // csv data
|
||||
selector?: string; // link selector
|
||||
externalFileId?: string; // external file dataset
|
||||
apiServer?: APIFileServer; // api dataset
|
||||
}): Promise<string> => {
|
||||
if (type === DatasetSourceReadTypeEnum.fileLocal) {
|
||||
const { rawText } = await readFileContentFromMongo({
|
||||
@@ -75,10 +80,19 @@ export const readDatasetSourceRawText = async ({
|
||||
|
||||
return result[0]?.content || '';
|
||||
} else if (type === DatasetSourceReadTypeEnum.externalFile) {
|
||||
if (!externalFileId) return Promise.reject('FileId not found');
|
||||
const rawText = await readFileRawTextByUrl({
|
||||
teamId,
|
||||
url: sourceId,
|
||||
relatedId
|
||||
relatedId: externalFileId
|
||||
});
|
||||
return rawText;
|
||||
} else if (type === DatasetSourceReadTypeEnum.apiFile) {
|
||||
if (!apiServer) return Promise.reject('apiServer not found');
|
||||
const rawText = await readApiServerFileContent({
|
||||
apiServer,
|
||||
apiFileId: sourceId,
|
||||
teamId
|
||||
});
|
||||
return rawText;
|
||||
}
|
||||
@@ -86,6 +100,18 @@ export const readDatasetSourceRawText = async ({
|
||||
return '';
|
||||
};
|
||||
|
||||
export const readApiServerFileContent = async ({
|
||||
apiServer,
|
||||
apiFileId,
|
||||
teamId
|
||||
}: {
|
||||
apiServer: APIFileServer;
|
||||
apiFileId: string;
|
||||
teamId: string;
|
||||
}) => {
|
||||
return useApiDatasetRequest({ apiServer }).getFileContent({ teamId, apiFileId });
|
||||
};
|
||||
|
||||
export const rawText2Chunks = ({
|
||||
rawText,
|
||||
isQAImport,
|
||||
|
||||
@@ -83,15 +83,18 @@ const DatasetSchema = new Schema({
|
||||
}
|
||||
}
|
||||
},
|
||||
externalReadUrl: {
|
||||
type: String
|
||||
},
|
||||
inheritPermission: {
|
||||
type: Boolean,
|
||||
default: true
|
||||
},
|
||||
apiServer: {
|
||||
type: Object
|
||||
},
|
||||
|
||||
// abandoned
|
||||
externalReadUrl: {
|
||||
type: String
|
||||
},
|
||||
defaultPermission: Number
|
||||
});
|
||||
|
||||
|
||||