Compare commits

...

64 Commits

Author SHA1 Message Date
Archer
608e58ba41 4.8.13 test (#3107)
* perf: select file

* perf: drop files

* fix: imple mode adapt files
2024-11-09 15:07:24 +08:00
Archer
044b0c57f7 4.8.13 test (#3106)
* perf: select file

* perf: drop files

* perf: env template
2024-11-09 14:46:14 +08:00
a.e.
7d7454ef3b feat: source id prefix env (#3103) 2024-11-09 14:44:10 +08:00
Archer
0d658c0114 fix: plugin select files and ai response check (#3104)
* fix: plugin select files and ai response check

* perf: text editor selector;tool call tip;remove invalid image url;

* perf: select file

* perf: drop files
2024-11-09 14:43:15 +08:00
Archer
d58cf44778 4.8.13 test (#3102)
* fix: loop index;edge parent check

* perf: reference invalid check

* fix: ts
2024-11-08 20:53:58 +08:00
Archer
7537330a3b feat: loop start add index (#3101)
* feat: loop start add index

* update doc
2024-11-08 17:21:19 +08:00
heheer
a7f881fc5e array reference check & node ui (#3100) 2024-11-08 17:19:05 +08:00
Archer
fc7304d3cd 4.8.13 test (#3098)
* perf: loop node refresh

* rename context

* comment

* fix: ts

* perf: push chat log
2024-11-08 16:02:33 +08:00
a.e.
aa50174066 feat: support push chat log (#3093)
* feat: custom uid/metadata

* to: custom info

* fix: chat push latest

* feat: add chat log envs

* refactor: move timer to pushChatLog

* fix: using precise log

---------

Co-authored-by: Finley Ge <m13203533462@163.com>
2024-11-08 15:35:27 +08:00
heheer
5b2cc097b0 loop node dynamic height (#3092)
* loop node dynamic height

* fix

* fix
2024-11-08 12:10:15 +08:00
Archer
7a933f73b6 fix: http tool response (#3097) 2024-11-08 11:56:18 +08:00
Archer
3e5d7d0d7a fix: workflow file upload refresh (#3088) 2024-11-07 15:04:46 +08:00
Archer
d15ec1ae69 4.8.13 test (#3087)
* fix: image expired

* fix: datacard navbar ui

* perf: build action
2024-11-07 14:01:00 +08:00
heheer
3b82ed0aa1 feat: support sub route config (#3071)
* feat: support sub route config

* dockerfile

* fix upload

* delete unused code
2024-11-07 13:53:23 +08:00
Archer
dc95ab1dc1 4.8.13 test (#3085)
* perf: workflow node ui

* chat iframe url
2024-11-07 12:03:21 +08:00
Archer
fa2fbc1ddd perf: workflow context split (#3083)
* perf: workflow context split

* perf: context
2024-11-07 10:05:03 +08:00
heheer
10421d73f4 add dispatch try catch (#3075) 2024-11-07 10:05:03 +08:00
Archer
a9ee6e6a5e feat: View will move when workflow check error;fix: ui refresh error when continuous file upload (#3077)
* fix: plugin output check

* fix: ui refresh error when continuous file upload

* feat: View will move when workflow check error
2024-11-07 10:05:03 +08:00
heheer
0f1932aadc node pluginoutput check (#3074) 2024-11-07 10:05:02 +08:00
Archer
65a39e80b8 feat: iframe code block;perf: workflow selector type (#3076)
* feat: iframe code block

* perf: workflow selector type
2024-11-07 10:05:02 +08:00
heheer
0db0cbf376 feat: support array reference multi-select (#3041)
* feat: support array reference multi-select

* fix build

* fix

* fix loop multi-select

* adjust condition

* fix get value

* array and non-array conversion

* fix plugin input

* merge func
2024-11-07 10:05:02 +08:00
heheer
f4dbe7c021 fix ui (#3065)
* fix ui

* fix
2024-11-07 10:05:02 +08:00
Archer
07b3a0a35d perf: dockerfile proxy (#3067) 2024-11-07 10:05:01 +08:00
Archer
fd49ad1342 Adapt findLast api;perf: markdown zh format. (#3066)
* perf: context code

* fix: adapt findLast api

* perf: commercial plugin run error

* perf: markdown zh format
2024-11-07 10:05:01 +08:00
Finley Ge
f90803c558 pref: slow query of full text search (#3044) 2024-11-07 10:05:01 +08:00
papapatrick
49cd2d7a3c add chatType (#3060) 2024-11-07 10:05:01 +08:00
papapatrick
727bd7144c feat: add chat history time label (#3024)
* feat:add chat and logs time

* feat: add chat history time label

* code perf

* code perf

---------

Co-authored-by: 勤劳上班的卑微小张 <jiazhan.zhang@ggimage.com>
2024-11-07 10:05:01 +08:00
Archer
469858877e New file upload (#3058)
* feat: toolNode aiNode readFileNode adapt new version

* update docker-compose

* update tip

* feat: adapt new file version

* perf: file input

* fix: ts
2024-11-07 10:05:01 +08:00
heheer
7a929db0a5 chore(ui): login page & workflow page (#3046)
* login page & number input & multirow select & llm select

* workflow

* adjust nodes
2024-11-07 10:04:58 +08:00
Carson Yang
0645b274da Docs: add docs for loop node (#3069) 2024-11-05 10:11:59 +08:00
heheer
cf8786b194 fix: node version update flicker (#3052) 2024-11-01 15:54:12 +08:00
tzq84
be6269688b feat(voice): add fallback for browsers without MediaSource support (#3043)
- Add MediaSource support detection function
- Implement fallback solution for browsers that don't support MediaSource:
  - For single audio: Read full stream before playing
  - For segmented audio: Wait until all text is received then play as one audio
- Improve code robustness and browser compatibility
2024-11-01 14:50:49 +08:00
Archer
912b264a47 perf: forbid image to base64 (#3038)
* perf: forbid image to base64

* update file upload path

* feat: support promptCall use image

* fix: echarts load

* update doc
2024-11-01 14:29:20 +08:00
Archer
7ef1821557 Update 4812.md (#3051) 2024-11-01 11:16:27 +08:00
Archer
4061b11922 fix: dataset select check (#3048) 2024-10-31 17:25:02 +08:00
Finley Ge
bc171db945 fix: alldataset get dataset without folders. omit the permission check (#3047) 2024-10-31 17:06:56 +08:00
Jiangween
eb365fef44 Update i18n files and Upload component (#3040)
* Update i18n files and Upload component

* 完善 i18n 和优化 Upload.tsx 文件

* 修改Upload.tsx 文件的问题...
2024-10-31 15:25:00 +08:00
Archer
2e7047cb3b Update 4812.md (#3036) 2024-10-31 00:48:36 +08:00
Archer
89a817d1c9 fix: dataset select cannot refresh (#3032)
* fix: dataset select cannot refresh

* update cors
2024-10-30 19:30:31 +08:00
Finley Ge
e788bcaabe fix: only owner or team owner can change app's owner (#3033) 2024-10-30 19:11:54 +08:00
heheer
9219903341 add goole tag manager (#3028) 2024-10-30 19:04:26 +08:00
Finley Ge
6939899baa fix: allDataset (#3031) 2024-10-30 19:02:08 +08:00
papapatrick
732b6d7780 add more market template (#3030)
* add more market template

* delete long translate avatar
2024-10-30 18:42:46 +08:00
heheer
e361279208 feat: add baidu conversion tracking (#3016)
* feat: add baidu conversion tracking

* chore
2024-10-30 14:40:49 +08:00
Archer
946fda0843 4.8.12 test (#3026)
* perf: app list permission

* perf: create dataset tip

* fix: create dataset
2024-10-30 14:23:37 +08:00
Archer
97216eec59 perf: app list permission (#3023)
* perf: app list permission

* perf: create dataset tip
2024-10-30 12:38:16 +08:00
Finley Ge
9f4aa3160e fix: app/dataset auth (#3021) 2024-10-30 11:50:25 +08:00
Archer
8e4084f7ee update text splitter (#3020) 2024-10-30 01:10:35 +08:00
Archer
ee718750e2 perf: dataset data auth (#3015) 2024-10-29 15:44:46 +08:00
Archer
1e02544c3a feat: reset milvus (#3013) 2024-10-29 14:55:27 +08:00
heheer
98771284e4 app list open-api & completion with appid & get history source (#3011)
* app list open-api & completion with appid & get history source

* change default value position
2024-10-29 14:27:29 +08:00
Finley Ge
efc4e860b7 fix/group (#3012)
* fix: app/dataset list private resource bug

* fix: auth owner
2024-10-29 11:22:21 +08:00
Archer
e06d72e86e 4.8.12 test (#3006)
* perf: oneapi error tip

* fix: qps limit condition error

* perf: Plan tip

* fix: permission modal ui

* perf: dataset slider ui

* perf: api key auth tmbId problem

* perf: dataset upload i18n

* fix: http json path check
2024-10-28 22:47:45 +08:00
Finley Ge
b712a821f8 fix: upload file (#2992)
* fix: upload file

* chore: remove wasm, support html image parse

* chore: adjust

* chore: move base64match function into htmlstr2md
2024-10-28 21:44:50 +08:00
Finley Ge
4e3d817b63 fix: milvus (#3004) 2024-10-28 16:06:08 +08:00
heheer
78a85bf847 fix: optimize tool node check in workflow (#3002)
* fix: optimize tool node check in workflow

* comment
2024-10-28 15:37:00 +08:00
ainuoyan
a5b913f1b1 Update chat.md (#2996) 2024-10-27 18:44:52 +08:00
Jiangween
7ee1a340e6 Re-add RAG documentation and images (#2995) 2024-10-27 09:15:44 +08:00
Archer
c722ced68d 4.8.12 test (#2994)
* perf: run loop code

* doc

* fix: mulity loop node will error; loop node variables cannot inherit

* back save tip position

* fix: child workflow runtime

* stream connection
2024-10-25 23:13:53 +08:00
Archer
f89452acdd Group role (#2993)
* feat: app/dataset support group (#2898)

* pref: member-group (#2862)

* feat: group list ordered by updateTime

* fix: transfer ownership of group when deleting member

* fix: i18n fix

* feat: can not set member as admin/owner when user is not active

* fix: GroupInfoModal hover input do not change color

* fix(fe): searchinput do not scroll

* feat: app collaborator with group, remove default permission

* feat: dataset collaborator with group, remove default permission

* chore(test): pref mock

* chore: remove useless code

* chore: adjust

* fix: add self as collaborator when creating folder

* fix(fe): folder manage menu do not show when user has write permission
only

* fix: dataset folder create

* feat: Add code comment

* Pref: app move (#2952)

* perf: app schema

* doc

---------

Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
2024-10-25 19:39:11 +08:00
Archer
74d58d562b 4.8.12 test fix (#2988)
* perf: qps limit

* perf: http response data

* perf: json path check

* fix: ts

* loop support reference parent variable
2024-10-25 16:34:26 +08:00
heheer
165fe077bc fix: http raw response undefined (#2981) 2024-10-25 11:53:32 +08:00
Finley Ge
75494f8d01 feat: QPS Limit middleware (#2956)
* feat: QPS Limit middleware

* chore: use request-ip to get client ip

* feat: frequencyLimit schema
2024-10-25 10:08:59 +08:00
papapatrick
bb727b0710 add bing search plugins (#2970) 2024-10-23 22:45:06 +08:00
366 changed files with 14390 additions and 5176 deletions

View File

@@ -90,3 +90,45 @@ jobs:
-t ${Docker_Hub_Tag} \
-t ${Docker_Hub_Latest} \
.
build-fastgpt-images-child-route:
runs-on: ubuntu-20.04
steps:
# Set tag
- name: Set image name and tag
run: |
if [[ "${{ github.ref_name }}" == "main" ]]; then
echo "Git_Tag=ghcr.io/${{ github.repository_owner }}/fastgpt-child-route:latest" >> $GITHUB_ENV
echo "Git_Latest=ghcr.io/${{ github.repository_owner }}/fastgpt-child-route:latest" >> $GITHUB_ENV
echo "Ali_Tag=${{ secrets.ALI_IMAGE_NAME }}/fastgpt-child-route:latest" >> $GITHUB_ENV
echo "Ali_Latest=${{ secrets.ALI_IMAGE_NAME }}/fastgpt-child-route:latest" >> $GITHUB_ENV
echo "Docker_Hub_Tag=${{ secrets.DOCKER_IMAGE_NAME }}/fastgpt-child-route:latest" >> $GITHUB_ENV
echo "Docker_Hub_Latest=${{ secrets.DOCKER_IMAGE_NAME }}/fastgpt-child-route:latest" >> $GITHUB_ENV
else
echo "Git_Tag=ghcr.io/${{ github.repository_owner }}/fastgpt-child-route:${{ github.ref_name }}" >> $GITHUB_ENV
echo "Git_Latest=ghcr.io/${{ github.repository_owner }}/fastgpt-child-route:latest" >> $GITHUB_ENV
echo "Ali_Tag=${{ secrets.ALI_IMAGE_NAME }}/fastgpt-child-route:${{ github.ref_name }}" >> $GITHUB_ENV
echo "Ali_Latest=${{ secrets.ALI_IMAGE_NAME }}/fastgpt-child-route:latest" >> $GITHUB_ENV
echo "Docker_Hub_Tag=${{ secrets.DOCKER_IMAGE_NAME }}/fastgpt-child-route:${{ github.ref_name }}" >> $GITHUB_ENV
echo "Docker_Hub_Latest=${{ secrets.DOCKER_IMAGE_NAME }}/fastgpt-child-route:latest" >> $GITHUB_ENV
fi
- name: Build and publish image for main branch or tag push event
env:
DOCKER_REPO_TAGGED: ${{ env.DOCKER_REPO_TAGGED }}
run: |
docker buildx build \
-f projects/app/Dockerfile \
--platform linux/amd64,linux/arm64 \
--build-arg base_url=fastai \
--label "org.opencontainers.image.source=https://github.com/${{ github.repository_owner }}/FastGPT" \
--label "org.opencontainers.image.description=fastgpt image" \
--push \
--cache-from=type=local,src=/tmp/.buildx-cache \
--cache-to=type=local,dest=/tmp/.buildx-cache \
-t ${Git_Tag} \
-t ${Git_Latest} \
-t ${Ali_Tag} \
-t ${Ali_Latest} \
-t ${Docker_Hub_Tag} \
-t ${Docker_Hub_Latest} \
.

Binary file not shown.

After

Width:  |  Height:  |  Size: 381 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 369 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 249 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 145 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 288 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 162 KiB

View File

@@ -0,0 +1,331 @@
---
title: '知识库基础原理介绍'
description: '本节详细介绍RAG模型的核心机制、应用场景及其在生成任务中的优势与局限性。'
icon: 'language'
draft: false
toc: true
weight: 106
---
[RAG文档](https://huggingface.co/docs/transformers/model_doc/rag)
# 1. 引言
随着自然语言处理NLP技术的迅猛发展生成式语言模型如GPT、BART等在多种文本生成任务中表现卓越尤其在语言生成和上下文理解方面。然而纯生成模型在处理事实类任务时存在一些固有的局限性。例如由于这些模型依赖于固定的预训练数据它们在回答需要最新或实时信息的问题时可能会出现“编造”信息的现象导致生成结果不准确或缺乏事实依据。此外生成模型在面对长尾问题和复杂推理任务时常因缺乏特定领域的外部知识支持而表现不佳难以提供足够的深度和准确性。
与此同时检索模型Retriever能够通过在海量文档中快速找到相关信息解决事实查询的问题。然而传统检索模型如BM25在面对模糊查询或跨域问题时往往只能返回孤立的结果无法生成连贯的自然语言回答。由于缺乏上下文推理能力检索模型生成的答案通常不够连贯和完整。
为了解决这两类模型的不足检索增强生成模型Retrieval-Augmented GenerationRAG应运而生。RAG通过结合生成模型和检索模型的优势实时从外部知识库中获取相关信息并将其融入生成任务中确保生成的文本既具备上下文连贯性又包含准确的知识。这种混合架构在智能问答、信息检索与推理、以及领域特定的内容生成等场景中表现尤为出色。
## 1.1 RAG的定义
RAG是一种将信息检索与生成模型相结合的混合架构。首先检索器从外部知识库或文档集中获取与用户查询相关的内容片段然后生成器基于这些检索到的内容生成自然语言输出确保生成的内容既信息丰富又具备高度的相关性和准确性。
# 2. RAG模型的核心机制
RAG 模型由两个主要模块构成检索器Retriever与生成器Generator。这两个模块相互配合确保生成的文本既包含外部的相关知识又具备自然流畅的语言表达。
## 2.1 检索器Retriever
检索器的主要任务是从一个外部知识库或文档集中获取与输入查询最相关的内容。在RAG中常用的技术包括
- 向量检索如BERT向量等它通过将文档和查询转化为向量空间中的表示并使用相似度计算来进行匹配。向量检索的优势在于能够更好地捕捉语义相似性而不仅仅是依赖于词汇匹配。
- 传统检索算法如BM25主要基于词频和逆文档频率TF-IDF的加权搜索模型来对文档进行排序和检索。BM25适用于处理较为简单的匹配任务尤其是当查询和文档中的关键词有直接匹配时。
RAG中检索器的作用是为生成器提供一个上下文背景使生成器能够基于这些检索到的文档片段生成更为相关的答案。
## 2.2 生成器Generator
生成器负责生成最终的自然语言输出。在RAG系统中常用的生成器包括
- BARTBART是一种序列到序列的生成模型专注于文本生成任务可以通过不同层次的噪声处理来提升生成的质量 。
- GPT系列GPT是一个典型的预训练语言模型擅长生成流畅自然的文本。它通过大规模数据训练能够生成相对准确的回答尤其在任务-生成任务中表现尤为突出 。
生成器在接收来自检索器的文档片段后,会利用这些片段作为上下文,并结合输入的查询,生成相关且自然的文本回答。这确保了模型的生成结果不仅仅基于已有的知识,还能够结合外部最新的信息。
## 2.3 RAG的工作流程
RAG模型的工作流程可以总结为以下几个步骤
1. 输入查询:用户输入问题,系统将其转化为向量表示。
2. 文档检索检索器从知识库中提取与查询最相关的文档片段通常使用向量检索技术或BM25等传统技术进行。
3. 生成答案:生成器接收检索器提供的片段,并基于这些片段生成自然语言答案。生成器不仅基于原始的用户查询,还会利用检索到的片段提供更加丰富、上下文相关的答案。
4. 输出结果:生成的答案反馈给用户,这个过程确保了用户能够获得基于最新和相关信息的准确回答。
# 3. RAG模型的工作原理
## 3.1 检索阶段
在RAG模型中用户的查询首先被转化为向量表示然后在知识库中执行向量检索。通常检索器采用诸如BERT等预训练模型生成查询和文档片段的向量表示并通过相似度计算如余弦相似度匹配最相关的文档片段。RAG的检索器不仅仅依赖简单的关键词匹配而是采用语义级别的向量表示从而在面对复杂问题或模糊查询时能够更加准确地找到相关知识。这一步骤对于最终生成的回答至关重要因为检索的效率和质量直接决定了生成器可利用的上下文信息 。
## 3.2 生成阶段
生成阶段是RAG模型的核心部分生成器负责基于检索到的内容生成连贯且自然的文本回答。RAG中的生成器如BART或GPT等模型结合用户输入的查询和检索到的文档片段生成更加精准且丰富的答案。与传统生成模型相比RAG的生成器不仅能够生成语言流畅的回答还可以根据外部知识库中的实际信息提供更具事实依据的内容从而提高了生成的准确性 。
## 3.3 多轮交互与反馈机制
RAG模型在对话系统中能够有效支持多轮交互。每一轮的查询和生成结果会作为下一轮的输入系统通过分析和学习用户的反馈逐步优化后续查询的上下文。通过这种循环反馈机制RAG能够更好地调整其检索和生成策略使得在多轮对话中生成的答案越来越符合用户的期望。此外多轮交互还增强了RAG在复杂对话场景中的适应性使其能够处理跨多轮的知识整合和复杂推理 。
# 4. RAG的优势与局限
## 4.1 优势
- 信息完整性RAG 模型结合了检索与生成技术使得生成的文本不仅语言自然流畅还能够准确利用外部知识库提供的实时信息。这种方法能够显著提升生成任务的准确性特别是在知识密集型场景下如医疗问答或法律意见生成。通过从知识库中检索相关文档RAG 模型避免了生成模型“编造”信息的风险,确保输出更具真实性 。
- 知识推理能力RAG 能够利用大规模的外部知识库进行高效检索并结合这些真实数据进行推理生成基于事实的答案。相比传统生成模型RAG 能处理更为复杂的任务特别是涉及跨领域或跨文档的推理任务。例如法律领域的复杂判例推理或金融领域的分析报告生成都可以通过RAG的推理能力得到优化 。
- 领域适应性强RAG 具有良好的跨领域适应性能够根据不同领域的知识库进行特定领域内的高效检索和生成。例如在医疗、法律、金融等需要实时更新和高度准确性的领域RAG 模型的表现优于仅依赖预训练的生成模型 。
## 4.2 局限
RAG检索增强生成模型通过结合检索器和生成器实现了在多种任务中知识密集型内容生成的突破性进展。然而尽管其具有较强的应用潜力和跨领域适应能力但在实际应用中仍然面临着一些关键局限限制了其在大规模系统中的部署和优化。以下是RAG模型的几个主要局限性
#### 4.2.1 检索器的依赖性与质量问题
RAG模型的性能很大程度上取决于检索器返回的文档质量。由于生成器主要依赖检索器提供的上下文信息如果检索到的文档片段不相关、不准确生成的文本可能出现偏差甚至产生误导性的结果。尤其在多模糊查询或跨领域检索的情况下检索器可能无法找到合适的片段这将直接影响生成内容的连贯性和准确性。
- 挑战当知识库庞大且内容多样时如何提高检索器在复杂问题下的精确度是一大挑战。当前的方法如BM25等在特定任务上有局限尤其是在面对语义模糊的查询时传统的关键词匹配方式可能无法提供语义上相关的内容。
- 解决途径引入混合检索技术如结合稀疏检索BM25与密集检索如向量检索。例如[Faiss](https://fael3z0zfze.feishu.cn/wiki/LULawsUufitGvWkDjx3cKJqHnle?from=from_copylink)的底层实现允许通过BERT等模型生成密集向量表示显著提升语义级别的匹配效果。通过这种方式检索器可以捕捉深层次的语义相似性减少无关文档对生成器的负面影响。
#### 4.2.2 生成器的计算复杂度与性能瓶颈
RAG模型将检索和生成模块结合尽管生成结果更加准确但也大大增加了模型的计算复杂度。尤其在处理大规模数据集或长文本时生成器需要处理来自多个文档片段的信息导致生成时间明显增加推理速度下降。对于实时问答系统或其他需要快速响应的应用场景这种高计算复杂度是一个主要瓶颈。
- 挑战当知识库规模扩大时检索过程中的计算开销以及生成器在多片段上的整合能力都会显著影响系统的效率。同时生成器也面临着资源消耗的问题尤其是在多轮对话或复杂生成任务中GPU和内存的消耗会成倍增加。
- 解决途径:使用模型压缩技术和知识蒸馏来减少生成器的复杂度和推理时间。此外,分布式计算与模型并行化技术的引入,如[DeepSpeed](https://www.deepspeed.ai/)和模型压缩工具,可以有效应对生成任务的高计算复杂度,提升大规模应用场景中的推理效率。
#### 4.2.3 知识库的更新与维护
RAG模型通常依赖于一个预先建立的外部知识库该知识库可能包含文档、论文、法律条款等各类信息。然而知识库内容的时效性和准确性直接影响到RAG生成结果的可信度。随着时间推移知识库中的内容可能过时导致生成的回答不能反映最新的信息。这对于需要实时信息的场景如医疗、金融尤其明显。
- 挑战:知识库需要频繁更新,但手动更新知识库既耗时又容易出错。如何在不影响系统性能的情况下实现知识库的持续自动更新是当前的一大挑战。
- 解决途径利用自动化爬虫和信息提取系统可以实现对知识库的自动化更新例如Scrapy等爬虫框架可以自动抓取网页数据并更新知识库。结合[动态索引技术](https://arxiv.org/pdf/2102.03315),可以帮助检索器实时更新索引,确保知识库反映最新信息。同时,结合增量学习技术,生成器可以逐步吸收新增的信息,避免生成过时答案。此外,动态索引技术也可以帮助检索器实时更新索引,确保知识库检索到的文档反映最新的内容。
#### 4.2.4 生成内容的可控性与透明度
RAG模型结合了检索与生成模块在生成内容的可控性和透明度上存在一定问题。特别是在复杂任务或多义性较强的用户输入情况下生成器可能会基于不准确的文档片段生成错误的推理导致生成的答案偏离实际问题。此外由于RAG模型的“黑箱”特性用户难以理解生成器如何利用检索到的文档信息这在高敏感领域如法律或医疗中尤为突出可能导致用户对生成内容产生不信任感。
- 挑战:模型透明度不足使得用户难以验证生成答案的来源和可信度。对于需要高可解释性的任务(如医疗问诊、法律咨询等),无法追溯生成答案的知识来源会导致用户不信任模型的决策。
- 解决途径为提高透明度可以引入可解释性AIXAI技术如LIME或SHAP[链接](https://github.com/marcotcr/lime)),为每个生成答案提供详细的溯源信息,展示所引用的知识片段。这种方法能够帮助用户理解模型的推理过程,从而增强对模型输出的信任。此外,针对生成内容的控制,可以通过加入规则约束或用户反馈机制,逐步优化生成器的输出,确保生成内容更加可信。
# 5. RAG整体改进方向
RAG模型的整体性能依赖于知识库的准确性和检索的效率因此在数据采集、内容分块、精准检索和回答生成等环节进行优化是提升模型效果的关键。通过加强数据来源、改进内容管理、优化检索策略及提升回答生成的准确性RAG模型能够更加适应复杂且动态的实际应用需求。
## 5.1 数据采集与知识库构建
RAG模型的核心依赖在于知识库的数据质量和广度知识库在某种程度上充当着“外部记忆”的角色。因此高质量的知识库不仅应包含广泛领域的内容更要确保数据来源的权威性、可靠性以及时效性。知识库的数据源应涵盖多种可信的渠道例如科学文献数据库如PubMed、IEEE Xplore、权威新闻媒体、行业标准和报告等这样才能提供足够的背景信息支持RAG在不同任务中的应用。此外为了确保RAG模型能够提供最新的回答知识库需要具备自动化更新的能力以避免数据内容老旧导致回答失准或缺乏现实参考。
- 挑战:
- 尽管数据采集是构建知识库的基础,但在实际操作中仍存在以下几方面的不足:
- 数据采集来源单一或覆盖不全
1. RAG模型依赖多领域数据的支持然而某些知识库过度依赖单一或有限的数据源通常集中在某些领域导致在多任务需求下覆盖不足。例如依赖医学领域数据而缺乏法律和金融数据会使RAG模型在跨领域问答中表现不佳。这种局限性削弱了RAG模型在处理不同主题或多样化查询时的准确性使得系统在应对复杂或跨领域任务时能力欠缺。
- 数据质量参差不齐
1. 数据源的质量差异直接影响知识库的可靠性。一些数据可能来源于非权威或低质量渠道存在偏见、片面或不准确的内容。这些数据若未经筛选录入知识库会导致RAG模型生成偏差或不准确的回答。例如在医学领域中如果引入未经验证的健康信息可能导致模型给出误导性回答产生负面影响。数据质量不一致的知识库会大大降低模型输出的可信度和适用性。
- 缺乏定期更新机制
1. 许多知识库缺乏自动化和频繁的更新机制特别是在信息变动频繁的领域如法律、金融和科技。若知识库长期未更新则RAG模型无法提供最新信息生成的回答可能过时或不具备实时参考价值。对于用户而言特别是在需要实时信息的场景下滞后的知识库会显著影响RAG模型的可信度和用户体验。
- 数据处理耗时且易出错
1. 数据的采集、清洗、分类和结构化处理是一项繁琐而复杂的任务尤其是当数据量巨大且涉及多种格式时。通常大量数据需要人工参与清洗和结构化而自动化处理流程也存在缺陷可能会产生错误或遗漏关键信息。低效和易出错的数据处理流程会导致知识库内容不准确、不完整进而影响RAG模型生成的答案的准确性和连贯性。
- 数据敏感性和隐私问题
1. 一些特定领域的数据(如医疗、法律、金融)包含敏感信息,未经适当的隐私保护直接引入知识库可能带来隐私泄露的风险。此外,某些敏感数据需要严格的授权和安全存储,以确保在知识库使用中避免违规或隐私泄漏。若未能妥善处理数据隐私问题,不仅会影响系统的合规性,还可能对用户造成严重后果。
- 改进:
- 针对以上不足,可以从以下几个方面进行改进,以提高数据采集和知识库构建的有效性:
- 扩大数据源覆盖范围,增加数据的多样性
1. 具体实施将知识库的数据源扩展至多个重要领域确保包含医疗、法律、金融等关键领域的专业数据库如PubMed、LexisNexis和金融数据库。使用具有开放许可的开源数据库和经过认证的数据确保来源多样化且权威性强。
2. 目的与效果通过跨领域数据覆盖知识库的广度和深度得以增强确保RAG模型能够在多任务场景下提供可靠回答。借助多领域合作机构的数据支持在应对多样化需求时将更具优势。
- 构建数据质量审查与过滤机制
1. 具体实施:采用自动化数据质量检测算法,如文本相似度检查、情感偏差检测等工具,结合人工审查过滤不符合标准的数据。为数据打分并构建“数据可信度评分”,基于来源可信度、内容完整性等指标筛选数据。
2. 目的与效果减少低质量、偏见数据的干扰确保知识库内容的可靠性。此方法保障了RAG模型输出的权威性特别在回答复杂或专业问题时用户能够获得更加精准且中立的答案。
- 实现知识库的自动化更新
1. 具体实施:引入自动化数据更新系统,如网络爬虫,定期爬取可信站点、行业数据库的最新数据,并利用变化检测算法筛选出与已有知识库重复或已失效的数据。更新机制可以结合智能筛选算法,仅采纳与用户查询高相关性或时效性强的数据。
2. 目的与效果:知识库保持及时更新,确保模型在快速变化的领域(如金融、政策、科技)中提供最新信息。用户体验将因此大幅提升,特别是在需要动态或最新信息的领域,输出的内容将更具时效性。
- 采用高效的数据清洗与分类流程
1. 具体实施使用自然语言处理技术如BERT等模型进行数据分类、实体识别和文本去噪结合去重算法清理重复内容。采用自动化的数据标注和分类算法将不同数据类型分领域存储。
2. 目的与效果数据清洗和分领域管理可以大幅提高数据处理的准确性减少低质量数据的干扰。此改进确保RAG模型的回答生成更流畅、上下文更连贯提升用户对生成内容的理解和信赖。
- 强化数据安全与隐私保护措施
1. 具体实施:针对医疗、法律等敏感数据,采用去标识化处理技术(如数据脱敏、匿名化等),并结合差分隐私保护。建立数据权限管理和加密存储机制,对敏感信息进行严格管控。
2. 目的与效果:在保护用户隐私的前提下,确保使用的数据合规、安全,适用于涉及个人或敏感数据的应用场景。此措施进一步保证了系统的法律合规性,并有效防止隐私泄露风险。
- 优化数据格式与结构的标准化
1. 具体实施建立统一的数据格式与标准编码格式例如使用JSON、XML或知识图谱形式组织结构化数据以便于检索系统在查询时高效利用。同时使用知识图谱等结构化工具将复杂数据间的关系进行系统化存储。
2. 目的与效果提高数据检索效率确保模型在生成回答时能够高效使用数据的关键信息。标准化的数据结构支持高效的跨领域检索并提高了RAG模型的内容准确性和知识关系的透明度。
- 用户反馈机制
1. 具体实施:通过用户反馈系统,记录用户对回答的满意度、反馈意见及改进建议。使用机器学习算法从反馈中识别知识库中的盲区与信息误差,反馈至数据管理流程中进行更新和优化。
2. 目的与效果利用用户反馈作为数据质量的调整依据帮助知识库持续优化内容。此方法不仅提升了RAG模型的实际效用还使知识库更贴合用户需求确保输出内容始终符合用户期望。
## 5.2 数据分块与内容管理
RAG模型的数据分块与内容管理是优化检索与生成流程的关键。合理的分块策略能够帮助模型高效定位目标信息并在回答生成时提供清晰的上下文支持。通常情况下将数据按段落、章节或主题进行分块不仅有助于检索效率的提升还能避免冗余数据对生成内容造成干扰。尤其在复杂、长文本中适当的分块策略可保证模型生成的答案具备连贯性、精确性避免出现内容跳跃或上下文断裂的问题。
- 挑战:
- 在实际操作中,数据分块与内容管理环节存在以下问题:
- 分块不合理导致的信息断裂
1. 部分文本过度切割或分块策略不合理,可能导致信息链条被打断,使得模型在回答生成时缺乏必要的上下文支持。这会使生成内容显得零散,不具备连贯性,影响用户对答案的理解。例如,将法律文本或技术文档随意切割成小段落会导致重要的上下文关系丢失,降低模型的回答质量。
- 冗余数据导致生成内容重复或信息过载
1. 数据集中往往包含重复信息,若不去重或优化整合,冗余数据可能导致生成内容的重复或信息过载。这不仅影响用户体验,还会浪费计算资源。例如,在新闻数据或社交媒体内容中,热点事件的描述可能重复出现,模型在生成回答时可能反复引用相同信息。
- 分块粒度选择不当影响检索精度
1. 如果分块粒度过细,模型可能因缺乏足够的上下文而生成不准确的回答;若分块过大,检索时将难以定位具体信息,导致回答内容冗长且含有无关信息。选择适当的分块粒度对生成答案的准确性和相关性至关重要,特别是在问答任务中需要精确定位答案的情况下,粗放的分块策略会明显影响用户的阅读体验和回答的可读性。
- 难以实现基于主题或内容逻辑的分块
1. 某些复杂文本难以直接按主题或逻辑结构进行分块,尤其是内容密集或领域专业性较强的数据。基于关键字或简单的规则切割往往难以识别不同主题和信息层次,导致模型在回答生成时信息杂乱。对内容逻辑或主题的错误判断,尤其是在医学、金融等场景下,会大大影响生成答案的准确度和专业性。
- 改进:
- 为提高数据分块和内容管理的有效性,可以从以下几方面进行优化:
- 引入NLP技术进行自动化分块和上下文分析
1. 具体实施借助自然语言处理NLP技术通过句法分析、语义分割等方式对文本进行逻辑切割以确保分块的合理性。可以基于BERT等预训练模型实现主题识别和上下文分析确保每个片段均具备完整的信息链避免信息断裂。
2. 目的与效果:确保文本切割基于逻辑或语义关系,避免信息链条被打断,生成答案时能够更具连贯性,尤其适用于长文本和复杂结构的内容,使模型在回答时上下文更加完整、连贯。
- 去重与信息整合,优化内容简洁性
1. 具体实施利用相似度算法如TF-IDF、余弦相似度识别冗余内容并结合聚类算法自动合并重复信息。针对内容频繁重复的情况可设置内容标记或索引避免生成时多次引用相同片段。
2. 目的与效果:通过去重和信息整合,使数据更具简洁性,避免生成答案中出现重复信息。减少冗余信息的干扰,使用户获得简明扼要的回答,增强阅读体验,同时提升生成过程的计算效率。
- 根据任务需求动态调整分块粒度
1. 具体实施:根据模型任务的不同,设置动态分块策略。例如,在问答任务中对关键信息较短的内容可采用小粒度分块,而在长文本或背景性内容中采用较大粒度。分块策略可基于查询需求或内容复杂度自动调整。
2. 目的与效果:分块粒度的动态调整确保模型在检索和生成时既能准确定位关键内容,又能为回答提供足够的上下文支持,提升生成内容的精准性和相关性,确保用户获取的信息既准确又不冗长。
- 引入基于主题的分块方法以提升上下文完整性
1. 具体实施使用主题模型如LDA或嵌入式文本聚类技术对文本内容按主题进行自动分类与分块。基于相同主题内容的聚合分块有助于模型识别不同内容层次尤其适用于复杂的学术文章或多章节的长篇报告。
2. 目的与效果:基于主题的分块确保同一主题的内容保持在一个片段内,提升模型在回答生成时的上下文连贯性。适用于主题复杂、层次清晰的内容场景,提高回答的专业性和条理性,使用户更容易理解生成内容的逻辑关系。
- 实时评估分块策略与内容呈现效果的反馈机制
1. 具体实施:通过用户反馈机制和生成质量评估系统实时监测生成内容的连贯性和准确性。对用户反馈中涉及分块效果差的部分进行重新分块,通过用户使用数据优化分块策略。
2. 目的与效果:用户反馈帮助识别不合理的分块和内容呈现问题,实现分块策略的动态优化,持续提升生成内容的质量和用户满意度。
## 5.3 检索优化
在RAG模型中检索模块决定了生成答案的相关性和准确性。有效的检索策略可确保模型获取到最适合的上下文片段使生成的回答更加精准且贴合查询需求。常用的混合检索策略如BM25和DPR结合能够在关键词匹配和语义检索方面实现优势互补BM25适合高效地处理关键字匹配任务而DPR在理解深层语义上表现更为优异。因此合理选用检索策略有助于在不同任务场景下达到计算资源和检索精度的平衡以高效提供相关上下文供生成器使用。
- 挑战:
- 检索优化过程中,仍面临以下不足之处:
- 检索策略单一导致的回答偏差
1. 当仅依赖BM25或DPR等单一技术时模型可能难以平衡关键词匹配与语义理解。BM25在处理具象关键字时表现良好但在面对复杂、含义丰富的语义查询时效果欠佳相反DPR虽然具备深度语义匹配能力但对高频关键词匹配的敏感度较弱。检索策略单一将导致模型难以适应复杂的用户查询回答中出现片面性或不够精准的情况。
- 检索效率与资源消耗的矛盾
1. 检索模块需要在短时间内处理大量查询而语义检索如DPR需要进行大量的计算和存储操作计算资源消耗高影响系统响应速度。特别是对于需要实时响应的应用场景DPR的计算复杂度往往难以满足实际需求因此在实时性和资源利用率上亟需优化。
- 检索结果的冗余性导致内容重复
1. 当检索策略未对结果进行去重或排序优化时RAG模型可能从知识库中检索出相似度高但内容冗余的文档片段。这会导致生成的回答中包含重复信息影响阅读体验同时增加无效信息的比例使用户难以迅速获取核心答案。
- 不同任务需求下检索策略的适配性差
1. RAG模型应用场景丰富但不同任务对检索精度、速度和上下文长度的需求不尽相同。固定检索策略难以灵活应对多样化的任务需求导致在应对不同任务时模型检索效果受限。例如面向精确性较高的医疗问答场景时检索策略应偏向语义准确性而在热点新闻场景中则应偏重检索速度。
- 改进:
- 针对上述不足,可以从以下几个方面优化检索模块:
- 结合BM25与DPR的混合检索策略
1. 具体实施采用BM25进行关键词初筛快速排除无关信息然后使用DPR进行深度语义匹配筛选。这样可以有效提升检索精度平衡关键词匹配和语义理解。
2. 目的与效果:通过多层筛选过程,确保检索结果在语义理解和关键词匹配方面互补,提升生成内容的准确性,特别适用于多意图查询或复杂的长文本检索。
- 优化检索效率,控制计算资源消耗
1. 具体实施利用缓存机制存储近期高频查询结果避免对相似查询的重复计算。同时可基于分布式计算结构将DPR的语义计算任务分散至多节点并行处理。
2. 目的与效果:缓存与分布式计算结合可显著减少检索计算压力,使系统能够在有限资源下提高响应速度,适用于高并发、实时性要求高的应用场景。
- 引入去重和排序优化算法
1. 具体实施:在检索结果中应用余弦相似度去重算法,筛除冗余内容,并基于用户偏好或时间戳对检索结果排序,以确保输出内容的丰富性和新鲜度。
2. 目的与效果:通过去重和优化排序,确保生成内容更加简洁、直接,减少重复信息的干扰,提高用户获取信息的效率和体验。
- 动态调整检索策略适应多任务需求
1. 具体实施:设置不同检索策略模板,根据任务类型自动调整检索权重、片段长度和策略组合。例如在医疗场景中偏向语义检索,而在金融新闻场景中更重视快速关键词匹配。
2. 目的与效果动态调整检索策略使RAG模型更加灵活能够适应不同任务需求确保检索的精准性和生成答案的上下文适配性显著提升多场景下的用户体验。
- 借助Haystack等检索优化框架
1. 具体实施在RAG模型中集成Haystack框架以实现更高效的检索效果并利用框架中的插件生态系统来增强检索模块的可扩展性和可调节性。
2. 目的与效果Haystack提供了检索和生成的整合接口有助于快速优化检索模块并适应复杂多样的用户需求在多任务环境中提供更稳定的性能表现。
## 5.4 回答生成与优化
在RAG模型中生成器负责基于检索模块提供的上下文为用户查询生成自然语言答案。生成内容的准确性和逻辑性直接决定了用户的体验因此优化生成器的表现至关重要。通过引入知识图谱等结构化信息生成器能够更准确地理解和关联上下文从而生成逻辑连贯、准确的回答。此外生成器的生成逻辑可结合用户反馈持续优化使回答风格和内容更加符合用户需求。
- 挑战:
- 在回答生成过程中RAG模型仍面临以下不足
- 上下文不充分导致的逻辑不连贯
1. 当生成器在上下文缺失或信息不完整的情况下生成回答时,生成内容往往不够连贯,特别是在处理复杂、跨领域任务时。这种缺乏上下文支持的问题,容易导致生成器误解或忽略关键信息,最终生成内容的逻辑性和完整性欠佳。如在医学场景中,若生成器缺少对病例或症状的全面理解,可能导致回答不准确或不符合逻辑,影响专业性和用户信任度。
- 专业领域回答的准确性欠佳
1. 在医学、法律等高专业领域中,生成器的回答需要高度的准确性。然而,生成器可能因缺乏特定知识而生成不符合领域要求的回答,出现内容偏差或理解错误,尤其在涉及专业术语和复杂概念时更为明显。如在法律咨询中,生成器可能未能正确引用相关法条或判例,导致生成的答案不够精确,甚至可能产生误导。
- 难以有效整合多轮用户反馈
1. 生成器缺乏有效机制来利用多轮用户反馈进行自我优化。用户反馈可能涉及回答内容的准确性、逻辑性以及风格适配等方面,但生成器在连续对话中缺乏充分的调节机制,难以持续调整生成策略和回答风格。如在客服场景中,生成器可能连续生成不符合用户需求的回答,降低了用户满意度。
- 生成内容的可控性和一致性不足
1. 在特定领域回答生成中,生成器的输出往往不具备足够的可控性和一致性。由于缺乏领域特定的生成规则和约束,生成内容的专业性和风格一致性欠佳,难以满足高要求的应用场景。如在金融报告生成中,生成内容需要确保一致的风格和术语使用,否则会影响输出的专业性和可信度。
- 改进:
- 针对以上不足,可以从以下方面优化回答生成模块:
- 引入知识图谱与结构化数据,增强上下文理解
1. 具体实施:结合知识图谱或知识库,将医学、法律等专业领域的信息整合到生成过程中。生成器在生成回答时,可以从知识图谱中提取关键信息和关联知识点,确保回答具备连贯的逻辑链条。
2. 目的与效果:知识图谱的引入提升了生成内容的连贯性和准确性,尤其在高专业性领域中,通过丰富的上下文理解,使生成器能够产生符合逻辑的回答。
- 设计专业领域特定的生成规则和约束
1. 具体实施:在生成模型中加入领域特定的生成规则和用语约束,特别针对医学、法律等领域的常见问答场景,设定回答模板、术语库等,以提高生成内容的准确性和一致性。
2. 目的与效果:生成内容更具领域特征,输出风格和内容的专业性增强,有效降低了生成器在专业领域中的回答偏差,满足用户对专业性和可信度的要求。
- 优化用户反馈机制,实现动态生成逻辑调整
1. 具体实施:利用机器学习算法对用户反馈进行分析,从反馈中提取生成错误或用户需求的调整信息,动态调节生成器的生成逻辑和策略。同时,在多轮对话中逐步适应用户的需求和风格偏好。
2. 目的与效果:用户反馈的高效利用能够帮助生成器优化生成内容,提高连续对话中的响应质量,提升用户体验,并使回答更贴合用户需求。
- 引入生成器与检索器的协同优化机制
1. 具体实施:通过协同优化机制,在生成器生成答案之前,允许生成器请求检索器补充缺失的上下文信息。生成器可基于回答需求自动向检索器发起上下文补充请求,从而获取完整的上下文。
2. 目的与效果:协同优化机制保障了生成器在回答时拥有足够的上下文支持,避免信息断层或缺失,提升回答的完整性和准确性。
- 实施生成内容的一致性检测和语义校正
1. 具体实施:通过一致性检测算法对生成内容进行术语、风格的统一管理,并结合语义校正模型检测生成内容是否符合用户需求的逻辑结构。在复杂回答生成中,使用语义校正对不符合逻辑的生成内容进行自动优化。
2. 目的与效果:生成内容具备高度一致性和逻辑性,特别是在多轮对话和专业领域生成中,保障了内容的稳定性和专业水准,提高了生成答案的可信度和用户满意度。
## 5.5 RAG流程
![](/imgs/RAG1.png)
1. 数据加载与查询输入:
1. 用户通过界面或API提交自然语言查询系统接收查询作为输入。
2. 输入被传递至向量化器利用向量化技术如BERT或Sentence Transformer将自然语言查询转换为向量表示。
2. 文档检索:
1. 向量化后的查询会传递给检索器,检索器通过在知识库中查找最相关的文档片段。
2. 检索可以基于稀疏检索技术如BM25或密集检索技术如DPR来提高匹配效率和精度。
3. 生成器处理与自然语言生成:
1. 检索到的文档片段作为生成器的输入生成器如GPT、BART或T5基于查询和文档内容生成自然语言回答。
2. 生成器结合了外部检索结果和预训练模型的语言知识,使回答更加精准、自然。
4. 结果输出:
1. 系统生成的答案通过API或界面返回给用户确保答案连贯且知识准确。
5. 反馈与优化:
1. 用户可以对生成的答案进行反馈,系统根据反馈优化检索与生成过程。
2. 通过微调模型参数或调整检索权重,系统逐步改进其性能,确保未来查询时更高的准确性与效率。
# 6. RAG相关案例整合
[各种分类领域下的RAG](https://github.com/hymie122/RAG-Survey)
# 7. RAG模型的应用
RAG模型已在多个领域得到广泛应用主要包括
## 7.1 智能问答系统中的应用
- RAG通过实时检索外部知识库生成包含准确且详细的答案避免传统生成模型可能产生的错误信息。例如在医疗问答系统中RAG能够结合最新的医学文献生成包含最新治疗方案的准确答案避免生成模型提供过时或错误的建议。这种方法帮助医疗专家快速获得最新的研究成果和诊疗建议提升医疗决策的质量。
- [医疗问答系统案例](https://www.apexon.com/blog/empowering-discovery-the-role-of-rag-architecture-generative-ai-in-healthcare-life-sciences/)
- ![](/imgs/RAG2.png)
- 用户通过Web应用程序发起查询
1. 用户在一个Web应用上输入查询请求这个请求进入后端系统启动了整个数据处理流程。
- 使用Azure AD进行身份验证
1. 系统通过Azure Active Directory (Azure AD) 对用户进行身份验证,确保只有经过授权的用户才能访问系统和数据。
- 用户权限检查:
1. 系统根据用户的组权限由Azure AD管理过滤用户能够访问的内容。这个步骤保证了用户只能看到他们有权限查看的信息。
- Azure AI搜索服务
1. 过滤后的用户查询被传递给Azure AI搜索服务该服务会在已索引的数据库或文档中查找与查询相关的内容。这个搜索引擎通过语义搜索技术检索最相关的信息。
- 文档智能处理:
1. 系统使用OCR光学字符识别和文档提取等技术处理输入的文档将非结构化数据转换为结构化、可搜索的数据便于Azure AI进行检索。
- 文档来源:
1. 这些文档来自预先存储的输入文档集合,这些文档在被用户查询之前已经通过文档智能处理进行了准备和索引。
- Azure Open AI生成响应
1. 在检索到相关信息后数据会被传递到Azure Open AI该模块利用自然语言生成NLG技术根据用户的查询和检索结果生成连贯的回答。
- 响应返回用户:
1. 最终生成的回答通过Web应用程序返回给用户完成整个查询到响应的流程。
- 整个流程展示了Azure AI技术的集成通过文档检索、智能处理以及自然语言生成来处理复杂的查询并确保了数据的安全和合规性。
## 7.2 信息检索与文本生成
- 文本生成RAG不仅可以检索相关文档还能根据这些文档生成总结、报告或文档摘要从而增强生成内容的连贯性和准确性。例如法律领域中RAG可以整合相关法条和判例生成详细的法律意见书确保内容的全面性和严谨性。这在法律咨询和文件生成过程中尤为重要可以帮助律师和法律从业者提高工作效率。
- [法律领域检索增强生成案例](https://www.apexon.com/blog/empowering-discovery-the-role-of-rag-architecture-generative-ai-in-healthcare-life-sciences/)
- 内容总结:
- 背景: 传统的大语言模型 (LLMs) 在生成任务中表现优异但在处理法律领域中的复杂任务时存在局限。法律文档具有独特的结构和术语标准的检索评估基准往往无法充分捕捉这些领域特有的复杂性。为了弥补这一不足LegalBench-RAG 旨在提供一个评估法律文档检索效果的专用基准。
- LegalBench-RAG 的结构:
1. ![](/imgs/RAG3.png)
2. 工作流程:
3. 用户输入问题Q: ?A: ?):用户通过界面输入查询问题,提出需要答案的具体问题。
4. 嵌入与检索模块Embed + Retrieve该模块接收到用户的查询后会对问题进行嵌入将其转化为向量并在外部知识库或文档中执行相似度检索。通过检索算法系统找到与查询相关的文档片段或信息。
5. 生成答案A基于检索到的最相关信息生成模型如GPT或类似的语言模型根据检索的结果生成连贯的自然语言答案。
6. 对比和返回结果:生成的答案会与之前的相关问题答案进行对比,并最终将生成的答案返回给用户。
7. 该基准基于 LegalBench 的数据集,构建了 6858 个查询-答案对,并追溯到其原始法律文档的确切位置。
8. LegalBench-RAG 侧重于精确地检索法律文本中的小段落,而非宽泛的、上下文不相关的片段。
9. 数据集涵盖了合同、隐私政策等不同类型的法律文档,确保涵盖多个法律应用场景。
- 意义: LegalBench-RAG 是第一个专门针对法律检索系统的公开可用的基准。它为研究人员和公司提供了一个标准化的框架,用于比较不同的检索算法的效果,特别是在需要高精度的法律任务中,例如判决引用、条款解释等。
- 关键挑战:
1. RAG 系统的生成部分依赖检索到的信息,错误的检索结果可能导致错误的生成输出。
2. 法律文档的长度和术语复杂性增加了模型检索和生成的难度。
- 质量控制: 数据集的构建过程确保了高质量的人工注释和文本精确性特别是在映射注释类别和文档ID到具体文本片段时进行了多次人工校验。
## 7.3 其它应用场景
RAG还可以应用于多模态生成场景如图像、音频和3D内容生成。例如跨模态应用如ReMoDiffuse和Make-An-Audio利用RAG技术实现不同数据形式的生成。此外在企业决策支持中RAG能够快速检索外部资源如行业报告、市场数据生成高质量的前瞻性报告从而提升企业战略决策的能力。
## 8 总结
本文档系统阐述了检索增强生成RAG模型的核心机制、优势与应用场景。通过结合生成模型与检索模型RAG解决了传统生成模型在面对事实性任务时的“编造”问题和检索模型难以生成连贯自然语言输出的不足。RAG模型能够实时从外部知识库获取信息使生成内容既包含准确的知识又具备流畅的语言表达适用于医疗、法律、智能问答系统等多个知识密集型领域。
在应用实践中RAG模型虽然有着信息完整性、推理能力和跨领域适应性等显著优势但也面临着数据质量、计算资源消耗和知识库更新等挑战。为进一步提升RAG的性能提出了针对数据采集、内容分块、检索策略优化以及回答生成的全面改进措施如引入知识图谱、优化用户反馈机制、实施高效去重算法等以增强模型的适用性和效率。
RAG在智能问答、信息检索与文本生成等领域展现了出色的应用潜力并在不断发展的技术支持下进一步拓展至多模态生成和企业决策支持等场景。通过引入混合检索技术、知识图谱以及动态反馈机制RAG能够更加灵活地应对复杂的用户需求生成具有事实支撑和逻辑连贯性的回答。未来RAG将通过增强模型透明性与可控性进一步提升在专业领域中的可信度和实用性为智能信息检索与内容生成提供更广泛的应用空间。

View File

@@ -45,7 +45,7 @@ curl --location --request POST 'http://localhost:3000/api/v1/chat/completions' \
"messages": [
{
"role": "user",
"content": "导演是谁",
"content": "导演是谁"
}
]
}'
@@ -526,7 +526,8 @@ curl --location --request POST 'http://localhost:3000/api/core/chat/getHistories
--data-raw '{
"appId": "appId",
"offset": 0,
"pageSize": 20
"pageSize": 20,
"source: "api"
}'
```
@@ -540,6 +541,7 @@ curl --location --request POST 'http://localhost:3000/api/core/chat/getHistories
- appId - 应用 Id
- offset - 偏移量,即从第几条数据开始取
- pageSize - 记录数量
- source - 对话源
{{% /alert %}}
{{< /markdownify >}}

View File

@@ -1,5 +1,5 @@
---
title: 'V4.8.12(进行中)'
title: 'V4.8.12(需要初始化)'
description: 'FastGPT V4.8.12 更新说明'
icon: 'upgrade'
draft: false
@@ -9,6 +9,38 @@ weight: 812
## 更新指南
### 1. 做好数据备份
### 2. 修改镜像
- 更新 FastGPT 镜像 tag: v4.8.12-fix
- 更新 FastGPT 管理端镜像 tag: v4.8.12 fastgpt-pro镜像
- Sandbox 镜像,可以不更新
### 3. 商业版执行初始化
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`{{host}} 替换成**FastGPT 管理端域名**。
```bash
curl --location --request POST 'https://{{host}}/api/admin/init/4812' \
--header 'rootkey: {{rootkey}}' \
--header 'Content-Type: application/json'
```
会初始化应用和知识库的成员组数据。
### 4. 重构 Milvus 数据
由于 js int64 精度丢失问题,之前私有化使用 milvus 或者 zilliz 的用户,如果存在数据精度丢失的问题,需要重构 Milvus 数据。(可以查看 dataset_datas 表中indexes 中的 dataId 是否末尾精度丢失)。使用 PG 的用户不需要操作。
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`{{host}} 替换成**FastGPT 主域名**。
```bash
curl --location --request POST 'https://{{host}}/api/admin/resetMilvus' \
--header 'rootkey: {{rootkey}}' \
--header 'Content-Type: application/json'
```
## 更新说明
@@ -23,7 +55,14 @@ weight: 812
9. 新增 - 数据库连接和操作插件
10. 新增 - Cookie 隐私协议提示
11. 新增 - HTTP 节点支持 JSONPath 表达式
12. 修复 - 文件后缀判断,去除 query 影响。
13. 修复 - AI 响应为空时,会造成 LLM 历史记录合并。
14. 修复 - 用户交互节点未阻塞流程
15. 修复 - 新建 APP有时候会导致空指针报错。
12. 新增 - 应用和知识库支持成员组配置权限
13. 优化 - 循环节点支持选择外部节点的变量
14. 优化 - Docx 文件读取中, HTML to Markdown 优化,提高速度和大幅度降低内存消耗
15. 修复 - 文件后缀判断,去除 query 影响。
16. 修复 - AI 响应为空时,会造成 LLM 历史记录合并。
17. 修复 - 用户交互节点未阻塞流程。
18. 修复 - 新建 APP有时候会导致空指针报错。
19. 修复 - 拥有多个循环节点时,错误运行。
20. 修复 - 循环节点中修改变量,无法传递。
21. 修复 - 非 stream 模式,嵌套子应用/插件执行时无法获取子应用响应。
22. 修复 - 数据分块策略,同时将每个 Markdown 独立分块。

View File

@@ -0,0 +1,25 @@
---
title: 'V4.8.13(进行中)'
description: 'FastGPT V4.8.13 更新说明'
icon: 'upgrade'
draft: false
toc: true
weight: 811
---
## 更新说明
1. 新增 - 数组变量选择支持多选,可以选多个数组或对应的单一数据类型,会自动按选择顺序进行合并。
2. 新增 - 文件上传方案调整,节点直接支持接收文件链接,插件自定义变量支持文件上传。
3. 新增 - 对话记录增加时间显示。
4. 新增 - 工作流校验错误时,跳转至错误节点。
5. 新增 - 循环节点增加下标值。
6. 新增 - 部分对话错误提醒增加翻译。
7. 优化 - 合并多个 system 提示词成 1 个,避免部分模型不支持多个 system 提示词。
8. 优化 - 知识库上传文件,优化报错提示。
9. 优化 - 全文检索语句,减少一轮查询。
10. 优化 - 修改 findLast 为 [...array].reverse().find适配旧版浏览器。
11. 优化 - Markdown 组件自动空格,避免分割 url 中的中文。
12. 优化 - 工作流上下文拆分,性能优化。
13. 修复 - Dockerfile pnpm install 支持代理。
14. 修复 - BI 图表生成无法写入文件。

View File

@@ -0,0 +1,297 @@
---
title: "循环运行"
description: "FastGPT 循环运行节点介绍和使用"
icon: "input"
draft: false
toc: true
weight: 366
---
## 节点概述
【**循环运行**】节点是 FastGPT V4.8.11 版本新增的一个重要功能模块。它允许工作流对数组类型的输入数据进行迭代处理,每次处理数组中的一个元素,并自动执行后续节点,直到完成整个数组的处理。
这个节点的设计灵感来自编程语言中的循环结构,但以可视化的方式呈现。
![循环运行节点](/imgs/fastgpt-loop-node.png)
> 在程序中,节点可以理解为一个个 Function 或者接口。可以理解为它就是一个**步骤**。将多个节点一个个拼接起来,即可一步步的去实现最终的 AI 输出。
【**循环运行**】节点本质上也是一个 Function它的主要职责是自动化地重复执行特定的工作流程。
## 核心特性
1. **数组批量处理**
- 支持输入数组类型数据
- 自动遍历数组元素
- 保持处理顺序
- 支持并行处理 (性能优化)
2. **自动迭代执行**
- 自动触发后续节点
- 支持条件终止
- 支持循环计数
- 维护执行上下文
3. **与其他节点协同**
- 支持与 AI 对话节点配合
- 支持与 HTTP 节点配合
- 支持与内容提取节点配合
- 支持与判断器节点配合
## 应用场景
【**循环运行**】节点的主要作用是通过自动化的方式扩展工作流的处理能力,使 FastGPT 能够更好地处理批量任务和复杂的数据处理流程。特别是在处理大规模数据或需要多轮迭代的场景下,循环运行节点能显著提升工作流的效率和自动化程度。
【**循环运行**】节点特别适合以下场景:
1. **批量数据处理**
- 批量翻译文本
- 批量总结文档
- 批量生成内容
2. **数据流水线处理**
- 对搜索结果逐条分析
- 对知识库检索结果逐条处理
- 对 HTTP 请求返回的数组数据逐项处理
3. **递归或迭代任务**
- 长文本分段处理
- 多轮优化内容
- 链式数据处理
## 使用方法
### 输入参数设置
【**循环运行**】节点需要配置两个核心输入参数:
1. **数组 (必填)**:接收一个数组类型的输入,可以是:
- 字符串数组 (`Array<string>`)
- 数字数组 (`Array<number>`)
- 布尔数组 (`Array<boolean>`)
- 对象数组 (`Array<object>`)
2. **循环体 (必填)**:定义每次循环需要执行的节点流程,包含:
- 循环体开始:标记循环开始的位置。
- 循环体结束:标记循环结束的位置,并可选择输出结果变量。
### 循环体配置
![循环体配置](/imgs/fastgpt-loop-node-config.png)
1. 在循环体内部,可以添加任意类型的节点,如:
- AI 对话节点
- HTTP 请求节点
- 内容提取节点
- 文本加工节点等
2. 循环体结束节点配置:
- 通过下拉菜单选择要输出的变量
- 该变量将作为当前循环的结果被收集
- 所有循环的结果将组成一个新的数组作为最终输出
## 场景示例
### 批量处理数组
假设我们有一个包含多个文本的数组,需要对每个文本进行 AI 处理。这是循环运行节点最基础也最常见的应用场景。
#### 实现步骤
1. 准备输入数组
![准备输入数组](/imgs/fastgpt-loop-node-example-1.png)
使用【代码运行】节点创建测试数组:
```javascript
const texts = [
"这是第一段文本",
"这是第二段文本",
"这是第三段文本"
];
return { textArray: texts };
```
2. 配置循环运行节点
![配置循环运行节点](/imgs/fastgpt-loop-node-example-2.png)
- 数组输入:选择上一步代码运行节点的输出变量 `textArray`。
- 循环体内添加一个【AI 对话】节点,用于处理每个文本。这里我们输入的 prompt 为:`请将这段文本翻译成英文`。
- 再添加一个【指定回复】节点,用于输出翻译后的文本。
- 循环体结束节点选择输出变量为 AI 回复内容。
#### 运行流程
![运行流程](/imgs/fastgpt-loop-node-example-3.png)
1. 【代码运行】节点执行,生成测试数组
2. 【循环运行】节点接收数组,开始遍历
3. 对每个数组元素:
- 【AI 对话】节点处理当前元素
- 【指定回复】节点输出翻译后的文本
- 【循环体结束】节点收集处理结果
4. 完成所有元素处理后,输出结果数组
### 长文本翻译
在处理长文本翻译时,我们经常会遇到以下挑战:
- 文本长度超出 LLM 的 token 限制
- 需要保持翻译风格的一致性
- 需要维护上下文的连贯性
- 翻译质量需要多轮优化
【**循环运行**】节点可以很好地解决这些问题。
#### 实现步骤
1. 文本预处理与分段
![文本预处理与分段](/imgs/fastgpt-loop-node-example-4.png)
使用【代码运行】节点进行文本分段,代码如下:
```javascript
const MAX_HEADING_LENGTH = 7; // 最大标题长度
const MAX_HEADING_CONTENT_LENGTH = 200; // 最大标题内容长度
const MAX_HEADING_UNDERLINE_LENGTH = 200; // 最大标题下划线长度
const MAX_HTML_HEADING_ATTRIBUTES_LENGTH = 100; // 最大HTML标题属性长度
const MAX_LIST_ITEM_LENGTH = 200; // 最大列表项长度
const MAX_NESTED_LIST_ITEMS = 6; // 最大嵌套列表项数
const MAX_LIST_INDENT_SPACES = 7; // 最大列表缩进空格数
const MAX_BLOCKQUOTE_LINE_LENGTH = 200; // 最大块引用行长度
const MAX_BLOCKQUOTE_LINES = 15; // 最大块引用行数
const MAX_CODE_BLOCK_LENGTH = 1500; // 最大代码块长度
const MAX_CODE_LANGUAGE_LENGTH = 20; // 最大代码语言长度
const MAX_INDENTED_CODE_LINES = 20; // 最大缩进代码行数
const MAX_TABLE_CELL_LENGTH = 200; // 最大表格单元格长度
const MAX_TABLE_ROWS = 20; // 最大表格行数
const MAX_HTML_TABLE_LENGTH = 2000; // 最大HTML表格长度
const MIN_HORIZONTAL_RULE_LENGTH = 3; // 最小水平分隔线长度
const MAX_SENTENCE_LENGTH = 400; // 最大句子长度
const MAX_QUOTED_TEXT_LENGTH = 300; // 最大引用文本长度
const MAX_PARENTHETICAL_CONTENT_LENGTH = 200; // 最大括号内容长度
const MAX_NESTED_PARENTHESES = 5; // 最大嵌套括号数
const MAX_MATH_INLINE_LENGTH = 100; // 最大行内数学公式长度
const MAX_MATH_BLOCK_LENGTH = 500; // 最大数学公式块长度
const MAX_PARAGRAPH_LENGTH = 1000; // 最大段落长度
const MAX_STANDALONE_LINE_LENGTH = 800; // 最大独立行长度
const MAX_HTML_TAG_ATTRIBUTES_LENGTH = 100; // 最大HTML标签属性长度
const MAX_HTML_TAG_CONTENT_LENGTH = 1000; // 最大HTML标签内容长度
const LOOKAHEAD_RANGE = 100; // 向前查找句子边界的字符数
const AVOID_AT_START = `[\\s\\]})>,']`; // 避免在开头匹配的字符
const PUNCTUATION = `[.!?…]|\\.{3}|[\\u2026\\u2047-\\u2049]|[\\p{Emoji_Presentation}\\p{Extended_Pictographic}]`; // 标点符号
const QUOTE_END = `(?:'(?=\`)|''(?=\`\`))`; // 引号结束
const SENTENCE_END = `(?:${PUNCTUATION}(?<!${AVOID_AT_START}(?=${PUNCTUATION}))|${QUOTE_END})(?=\\S|$)`; // 句子结束
const SENTENCE_BOUNDARY = `(?:${SENTENCE_END}|(?=[\\r\\n]|$))`; // 句子边界
const LOOKAHEAD_PATTERN = `(?:(?!${SENTENCE_END}).){1,${LOOKAHEAD_RANGE}}${SENTENCE_END}`; // 向前查找句子结束的模式
const NOT_PUNCTUATION_SPACE = `(?!${PUNCTUATION}\\s)`; // 非标点符号空格
const SENTENCE_PATTERN = `${NOT_PUNCTUATION_SPACE}(?:[^\\r\\n]{1,{MAX_LENGTH}}${SENTENCE_BOUNDARY}|[^\\r\\n]{1,{MAX_LENGTH}}(?=${PUNCTUATION}|$ {QUOTE_END})(?:${LOOKAHEAD_PATTERN})?)${AVOID_AT_START}*`; // 句子模式
const regex = new RegExp(
"(" +
// 1. Headings (Setext-style, Markdown, and HTML-style, with length constraints)
`(?:^(?:[#*=-]{1,${MAX_HEADING_LENGTH}}|\\w[^\\r\\n]{0,${MAX_HEADING_CONTENT_LENGTH}}\\r?\\n[-=]{2,${MAX_HEADING_UNDERLINE_LENGTH}}|<h[1-6][^>] {0,${MAX_HTML_HEADING_ATTRIBUTES_LENGTH}}>)[^\\r\\n]{1,${MAX_HEADING_CONTENT_LENGTH}}(?:</h[1-6]>)?(?:\\r?\\n|$))` +
"|" +
// New pattern for citations
`(?:\\[[0-9]+\\][^\\r\\n]{1,${MAX_STANDALONE_LINE_LENGTH}})` +
"|" +
// 2. List items (bulleted, numbered, lettered, or task lists, including nested, up to three levels, with length constraints)
`(?:(?:^|\\r?\\n)[ \\t]{0,3}(?:[-*+•]|\\d{1,3}\\.\\w\\.|\\[[ xX]\\])[ \\t]+${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String (MAX_LIST_ITEM_LENGTH))}` +
`(?:(?:\\r?\\n[ \\t]{2,5}(?:[-*+•]|\\d{1,3}\\.\\w\\.|\\[[ xX]\\])[ \\t]+${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String (MAX_LIST_ITEM_LENGTH))}){0,${MAX_NESTED_LIST_ITEMS}}` +
`(?:\\r?\\n[ \\t]{4,${MAX_LIST_INDENT_SPACES}}(?:[-*+•]|\\d{1,3}\\.\\w\\.|\\[[ xX]\\])[ \\t]+${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String (MAX_LIST_ITEM_LENGTH))}){0,${MAX_NESTED_LIST_ITEMS}})?)` +
"|" +
// 3. Block quotes (including nested quotes and citations, up to three levels, with length constraints)
`(?:(?:^>(?:>|\\s{2,}){0,2}${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String(MAX_BLOCKQUOTE_LINE_LENGTH))}\\r?\\n?){1,$ {MAX_BLOCKQUOTE_LINES}})` +
"|" +
// 4. Code blocks (fenced, indented, or HTML pre/code tags, with length constraints)
`(?:(?:^|\\r?\\n)(?:\`\`\`|~~~)(?:\\w{0,${MAX_CODE_LANGUAGE_LENGTH}})?\\r?\\n[\\s\\S]{0,${MAX_CODE_BLOCK_LENGTH}}?(?:\`\`\`|~~~)\\r?\\n?` +
`|(?:(?:^|\\r?\\n)(?: {4}|\\t)[^\\r\\n]{0,${MAX_LIST_ITEM_LENGTH}}(?:\\r?\\n(?: {4}|\\t)[^\\r\\n]{0,${MAX_LIST_ITEM_LENGTH}}){0,$ {MAX_INDENTED_CODE_LINES}}\\r?\\n?)` +
`|(?:<pre>(?:<code>)?[\\s\\S]{0,${MAX_CODE_BLOCK_LENGTH}}?(?:</code>)?</pre>))` +
"|" +
// 5. Tables (Markdown, grid tables, and HTML tables, with length constraints)
`(?:(?:^|\\r?\\n)(?:\\|[^\\r\\n]{0,${MAX_TABLE_CELL_LENGTH}}\\|(?:\\r?\\n\\|[-:]{1,${MAX_TABLE_CELL_LENGTH}}\\|){0,1}(?:\\r?\\n\\|[^\\r\\n]{0,$ {MAX_TABLE_CELL_LENGTH}}\\|){0,${MAX_TABLE_ROWS}}` +
`|<table>[\\s\\S]{0,${MAX_HTML_TABLE_LENGTH}}?</table>))` +
"|" +
// 6. Horizontal rules (Markdown and HTML hr tag)
`(?:^(?:[-*_]){${MIN_HORIZONTAL_RULE_LENGTH},}\\s*$|<hr\\s*/?>)` +
"|" +
// 10. Standalone lines or phrases (including single-line blocks and HTML elements, with length constraints)
`(?!${AVOID_AT_START})(?:^(?:<[a-zA-Z][^>]{0,${MAX_HTML_TAG_ATTRIBUTES_LENGTH}}>)?${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String (MAX_STANDALONE_LINE_LENGTH))}(?:</[a-zA-Z]+>)?(?:\\r?\\n|$))` +
"|" +
// 7. Sentences or phrases ending with punctuation (including ellipsis and Unicode punctuation)
`(?!${AVOID_AT_START})${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String(MAX_SENTENCE_LENGTH))}` +
"|" +
// 8. Quoted text, parenthetical phrases, or bracketed content (with length constraints)
"(?:" +
`(?<!\\w)\"\"\"[^\"]{0,${MAX_QUOTED_TEXT_LENGTH}}\"\"\"(?!\\w)` +
`|(?<!\\w)(?:['\"\`'"])[^\\r\\n]{0,${MAX_QUOTED_TEXT_LENGTH}}\\1(?!\\w)` +
`|(?<!\\w)\`[^\\r\\n]{0,${MAX_QUOTED_TEXT_LENGTH}}'(?!\\w)` +
`|(?<!\\w)\`\`[^\\r\\n]{0,${MAX_QUOTED_TEXT_LENGTH}}''(?!\\w)` +
`|\\([^\\r\\n()]{0,${MAX_PARENTHETICAL_CONTENT_LENGTH}}(?:\\([^\\r\\n()]{0,${MAX_PARENTHETICAL_CONTENT_LENGTH}}\\)[^\\r\\n()]{0,$ {MAX_PARENTHETICAL_CONTENT_LENGTH}}){0,${MAX_NESTED_PARENTHESES}}\\)` +
`|\\[[^\\r\\n\\[\\]]{0,${MAX_PARENTHETICAL_CONTENT_LENGTH}}(?:\\[[^\\r\\n\\[\\]]{0,${MAX_PARENTHETICAL_CONTENT_LENGTH}}\\][^\\r\\n\\[\\]]{0,$ {MAX_PARENTHETICAL_CONTENT_LENGTH}}){0,${MAX_NESTED_PARENTHESES}}\\]` +
`|\\$[^\\r\\n$]{0,${MAX_MATH_INLINE_LENGTH}}\\$` +
`|\`[^\`\\r\\n]{0,${MAX_MATH_INLINE_LENGTH}}\`` +
")" +
"|" +
// 9. Paragraphs (with length constraints)
`(?!${AVOID_AT_START})(?:(?:^|\\r?\\n\\r?\\n)(?:<p>)?${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String(MAX_PARAGRAPH_LENGTH))}(?:</p>)?(?=\\r? \\n\\r?\\n|$))` +
"|" +
// 11. HTML-like tags and their content (including self-closing tags and attributes, with length constraints)
`(?:<[a-zA-Z][^>]{0,${MAX_HTML_TAG_ATTRIBUTES_LENGTH}}(?:>[\\s\\S]{0,${MAX_HTML_TAG_CONTENT_LENGTH}}?</[a-zA-Z]+>|\\s*/>))` +
"|" +
// 12. LaTeX-style math expressions (inline and block, with length constraints)
`(?:(?:\\$\\$[\\s\\S]{0,${MAX_MATH_BLOCK_LENGTH}}?\\$\\$)|(?:\\$[^\\$\\r\\n]{0,${MAX_MATH_INLINE_LENGTH}}\\$))` +
"|" +
// 14. Fallback for any remaining content (with length constraints)
`(?!${AVOID_AT_START})${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String(MAX_STANDALONE_LINE_LENGTH))}` +
")",
"gmu"
);
function main({text}){
const chunks = [];
let currentChunk = '';
const tokens = countToken(text)
const matches = text.match(regex);
if (matches) {
matches.forEach((match) => {
if (currentChunk.length + match.length <= 1000) {
currentChunk += match;
} else {
if (currentChunk) {
chunks.push(currentChunk);
}
currentChunk = match;
}
});
if (currentChunk) {
chunks.push(currentChunk);
}
}
return {chunks, tokens};
}
```
这里我们用到了 [Jina AI 开源的一个强大的正则表达式](https://x.com/JinaAI_/status/1823756993108304135),它能利用所有可能的边界线索和启发式方法来精确切分文本。
2. 配置循环运行节点
![配置循环运行节点](/imgs/fastgpt-loop-node-example-5.png)
- 数组输入:选择上一步代码运行节点的输出变量 `chunks`。
- 循环体内添加一个【代码运行】节点,对源文本进行格式化。
- 添加一个【搜索词库】节点,将专有名词的词库作为知识库,在翻译前进行搜索。
- 添加一个【AI 对话】节点,使用 CoT 思维链,让 LLM 显式地、系统地生成推理链条,展示翻译的完整思考过程。
- 添加一个【代码运行】节点将【AI 对话】节点最后一轮的翻译结果提取出来。
- 添加一个【指定回复】节点,输出翻译后的文本。
- 循环体结束节点选择输出变量为【取出翻译文本】的输出变量 `result`。

View File

@@ -1,6 +1,13 @@
<!DOCTYPE html>
{{ $.Scratch.Delete "social_list" }}
{{ $.Scratch.Set "pathName" (printf "%s" (.Site.Params.docs.pathName | default "docs")) }}
<!-- Google Tag Manager -->
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
})(window,document,'script','dataLayer','GTM-W9HPZZ22');</script>
<!-- End Google Tag Manager -->
<!-- social_list -->
<!-- change -->
{{ $social_params := slice "github" "twitter" "instagram" "rss" "wechat" "lark" }}
@@ -12,6 +19,10 @@
<html lang="{{ site.LanguageCode }}">
{{- partial (printf "%s/%s" ($.Scratch.Get "pathName") "head.html") . -}}
<body>
<!-- Google Tag Manager (noscript) -->
<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-W9HPZZ22"
height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
<!-- End Google Tag Manager (noscript) -->
<div class="content">
<div class="page-wrapper toggled">
{{- partial (printf "%s/%s" ($.Scratch.Get "pathName") "sidebar.html") . -}}

View File

@@ -139,6 +139,8 @@ services:
- OPENAI_BASE_URL=http://oneapi:3000/v1
# AI模型的API Key。这里默认填写了OneAPI的快速默认key测试通后务必及时修改
- CHAT_API_KEY=sk-fastgpt
# 是否将图片转成 base64 传递给模型,本地开发和内网环境使用共有模型时候需要设置为 true
- MULTIPLE_DATA_TO_BASE64=false
# 数据库最大连接数
- DB_MAX_LINK=30
# 登录凭证密钥

View File

@@ -97,6 +97,8 @@ services:
- OPENAI_BASE_URL=http://oneapi:3000/v1
# AI模型的API Key。这里默认填写了OneAPI的快速默认key测试通后务必及时修改
- CHAT_API_KEY=sk-fastgpt
# 是否将图片转成 base64 传递给模型,本地开发和内网环境使用共有模型时候需要设置为 true
- MULTIPLE_DATA_TO_BASE64=false
# 数据库最大连接数
- DB_MAX_LINK=30
# 登录凭证密钥

View File

@@ -77,6 +77,8 @@ services:
- OPENAI_BASE_URL=http://oneapi:3000/v1
# AI模型的API Key。这里默认填写了OneAPI的快速默认key测试通后务必及时修改
- CHAT_API_KEY=sk-fastgpt
# 是否将图片转成 base64 传递给模型,本地开发和内网环境使用共有模型时候需要设置为 true
- MULTIPLE_DATA_TO_BASE64=false
# 数据库最大连接数
- DB_MAX_LINK=30
# 登录凭证密钥

View File

@@ -19,6 +19,7 @@ export const ERROR_CODE: { [key: number]: string } = {
406: i18nT('common:code_error.error_code.406'),
410: i18nT('common:code_error.error_code.410'),
422: i18nT('common:code_error.error_code.422'),
429: i18nT('common:code_error.error_code.429'),
500: i18nT('common:code_error.error_code.500'),
502: i18nT('common:code_error.error_code.502'),
503: i18nT('common:code_error.error_code.503'),
@@ -39,7 +40,8 @@ export enum ERROR_ENUM {
insufficientQuota = 'insufficientQuota',
unAuthModel = 'unAuthModel',
unAuthApiKey = 'unAuthApiKey',
unAuthFile = 'unAuthFile'
unAuthFile = 'unAuthFile',
tooManyRequest = 'tooManyRequest'
}
export type ErrType<T> = Record<
@@ -67,6 +69,12 @@ export const ERROR_RESPONSE: Record<
message: i18nT('common:code_error.error_message.403'),
data: null
},
[ERROR_ENUM.tooManyRequest]: {
code: 429,
statusText: ERROR_ENUM.tooManyRequest,
message: 'Too many request',
data: null
},
[ERROR_ENUM.insufficientQuota]: {
code: 510,
statusText: ERROR_ENUM.insufficientQuota,

View File

@@ -16,6 +16,8 @@ export const bucketNameMap = {
}
};
export const ReadFileBaseUrl = `${process.env.FE_DOMAIN || ''}/api/common/file/read`;
export const ReadFileBaseUrl = `${process.env.FE_DOMAIN || ''}${process.env.NEXT_PUBLIC_BASE_URL}/api/common/file/read`;
export const documentFileType = '.txt, .docx, .csv, .xlsx, .pdf, .md, .html, .pptx';
export const imageFileType =
'.jpg, .jpeg, .png, .gif, .bmp, .webp, .svg, .tiff, .tif, .ico, .heic, .heif, .avif';

View File

@@ -1,4 +1,7 @@
import { detect } from 'jschardet';
import { documentFileType, imageFileType } from './constants';
import { ChatFileTypeEnum } from '../../core/chat/constants';
import { UserChatItemValueItemType } from '../../core/chat/type';
export const formatFileSize = (bytes: number): string => {
if (bytes === 0) return '0 B';
@@ -13,3 +16,40 @@ export const formatFileSize = (bytes: number): string => {
export const detectFileEncoding = (buffer: Buffer) => {
return detect(buffer.slice(0, 200))?.encoding?.toLocaleLowerCase();
};
// Url => user upload file type
export const parseUrlToFileType = (url: string): UserChatItemValueItemType['file'] | undefined => {
if (typeof url !== 'string') return;
const parseUrl = new URL(url, 'https://locaohost:3000');
const filename = (() => {
// Old version file url: https://xxx.com/file/read?filename=xxx.pdf
const filenameQuery = parseUrl.searchParams.get('filename');
if (filenameQuery) return filenameQuery;
// Common file https://xxx.com/xxx.pdf?xxxx=xxx
const pathname = parseUrl.pathname;
if (pathname) return pathname.split('/').pop();
})();
if (!filename) return;
const extension = filename.split('.').pop()?.toLowerCase() || '';
if (!extension) return;
if (documentFileType.includes(extension)) {
return {
type: ChatFileTypeEnum.file,
name: filename,
url
};
}
if (imageFileType.includes(extension)) {
return {
type: ChatFileTypeEnum.image,
name: filename,
url
};
}
};

View File

@@ -92,9 +92,9 @@ ${mdSplitString}
};
/*
1. 自定义分隔符:不需要重叠
2. Markdown 标题:不需要重叠;标题嵌套共享
3. 特殊 markdown 语法:不需要重叠
1. 自定义分隔符:不需要重叠,不需要小块合并
2. Markdown 标题:不需要重叠;标题嵌套共享,不需要小块合并
3. 特殊 markdown 语法:不需要重叠,需要小块合并
4. 段落:尽可能保证它是一个完整的段落。
5. 标点分割:重叠
*/
@@ -118,10 +118,10 @@ const commonSplit = (props: SplitProps): SplitResponse => {
reg: new RegExp(`(${replaceRegChars(text)})`, 'g'),
maxLen: chunkLen * 1.4
})),
{ reg: /^(#\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
{ reg: /^(##\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
{ reg: /^(###\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
{ reg: /^(####\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
{ reg: /^(#\s[^\n]+\n)/gm, maxLen: chunkLen * 1.2 },
{ reg: /^(##\s[^\n]+\n)/gm, maxLen: chunkLen * 1.4 },
{ reg: /^(###\s[^\n]+\n)/gm, maxLen: chunkLen * 1.6 },
{ reg: /^(####\s[^\n]+\n)/gm, maxLen: chunkLen * 1.8 },
{ reg: /([\n]([`~]))/g, maxLen: chunkLen * 4 }, // code block
{ reg: /([\n](?!\s*[\*\-|>0-9]))/g, maxLen: chunkLen * 2 }, // 增大块,尽可能保证它是一个完整的段落。 (?![\*\-|>`0-9]): markdown special char
@@ -137,7 +137,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
const customRegLen = customReg.length;
const checkIsCustomStep = (step: number) => step < customRegLen;
const checkIsMarkdownSplit = (step: number) => step >= customRegLen && step <= 3 + customRegLen;
const checkIndependentChunk = (step: number) => step >= customRegLen && step <= 4 + customRegLen;
const checkForbidOverlap = (step: number) => step <= 6 + customRegLen;
// if use markdown title split, Separate record title
@@ -153,7 +152,6 @@ const commonSplit = (props: SplitProps): SplitResponse => {
const isCustomStep = checkIsCustomStep(step);
const isMarkdownSplit = checkIsMarkdownSplit(step);
const independentChunk = checkIndependentChunk(step);
const { reg } = stepReges[step];
@@ -162,7 +160,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
reg,
(() => {
if (isCustomStep) return splitMarker;
if (independentChunk) return `${splitMarker}$1`;
if (isMarkdownSplit) return `${splitMarker}$1`;
return `$1${splitMarker}`;
})()
)
@@ -178,7 +176,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
title: matchTitle
};
})
.filter((item) => item.text.trim());
.filter((item) => item.text?.trim());
};
/* Gets the overlap at the end of a text as the beginning of the next block */
@@ -214,15 +212,16 @@ const commonSplit = (props: SplitProps): SplitResponse => {
text = '',
step,
lastText,
mdTitle = ''
parentTitle = ''
}: {
text: string;
step: number;
lastText: string;
mdTitle: string;
lastText: string; // 上一个分块末尾数据会通过这个参数传入。
parentTitle: string;
}): string[] => {
const independentChunk = checkIndependentChunk(step);
const isMarkdownStep = checkIsMarkdownSplit(step);
const isCustomStep = checkIsCustomStep(step);
const forbidConcat = isMarkdownStep || isCustomStep; // forbid=true时候lastText肯定为空
// oversize
if (step >= stepReges.length) {
@@ -232,7 +231,7 @@ const commonSplit = (props: SplitProps): SplitResponse => {
// use slice-chunkLen to split text
const chunks: string[] = [];
for (let i = 0; i < text.length; i += chunkLen - overlapLen) {
chunks.push(`${mdTitle}${text.slice(i, i + chunkLen)}`);
chunks.push(`${parentTitle}${text.slice(i, i + chunkLen)}`);
}
return chunks;
}
@@ -242,67 +241,78 @@ const commonSplit = (props: SplitProps): SplitResponse => {
const maxLen = splitTexts.length > 1 ? stepReges[step].maxLen : chunkLen;
const minChunkLen = chunkLen * 0.7;
const miniChunkLen = 30;
// console.log(splitTexts, stepReges[step].reg);
const chunks: string[] = [];
for (let i = 0; i < splitTexts.length; i++) {
const item = splitTexts[i];
const currentTitle = `${mdTitle}${item.title}`;
const lastTextLen = lastText.length;
const currentText = item.text;
const currentTextLen = currentText.length;
const lastTextLen = lastText.length;
const newText = lastText + currentText;
const newTextLen = lastTextLen + currentTextLen;
// newText is too large(now, The lastText must be smaller than chunkLen)
if (newTextLen > maxLen) {
if (newTextLen > maxLen || isMarkdownStep) {
// lastText greater minChunkLen, direct push it to chunks, not add to next chunk. (large lastText)
if (lastTextLen > minChunkLen) {
chunks.push(`${currentTitle}${lastText}`);
lastText = getOneTextOverlapText({ text: lastText, step }); // next chunk will start with overlayText
i--;
chunks.push(lastText);
lastText = getOneTextOverlapText({ text: lastText, step }); // next chunk will start with overlayText
i--;
continue;
}
// 说明是新的文本块比较大,需要进一步拆分
// split new Text, split chunks must will greater 1 (small lastText)
const innerChunks = splitTextRecursively({
text: newText,
step: step + 1,
lastText: '',
mdTitle: currentTitle
parentTitle: parentTitle + item.title
});
const lastChunk = innerChunks[innerChunks.length - 1];
if (!lastChunk) continue;
if (forbidConcat) {
chunks.push(
...innerChunks.map(
(chunk) => (step === 3 + customRegLen ? `${parentTitle}${chunk}` : chunk) // 合并进 Markdown 分块时,需要补标题
)
);
continue;
}
// last chunk is too small, concat it to lastText(next chunk start)
if (!independentChunk && lastChunk.length < minChunkLen) {
if (lastChunk.length < minChunkLen) {
chunks.push(...innerChunks.slice(0, -1));
lastText = lastChunk;
} else {
chunks.push(...innerChunks);
// compute new overlapText
lastText = getOneTextOverlapText({
text: lastChunk,
step
});
continue;
}
// Last chunk is large enough
chunks.push(...innerChunks);
// compute new overlapText
lastText = getOneTextOverlapText({
text: lastChunk,
step
});
continue;
}
// size less than chunkLen, push text to last chunk. now, text definitely less than maxLen
lastText = newText;
// new text is small
// markdown paragraph block: Direct addition; If the chunk size reaches, add a chunk
if (
isCustomStep ||
(independentChunk && newTextLen > miniChunkLen) ||
newTextLen >= chunkLen
) {
chunks.push(`${currentTitle}${lastText}`);
lastText = getOneTextOverlapText({ text: lastText, step });
// Not overlap
if (forbidConcat) {
chunks.push(`${parentTitle}${item.title}${item.text}`);
continue;
}
lastText += item.text;
}
/* If the last chunk is independent, it needs to be push chunks. */
@@ -310,9 +320,10 @@ const commonSplit = (props: SplitProps): SplitResponse => {
if (lastText.length < chunkLen * 0.4) {
chunks[chunks.length - 1] = chunks[chunks.length - 1] + lastText;
} else {
chunks.push(`${mdTitle}${lastText}`);
chunks.push(lastText);
}
} else if (lastText && chunks.length === 0) {
// 只分出一个很小的块,则直接追加到末尾(如果大于 1 个块,说明这个小块内容已经被上一个块拿到了)
chunks.push(lastText);
}
@@ -324,8 +335,8 @@ const commonSplit = (props: SplitProps): SplitResponse => {
text,
step: 0,
lastText: '',
mdTitle: ''
}).map((chunk) => chunk?.replaceAll(codeBlockMarker, '\n') || ''); // restore code block
parentTitle: ''
}).map((chunk) => chunk?.replaceAll(codeBlockMarker, '\n')?.trim() || ''); // restore code block
const chars = chunks.reduce((sum, chunk) => sum + chunk.length, 0);

View File

@@ -2,6 +2,7 @@ import dayjs from 'dayjs';
import cronParser from 'cron-parser';
import utc from 'dayjs/plugin/utc';
import timezone from 'dayjs/plugin/timezone';
import { i18nT } from '../../../web/i18n/utils';
dayjs.extend(utc);
dayjs.extend(timezone);
@@ -23,31 +24,51 @@ export const formatTimeToChatTime = (time: Date) => {
// 如果传入时间小于60秒返回刚刚
if (now.diff(target, 'second') < 60) {
return '刚刚';
return i18nT('common:just_now');
}
// 如果时间是今天,展示几时:几分
//用#占位i18n生效后replace成:
if (now.isSame(target, 'day')) {
return target.format('HH : mm');
return target.format('HH#mm');
}
// 如果是昨天,展示昨天
if (now.subtract(1, 'day').isSame(target, 'day')) {
return '昨天';
}
// 如果是前天,展示前天
if (now.subtract(2, 'day').isSame(target, 'day')) {
return '前天';
return i18nT('common:yesterday');
}
// 如果是今年,展示某月某日
if (now.isSame(target, 'year')) {
return target.format('MM/DD');
return target.format('MM-DD');
}
// 如果是更久之前,展示某年某月某日
return target.format('YYYY/M/D');
return target.format('YYYY-M-D');
};
export const formatTimeToChatItemTime = (time: Date) => {
const now = dayjs();
const target = dayjs(time);
const detailTime = target.format('HH#mm');
// 如果时间是今天,展示几时:几分
if (now.isSame(target, 'day')) {
return detailTime;
}
// 如果是昨天,展示昨天+几时:几分
if (now.subtract(1, 'day').isSame(target, 'day')) {
return i18nT('common:yesterday_detail_time');
}
// 如果是今年,展示某月某日+几时:几分
if (now.isSame(target, 'year')) {
return target.format('MM-DD') + ' ' + detailTime;
}
// 如果是更久之前,展示某年某月某日+几时:几分
return target.format('YYYY-M-D') + ' ' + detailTime;
};
/* cron time parse */

View File

@@ -207,8 +207,8 @@ export const Prompt_systemQuotePromptList: PromptTemplateItem[] = [
];
// Document quote prompt
export const Prompt_DocumentQuote = `将 <Reference></Reference> 中的内容作为本次对话的参考:
<Reference>
export const Prompt_DocumentQuote = `将 <FilesContent></FilesContent> 中的内容作为本次对话的参考:
<FilesContent>
{{quote}}
</Reference>
</FilesContent>
`;

View File

@@ -1,4 +1,8 @@
import { UpdateClbPermissionProps } from '../../support/permission/collaborator';
import { RequireOnlyOne } from '../../common/type/utils';
import {
UpdateClbPermissionProps,
UpdatePermissionBody
} from '../../support/permission/collaborator';
import { PermissionValueType } from '../../support/permission/type';
export type UpdateAppCollaboratorBody = UpdateClbPermissionProps & {
@@ -7,5 +11,7 @@ export type UpdateAppCollaboratorBody = UpdateClbPermissionProps & {
export type AppCollaboratorDeleteParams = {
appId: string;
} & RequireOnlyOne<{
tmbId: string;
};
groupId: string;
}>;

View File

@@ -10,7 +10,6 @@ import { SelectedDatasetType } from '../workflow/api';
import { DatasetSearchModeEnum } from '../dataset/constants';
import { TeamTagSchema as TeamTagsSchemaType } from '@fastgpt/global/support/user/team/type.d';
import { StoreEdgeItemType } from '../workflow/type/edge';
import { PermissionSchemaType, PermissionValueType } from '../../support/permission/type';
import { AppPermission } from '../../support/permission/app/controller';
import { ParentIdType } from '../../common/parentFolder/type';
import { FlowNodeInputTypeEnum } from 'core/workflow/node/constant';
@@ -45,7 +44,11 @@ export type AppSchema = {
inited?: boolean;
teamTags: string[];
} & PermissionSchemaType;
inheritPermission?: boolean;
// abandon
defaultPermission?: number;
};
export type AppListItemType = {
_id: string;
@@ -57,7 +60,9 @@ export type AppListItemType = {
updateTime: Date;
pluginData?: AppSchema['pluginData'];
permission: AppPermission;
} & PermissionSchemaType;
inheritPermission?: boolean;
private?: boolean;
};
export type AppDetailType = AppSchema & {
permission: AppPermission;

View File

@@ -14,7 +14,6 @@ import type {
ChatCompletionToolMessageParam
} from '../../core/ai/type.d';
import { ChatCompletionRequestMessageRoleEnum } from '../../core/ai/constants';
const GPT2Chat = {
[ChatCompletionRequestMessageRoleEnum.System]: ChatRoleEnum.System,
[ChatCompletionRequestMessageRoleEnum.User]: ChatRoleEnum.Human,
@@ -61,14 +60,14 @@ export const chats2GPTMessages = ({
return {
type: 'image_url',
image_url: {
url: item.file?.url || ''
url: item.file.url
}
};
} else if (item.file?.type === ChatFileTypeEnum.file) {
return {
type: 'file_url',
name: item.file?.name || '',
url: item.file?.url || ''
url: item.file.url
};
}
}

View File

@@ -126,6 +126,7 @@ export type ChatSiteItemType = (UserChatItemType | SystemChatItemType | AIChatIt
moduleName?: string;
ttsBuffer?: Uint8Array;
responseData?: ChatHistoryItemResType[];
time?: Date;
} & ChatBoxInputType &
ResponseTagItemType;

View File

@@ -30,7 +30,8 @@ export const getChatTitleFromChatMessage = (message?: ChatItemType, defaultValue
// Keep the first n and last n characters
export const getHistoryPreview = (
completeMessages: ChatItemType[],
size = 100
size = 100,
useVision = false
): {
obj: `${ChatRoleEnum}`;
value: string;
@@ -48,7 +49,8 @@ export const getHistoryPreview = (
item.value
?.map((item) => {
if (item?.text?.content) return item?.text?.content;
if (item.file?.type === 'image') return 'Input an image';
if (item.file?.type === 'image' && useVision)
return `![Input an image](${item.file.url.slice(0, 100)}...)`;
return '';
})
.filter(Boolean)

View File

@@ -1,5 +1,6 @@
import { UpdateClbPermissionProps } from '../../support/permission/collaborator';
import { PermissionValueType } from '../../support/permission/type';
import { RequireOnlyOne } from '../../common/type/utils';
export type UpdateDatasetCollaboratorBody = UpdateClbPermissionProps & {
datasetId: string;
@@ -7,5 +8,7 @@ export type UpdateDatasetCollaboratorBody = UpdateClbPermissionProps & {
export type DatasetCollaboratorDeleteParams = {
datasetId: string;
} & RequireOnlyOne<{
tmbId: string;
};
groupId: string;
}>;

View File

@@ -1,4 +1,3 @@
import { PermissionSchemaType } from '../../support/permission/type';
import type { LLMModelItemType, VectorModelItemType } from '../../core/ai/model.d';
import { PermissionTypeEnum } from '../../support/permission/constant';
import { PushDatasetDataChunkProps } from './api';
@@ -32,8 +31,11 @@ export type DatasetSchemaType = {
selector: string;
};
externalReadUrl?: string;
} & PermissionSchemaType;
// } & PermissionSchemaType;
inheritPermission: boolean;
// abandon
defaultPermission?: number;
};
export type DatasetCollectionSchemaType = {
_id: string;
@@ -146,7 +148,9 @@ export type DatasetListItemType = {
type: `${DatasetTypeEnum}`;
permission: DatasetPermission;
vectorModel: VectorModelItemType;
} & PermissionSchemaType;
inheritPermission: boolean;
private?: boolean;
};
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel' | 'agentModel'> & {
vectorModel: VectorModelItemType;
@@ -187,7 +191,7 @@ export type DatasetDataItemType = {
chunkIndex: number;
indexes: DatasetDataIndexItemType[];
isOwner: boolean;
canWrite: boolean;
// permission: DatasetPermission;
};
/* --------------- file ---------------------- */
@@ -208,7 +212,7 @@ export type DatasetFileSchema = {
/* ============= search =============== */
export type SearchDataResponseItemType = Omit<
DatasetDataItemType,
'teamId' | 'indexes' | 'isOwner' | 'canWrite'
'teamId' | 'indexes' | 'isOwner'
> & {
score: { type: `${SearchScoreTypeEnum}`; value: number; index: number }[];
// score: number;

View File

@@ -201,6 +201,7 @@ export enum NodeInputKeyEnum {
nodeHeight = 'nodeHeight',
// loop start
loopStartInput = 'loopStartInput',
loopStartIndex = 'loopStartIndex',
// loop end
loopEndInput = 'loopEndInput',
@@ -256,9 +257,9 @@ export enum NodeOutputKeyEnum {
// loop
loopArray = 'loopArray',
// loop start
loopStartInput = 'loopStartInput',
loopStartIndex = 'loopStartIndex',
// form input
formInputResult = 'formInputResult'
@@ -334,3 +335,21 @@ export enum ContentTypes {
xml = 'xml',
raw = 'raw-text'
}
export const ArrayTypeMap: Record<WorkflowIOValueTypeEnum, WorkflowIOValueTypeEnum> = {
[WorkflowIOValueTypeEnum.string]: WorkflowIOValueTypeEnum.arrayString,
[WorkflowIOValueTypeEnum.number]: WorkflowIOValueTypeEnum.arrayNumber,
[WorkflowIOValueTypeEnum.boolean]: WorkflowIOValueTypeEnum.arrayBoolean,
[WorkflowIOValueTypeEnum.object]: WorkflowIOValueTypeEnum.arrayObject,
[WorkflowIOValueTypeEnum.arrayString]: WorkflowIOValueTypeEnum.arrayString,
[WorkflowIOValueTypeEnum.arrayNumber]: WorkflowIOValueTypeEnum.arrayNumber,
[WorkflowIOValueTypeEnum.arrayBoolean]: WorkflowIOValueTypeEnum.arrayBoolean,
[WorkflowIOValueTypeEnum.arrayObject]: WorkflowIOValueTypeEnum.arrayObject,
[WorkflowIOValueTypeEnum.chatHistory]: WorkflowIOValueTypeEnum.arrayObject,
[WorkflowIOValueTypeEnum.datasetQuote]: WorkflowIOValueTypeEnum.arrayObject,
[WorkflowIOValueTypeEnum.dynamic]: WorkflowIOValueTypeEnum.arrayObject,
[WorkflowIOValueTypeEnum.selectDataset]: WorkflowIOValueTypeEnum.arrayObject,
[WorkflowIOValueTypeEnum.selectApp]: WorkflowIOValueTypeEnum.arrayObject,
[WorkflowIOValueTypeEnum.arrayAny]: WorkflowIOValueTypeEnum.arrayAny,
[WorkflowIOValueTypeEnum.any]: WorkflowIOValueTypeEnum.arrayAny
};

View File

@@ -27,7 +27,9 @@ export enum FlowNodeInputTypeEnum { // render ui
settingDatasetQuotePrompt = 'settingDatasetQuotePrompt',
hidden = 'hidden',
custom = 'custom'
custom = 'custom',
fileSelect = 'fileSelect'
}
export const FlowNodeInputMap: Record<
FlowNodeInputTypeEnum,
@@ -85,6 +87,9 @@ export const FlowNodeInputMap: Record<
},
[FlowNodeInputTypeEnum.textarea]: {
icon: 'core/workflow/inputType/textarea'
},
[FlowNodeInputTypeEnum.fileSelect]: {
icon: 'core/workflow/inputType/file'
}
};
@@ -137,43 +142,43 @@ export enum FlowNodeTypeEnum {
// node IO value type
export const FlowValueTypeMap = {
[WorkflowIOValueTypeEnum.string]: {
label: 'string',
label: 'String',
value: WorkflowIOValueTypeEnum.string
},
[WorkflowIOValueTypeEnum.number]: {
label: 'number',
label: 'Number',
value: WorkflowIOValueTypeEnum.number
},
[WorkflowIOValueTypeEnum.boolean]: {
label: 'boolean',
label: 'Boolean',
value: WorkflowIOValueTypeEnum.boolean
},
[WorkflowIOValueTypeEnum.object]: {
label: 'object',
label: 'Object',
value: WorkflowIOValueTypeEnum.object
},
[WorkflowIOValueTypeEnum.arrayString]: {
label: 'array<string>',
label: 'Array<string>',
value: WorkflowIOValueTypeEnum.arrayString
},
[WorkflowIOValueTypeEnum.arrayNumber]: {
label: 'array<number>',
label: 'Array<number>',
value: WorkflowIOValueTypeEnum.arrayNumber
},
[WorkflowIOValueTypeEnum.arrayBoolean]: {
label: 'array<boolean>',
label: 'Array<boolean>',
value: WorkflowIOValueTypeEnum.arrayBoolean
},
[WorkflowIOValueTypeEnum.arrayObject]: {
label: 'array<object>',
label: 'Array<object>',
value: WorkflowIOValueTypeEnum.arrayObject
},
[WorkflowIOValueTypeEnum.arrayAny]: {
label: 'array',
label: 'Array',
value: WorkflowIOValueTypeEnum.arrayAny
},
[WorkflowIOValueTypeEnum.any]: {
label: 'any',
label: 'Any',
value: WorkflowIOValueTypeEnum.any
},
[WorkflowIOValueTypeEnum.chatHistory]: {

View File

@@ -135,6 +135,9 @@ export type DispatchNodeResponseType = {
extensionResult?: string;
extensionTokens?: number;
// dataset concat
concatLength?: number;
// cq
cqList?: ClassifyQuestionAgentItemType[];
cqResult?: string;
@@ -216,5 +219,7 @@ export type AIChatNodeProps = {
[NodeInputKeyEnum.aiChatQuoteTemplate]?: string;
[NodeInputKeyEnum.aiChatQuotePrompt]?: string;
[NodeInputKeyEnum.aiChatVision]?: boolean;
[NodeInputKeyEnum.stringQuoteText]?: string;
[NodeInputKeyEnum.fileUrlList]?: string[];
};

View File

@@ -5,8 +5,8 @@ import { StoreNodeItemType } from '../type/node';
import { StoreEdgeItemType } from '../type/edge';
import { RuntimeEdgeItemType, RuntimeNodeItemType } from './type';
import { VARIABLE_NODE_ID } from '../constants';
import { isReferenceValue } from '../utils';
import { FlowNodeOutputItemType, ReferenceValueProps } from '../type/io';
import { isValidReferenceValueFormat } from '../utils';
import { FlowNodeOutputItemType, ReferenceValueType } from '../type/io';
import { ChatItemType, NodeOutputItemType } from '../../../core/chat/type';
import { ChatItemValueTypeEnum, ChatRoleEnum } from '../../../core/chat/constants';
@@ -34,7 +34,7 @@ export const getMaxHistoryLimitFromNodes = (nodes: StoreNodeItemType[]): number
2. Check that the workflow starts at the interaction node
*/
export const getLastInteractiveValue = (histories: ChatItemType[]) => {
const lastAIMessage = histories.findLast((item) => item.obj === ChatRoleEnum.AI);
const lastAIMessage = [...histories].reverse().find((item) => item.obj === ChatRoleEnum.AI);
if (lastAIMessage) {
const lastValue = lastAIMessage.value[lastAIMessage.value.length - 1];
@@ -225,37 +225,129 @@ export const checkNodeRunStatus = ({
return 'wait';
};
/*
Get the value of the reference variable/node output
1. [string,string]
2. [string,string][]
*/
export const getReferenceVariableValue = ({
value,
nodes,
variables
}: {
value: ReferenceValueProps;
value?: ReferenceValueType;
nodes: RuntimeNodeItemType[];
variables: Record<string, any>;
}) => {
const nodeIds = nodes.map((node) => node.nodeId);
if (!isReferenceValue(value, nodeIds)) {
return value;
}
const sourceNodeId = value[0];
const outputId = value[1];
if (!value) return value;
if (sourceNodeId === VARIABLE_NODE_ID && outputId) {
return variables[outputId];
// handle single reference value
if (isValidReferenceValueFormat(value)) {
const sourceNodeId = value[0];
const outputId = value[1];
if (sourceNodeId === VARIABLE_NODE_ID) {
if (!outputId) return undefined;
return variables[outputId];
}
const node = nodes.find((node) => node.nodeId === sourceNodeId);
if (!node) {
return value;
}
return node.outputs.find((output) => output.id === outputId)?.value;
}
const node = nodes.find((node) => node.nodeId === sourceNodeId);
// handle reference array
if (
Array.isArray(value) &&
value.length > 0 &&
value.every((item) => isValidReferenceValueFormat(item))
) {
const result = value.map<any>((val) => {
return getReferenceVariableValue({
value: val,
nodes,
variables
});
});
if (!node) {
return undefined;
return result.flat().filter((item) => item !== undefined);
}
const outputValue = node.outputs.find((output) => output.id === outputId)?.value;
return outputValue;
return value;
};
// replace {{$xx.xx$}} variables for text
export function replaceEditorVariable({
text,
nodes,
variables,
runningNode
}: {
text: any;
nodes: RuntimeNodeItemType[];
variables: Record<string, any>; // global variables
runningNode: RuntimeNodeItemType;
}) {
if (typeof text !== 'string') return text;
const globalVariables = Object.keys(variables).map((key) => {
return {
nodeId: VARIABLE_NODE_ID,
id: key,
value: variables[key]
};
});
// Upstream node outputs
const nodeVariables = nodes
.map((node) => {
return node.outputs.map((output) => {
return {
nodeId: node.nodeId,
id: output.id,
value: output.value
};
});
})
.flat();
// Get runningNode inputs(Will be replaced with reference)
const customInputs = runningNode.inputs.flatMap((item) => {
return [
{
id: item.key,
value: getReferenceVariableValue({
value: item.value,
nodes,
variables
}),
nodeId: runningNode.nodeId
}
];
});
const allVariables = [...globalVariables, ...nodeVariables, ...customInputs];
// Replace {{$xxx.xxx$}} to value
for (const key in allVariables) {
const variable = allVariables[key];
const val = variable.value;
const formatVal = (() => {
if (val === undefined) return '';
if (val === null) return 'null';
return typeof val === 'object' ? JSON.stringify(val) : String(val);
})();
const regex = new RegExp(`\\{\\{\\$(${variable.nodeId}\\.${variable.id})\\$\\}\\}`, 'g');
text = text.replace(regex, formatVal);
}
return text || '';
}
export const textAdaptGptResponse = ({
text,
model = '',

View File

@@ -75,10 +75,17 @@ export const Input_Template_Text_Quote: FlowNodeInputItemType = {
description: i18nT('app:document_quote_tip'),
valueType: WorkflowIOValueTypeEnum.string
};
export const Input_Template_File_Link_Prompt: FlowNodeInputItemType = {
key: NodeInputKeyEnum.fileUrlList,
renderTypeList: [FlowNodeInputTypeEnum.reference, FlowNodeInputTypeEnum.input],
label: i18nT('app:file_quote_link'),
debugLabel: i18nT('app:file_quote_link'),
valueType: WorkflowIOValueTypeEnum.arrayString
};
export const Input_Template_File_Link: FlowNodeInputItemType = {
key: NodeInputKeyEnum.fileUrlList,
renderTypeList: [FlowNodeInputTypeEnum.reference],
required: true,
label: i18nT('app:workflow.user_file_input'),
debugLabel: i18nT('app:workflow.user_file_input'),
description: i18nT('app:workflow.user_file_input_desc'),
@@ -104,7 +111,7 @@ export const Input_Template_Node_Height: FlowNodeInputItemType = {
renderTypeList: [FlowNodeInputTypeEnum.hidden],
valueType: WorkflowIOValueTypeEnum.number,
label: '',
value: 900
value: 600
};
export const Input_Template_Stream_MODE: FlowNodeInputItemType = {

View File

@@ -17,7 +17,8 @@ import {
Input_Template_History,
Input_Template_System_Prompt,
Input_Template_UserChatInput,
Input_Template_Text_Quote
Input_Template_Text_Quote,
Input_Template_File_Link_Prompt
} from '../../input';
import { chatNodeSystemPromptTip, systemPromptTip } from '../../tip';
import { getHandleConfig } from '../../utils';
@@ -55,7 +56,7 @@ export const AiChatModule: FlowNodeTemplateType = {
showStatus: true,
isTool: true,
courseUrl: '/docs/workflow/modules/ai_chat/',
version: '481',
version: '4813',
inputs: [
Input_Template_SettingAiModel,
// --- settings modal
@@ -89,7 +90,7 @@ export const AiChatModule: FlowNodeTemplateType = {
renderTypeList: [FlowNodeInputTypeEnum.hidden],
label: '',
valueType: WorkflowIOValueTypeEnum.boolean,
value: false
value: true
},
// settings modal ---
{
@@ -100,7 +101,7 @@ export const AiChatModule: FlowNodeTemplateType = {
},
Input_Template_History,
Input_Template_Dataset_Quote,
Input_Template_Text_Quote,
Input_Template_File_Link_Prompt,
{ ...Input_Template_UserChatInput, toolDescription: i18nT('workflow:user_question') }
],

View File

@@ -25,7 +25,7 @@ export const getOneQuoteInputTemplate = ({
}): FlowNodeInputItemType => ({
key,
renderTypeList: [FlowNodeInputTypeEnum.reference],
label: `${i18nT('workflow:quote_num')},{ num: ${index} }`,
label: `${i18nT('workflow:quote_num')}-${index}`,
debugLabel: i18nT('workflow:knowledge_base_reference'),
canEdit: true,
valueType: WorkflowIOValueTypeEnum.datasetQuote

View File

@@ -1,9 +1,9 @@
import { ReferenceValueProps } from 'core/workflow/type/io';
import { ReferenceItemValueType } from '../../../type/io';
import { VariableConditionEnum } from './constant';
export type IfElseConditionType = 'AND' | 'OR';
export type ConditionListItemType = {
variable?: ReferenceValueProps;
variable?: ReferenceItemValueType;
condition?: VariableConditionEnum;
value?: string;
};

View File

@@ -1,8 +1,13 @@
import { FlowNodeInputTypeEnum, FlowNodeTypeEnum } from '../../../node/constant';
import {
FlowNodeInputTypeEnum,
FlowNodeOutputTypeEnum,
FlowNodeTypeEnum
} from '../../../node/constant';
import { FlowNodeTemplateType } from '../../../type/node.d';
import {
FlowNodeTemplateTypeEnum,
NodeInputKeyEnum,
NodeOutputKeyEnum,
WorkflowIOValueTypeEnum
} from '../../../constants';
import { getHandleConfig } from '../../utils';
@@ -28,7 +33,21 @@ export const LoopStartNode: FlowNodeTemplateType = {
label: '',
required: true,
value: ''
},
{
key: NodeInputKeyEnum.loopStartIndex,
renderTypeList: [FlowNodeInputTypeEnum.hidden],
valueType: WorkflowIOValueTypeEnum.number,
label: i18nT('workflow:Array_element_index')
}
],
outputs: []
outputs: [
{
id: NodeOutputKeyEnum.loopStartIndex,
key: NodeOutputKeyEnum.loopStartIndex,
label: i18nT('workflow:Array_element_index'),
type: FlowNodeOutputTypeEnum.static,
valueType: WorkflowIOValueTypeEnum.number
}
]
};

View File

@@ -23,7 +23,7 @@ export const ReadFilesNode: FlowNodeTemplateType = {
name: i18nT('app:workflow.read_files'),
intro: i18nT('app:workflow.read_files_tip'),
showStatus: true,
version: '489',
version: '4812',
isTool: true,
inputs: [
{

View File

@@ -24,17 +24,8 @@ export const TextEditorNode: FlowNodeTemplateType = {
name: i18nT('workflow:text_concatenation'),
intro: i18nT('workflow:intro_text_concatenation'),
courseUrl: '/docs/workflow/modules/text_editor/',
version: '486',
version: '4813',
inputs: [
{
...Input_Template_DynamicInput,
description: i18nT('workflow:dynamic_input_description_concat'),
customInputConfig: {
selectValueTypeList: Object.values(WorkflowIOValueTypeEnum),
showDescription: false,
showDefaultValue: false
}
},
{
key: NodeInputKeyEnum.textareaInput,
renderTypeList: [FlowNodeInputTypeEnum.textarea],

View File

@@ -20,6 +20,7 @@ import { chatNodeSystemPromptTip, systemPromptTip } from '../tip';
import { LLMModelTypeEnum } from '../../../ai/constants';
import { getHandleConfig } from '../utils';
import { i18nT } from '../../../../../web/i18n/utils';
import { Input_Template_File_Link_Prompt } from '../input';
export const ToolModule: FlowNodeTemplateType = {
id: FlowNodeTypeEnum.tools,
@@ -32,7 +33,7 @@ export const ToolModule: FlowNodeTemplateType = {
intro: i18nT('workflow:template.tool_call_intro'),
showStatus: true,
courseUrl: '/docs/workflow/modules/tool/',
version: '481',
version: '4813',
inputs: [
{
...Input_Template_SettingAiModel,
@@ -67,6 +68,7 @@ export const ToolModule: FlowNodeTemplateType = {
placeholder: chatNodeSystemPromptTip
},
Input_Template_History,
Input_Template_File_Link_Prompt,
Input_Template_UserChatInput
],
outputs: [

View File

@@ -18,7 +18,7 @@ export const VariableUpdateNode: FlowNodeTemplateType = {
name: i18nT('workflow:variable_update'),
intro: i18nT('workflow:update_specified_node_output_or_global_variable'),
showStatus: false,
isTool: false,
isTool: true,
version: '481',
inputs: [
{

View File

@@ -1,10 +1,10 @@
import { FlowNodeInputTypeEnum } from '../../../node/constant';
import { ReferenceValueProps } from '../../..//type/io';
import { ReferenceItemValueType, ReferenceValueType } from '../../..//type/io';
import { WorkflowIOValueTypeEnum } from '../../../constants';
export type TUpdateListItem = {
variable?: ReferenceValueProps;
value: ReferenceValueProps;
variable?: ReferenceItemValueType;
value?: ReferenceValueType; // input: ['',value], reference: [nodeId,outputId]
valueType?: WorkflowIOValueTypeEnum;
renderType: FlowNodeInputTypeEnum.input | FlowNodeInputTypeEnum.reference;
};

View File

@@ -43,6 +43,3 @@ export const WorkflowStart: FlowNodeTemplateType = {
}
]
};
export const isWorkflowStartOutput = (key?: string) =>
!!WorkflowStart.outputs.find((output) => output.key === key);

View File

@@ -56,6 +56,11 @@ export type FlowNodeInputItemType = InputComponentPropsType & {
canEdit?: boolean; // dynamic inputs
isPro?: boolean; // Pro version field
isToolOutput?: boolean;
// file
canSelectFile?: boolean;
canSelectImg?: boolean;
maxFiles?: number;
};
export type FlowNodeOutputItemType = {
@@ -75,4 +80,6 @@ export type FlowNodeOutputItemType = {
customFieldConfig?: CustomFieldConfigType;
};
export type ReferenceValueProps = [string, string | undefined];
export type ReferenceItemValueType = [string, string | undefined];
export type ReferenceArrayValueType = ReferenceItemValueType[];
export type ReferenceValueType = ReferenceItemValueType | ReferenceArrayValueType;

View File

@@ -12,7 +12,12 @@ import {
VARIABLE_NODE_ID,
NodeOutputKeyEnum
} from './constants';
import { FlowNodeInputItemType, FlowNodeOutputItemType, ReferenceValueProps } from './type/io.d';
import {
FlowNodeInputItemType,
FlowNodeOutputItemType,
ReferenceArrayValueType,
ReferenceItemValueType
} from './type/io.d';
import { StoreNodeItemType } from './type/node';
import type {
VariableItemType,
@@ -30,8 +35,8 @@ import {
} from '../app/constants';
import { IfElseResultEnum } from './template/system/ifElse/constant';
import { RuntimeNodeItemType } from './runtime/type';
import { getReferenceVariableValue } from './runtime/utils';
import {
Input_Template_File_Link,
Input_Template_History,
Input_Template_Stream_MODE,
Input_Template_UserChatInput
@@ -261,8 +266,10 @@ export const appData2FlowNodeIO = ({
inputs: [
Input_Template_Stream_MODE,
Input_Template_History,
...(chatConfig?.fileSelectConfig?.canSelectFile || chatConfig?.fileSelectConfig?.canSelectImg
? [Input_Template_File_Link]
: []),
Input_Template_UserChatInput,
// ...(showFileLink ? [Input_Template_File_Link] : []),
...variableInput
],
outputs: [
@@ -298,9 +305,37 @@ export const formatEditorVariablePickerIcon = (
}));
};
export const isReferenceValue = (value: any, nodeIds: string[]): boolean => {
const validIdList = [VARIABLE_NODE_ID, ...nodeIds];
return Array.isArray(value) && value.length === 2 && validIdList.includes(value[0]);
// Check the value is a valid reference value format: [variableId, outputId]
export const isValidReferenceValueFormat = (value: any): value is ReferenceItemValueType => {
return Array.isArray(value) && value.length === 2 && typeof value[0] === 'string';
};
/*
Check whether the value([variableId, outputId]) value is a valid reference value:
1. The value must be an array of length 2
2. The first item of the array must be one of VARIABLE_NODE_ID or nodeIds
*/
export const isValidReferenceValue = (
value: any,
nodeIds: string[]
): value is ReferenceItemValueType => {
if (!isValidReferenceValueFormat(value)) return false;
const validIdSet = new Set([VARIABLE_NODE_ID, ...nodeIds]);
return validIdSet.has(value[0]);
};
/*
Check whether the value([variableId, outputId][]) value is a valid reference value array:
1. The value must be an array
2. The array must contain at least one element
3. Each element in the array must be a valid reference value
*/
export const isValidArrayReferenceValue = (
value: any,
nodeIds: string[]
): value is ReferenceArrayValueType => {
if (!Array.isArray(value)) return false;
return value.every((item) => isValidReferenceValue(item, nodeIds));
};
export const getElseIFLabel = (i: number) => {
@@ -342,79 +377,6 @@ export const updatePluginInputByVariables = (
);
};
// replace {{$xx.xx$}} variables for text
export function replaceEditorVariable({
text,
nodes,
variables,
runningNode
}: {
text: any;
nodes: RuntimeNodeItemType[];
variables: Record<string, any>; // global variables
runningNode: RuntimeNodeItemType;
}) {
if (typeof text !== 'string') return text;
const globalVariables = Object.keys(variables).map((key) => {
return {
nodeId: VARIABLE_NODE_ID,
id: key,
value: variables[key]
};
});
// Upstream node outputs
const nodeVariables = nodes
.map((node) => {
return node.outputs.map((output) => {
return {
nodeId: node.nodeId,
id: output.id,
value: output.value
};
});
})
.flat();
// Get runningNode inputs(Will be replaced with reference)
const customInputs = runningNode.inputs.flatMap((item) => {
if (Array.isArray(item.value)) {
return [
{
id: item.key,
value: getReferenceVariableValue({
value: item.value as ReferenceValueProps,
nodes,
variables
}),
nodeId: runningNode.nodeId
}
];
}
return [
{
id: item.key,
value: item.value,
nodeId: runningNode.nodeId
}
];
});
const allVariables = [...globalVariables, ...nodeVariables, ...customInputs];
// Replace {{$xxx.xxx$}} to value
for (const key in allVariables) {
const variable = allVariables[key];
const val = variable.value;
const formatVal = typeof val === 'object' ? JSON.stringify(val) : String(val);
const regex = new RegExp(`\\{\\{\\$(${variable.nodeId}\\.${variable.id})\\$\\}\\}`, 'g');
text = text.replace(regex, formatVal);
}
return text || '';
}
/* Get plugin runtime input user query */
export const getPluginRunUserQuery = ({
pluginInputs,

View File

@@ -4,11 +4,13 @@ import { PermissionValueType } from './type';
export type CollaboratorItemType = {
teamId: string;
tmbId: string;
permission: Permission;
name: string;
avatar: string;
};
} & RequireOnlyOne<{
tmbId: string;
groupId: string;
}>;
export type UpdateClbPermissionProps = {
members?: string[];

View File

@@ -1,4 +1,3 @@
import { Permission } from './controller';
import { PermissionListType } from './type';
import { i18nT } from '../../../web/i18n/utils';
export enum AuthUserTypeEnum {

View File

@@ -1,6 +1,7 @@
import { RequireOnlyOne } from '../../common/type/utils';
import { TeamMemberWithUserSchema } from '../user/team/type';
import { AuthUserTypeEnum, PermissionKeyEnum, PerResourceTypeEnum } from './constant';
import { MemberGroupSchemaType } from './memberGroup/type';
// PermissionValueType, the type of permission's value is a number, which is a bit field actually.
// It is spired by the permission system in Linux.
@@ -33,6 +34,10 @@ export type ResourcePerWithTmbWithUser = Omit<ResourcePermissionType, 'tmbId'> &
tmbId: TeamMemberWithUserSchema;
};
export type ResourcePerWithGroup = Omit<ResourcePermissionType, 'groupId'> & {
groupId: MemberGroupSchemaType;
};
export type PermissionSchemaType = {
defaultPermission: PermissionValueType;
inheritPermission: boolean;

View File

@@ -14,7 +14,8 @@ const staticPluginList = [
`Doc2X/FilePDF2text`,
`Doc2X/FileImg2text`,
'feishu',
'google'
'google',
'bing'
];
// Run in worker thread (Have npm packages)
const packagePluginList = [

View File

@@ -0,0 +1,510 @@
{
"author": "",
"version": "4811",
"name": "Bing搜索",
"avatar": "core/workflow/template/bing",
"intro": "在Bing中搜索。",
"showStatus": true,
"weight": 10,
"courseUrl": "https://fael3z0zfze.feishu.cn/wiki/LsKAwOmtniA4vkkC259cmfxXnAc?fromScene=spaceOverview",
"isTool": true,
"templateType": "search",
"workflow": {
"nodes": [
{
"nodeId": "pluginInput",
"name": "workflow:template.plugin_start",
"intro": "workflow:intro_plugin_input",
"avatar": "core/workflow/template/workflowStart",
"flowNodeType": "pluginInput",
"showStatus": false,
"position": {
"x": 636.3048409085379,
"y": -238.61714728578016
},
"version": "481",
"inputs": [
{
"renderTypeList": ["input"],
"selectedTypeIndex": 0,
"valueType": "string",
"canEdit": true,
"key": "key",
"label": "key",
"description": "bing搜索key",
"defaultValue": "",
"required": true
},
{
"renderTypeList": ["input", "reference"],
"selectedTypeIndex": 0,
"valueType": "string",
"canEdit": true,
"key": "query",
"label": "query",
"description": "查询字段值",
"defaultValue": "",
"list": [
{
"label": "",
"value": ""
}
],
"required": true,
"toolDescription": "查询字段值"
}
],
"outputs": [
{
"id": "key",
"valueType": "string",
"key": "key",
"label": "key",
"type": "hidden"
},
{
"id": "query",
"valueType": "string",
"key": "query",
"label": "query",
"type": "hidden"
}
]
},
{
"nodeId": "pluginOutput",
"name": "common:core.module.template.self_output",
"intro": "workflow:intro_custom_plugin_output",
"avatar": "core/workflow/template/pluginOutput",
"flowNodeType": "pluginOutput",
"showStatus": false,
"position": {
"x": 2764.1105686698083,
"y": -30.617147285780163
},
"version": "481",
"inputs": [
{
"renderTypeList": ["reference"],
"valueType": "object",
"canEdit": true,
"key": "result",
"label": "result",
"isToolOutput": true,
"description": "",
"value": ["pZTkvleFSZXo", "system_rawResponse"]
}
],
"outputs": []
},
{
"nodeId": "pluginConfig",
"name": "common:core.module.template.system_config",
"intro": "",
"avatar": "core/workflow/template/systemConfig",
"flowNodeType": "pluginConfig",
"position": {
"x": 184.66337662472682,
"y": -216.05298493910115
},
"version": "4811",
"inputs": [],
"outputs": []
},
{
"nodeId": "nyA6oA8mF1iW",
"name": "HTTP 请求",
"intro": "调用谷歌搜索,查询相关内容",
"avatar": "core/workflow/template/httpRequest",
"flowNodeType": "httpRequest468",
"showStatus": true,
"position": {
"x": 1335.0647252518884,
"y": -455.9043948565971
},
"version": "481",
"inputs": [
{
"key": "system_addInputParam",
"renderTypeList": ["addInputParam"],
"valueType": "dynamic",
"label": "",
"required": false,
"description": "common:core.module.input.description.HTTP Dynamic Input",
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpMethod",
"renderTypeList": ["custom"],
"valueType": "string",
"label": "",
"value": "GET",
"required": true,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpTimeout",
"renderTypeList": ["custom"],
"valueType": "number",
"label": "",
"value": 30,
"min": 5,
"max": 600,
"required": true,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpReqUrl",
"renderTypeList": ["hidden"],
"valueType": "string",
"label": "",
"description": "common:core.module.input.description.Http Request Url",
"placeholder": "https://api.ai.com/getInventory",
"required": false,
"value": "https://api.bing.microsoft.com/v7.0/search",
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpHeader",
"renderTypeList": ["custom"],
"valueType": "any",
"value": [
{
"key": "Ocp-Apim-Subscription-Key",
"type": "string",
"value": "{{$pluginInput.key$}}"
}
],
"label": "",
"description": "common:core.module.input.description.Http Request Header",
"placeholder": "common:core.module.input.description.Http Request Header",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpParams",
"renderTypeList": ["hidden"],
"valueType": "any",
"value": [
{
"key": "q",
"type": "string",
"value": "{{query}}"
}
],
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpJsonBody",
"renderTypeList": ["hidden"],
"valueType": "any",
"value": "",
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpFormBody",
"renderTypeList": ["hidden"],
"valueType": "any",
"value": [],
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpContentType",
"renderTypeList": ["hidden"],
"valueType": "string",
"value": "json",
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"valueType": "string",
"renderTypeList": ["reference"],
"key": "query",
"label": "query",
"toolDescription": "谷歌搜索检索词",
"required": true,
"canEdit": true,
"editField": {
"key": true,
"description": true
},
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"value": ["pluginInput", "query"]
}
],
"outputs": [
{
"id": "error",
"key": "error",
"label": "workflow:request_error",
"description": "HTTP请求错误信息成功时返回空",
"valueType": "object",
"type": "static"
},
{
"id": "httpRawResponse",
"key": "httpRawResponse",
"required": true,
"label": "workflow:raw_response",
"description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。",
"valueType": "any",
"type": "static"
},
{
"id": "system_addOutputParam",
"key": "system_addOutputParam",
"type": "dynamic",
"valueType": "dynamic",
"label": "",
"editField": {
"key": true,
"valueType": true
}
},
{
"id": "M5YmxaYe8em1",
"type": "dynamic",
"key": "prompt",
"valueType": "string",
"label": "prompt"
}
]
},
{
"nodeId": "pZTkvleFSZXo",
"name": "代码运行",
"intro": "执行一段简单的脚本代码,通常用于进行复杂的数据处理。",
"avatar": "core/workflow/template/codeRun",
"flowNodeType": "code",
"showStatus": true,
"position": {
"x": 2153.5325687235554,
"y": -188.04429852303304
},
"version": "482",
"inputs": [
{
"key": "system_addInputParam",
"renderTypeList": ["addInputParam"],
"valueType": "dynamic",
"label": "",
"required": false,
"description": "workflow:these_variables_will_be_input_parameters_for_code_execution",
"editField": {
"key": true,
"valueType": true
},
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
},
"debugLabel": "",
"toolDescription": ""
},
{
"key": "codeType",
"renderTypeList": ["hidden"],
"label": "",
"value": "js",
"debugLabel": "",
"toolDescription": ""
},
{
"key": "code",
"renderTypeList": ["custom"],
"label": "",
"value": "function main({data}){\n const result = data.webPages.value.map((item) => ({\n title: item.name,\n link: item.url,\n snippet: item.snippet\n }))\n return JSON.stringify(result) \n}",
"debugLabel": "",
"toolDescription": ""
},
{
"key": "data",
"valueType": "object",
"label": "data",
"renderTypeList": ["reference"],
"description": "",
"canEdit": true,
"editField": {
"key": true,
"valueType": true
},
"value": ["nyA6oA8mF1iW", "httpRawResponse"],
"customInputConfig": {
"selectValueTypeList": [
"string",
"number",
"boolean",
"object",
"arrayString",
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
],
"showDescription": false,
"showDefaultValue": true
}
}
],
"outputs": [
{
"id": "system_rawResponse",
"key": "system_rawResponse",
"label": "workflow:full_response_data",
"valueType": "object",
"type": "static",
"description": ""
},
{
"id": "error",
"key": "error",
"label": "workflow:execution_error",
"description": "代码运行错误信息,成功时返回空",
"valueType": "object",
"type": "static"
},
{
"id": "system_addOutputParam",
"key": "system_addOutputParam",
"type": "dynamic",
"valueType": "dynamic",
"label": "",
"editField": {
"key": true,
"valueType": true
},
"description": "将代码中 return 的对象作为输出,传递给后续的节点"
},
{
"id": "qLUQfhG0ILRX",
"type": "dynamic",
"key": "prompt",
"valueType": "string",
"label": "prompt"
}
]
}
],
"edges": [
{
"source": "pluginInput",
"target": "nyA6oA8mF1iW",
"sourceHandle": "pluginInput-source-right",
"targetHandle": "nyA6oA8mF1iW-target-left"
},
{
"source": "nyA6oA8mF1iW",
"target": "pZTkvleFSZXo",
"sourceHandle": "nyA6oA8mF1iW-source-right",
"targetHandle": "pZTkvleFSZXo-target-left"
},
{
"source": "pZTkvleFSZXo",
"target": "pluginOutput",
"sourceHandle": "pZTkvleFSZXo-source-right",
"targetHandle": "pluginOutput-target-left"
}
],
"chatConfig": {
"welcomeText": "",
"variables": [],
"questionGuide": false,
"ttsConfig": {
"type": "web"
},
"whisperConfig": {
"open": false,
"autoSend": false,
"autoTTSResponse": false
},
"chatInputGuide": {
"open": false,
"textList": [],
"customUrl": ""
},
"instruction": "",
"_id": "6709e90cd9873479ee78fe71"
}
}
}

View File

@@ -1,7 +1,5 @@
import * as echarts from 'echarts';
import json5 from 'json5';
import { getFileSavePath } from '../../../utils';
import * as fs from 'fs';
import { SystemPluginSpecialResponse } from '../../../type.d';
type Props = {
@@ -82,25 +80,23 @@ const generateChart = async (title: string, xAxis: string, yAxis: string, chartT
chart.setOption(option);
// 生成 Base64 图像
const base64Image = chart.getDataURL();
const svgData = decodeURIComponent(base64Image.split(',')[1]);
const base64Image = chart.getDataURL({
type: 'png',
pixelRatio: 2 // 可以设置更高的像素比以获得更清晰的图像
});
const svgContent = decodeURIComponent(base64Image.split(',')[1]);
const base64 = `data:image/svg+xml;base64,${Buffer.from(svgContent).toString('base64')}`;
const fileName = `chart_${Date.now()}.svg`;
const filePath = getFileSavePath(fileName);
fs.writeFileSync(filePath, svgData);
// 释放图表实例
chart.dispose();
return filePath;
return base64;
};
const main = async ({ title, xAxis, yAxis, chartType }: Props): Response => {
const filePath = await generateChart(title, xAxis, yAxis, chartType);
const base64 = await generateChart(title, xAxis, yAxis, chartType);
return {
result: {
type: 'SYSTEM_PLUGIN_FILE',
path: filePath,
contentType: 'image/svg+xml'
type: 'SYSTEM_PLUGIN_BASE64',
value: base64,
extension: 'svg'
}
};
};

View File

@@ -6,7 +6,7 @@
"intro": "在google中搜索。",
"showStatus": true,
"weight": 10,
"courseUrl": "https://fael3z0zfze.feishu.cn/wiki/Vqk1w4ltNiuLifkHTuoc0hSrnVg?fromScene=spaceOverview",
"isTool": true,
"templateType": "search",

View File

@@ -4,9 +4,9 @@ import { SystemPluginTemplateItemType } from '@fastgpt/global/core/workflow/type
export type SystemPluginResponseType = Promise<Record<string, any>>;
export type SystemPluginSpecialResponse = {
type: 'SYSTEM_PLUGIN_FILE';
path: string;
contentType: string;
type: 'SYSTEM_PLUGIN_BASE64';
value: string;
extension: string;
};
declare global {

View File

@@ -1,15 +0,0 @@
import path from 'path';
import * as fs from 'fs';
const isProduction = process.env.NODE_ENV === 'production';
export const getFileSavePath = (name: string) => {
if (isProduction) {
return `/app/plugin_file/${name}`;
}
const filePath = path.join(process.cwd(), 'local', 'plugin_file', name);
fs.mkdirSync(path.dirname(filePath), { recursive: true });
return filePath;
};

View File

@@ -12,6 +12,7 @@ import { gridFsStream2Buffer, stream2Encoding } from './utils';
import { addLog } from '../../system/log';
import { readFromSecondary } from '../../mongo/utils';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
import { Readable } from 'stream';
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoDatasetFileSchema;
@@ -76,6 +77,59 @@ export async function uploadFile({
return String(stream.id);
}
export async function uploadFileFromBase64Img({
bucketName,
teamId,
tmbId,
base64,
filename,
metadata = {}
}: {
bucketName: `${BucketNameEnum}`;
teamId: string;
tmbId: string;
base64: string;
filename: string;
metadata?: Record<string, any>;
}) {
if (!base64) return Promise.reject(`filePath is empty`);
if (!filename) return Promise.reject(`filename is empty`);
const base64Data = base64.split(',')[1];
const contentType = base64.split(',')?.[0]?.split?.(':')?.[1];
const buffer = Buffer.from(base64Data, 'base64');
const readableStream = new Readable({
read() {
this.push(buffer);
this.push(null);
}
});
const { stream: readStream, encoding } = await stream2Encoding(readableStream);
// Add default metadata
metadata.teamId = teamId;
metadata.tmbId = tmbId;
metadata.encoding = encoding;
// create a gridfs bucket
const bucket = getGridBucket(bucketName);
const stream = bucket.openUploadStream(filename, {
metadata,
contentType
});
// save to gridfs
await new Promise((resolve, reject) => {
readStream
.pipe(stream as any)
.on('finish', resolve)
.on('error', reject);
});
return String(stream.id);
}
export async function getFileById({
bucketName,
@@ -159,7 +213,6 @@ export const readFileContentFromMongo = async ({
getFileById({ bucketName, fileId }),
getDownloadStream({ bucketName, fileId })
]);
// console.log('get file stream', Date.now() - start);
if (!file) {
return Promise.reject(CommonErrEnum.fileNotFound);
}

View File

@@ -1,7 +1,5 @@
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
import { uploadMongoImg } from '../image/controller';
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
import { addHours } from 'date-fns';
import FormData from 'form-data';
import { WorkerNameEnum, runWorker } from '../../../worker/utils';
@@ -10,6 +8,8 @@ import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import type { ReadFileResponse } from '../../../worker/readFile/type';
import axios from 'axios';
import { addLog } from '../../system/log';
import { batchRun } from '@fastgpt/global/common/fn/utils';
import { addHours } from 'date-fns';
export type readRawTextByLocalFileParams = {
teamId: string;
@@ -53,21 +53,6 @@ export const readRawContentByFileBuffer = async ({
encoding: string;
metadata?: Record<string, any>;
}) => {
// Upload image in markdown
const matchMdImgTextAndUpload = ({ teamId, md }: { md: string; teamId: string }) =>
markdownProcess({
rawText: md,
uploadImgController: (base64Img) =>
uploadMongoImg({
type: MongoImageTypeEnum.collectionImage,
base64Img,
teamId,
metadata,
expiredTime: addHours(new Date(), 1)
})
});
/* If */
const customReadfileUrl = process.env.CUSTOM_READ_FILE_URL;
const customReadFileExtension = process.env.CUSTOM_READ_FILE_EXTENSION || '';
const ocrParse = process.env.CUSTOM_READ_FILE_OCR || 'false';
@@ -111,19 +96,29 @@ export const readRawContentByFileBuffer = async ({
};
};
let { rawText, formatText } =
let { rawText, formatText, imageList } =
(await readFileFromCustomService()) ||
(await runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
extension,
encoding,
buffer
buffer,
teamId
}));
// markdown data format
if (['md', 'html', 'docx', ...customReadFileExtension.split(',')].includes(extension)) {
rawText = await matchMdImgTextAndUpload({
teamId: teamId,
md: rawText
if (imageList) {
await batchRun(imageList, async (item) => {
const src = await uploadMongoImg({
type: MongoImageTypeEnum.collectionImage,
base64Img: `data:${item.mime};base64,${item.base64}`,
teamId,
expiredTime: addHours(new Date(), 1),
metadata: {
...metadata,
mime: item.mime
}
});
rawText = rawText.replace(item.uuid, src);
});
}

View File

@@ -19,8 +19,11 @@ export const NextEntry = ({ beforeCallback = [] }: { beforeCallback?: Promise<an
await Promise.all([withNextCors(req, res), ...beforeCallback]);
let response = null;
for (const handler of args) {
for await (const handler of args) {
response = await handler(req, res);
if (res.writableFinished) {
break;
}
}
// Get request duration

View File

@@ -0,0 +1,32 @@
import { ApiRequestProps } from '../../type/next';
import requestIp from 'request-ip';
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
import { authFrequencyLimit } from '../system/frequencyLimit/utils';
import { addSeconds } from 'date-fns';
import { NextApiResponse } from 'next';
import { jsonRes } from '../response';
// unit: times/s
// how to use?
// export default NextAPI(useQPSLimit(10), handler); // limit 10 times per second for a ip
export function useReqFrequencyLimit(seconds: number, limit: number) {
return async (req: ApiRequestProps, res: NextApiResponse) => {
const ip = requestIp.getClientIp(req);
if (!ip || process.env.USE_IP_LIMIT !== 'true') {
return;
}
try {
await authFrequencyLimit({
eventId: 'ip-qps-limit' + ip,
maxAmount: limit,
expiredTime: addSeconds(new Date(), seconds)
});
} catch (_) {
res.status(429);
jsonRes(res, {
code: 429,
message: ERROR_ENUM.tooManyRequest
});
}
};
}

View File

@@ -1,8 +1,12 @@
import { simpleMarkdownText } from '@fastgpt/global/common/string/markdown';
import { WorkerNameEnum, runWorker } from '../../worker/utils';
import { ImageType } from '../../worker/readFile/type';
export const htmlToMarkdown = async (html?: string | null) => {
const md = await runWorker<string>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
const md = await runWorker<{
rawText: string;
imageList: ImageType[];
}>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
return simpleMarkdownText(md);
return simpleMarkdownText(md.rawText);
};

View File

@@ -0,0 +1,27 @@
import { getMongoModel, Schema } from '../../mongo';
import type { FrequencyLimitSchemaType } from './type';
const FrequencyLimitSchema = new Schema({
eventId: {
type: String,
required: true
},
amount: {
type: Number,
default: 0
},
expiredTime: {
type: Date,
required: true
}
});
try {
FrequencyLimitSchema.index({ eventId: 1, expiredTime: 1 });
FrequencyLimitSchema.index({ expiredTime: 1 }, { expireAfterSeconds: 0 });
} catch (error) {}
export const MongoFrequencyLimit = getMongoModel<FrequencyLimitSchemaType>(
'frequency_limit',
FrequencyLimitSchema
);

View File

@@ -0,0 +1,6 @@
export type FrequencyLimitSchemaType = {
_id: string;
eventId: string; // 事件ID
amount: number; // 当前数量
expiredTime: Date; // 什么时候过期,过期则重置
};

View File

@@ -0,0 +1,33 @@
import { AuthFrequencyLimitProps } from '@fastgpt/global/common/frequenctLimit/type';
import { MongoFrequencyLimit } from './schema';
export const authFrequencyLimit = async ({
eventId,
maxAmount,
expiredTime
}: AuthFrequencyLimitProps) => {
try {
// 对应 eventId 的 account+1, 不存在的话,则创建一个
const result = await MongoFrequencyLimit.findOneAndUpdate(
{
eventId,
expiredTime: { $gte: new Date() }
},
{
$inc: { amount: 1 },
// If not exist, set the expiredTime
$setOnInsert: { expiredTime }
},
{
upsert: true,
new: true
}
).lean();
// 因为始终会返回+1的结果所以这里不能直接等需要多一个。
if (result.amount > maxAmount) {
return Promise.reject(result);
}
} catch (error) {
console.log(error);
}
};

View File

@@ -13,6 +13,7 @@ import type {
} from '../controller.d';
import { delay } from '@fastgpt/global/common/system/utils';
import { addLog } from '../../../common/system/log';
import { customNanoid } from '@fastgpt/global/common/string/tools';
export class MilvusCtrl {
constructor() {}
@@ -63,7 +64,7 @@ export class MilvusCtrl {
name: 'id',
data_type: DataType.Int64,
is_primary_key: true,
autoID: true
autoID: false // disable auto id, and we need to set id in insert
},
{
name: 'vector',
@@ -127,11 +128,21 @@ export class MilvusCtrl {
const client = await this.getClient();
const { teamId, datasetId, collectionId, vector, retry = 3 } = props;
const generateId = () => {
// in js, the max safe integer is 2^53 - 1: 9007199254740991
// so we can generate a random number between 1-8 as the first digit
// and the rest 15 digits can be random
const firstDigit = customNanoid('12345678', 1);
const restDigits = customNanoid('1234567890', 15);
return Number(`${firstDigit}${restDigits}`);
};
const id = generateId();
try {
const result = await client.insert({
collection_name: DatasetVectorTableName,
data: [
{
id,
vector,
teamId: String(teamId),
datasetId: String(datasetId),

View File

@@ -7,14 +7,20 @@ import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
1. Commercial plugin: n points per times
2. Other plugin: sum of children points
*/
export const computedPluginUsage = async (
plugin: PluginRuntimeType,
childrenUsage: ChatNodeUsageType[]
) => {
export const computedPluginUsage = async ({
plugin,
childrenUsage,
error
}: {
plugin: PluginRuntimeType;
childrenUsage: ChatNodeUsageType[];
error?: boolean;
}) => {
const { source } = await splitCombinePluginId(plugin.id);
// Commercial plugin: n points per times
if (source === PluginSourceEnum.commercial) {
if (error) return 0;
return plugin.currentCost ?? 0;
}

View File

@@ -5,8 +5,6 @@ import {
TeamCollectionName,
TeamMemberCollectionName
} from '@fastgpt/global/support/user/team/constant';
import { AppDefaultPermissionVal } from '@fastgpt/global/support/permission/app/constant';
import { getPermissionSchema } from '@fastgpt/global/support/permission/utils';
export const AppCollectionName = 'apps';
@@ -111,8 +109,13 @@ const AppSchema = new Schema({
inited: {
type: Boolean
},
inheritPermission: {
type: Boolean,
default: true
},
...getPermissionSchema(AppDefaultPermissionVal)
// abandoned
defaultPermission: Number
});
AppSchema.index({ teamId: 1, updateTime: -1 });

View File

@@ -0,0 +1,155 @@
import { addLog } from '../../common/system/log';
import { MongoChatItem } from './chatItemSchema';
import { MongoChat } from './chatSchema';
import axios from 'axios';
import { AIChatItemType, ChatItemType } from '@fastgpt/global/core/chat/type';
export type Metadata = {
[key: string]: {
label: string;
value: string;
};
};
export const pushChatLog = ({
chatId,
chatItemIdHuman,
chatItemIdAi,
appId,
metadata
}: {
chatId: string;
chatItemIdHuman: string;
chatItemIdAi: string;
appId: string;
metadata?: Metadata;
}) => {
const interval = Number(process.env.CHAT_LOG_INTERVAL);
const url = process.env.CHAT_LOG_URL;
if (interval > 0 && url) {
addLog.info(`[ChatLogPush] push chat log after ${interval}ms`, {
appId,
chatItemIdHuman,
chatItemIdAi
});
setTimeout(() => {
pushChatLogInternal({ chatId, chatItemIdHuman, chatItemIdAi, appId, url, metadata });
}, interval);
}
};
type ChatItem = ChatItemType & {
userGoodFeedback?: string;
userBadFeedback?: string;
chatId: string;
responseData: {
moduleType: string;
runningTime: number; //s
historyPreview: { obj: string; value: string }[];
}[];
time: Date;
};
type ChatLog = {
title: string;
feedback: 'like' | 'dislike' | null;
chatItemId: string;
uid: string;
question: string;
answer: string;
chatId: string;
responseTime: number;
metadata: string;
sourceName: string;
createdAt: number;
sourceId: string;
};
const pushChatLogInternal = async ({
chatId,
chatItemIdHuman,
chatItemIdAi,
appId,
url,
metadata
}: {
chatId: string;
chatItemIdHuman: string;
chatItemIdAi: string;
appId: string;
url: string;
metadata?: Metadata;
}) => {
try {
const [chatItemHuman, chatItemAi] = await Promise.all([
MongoChatItem.findById(chatItemIdHuman).lean(),
MongoChatItem.findById(chatItemIdAi).lean() as Promise<AIChatItemType>
]);
if (!chatItemHuman || !chatItemAi) {
return;
}
const chat = await MongoChat.findOne({ chatId }).lean();
// addLog.warn('ChatLogDebug', chat);
// addLog.warn('ChatLogDebug', { chatItemHuman, chatItemAi });
if (!chat) {
return;
}
const metadataString = JSON.stringify(metadata ?? {});
const uid = chat.outLinkUid || chat.tmbId;
// Pop last two items
const question = chatItemHuman.value[chatItemHuman.value.length - 1]?.text?.content;
const answer = chatItemAi.value[chatItemAi.value.length - 1]?.text?.content;
if (!question || !answer) {
addLog.error('[ChatLogPush] question or answer is empty', {
question: chatItemHuman.value,
answer: chatItemAi.value
});
return;
}
const responseData = chatItemAi.responseData;
const responseTime =
responseData?.reduce((acc, item) => acc + (item?.runningTime ?? 0), 0) || 0;
const sourceIdPrefix = process.env.SOURCE_ID_PREFIX ?? '';
const chatLog: ChatLog = {
title: chat.title,
feedback: (() => {
if (chatItemAi.userGoodFeedback) {
return 'like';
} else if (chatItemAi.userBadFeedback) {
return 'dislike';
} else {
return null;
}
})(),
chatItemId: `${chatItemIdHuman},${chatItemIdAi}`,
uid,
question,
answer,
chatId,
responseTime: responseTime * 1000,
metadata: metadataString,
sourceName: chat.source ?? '-',
// @ts-ignore
createdAt: new Date(chatItemAi.time).getTime(),
sourceId: `${sourceIdPrefix}${appId}`
};
await axios
.post(`${url}/api/chat/push`, chatLog)
.then((res) => {
addLog.info('[ChatLogPush] push success', res.data);
})
.catch((e) => {
addLog.error('[ChatLogPush] push failed', { e, resData: e.response?.data });
});
} catch (e) {
addLog.error('[ChatLogPush] error', e);
}
};

View File

@@ -1,4 +1,9 @@
import type { AIChatItemType, UserChatItemType } from '@fastgpt/global/core/chat/type.d';
import type {
AIChatItemType,
ChatItemType,
UserChatItemType
} from '@fastgpt/global/core/chat/type.d';
import axios from 'axios';
import { MongoApp } from '../app/schema';
import {
ChatItemValueTypeEnum,
@@ -13,6 +18,7 @@ import { StoreNodeItemType } from '@fastgpt/global/core/workflow/type/node';
import { getAppChatConfig, getGuideModule } from '@fastgpt/global/core/workflow/utils';
import { AppChatConfigType } from '@fastgpt/global/core/app/type';
import { mergeChatResponseData } from '@fastgpt/global/core/chat/utils';
import { pushChatLog } from './pushChatLog';
type Props = {
chatId: string;
@@ -67,7 +73,7 @@ export async function saveChat({
});
await mongoSessionRun(async (session) => {
await MongoChatItem.insertMany(
const [{ _id: chatItemIdHuman }, { _id: chatItemIdAi }] = await MongoChatItem.insertMany(
content.map((item) => ({
chatId,
teamId,
@@ -105,6 +111,13 @@ export async function saveChat({
upsert: true
}
);
pushChatLog({
chatId,
chatItemIdHuman: String(chatItemIdHuman),
chatItemIdAi: String(chatItemIdAi),
appId
});
});
if (isUpdateUseTime) {

View File

@@ -104,9 +104,12 @@ export const loadRequestMessages = async ({
}) => {
// Load image to base64
const loadImageToBase64 = async (messages: ChatCompletionContentPart[]) => {
if (process.env.MULTIPLE_DATA_TO_BASE64 === 'false') {
return messages;
}
return Promise.all(
messages.map(async (item) => {
if (item.type === 'image_url') {
if (item.type === 'image_url' && process.env.MULTIPLE_DATA_TO_BASE64 === 'true') {
// Remove url origin
const imgUrl = (() => {
if (origin && item.image_url.url.startsWith(origin)) {
@@ -115,38 +118,51 @@ export const loadRequestMessages = async ({
return item.image_url.url;
})();
// If imgUrl is a local path, load image from local, and set url to base64
if (imgUrl.startsWith('/')) {
addLog.debug('Load image from local server', {
baseUrl: serverRequestBaseUrl,
requestUrl: imgUrl
});
const response = await axios.get(imgUrl, {
baseURL: serverRequestBaseUrl,
responseType: 'arraybuffer',
proxy: false
});
const base64 = Buffer.from(response.data, 'binary').toString('base64');
const imageType =
getFileContentTypeFromHeader(response.headers['content-type']) ||
guessBase64ImageType(base64);
try {
// If imgUrl is a local path, load image from local, and set url to base64
if (imgUrl.startsWith('/')) {
addLog.debug('Load image from local server', {
baseUrl: serverRequestBaseUrl,
requestUrl: imgUrl
});
const response = await axios.get(imgUrl, {
baseURL: serverRequestBaseUrl,
responseType: 'arraybuffer',
proxy: false
});
const base64 = Buffer.from(response.data, 'binary').toString('base64');
const imageType =
getFileContentTypeFromHeader(response.headers['content-type']) ||
guessBase64ImageType(base64);
return {
...item,
image_url: {
...item.image_url,
url: `data:${imageType};base64,${base64}`
}
};
return {
...item,
image_url: {
...item.image_url,
url: `data:${imageType};base64,${base64}`
}
};
}
// 检查下这个图片是否可以被访问,如果不行的话,则过滤掉
const response = await axios.head(imgUrl, {
timeout: 10000
});
if (response.status < 200 || response.status >= 400) {
addLog.info(`Filter invalid image: ${imgUrl}`);
return;
}
} catch (error) {
return;
}
}
return item;
})
);
).then((res) => res.filter(Boolean) as ChatCompletionContentPart[]);
};
// Split question text and image
const parseStringWithImages = (input: string): ChatCompletionContentPart[] => {
if (!useVision) {
if (!useVision || input.length > 500) {
return [{ type: 'text', text: input || '' }];
}
@@ -167,8 +183,8 @@ export const loadRequestMessages = async ({
});
});
// Too many images or too long text, return text
if (httpsImages.length > 4 || input.length > 1000) {
// Too many images return text
if (httpsImages.length > 4) {
return [{ type: 'text', text: input || '' }];
}
@@ -176,7 +192,7 @@ export const loadRequestMessages = async ({
result.push({ type: 'text', text: input });
return result;
};
// Parse user content(text and img)
// Parse user content(text and img) Store history => api messages
const parseUserContent = async (content: string | ChatCompletionContentPart[]) => {
if (typeof content === 'string') {
return loadImageToBase64(parseStringWithImages(content));

View File

@@ -9,8 +9,6 @@ import {
TeamCollectionName,
TeamMemberCollectionName
} from '@fastgpt/global/support/user/team/constant';
import { DatasetDefaultPermissionVal } from '@fastgpt/global/support/permission/dataset/constant';
import { getPermissionSchema } from '@fastgpt/global/support/permission/utils';
import type { DatasetSchemaType } from '@fastgpt/global/core/dataset/type.d';
export const DatasetCollectionName = 'datasets';
@@ -88,7 +86,13 @@ const DatasetSchema = new Schema({
externalReadUrl: {
type: String
},
...getPermissionSchema(DatasetDefaultPermissionVal)
inheritPermission: {
type: Boolean,
default: true
},
// abandoned
defaultPermission: Number
});
try {

View File

@@ -12,7 +12,7 @@ import {
DatasetDataWithCollectionType,
SearchDataResponseItemType
} from '@fastgpt/global/core/dataset/type';
import { DatasetColCollectionName, MongoDatasetCollection } from '../collection/schema';
import { MongoDatasetCollection } from '../collection/schema';
import { reRankRecall } from '../../../core/ai/rerank';
import { countPromptTokens } from '../../../common/string/tiktoken/index';
import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils';
@@ -320,11 +320,13 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
const fullTextRecall = async ({
query,
limit,
filterCollectionIdList
filterCollectionIdList,
forbidCollectionIdList
}: {
query: string;
limit: number;
filterCollectionIdList?: string[];
forbidCollectionIdList: string[];
}): Promise<{
fullTextRecallResults: SearchDataResponseItemType[];
tokenLen: number;
@@ -351,6 +353,13 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
}
}
: {}),
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
? {
collectionId: {
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
}
}
: {})
}
},
@@ -367,31 +376,6 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
{
$limit: limit
},
{
$lookup: {
from: DatasetColCollectionName,
let: { collectionId: '$collectionId' },
pipeline: [
{
$match: {
$expr: { $eq: ['$_id', '$$collectionId'] },
forbid: { $eq: true } // 匹配被禁用的数据
}
},
{
$project: {
_id: 1 // 只需要_id字段来确认匹配
}
}
],
as: 'collection'
}
},
{
$match: {
collection: { $eq: [] } // 没有 forbid=true 的数据
}
},
{
$project: {
_id: 1,
@@ -509,7 +493,8 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
fullTextRecall({
query,
limit: fullTextLimit,
filterCollectionIdList
filterCollectionIdList,
forbidCollectionIdList
})
]);
totalTokens += tokens;

View File

@@ -63,7 +63,7 @@ const TrainingDataSchema = new Schema({
},
q: {
type: String,
required: true
default: ''
},
a: {
type: String,

View File

@@ -28,6 +28,7 @@ import { computedMaxToken, llmCompletionsBodyFormat } from '../../../../ai/utils
import { toolValueTypeList } from '@fastgpt/global/core/workflow/constants';
import { WorkflowInteractiveResponseType } from '@fastgpt/global/core/workflow/template/system/interactive/type';
import { ChatItemValueTypeEnum, ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { i18nT } from '../../../../../../web/i18n/utils';
type FunctionRunResponseType = {
toolRunResponse: DispatchFlowResponse;
@@ -549,7 +550,7 @@ async function streamResponse({
}
if (!textAnswer && functionCalls.length === 0) {
return Promise.reject('LLM api response empty');
return Promise.reject(i18nT('chat:LLM_model_response_empty'));
}
return { answer: textAnswer, functionCalls };

View File

@@ -25,45 +25,16 @@ import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { getMultiplePrompt, Prompt_Tool_Call } from './constants';
import { filterToolResponseToPreview } from './utils';
import { InteractiveNodeResponseType } from '@fastgpt/global/core/workflow/template/system/interactive/type';
import { getFileContentFromLinks, getHistoryFileLinks } from '../../tools/readFiles';
import { parseUrlToFileType } from '@fastgpt/global/common/file/tools';
import { Prompt_DocumentQuote } from '@fastgpt/global/core/ai/prompt/AIChat';
import { FlowNodeTypeEnum } from '@fastgpt/global/core/workflow/node/constant';
type Response = DispatchNodeResultType<{
[NodeOutputKeyEnum.answerText]: string;
[DispatchNodeResponseKeyEnum.interactive]?: InteractiveNodeResponseType;
}>;
/*
Tool call auth add file prompt to question。
Guide the LLM to call tool.
*/
export const toolCallMessagesAdapt = ({
userInput
}: {
userInput: UserChatItemValueItemType[];
}) => {
const files = userInput.filter((item) => item.type === 'file');
if (files.length > 0) {
return userInput.map((item) => {
if (item.type === 'text') {
const filesCount = files.filter((file) => file.file?.type === 'file').length;
const imgCount = files.filter((file) => file.file?.type === 'image').length;
const text = item.text?.content || '';
return {
...item,
text: {
content: getMultiplePrompt({ fileCount: filesCount, imgCount, question: text })
}
};
}
return item;
});
}
return userInput;
};
export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<Response> => {
const {
node: { nodeId, name, isEntry },
@@ -71,11 +42,21 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
runtimeEdges,
histories,
query,
params: { model, systemPrompt, userChatInput, history = 6 }
requestOrigin,
chatConfig,
runningAppInfo: { teamId },
params: {
model,
systemPrompt,
userChatInput,
history = 6,
fileUrlList: fileLinks,
aiChatVision
}
} = props;
const toolModel = getLLMModel(model);
const useVision = aiChatVision && toolModel.vision;
const chatHistories = getHistories(history, histories);
const toolNodeIds = filterToolNodeIdByEdges({ nodeId, edges: runtimeEdges });
@@ -109,18 +90,43 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
}
})();
props.node.isEntry = false;
const hasReadFilesTool = toolNodes.some(
(item) => item.flowNodeType === FlowNodeTypeEnum.readFiles
);
const globalFiles = chatValue2RuntimePrompt(query).files;
const { documentQuoteText, userFiles } = await getMultiInput({
histories: chatHistories,
requestOrigin,
maxFiles: chatConfig?.fileSelectConfig?.maxFiles || 20,
teamId,
fileLinks,
inputFiles: globalFiles
});
const concatenateSystemPrompt = [
toolModel.defaultSystemChatPrompt,
systemPrompt,
documentQuoteText
? replaceVariable(Prompt_DocumentQuote, {
quote: documentQuoteText
})
: ''
]
.filter(Boolean)
.join('\n\n===---===---===\n\n');
const messages: ChatItemType[] = (() => {
const value: ChatItemType[] = [
...getSystemPrompt_ChatItemType(toolModel.defaultSystemChatPrompt),
...getSystemPrompt_ChatItemType(systemPrompt),
...getSystemPrompt_ChatItemType(concatenateSystemPrompt),
// Add file input prompt to histories
...chatHistories.map((item) => {
if (item.obj === ChatRoleEnum.Human) {
return {
...item,
value: toolCallMessagesAdapt({
userInput: item.value
userInput: item.value,
skip: !hasReadFilesTool
})
};
}
@@ -129,9 +135,10 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
{
obj: ChatRoleEnum.Human,
value: toolCallMessagesAdapt({
skip: !hasReadFilesTool,
userInput: runtimePrompt2ChatsValue({
text: userChatInput,
files: chatValue2RuntimePrompt(query).files
files: userFiles
})
})
}
@@ -185,7 +192,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
// array, replace last element
const lastText = lastMessage.content[lastMessage.content.length - 1];
if (lastText.type === 'text') {
lastMessage.content = replaceVariable(Prompt_Tool_Call, {
lastText.text = replaceVariable(Prompt_Tool_Call, {
question: lastText.text
});
} else {
@@ -211,18 +218,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
});
// flat child tool response
let newVariables: Record<string, any> = props.variables;
const childToolResponse = dispatchFlowResponse
.map((item) => {
// Computed new variables
newVariables = {
...newVariables,
...item.newVariables
};
return item.flowResponses;
})
.flat();
const childToolResponse = dispatchFlowResponse.map((item) => item.flowResponses).flat();
// concat tool usage
const totalPointsUsage =
@@ -248,7 +244,11 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
childTotalPoints: flatUsages.reduce((sum, item) => sum + item.totalPoints, 0),
model: modelName,
query: userChatInput,
historyPreview: getHistoryPreview(GPTMessages2Chats(completeMessages, false), 10000),
historyPreview: getHistoryPreview(
GPTMessages2Chats(completeMessages, false),
10000,
useVision
),
toolDetail: childToolResponse,
mergeSignId: nodeId
},
@@ -261,7 +261,91 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
},
...flatUsages
],
[DispatchNodeResponseKeyEnum.newVariables]: newVariables,
[DispatchNodeResponseKeyEnum.interactive]: toolWorkflowInteractiveResponse
};
};
const getMultiInput = async ({
histories,
fileLinks,
requestOrigin,
maxFiles,
teamId,
inputFiles
}: {
histories: ChatItemType[];
fileLinks?: string[];
requestOrigin?: string;
maxFiles: number;
teamId: string;
inputFiles: UserChatItemValueItemType['file'][];
}) => {
// Not file quote
if (!fileLinks) {
return {
documentQuoteText: '',
userFiles: inputFiles
};
}
const filesFromHistories = getHistoryFileLinks(histories);
const urls = [...fileLinks, ...filesFromHistories];
if (urls.length === 0) {
return {
documentQuoteText: '',
userFiles: []
};
}
// Get files from histories
const { text } = await getFileContentFromLinks({
// Concat fileUrlList and filesFromHistories; remove not supported files
urls,
requestOrigin,
maxFiles,
teamId
});
return {
documentQuoteText: text,
userFiles: fileLinks.map((url) => parseUrlToFileType(url))
};
};
/*
Tool call auth add file prompt to question。
Guide the LLM to call tool.
*/
const toolCallMessagesAdapt = ({
userInput,
skip
}: {
userInput: UserChatItemValueItemType[];
skip?: boolean;
}) => {
if (skip) return userInput;
const files = userInput.filter((item) => item.type === 'file');
if (files.length > 0) {
return userInput.map((item) => {
if (item.type === 'text') {
const filesCount = files.filter((file) => file.file?.type === 'file').length;
const imgCount = files.filter((file) => file.file?.type === 'image').length;
const text = item.text?.content || '';
return {
...item,
text: {
content: getMultiplePrompt({ fileCount: filesCount, imgCount, question: text })
}
};
}
return item;
});
}
return userInput;
};

View File

@@ -29,6 +29,7 @@ import { WorkflowResponseType } from '../../type';
import { toolValueTypeList } from '@fastgpt/global/core/workflow/constants';
import { WorkflowInteractiveResponseType } from '@fastgpt/global/core/workflow/template/system/interactive/type';
import { ChatItemValueTypeEnum } from '@fastgpt/global/core/chat/constants';
import { i18nT } from '../../../../../../web/i18n/utils';
type FunctionCallCompletion = {
id: string;
@@ -176,17 +177,29 @@ export const runToolWithPromptCall = async (
);
const lastMessage = messages[messages.length - 1];
if (typeof lastMessage.content !== 'string') {
if (typeof lastMessage.content === 'string') {
lastMessage.content = replaceVariable(lastMessage.content, {
toolsPrompt
});
} else if (Array.isArray(lastMessage.content)) {
// array, replace last element
const lastText = lastMessage.content[lastMessage.content.length - 1];
if (lastText.type === 'text') {
lastText.text = replaceVariable(lastText.text, {
toolsPrompt
});
} else {
return Promise.reject('Prompt call invalid input');
}
} else {
return Promise.reject('Prompt call invalid input');
}
lastMessage.content = replaceVariable(lastMessage.content, {
toolsPrompt
});
const filterMessages = await filterGPTMessageByMaxTokens({
messages,
maxTokens: toolModel.maxContext - 500 // filter token. not response maxToken
});
const [requestMessages, max_tokens] = await Promise.all([
loadRequestMessages({
messages: filterMessages,
@@ -398,11 +411,27 @@ export const runToolWithPromptCall = async (
: undefined;
// get the next user prompt
lastMessage.content += `${replaceAnswer}
if (typeof lastMessage.content === 'string') {
lastMessage.content += `${replaceAnswer}
TOOL_RESPONSE: """
${workflowInteractiveResponseItem ? `{{${INTERACTIVE_STOP_SIGNAL}}}` : toolsRunResponse.toolResponsePrompt}
"""
ANSWER: `;
} else if (Array.isArray(lastMessage.content)) {
// array, replace last element
const lastText = lastMessage.content[lastMessage.content.length - 1];
if (lastText.type === 'text') {
lastText.text += `${replaceAnswer}
TOOL_RESPONSE: """
${workflowInteractiveResponseItem ? `{{${INTERACTIVE_STOP_SIGNAL}}}` : toolsRunResponse.toolResponsePrompt}
"""
ANSWER: `;
} else {
return Promise.reject('Prompt call invalid input');
}
} else {
return Promise.reject('Prompt call invalid input');
}
const runTimes = (response?.runTimes || 0) + toolsRunResponse.toolResponse.runTimes;
const toolNodeTokens = response?.toolNodeTokens ? response.toolNodeTokens + tokens : tokens;
@@ -509,7 +538,7 @@ async function streamResponse({
}
if (!textAnswer) {
return Promise.reject('LLM api response empty');
return Promise.reject(i18nT('chat:LLM_model_response_empty'));
}
return { answer: textAnswer.trim() };
}

View File

@@ -27,7 +27,8 @@ import { getNanoid, sliceStrStartEnd } from '@fastgpt/global/common/string/tools
import { addLog } from '../../../../../common/system/log';
import { toolValueTypeList } from '@fastgpt/global/core/workflow/constants';
import { WorkflowInteractiveResponseType } from '@fastgpt/global/core/workflow/template/system/interactive/type';
import { ChatItemValueTypeEnum, ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { ChatItemValueTypeEnum } from '@fastgpt/global/core/chat/constants';
import { i18nT } from '../../../../../../web/i18n/utils';
type ToolRunResponseType = {
toolRunResponse: DispatchFlowResponse;
@@ -268,7 +269,7 @@ export const runToolWithToolChoice = async (
},
toolModel
);
// console.log(JSON.stringify(requestMessages, null, 2), '==requestBody');
// console.log(JSON.stringify(requestBody, null, 2), '==requestBody');
/* Run llm */
const ai = getAIApi({
timeout: 480000
@@ -656,7 +657,7 @@ async function streamResponse({
}
if (!textAnswer && toolCalls.length === 0) {
return Promise.reject('LLM api response empty');
return Promise.reject(i18nT('chat:LLM_model_response_empty'));
}
return { answer: textAnswer, toolCalls };

View File

@@ -21,6 +21,7 @@ export type DispatchToolModuleProps = ModuleDispatchProps<{
[NodeInputKeyEnum.aiChatTemperature]: number;
[NodeInputKeyEnum.aiChatMaxToken]: number;
[NodeInputKeyEnum.aiChatVision]?: boolean;
[NodeInputKeyEnum.fileUrlList]?: string[];
}> & {
messages: ChatCompletionMessageParam[];
toolNodes: ToolNodeItemType[];

View File

@@ -5,11 +5,7 @@ import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
import { getAIApi } from '../../../ai/config';
import type {
ChatCompletion,
ChatCompletionMessageParam,
StreamChatType
} from '@fastgpt/global/core/ai/type.d';
import type { ChatCompletion, StreamChatType } from '@fastgpt/global/core/ai/type.d';
import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
import { postTextCensor } from '../../../../common/api/requestPlusApi';
@@ -45,6 +41,10 @@ import { computedMaxToken, llmCompletionsBodyFormat } from '../../../ai/utils';
import { WorkflowResponseType } from '../type';
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
import { AiChatQuoteRoleType } from '@fastgpt/global/core/workflow/template/system/aiChat/type';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { getFileContentFromLinks, getHistoryFileLinks } from '../tools/readFiles';
import { parseUrlToFileType } from '@fastgpt/global/common/file/tools';
import { i18nT } from '../../../../../web/i18n/utils';
export type ChatProps = ModuleDispatchProps<
AIChatNodeProps & {
@@ -68,7 +68,9 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
histories,
node: { name },
query,
runningAppInfo: { teamId },
workflowStreamResponse,
chatConfig,
params: {
model,
temperature = 0,
@@ -82,14 +84,12 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
quoteTemplate,
quotePrompt,
aiChatVision,
stringQuoteText
fileUrlList: fileLinks, // node quote file links
stringQuoteText //abandon
}
} = props;
const { files: inputFiles } = chatValue2RuntimePrompt(query);
const { files: inputFiles } = chatValue2RuntimePrompt(query); // Chat box input files
if (!userChatInput && inputFiles.length === 0) {
return Promise.reject('Question is empty');
}
stream = stream && isResponseAnswerText;
const chatHistories = getHistories(history, histories);
@@ -99,11 +99,26 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
return Promise.reject('The chat model is undefined, you need to select a chat model.');
}
const { datasetQuoteText } = await filterDatasetQuote({
quoteQA,
model: modelConstantsData,
quoteTemplate
});
const [{ datasetQuoteText }, { documentQuoteText, userFiles }] = await Promise.all([
filterDatasetQuote({
quoteQA,
model: modelConstantsData,
quoteTemplate
}),
getMultiInput({
histories: chatHistories,
inputFiles,
fileLinks,
stringQuoteText,
requestOrigin,
maxFiles: chatConfig?.fileSelectConfig?.maxFiles || 20,
teamId
})
]);
if (!userChatInput && !documentQuoteText && userFiles.length === 0) {
return Promise.reject(i18nT('chat:AI_input_is_empty'));
}
const [{ filterMessages }] = await Promise.all([
getChatMessages({
@@ -114,9 +129,9 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
aiChatQuoteRole,
datasetQuotePrompt: quotePrompt,
userChatInput,
inputFiles,
systemPrompt,
stringQuoteText
userFiles,
documentQuoteText
}),
(() => {
// censor model and system key
@@ -131,22 +146,9 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
})()
]);
// Get the request messages
const concatMessages = [
...(modelConstantsData.defaultSystemChatPrompt
? [
{
role: ChatCompletionRequestMessageRoleEnum.System,
content: modelConstantsData.defaultSystemChatPrompt
}
]
: []),
...filterMessages
] as ChatCompletionMessageParam[];
const [requestMessages, max_tokens] = await Promise.all([
loadRequestMessages({
messages: concatMessages,
messages: filterMessages,
useVision: modelConstantsData.vision && aiChatVision,
origin: requestOrigin
}),
@@ -194,7 +196,7 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
});
if (!answer) {
throw new Error('LLM model response empty');
return Promise.reject(i18nT('chat:LLM_model_response_empty'));
}
return {
@@ -241,7 +243,11 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
tokens,
query: `${userChatInput}`,
maxToken: max_tokens,
historyPreview: getHistoryPreview(chatCompleteMessages, 10000),
historyPreview: getHistoryPreview(
chatCompleteMessages,
10000,
modelConstantsData.vision && aiChatVision
),
contextTotalLen: completeMessages.length
},
[DispatchNodeResponseKeyEnum.nodeDispatchUsages]: [
@@ -262,7 +268,7 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
});
if (user.openaiAccount?.baseUrl) {
return Promise.reject(`您的 OpenAI key 出错了: ${JSON.stringify(requestBody)}`);
return Promise.reject(`您的 OpenAI key 出错了: ${getErrText(error)}`);
}
return Promise.reject(error);
@@ -301,7 +307,70 @@ async function filterDatasetQuote({
datasetQuoteText
};
}
async function getMultiInput({
histories,
inputFiles,
fileLinks,
stringQuoteText,
requestOrigin,
maxFiles,
teamId
}: {
histories: ChatItemType[];
inputFiles: UserChatItemValueItemType['file'][];
fileLinks?: string[];
stringQuoteText?: string; // file quote
requestOrigin?: string;
maxFiles: number;
teamId: string;
}) {
// 旧版本适配====>
if (stringQuoteText) {
return {
documentQuoteText: stringQuoteText,
userFiles: inputFiles
};
}
// 没有引用文件参考,但是可能用了图片识别
if (!fileLinks) {
return {
documentQuoteText: '',
userFiles: inputFiles
};
}
// 旧版本适配<====
// If fileLinks params is not empty, it means it is a new version, not get the global file.
// Get files from histories
const filesFromHistories = getHistoryFileLinks(histories);
const urls = [...fileLinks, ...filesFromHistories];
if (urls.length === 0) {
return {
documentQuoteText: '',
userFiles: []
};
}
const { text } = await getFileContentFromLinks({
// Concat fileUrlList and filesFromHistories; remove not supported files
urls,
requestOrigin,
maxFiles,
teamId
});
return {
documentQuoteText: text,
userFiles: fileLinks.map((url) => parseUrlToFileType(url))
};
}
async function getChatMessages({
model,
aiChatQuoteRole,
datasetQuotePrompt = '',
datasetQuoteText,
@@ -309,10 +378,10 @@ async function getChatMessages({
histories = [],
systemPrompt,
userChatInput,
inputFiles,
model,
stringQuoteText
userFiles,
documentQuoteText
}: {
model: LLMModelItemType;
// dataset quote
aiChatQuoteRole: AiChatQuoteRoleType; // user: replace user prompt; system: replace system prompt
datasetQuotePrompt?: string;
@@ -322,10 +391,11 @@ async function getChatMessages({
histories: ChatItemType[];
systemPrompt: string;
userChatInput: string;
inputFiles: UserChatItemValueItemType['file'][];
model: LLMModelItemType;
stringQuoteText?: string; // file quote
userFiles: UserChatItemValueItemType['file'][];
documentQuoteText?: string; // document quote
}) {
// Dataset prompt ====>
// User role or prompt include question
const quoteRole =
aiChatQuoteRole === 'user' || datasetQuotePrompt.includes('{{question}}') ? 'user' : 'system';
@@ -336,6 +406,7 @@ async function getChatMessages({
? Prompt_userQuotePromptList[0].value
: Prompt_systemQuotePromptList[0].value;
// Reset user input, add dataset quote to user input
const replaceInputValue =
useDatasetQuote && quoteRole === 'user'
? replaceVariable(datasetQuotePromptTemplate, {
@@ -343,31 +414,33 @@ async function getChatMessages({
question: userChatInput
})
: userChatInput;
// Dataset prompt <====
const replaceSystemPrompt =
// Concat system prompt
const concatenateSystemPrompt = [
model.defaultSystemChatPrompt,
systemPrompt,
useDatasetQuote && quoteRole === 'system'
? `${systemPrompt ? systemPrompt + '\n\n------\n\n' : ''}${replaceVariable(
datasetQuotePromptTemplate,
{
quote: datasetQuoteText
}
)}`
: systemPrompt;
? replaceVariable(datasetQuotePromptTemplate, {
quote: datasetQuoteText
})
: '',
documentQuoteText
? replaceVariable(Prompt_DocumentQuote, {
quote: documentQuoteText
})
: ''
]
.filter(Boolean)
.join('\n\n===---===---===\n\n');
const messages: ChatItemType[] = [
...getSystemPrompt_ChatItemType(replaceSystemPrompt),
...(stringQuoteText // file quote
? getSystemPrompt_ChatItemType(
replaceVariable(Prompt_DocumentQuote, {
quote: stringQuoteText
})
)
: []),
...getSystemPrompt_ChatItemType(concatenateSystemPrompt),
...histories,
{
obj: ChatRoleEnum.Human,
value: runtimePrompt2ChatsValue({
files: inputFiles,
files: userFiles,
text: replaceInputValue
})
}

View File

@@ -1,17 +1,21 @@
import type { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/workflow/runtime/type';
import type {
DispatchNodeResultType,
ModuleDispatchProps
} from '@fastgpt/global/core/workflow/runtime/type';
import { NodeInputKeyEnum, NodeOutputKeyEnum } from '@fastgpt/global/core/workflow/constants';
import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils';
import { filterSearchResultsByMaxChars } from '../../utils';
import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runtime/constants';
type DatasetConcatProps = ModuleDispatchProps<
{
[NodeInputKeyEnum.datasetMaxTokens]: number;
} & { [key: string]: SearchDataResponseItemType[] }
>;
type DatasetConcatResponse = {
type DatasetConcatResponse = DispatchNodeResultType<{
[NodeOutputKeyEnum.datasetQuoteQA]: SearchDataResponseItemType[];
};
}>;
export async function dispatchDatasetConcat(
props: DatasetConcatProps
@@ -30,6 +34,12 @@ export async function dispatchDatasetConcat(
);
return {
[NodeOutputKeyEnum.datasetQuoteQA]: await filterSearchResultsByMaxChars(rrfConcatResults, limit)
[NodeOutputKeyEnum.datasetQuoteQA]: await filterSearchResultsByMaxChars(
rrfConcatResults,
limit
),
[DispatchNodeResponseKeyEnum.nodeResponse]: {
concatLength: rrfConcatResults.length
}
};
}

View File

@@ -16,6 +16,7 @@ import { datasetSearchQueryExtension } from '../../../dataset/search/utils';
import { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
import { checkTeamReRankPermission } from '../../../../support/permission/teamLimit';
import { MongoDataset } from '../../../dataset/schema';
import { i18nT } from '../../../../../web/i18n/utils';
type DatasetSearchProps = ModuleDispatchProps<{
[NodeInputKeyEnum.datasetSelectList]: SelectedDatasetType;
@@ -56,15 +57,15 @@ export async function dispatchDatasetSearch(
} = props as DatasetSearchProps;
if (!Array.isArray(datasets)) {
return Promise.reject('Quote type error');
return Promise.reject(i18nT('chat:dataset_quote_type error'));
}
if (datasets.length === 0) {
return Promise.reject('core.chat.error.Select dataset empty');
return Promise.reject(i18nT('common:core.chat.error.Select dataset empty'));
}
if (!userChatInput) {
return Promise.reject('core.chat.error.User input empty');
return Promise.reject(i18nT('common:core.chat.error.User input empty'));
}
// query extension

View File

@@ -23,7 +23,6 @@ import {
} from '@fastgpt/global/core/workflow/node/constant';
import { getNanoid, replaceVariable } from '@fastgpt/global/common/string/tools';
import { getSystemTime } from '@fastgpt/global/common/time/timezone';
import { replaceEditorVariable } from '@fastgpt/global/core/workflow/utils';
import { dispatchWorkflowStart } from './init/workflowStart';
import { dispatchChatCompletion } from './chat/oneapi';
@@ -38,10 +37,12 @@ import { dispatchQueryExtension } from './tools/queryExternsion';
import { dispatchRunPlugin } from './plugin/run';
import { dispatchPluginInput } from './plugin/runInput';
import { dispatchPluginOutput } from './plugin/runOutput';
import { removeSystemVariable, valueTypeFormat } from './utils';
import { formatHttpError, removeSystemVariable, valueTypeFormat } from './utils';
import {
filterWorkflowEdges,
checkNodeRunStatus
checkNodeRunStatus,
textAdaptGptResponse,
replaceEditorVariable
} from '@fastgpt/global/core/workflow/runtime/utils';
import { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
import { dispatchRunTools } from './agent/runTool/index';
@@ -71,6 +72,7 @@ import { dispatchLoopEnd } from './loop/runLoopEnd';
import { dispatchLoopStart } from './loop/runLoopStart';
import { dispatchFormInput } from './interactive/formInput';
import { dispatchToolParams } from './agent/runTool/toolParams';
import { responseWrite } from '../../../common/response';
const callbackMap: Record<FlowNodeTypeEnum, Function> = {
[FlowNodeTypeEnum.workflowStart]: dispatchWorkflowStart,
@@ -161,6 +163,20 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
res.setHeader('Access-Control-Allow-Origin', '*');
res.setHeader('X-Accel-Buffering', 'no');
res.setHeader('Cache-Control', 'no-cache, no-transform');
// 10s sends a message to prevent the browser from thinking that the connection is disconnected
const sendStreamTimerSign = () => {
setTimeout(() => {
props?.workflowStreamResponse?.({
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
text: ''
})
});
sendStreamTimerSign();
}, 10000);
};
sendStreamTimerSign();
}
variables = {
@@ -371,6 +387,7 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
node,
runtimeEdges
});
const nodeRunResult = await (() => {
if (status === 'run') {
nodeRunBeforeHook(node);
@@ -466,8 +483,16 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
: {};
node.inputs.forEach((input) => {
// Special input, not format
if (input.key === dynamicInput?.key) return;
// Skip some special key
if (input.key === NodeInputKeyEnum.childrenNodeIdList) {
params[input.key] = input.value;
return;
}
// replace {{xx}} variables
let value = replaceVariable(input.value, variables);
@@ -490,7 +515,6 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
if (input.canEdit && dynamicInput && params[dynamicInput.key]) {
params[dynamicInput.key][input.key] = valueTypeFormat(value, input.valueType);
}
params[input.key] = valueTypeFormat(value, input.valueType);
});
@@ -533,7 +557,21 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
// run module
const dispatchRes: Record<string, any> = await (async () => {
if (callbackMap[node.flowNodeType]) {
return callbackMap[node.flowNodeType](dispatchData);
try {
return await callbackMap[node.flowNodeType](dispatchData);
} catch (error) {
// Get source handles of outgoing edges
const targetEdges = runtimeEdges.filter((item) => item.source === node.nodeId);
const skipHandleIds = targetEdges.map((item) => item.sourceHandle);
// Skip all edges and return error
return {
[DispatchNodeResponseKeyEnum.nodeResponse]: {
error: formatHttpError(error)
},
[DispatchNodeResponseKeyEnum.skipHandleId]: skipHandleIds
};
}
}
return {};
})();
@@ -592,56 +630,60 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
};
}
// start process width initInput
const entryNodes = runtimeNodes.filter((item) => item.isEntry);
// reset entry
runtimeNodes.forEach((item) => {
// Interactive node is not the entry node, return interactive result
if (
item.flowNodeType !== FlowNodeTypeEnum.userSelect &&
item.flowNodeType !== FlowNodeTypeEnum.formInput &&
item.flowNodeType !== FlowNodeTypeEnum.tools
) {
item.isEntry = false;
}
});
await Promise.all(entryNodes.map((node) => checkNodeCanRun(node)));
try {
// start process width initInput
const entryNodes = runtimeNodes.filter((item) => item.isEntry);
// reset entry
runtimeNodes.forEach((item) => {
// Interactive node is not the entry node, return interactive result
if (
item.flowNodeType !== FlowNodeTypeEnum.userSelect &&
item.flowNodeType !== FlowNodeTypeEnum.formInput &&
item.flowNodeType !== FlowNodeTypeEnum.tools
) {
item.isEntry = false;
}
});
await Promise.all(entryNodes.map((node) => checkNodeCanRun(node)));
// focus try to run pluginOutput
const pluginOutputModule = runtimeNodes.find(
(item) => item.flowNodeType === FlowNodeTypeEnum.pluginOutput
);
if (pluginOutputModule && props.mode !== 'debug') {
await nodeRunWithActive(pluginOutputModule);
// focus try to run pluginOutput
const pluginOutputModule = runtimeNodes.find(
(item) => item.flowNodeType === FlowNodeTypeEnum.pluginOutput
);
if (pluginOutputModule && props.mode !== 'debug') {
await nodeRunWithActive(pluginOutputModule);
}
// Interactive node
const interactiveResult = (() => {
if (nodeInteractiveResponse) {
const interactiveAssistant = handleInteractiveResult({
entryNodeIds: nodeInteractiveResponse.entryNodeIds,
interactiveResponse: nodeInteractiveResponse.interactiveResponse
});
chatAssistantResponse.push(interactiveAssistant);
return interactiveAssistant.interactive;
}
})();
return {
flowResponses: chatResponses,
flowUsages: chatNodeUsages,
debugResponse: {
finishedNodes: runtimeNodes,
finishedEdges: runtimeEdges,
nextStepRunNodes: debugNextStepRunNodes
},
workflowInteractiveResponse: interactiveResult,
[DispatchNodeResponseKeyEnum.runTimes]: workflowRunTimes,
[DispatchNodeResponseKeyEnum.assistantResponses]:
mergeAssistantResponseAnswerText(chatAssistantResponse),
[DispatchNodeResponseKeyEnum.toolResponses]: toolRunResponse,
newVariables: removeSystemVariable(variables)
};
} catch (error) {
return Promise.reject(error);
}
// Interactive node
const interactiveResult = (() => {
if (nodeInteractiveResponse) {
const interactiveAssistant = handleInteractiveResult({
entryNodeIds: nodeInteractiveResponse.entryNodeIds,
interactiveResponse: nodeInteractiveResponse.interactiveResponse
});
chatAssistantResponse.push(interactiveAssistant);
return interactiveAssistant.interactive;
}
})();
return {
flowResponses: chatResponses,
flowUsages: chatNodeUsages,
debugResponse: {
finishedNodes: runtimeNodes,
finishedEdges: runtimeEdges,
nextStepRunNodes: debugNextStepRunNodes
},
workflowInteractiveResponse: interactiveResult,
[DispatchNodeResponseKeyEnum.runTimes]: workflowRunTimes,
[DispatchNodeResponseKeyEnum.assistantResponses]:
mergeAssistantResponseAnswerText(chatAssistantResponse),
[DispatchNodeResponseKeyEnum.toolResponses]: toolRunResponse,
newVariables: removeSystemVariable(variables)
};
}
/* get system variable */

View File

@@ -7,6 +7,7 @@ import {
import { dispatchWorkFlow } from '..';
import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runtime/constants';
import { AIChatItemValueItemType, ChatHistoryItemResType } from '@fastgpt/global/core/chat/type';
import { cloneDeep } from 'lodash';
type Props = ModuleDispatchProps<{
[NodeInputKeyEnum.loopInputArray]: Array<any>;
@@ -19,60 +20,63 @@ type Response = DispatchNodeResultType<{
export const dispatchLoop = async (props: Props): Promise<Response> => {
const {
params,
runtimeEdges,
runtimeNodes,
user,
node: { name }
} = props;
const { loopInputArray = [], childrenNodeIdList } = params;
const { loopInputArray = [], childrenNodeIdList = [] } = params;
if (!Array.isArray(loopInputArray)) {
return Promise.reject('Input value is not an array');
}
if (loopInputArray.length > 50) {
const maxLength = process.env.WORKFLOW_MAX_LOOP_TIMES
? Number(process.env.WORKFLOW_MAX_LOOP_TIMES)
: 50;
if (loopInputArray.length > maxLength) {
return Promise.reject('Input array length cannot be greater than 50');
}
const runNodes = runtimeNodes.filter((node) => childrenNodeIdList.includes(node.nodeId));
const outputValueArr = [];
const loopDetail: ChatHistoryItemResType[] = [];
let assistantResponses: AIChatItemValueItemType[] = [];
let totalPoints = 0;
let newVariables: Record<string, any> = props.variables;
for await (const item of loopInputArray) {
let index = 0;
for await (const item of loopInputArray.filter(Boolean)) {
runtimeNodes.forEach((node) => {
if (
childrenNodeIdList.includes(node.nodeId) &&
node.flowNodeType === FlowNodeTypeEnum.loopStart
) {
node.isEntry = true;
node.inputs.forEach((input) => {
if (input.key === NodeInputKeyEnum.loopStartInput) {
input.value = item;
} else if (input.key === NodeInputKeyEnum.loopStartIndex) {
input.value = index++;
}
});
}
});
const response = await dispatchWorkFlow({
...props,
runtimeNodes: runNodes.map((node) =>
node.flowNodeType === FlowNodeTypeEnum.loopStart
? {
...node,
isEntry: true,
inputs: node.inputs.map((input) =>
input.key === NodeInputKeyEnum.loopStartInput
? {
...input,
value: item
}
: input
)
}
: {
...node,
isEntry: false
}
)
runtimeEdges: cloneDeep(runtimeEdges)
});
const loopOutputValue = response.flowResponses.find(
(res) => res.moduleType === FlowNodeTypeEnum.loopEnd
)?.loopOutputValue;
// Concat runtime response
outputValueArr.push(loopOutputValue);
loopDetail.push(...response.flowResponses);
assistantResponses.push(...response.assistantResponses);
totalPoints += response.flowUsages.reduce((acc, usage) => acc + usage.totalPoints, 0);
totalPoints = response.flowUsages.reduce((acc, usage) => acc + usage.totalPoints, 0);
// Concat new variables
newVariables = {
...newVariables,
...response.newVariables

View File

@@ -7,9 +7,11 @@ import {
type Props = ModuleDispatchProps<{
[NodeInputKeyEnum.loopStartInput]: any;
[NodeInputKeyEnum.loopStartIndex]: number;
}>;
type Response = DispatchNodeResultType<{
[NodeOutputKeyEnum.loopStartInput]: any;
[NodeOutputKeyEnum.loopStartIndex]: number;
}>;
export const dispatchLoopStart = async (props: Props): Promise<Response> => {
@@ -18,6 +20,7 @@ export const dispatchLoopStart = async (props: Props): Promise<Response> => {
[DispatchNodeResponseKeyEnum.nodeResponse]: {
loopInputValue: params.loopStartInput
},
[NodeOutputKeyEnum.loopStartInput]: params.loopStartInput
[NodeOutputKeyEnum.loopStartInput]: params.loopStartInput,
[NodeOutputKeyEnum.loopStartIndex]: params.loopStartIndex
};
};

View File

@@ -112,12 +112,15 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
output.moduleLogo = plugin.avatar;
}
const usagePoints = await computedPluginUsage(plugin, flowUsages);
const childStreamResponse = system_forbid_stream ? false : props.stream;
const usagePoints = await computedPluginUsage({
plugin,
childrenUsage: flowUsages,
error: !!output?.pluginOutput?.error
});
return {
// 嵌套运行时,如果 childApp stream=false实际上不会有任何内容输出给用户所以不需要存储
assistantResponses: childStreamResponse ? assistantResponses : [],
assistantResponses: system_forbid_stream ? [] : assistantResponses,
// responseData, // debug
[DispatchNodeResponseKeyEnum.runTimes]: runTimes,
[DispatchNodeResponseKeyEnum.nodeResponse]: {

View File

@@ -17,12 +17,14 @@ import { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/ty
import { authAppByTmbId } from '../../../../support/permission/app/auth';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { getAppVersionById } from '../../../app/version/controller';
import { parseUrlToFileType } from '@fastgpt/global/common/file/tools';
type Props = ModuleDispatchProps<{
[NodeInputKeyEnum.userChatInput]: string;
[NodeInputKeyEnum.history]?: ChatItemType[] | number;
[NodeInputKeyEnum.fileUrlList]?: string[];
[NodeInputKeyEnum.forbidStream]?: boolean;
[NodeInputKeyEnum.fileUrlList]?: string[];
}>;
type Response = DispatchNodeResultType<{
[NodeOutputKeyEnum.answerText]: string;
@@ -40,8 +42,24 @@ export const dispatchRunAppNode = async (props: Props): Promise<Response> => {
variables
} = props;
const { system_forbid_stream = false, userChatInput, history, ...childrenAppVariables } = params;
if (!userChatInput) {
const {
system_forbid_stream = false,
userChatInput,
history,
fileUrlList,
...childrenAppVariables
} = params;
const { files } = chatValue2RuntimePrompt(query);
const userInputFiles = (() => {
if (fileUrlList) {
return fileUrlList.map((url) => parseUrlToFileType(url));
}
// Adapt version 4.8.13 upgrade
return files;
})();
if (!userChatInput && !userInputFiles) {
return Promise.reject('Input is empty');
}
if (!appId) {
@@ -72,7 +90,6 @@ export const dispatchRunAppNode = async (props: Props): Promise<Response> => {
}
const chatHistories = getHistories(history, histories);
const { files } = chatValue2RuntimePrompt(query);
// Rewrite children app variables
const systemVariables = filterSystemVariables(variables);
@@ -102,7 +119,7 @@ export const dispatchRunAppNode = async (props: Props): Promise<Response> => {
histories: chatHistories,
variables: childrenRunVariables,
query: runtimePrompt2ChatsValue({
files,
files: userInputFiles,
text: userChatInput
}),
chatConfig
@@ -124,7 +141,7 @@ export const dispatchRunAppNode = async (props: Props): Promise<Response> => {
const usagePoints = flowUsages.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
return {
assistantResponses: childStreamResponse ? assistantResponses : [],
assistantResponses: system_forbid_stream ? [] : assistantResponses,
[DispatchNodeResponseKeyEnum.runTimes]: runTimes,
[DispatchNodeResponseKeyEnum.nodeResponse]: {
moduleLogo: appData.avatar,

View File

@@ -1,4 +1,5 @@
import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
import { ChatFileTypeEnum } from '@fastgpt/global/core/chat/constants';
import { NodeOutputKeyEnum } from '@fastgpt/global/core/workflow/constants';
import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runtime/constants';
import type { ModuleDispatchProps } from '@fastgpt/global/core/workflow/runtime/type';
@@ -11,6 +12,26 @@ export const dispatchPluginInput = (props: PluginInputProps) => {
const { params, query } = props;
const { files } = chatValue2RuntimePrompt(query);
/*
对 params 中文件类型数据进行处理
* 插件单独运行时,这里会是一个特殊的数组
* 插件调用的话,这个参数是一个 string[] 不会进行处理
* 硬性要求API 单独调用插件时,要避免这种特殊类型冲突
TODO: 需要 filter max files
*/
for (const key in params) {
const val = params[key];
if (
Array.isArray(val) &&
val.every(
(item) => item.type === ChatFileTypeEnum.file || item.type === ChatFileTypeEnum.image
)
) {
params[key] = val.map((item) => item.url);
}
}
return {
...params,
[DispatchNodeResponseKeyEnum.nodeResponse]: {},

View File

@@ -14,15 +14,17 @@ import { SERVICE_LOCAL_HOST } from '../../../../common/system/tools';
import { addLog } from '../../../../common/system/log';
import { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { textAdaptGptResponse } from '@fastgpt/global/core/workflow/runtime/utils';
import {
textAdaptGptResponse,
replaceEditorVariable
} from '@fastgpt/global/core/workflow/runtime/utils';
import { getSystemPluginCb } from '../../../../../plugins/register';
import { ContentTypes } from '@fastgpt/global/core/workflow/constants';
import { replaceEditorVariable } from '@fastgpt/global/core/workflow/utils';
import { uploadFile } from '../../../../common/file/gridfs/controller';
import { uploadFileFromBase64Img } from '../../../../common/file/gridfs/controller';
import { ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
import { createFileToken } from '../../../../support/permission/controller';
import { removeFilesByPaths } from '../../../../common/file/utils';
import { JSONPath } from 'jsonpath-plus';
import type { SystemPluginSpecialResponse } from '../../../../../plugins/type';
type PropsArrType = {
key: string;
@@ -232,10 +234,34 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
// format output value type
const results: Record<string, any> = {};
node.outputs.forEach((item) => {
const key = item.key.startsWith('$') ? item.key : `$.${item.key}`;
results[item.key] = JSONPath({ path: key, json: formatResponse })[0];
});
node.outputs
.filter(
(item) =>
item.id !== NodeOutputKeyEnum.error &&
item.id !== NodeOutputKeyEnum.httpRawResponse &&
item.id !== NodeOutputKeyEnum.addOutputParam
)
.forEach((item) => {
const key = item.key.startsWith('$') ? item.key : `$.${item.key}`;
results[item.key] = (() => {
const result = JSONPath({ path: key, json: formatResponse });
// 如果结果为空,返回 undefined
if (!result || result.length === 0) {
return undefined;
}
// 以下情况返回数组:
// 1. 使用通配符 *
// 2. 使用数组切片 [start:end]
// 3. 使用过滤表达式 [?(...)]
// 4. 使用递归下降 ..
// 5. 使用多个结果运算符 ,
const needArrayResult = /[*]|[\[][:?]|\.\.|\,/.test(key);
return needArrayResult ? result : result[0];
})();
});
if (typeof formatResponse[NodeOutputKeyEnum.answerText] === 'string') {
workflowStreamResponse?.({
@@ -247,6 +273,7 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
}
return {
...results,
[DispatchNodeResponseKeyEnum.nodeResponse]: {
totalPoints: 0,
params: Object.keys(params).length > 0 ? params : undefined,
@@ -256,8 +283,7 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
},
[DispatchNodeResponseKeyEnum.toolResponses]:
Object.keys(results).length > 0 ? results : rawResponse,
[NodeOutputKeyEnum.httpRawResponse]: rawResponse,
...results
[NodeOutputKeyEnum.httpRawResponse]: rawResponse
};
} catch (error) {
addLog.error('Http request error', error);
@@ -354,27 +380,25 @@ async function replaceSystemPluginResponse({
tmbId: string;
}) {
for await (const key of Object.keys(response)) {
if (typeof response[key] === 'object' && response[key].type === 'SYSTEM_PLUGIN_FILE') {
const fileObj = response[key];
const filename = fileObj.path.split('/').pop() || `${tmbId}-${Date.now()}`;
if (typeof response[key] === 'object' && response[key].type === 'SYSTEM_PLUGIN_BASE64') {
const fileObj = response[key] as SystemPluginSpecialResponse;
const filename = `${tmbId}-${Date.now()}.${fileObj.extension}`;
try {
const fileId = await uploadFile({
const fileId = await uploadFileFromBase64Img({
teamId,
tmbId,
bucketName: 'chat',
path: fileObj.path,
base64: fileObj.value,
filename,
contentType: fileObj.contentType,
metadata: {}
});
response[key] = `${ReadFileBaseUrl}?filename=${filename}&token=${await createFileToken({
response[key] = `${ReadFileBaseUrl}/${filename}?token=${await createFileToken({
bucketName: 'chat',
teamId,
tmbId,
fileId
})}`;
} catch (error) {}
removeFilesByPaths([fileObj.path]);
}
}
return response;

View File

@@ -2,16 +2,15 @@ import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runti
import type { ModuleDispatchProps } from '@fastgpt/global/core/workflow/runtime/type';
import { NodeInputKeyEnum, NodeOutputKeyEnum } from '@fastgpt/global/core/workflow/constants';
import { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
import { documentFileType } from '@fastgpt/global/common/file/constants';
import axios from 'axios';
import { serverRequestBaseUrl } from '../../../../common/api/serverRequest';
import { MongoRawTextBuffer } from '../../../../common/buffer/rawText/schema';
import { readFromSecondary } from '../../../../common/mongo/utils';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { detectFileEncoding, parseUrlToFileType } from '@fastgpt/global/common/file/tools';
import { readRawContentByFileBuffer } from '../../../../common/file/read/utils';
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { UserChatItemValueItemType } from '@fastgpt/global/core/chat/type';
import { ChatItemType, UserChatItemValueItemType } from '@fastgpt/global/core/chat/type';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
type Props = ModuleDispatchProps<{
@@ -48,12 +47,41 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
runningAppInfo: { teamId },
histories,
chatConfig,
node: { version },
params: { fileUrlList = [] }
} = props;
const maxFiles = chatConfig?.fileSelectConfig?.maxFiles || 20;
// Get files from histories
const filesFromHistories = histories
const filesFromHistories = version !== '489' ? [] : getHistoryFileLinks(histories);
const { text, readFilesResult } = await getFileContentFromLinks({
// Concat fileUrlList and filesFromHistories; remove not supported files
urls: [...fileUrlList, ...filesFromHistories],
requestOrigin,
maxFiles,
teamId
});
return {
[NodeOutputKeyEnum.text]: text,
[DispatchNodeResponseKeyEnum.nodeResponse]: {
readFiles: readFilesResult.map((item) => ({
name: item?.filename || '',
url: item?.url || ''
})),
readFilesResult: readFilesResult
.map((item) => item?.nodeResponsePreviewText ?? '')
.join('\n******\n')
},
[DispatchNodeResponseKeyEnum.toolResponses]: {
fileContent: text
}
};
};
export const getHistoryFileLinks = (histories: ChatItemType[]) => {
return histories
.filter((item) => {
if (item.obj === ChatRoleEnum.Human) {
return item.value.filter((value) => value.type === 'file');
@@ -70,28 +98,38 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
return files;
})
.flat();
};
// Concat fileUrlList and filesFromHistories; remove not supported files
const parseUrlList = [...fileUrlList, ...filesFromHistories]
export const getFileContentFromLinks = async ({
urls,
requestOrigin,
maxFiles,
teamId
}: {
urls: string[];
requestOrigin?: string;
maxFiles: number;
teamId: string;
}) => {
const parseUrlList = urls
// Remove invalid urls
.filter((url) => {
if (typeof url !== 'string') return false;
// 检查相对路径
const validPrefixList = ['/', 'http', 'ws'];
if (validPrefixList.some((prefix) => url.startsWith(prefix))) {
return true;
}
return false;
})
// Just get the document type file
.filter((url) => parseUrlToFileType(url)?.type === 'file')
.map((url) => {
try {
// Avoid "/api/xxx" file error.
const origin = requestOrigin ?? 'http://localhost:3000';
// Check is system upload file
if (url.startsWith('/') || (requestOrigin && url.startsWith(requestOrigin))) {
// Parse url, get filename query. Keep only documents that can be parsed
const parseUrl = new URL(url, origin);
const filenameQuery = parseUrl.searchParams.get('filename');
// Not document
if (filenameQuery) {
const extensionQuery = filenameQuery.split('.').pop()?.toLowerCase() || '';
if (!documentFileType.includes(extensionQuery)) {
return '';
}
}
// Remove the origin(Make intranet requests directly)
if (requestOrigin && url.startsWith(requestOrigin)) {
url = url.replace(requestOrigin, '');
@@ -123,7 +161,7 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
}
try {
// Get file buffer
// Get file buffer data
const response = await axios.get(url, {
baseURL: serverRequestBaseUrl,
responseType: 'arraybuffer'
@@ -197,18 +235,7 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
const text = readFilesResult.map((item) => item?.text ?? '').join('\n******\n');
return {
[NodeOutputKeyEnum.text]: text,
[DispatchNodeResponseKeyEnum.nodeResponse]: {
readFiles: readFilesResult.map((item) => ({
name: item?.filename || '',
url: item?.url || ''
})),
readFilesResult: readFilesResult
.map((item) => item?.nodeResponsePreviewText ?? '')
.join('\n******\n')
},
[DispatchNodeResponseKeyEnum.toolResponses]: {
fileContent: text
}
text,
readFilesResult
};
};

View File

@@ -4,11 +4,14 @@ import {
SseResponseEventEnum
} from '@fastgpt/global/core/workflow/runtime/constants';
import { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
import { getReferenceVariableValue } from '@fastgpt/global/core/workflow/runtime/utils';
import {
getReferenceVariableValue,
replaceEditorVariable
} from '@fastgpt/global/core/workflow/runtime/utils';
import { TUpdateListItem } from '@fastgpt/global/core/workflow/template/system/variableUpdate/type';
import { ModuleDispatchProps } from '@fastgpt/global/core/workflow/runtime/type';
import { removeSystemVariable, valueTypeFormat } from '../utils';
import { replaceEditorVariable } from '@fastgpt/global/core/workflow/utils';
import { isValidReferenceValue } from '@fastgpt/global/core/workflow/utils';
type Props = ModuleDispatchProps<{
[NodeInputKeyEnum.updateList]: TUpdateListItem[];
@@ -19,15 +22,24 @@ export const dispatchUpdateVariable = async (props: Props): Promise<Response> =>
const { params, variables, runtimeNodes, workflowStreamResponse, node } = props;
const { updateList } = params;
const result = updateList.map((item) => {
const varNodeId = item.variable?.[0];
const varKey = item.variable?.[1];
const nodeIds = runtimeNodes.map((node) => node.nodeId);
if (!varNodeId || !varKey) {
const result = updateList.map((item) => {
const variable = item.variable;
if (!isValidReferenceValue(variable, nodeIds)) {
return null;
}
const varNodeId = variable[0];
const varKey = variable[1];
if (!varKey) {
return null;
}
const value = (() => {
// If first item is empty, it means it is a input value
if (!item.value?.[0]) {
const formatValue = valueTypeFormat(item.value?.[1], item.valueType);
@@ -48,6 +60,7 @@ export const dispatchUpdateVariable = async (props: Props): Promise<Response> =>
}
})();
// Update node output
// Global variable
if (varNodeId === VARIABLE_NODE_ID) {
variables[varKey] = value;
@@ -72,6 +85,7 @@ export const dispatchUpdateVariable = async (props: Props): Promise<Response> =>
});
return {
[DispatchNodeResponseKeyEnum.newVariables]: variables,
[DispatchNodeResponseKeyEnum.nodeResponse]: {
updateVarResult: result
}

View File

@@ -1,14 +1,5 @@
import { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type';
import { countPromptTokens } from '../../common/string/tiktoken/index';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import {
getPluginInputsFromStoreNodes,
getPluginRunContent
} from '@fastgpt/global/core/app/plugin/utils';
import { StoreNodeItemType } from '@fastgpt/global/core/workflow/type/node';
import { RuntimeUserPromptType, UserChatItemType } from '@fastgpt/global/core/chat/type';
import { runtimePrompt2ChatsValue } from '@fastgpt/global/core/chat/adapt';
/* filter search result */
export const filterSearchResultsByMaxChars = async (

View File

@@ -33,7 +33,8 @@
"papaparse": "5.4.1",
"pdfjs-dist": "4.4.168",
"pg": "^8.10.0",
"tiktoken": "^1.0.15",
"request-ip": "^3.3.0",
"tiktoken": "1.0.17",
"tunnel": "^0.0.6",
"turndown": "^7.1.2"
},
@@ -46,6 +47,7 @@
"@types/node-cron": "^3.0.11",
"@types/papaparse": "5.3.7",
"@types/pg": "^8.6.6",
"@types/request-ip": "^0.0.37",
"@types/tunnel": "^0.0.4",
"@types/turndown": "^5.0.4"
}

View File

@@ -13,6 +13,7 @@ import { ParentIdType } from '@fastgpt/global/common/parentFolder/type';
import { splitCombinePluginId } from '../../../core/app/plugin/controller';
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
import { AuthModeType, AuthResponseType } from '../type';
import { AppDefaultPermissionVal } from '@fastgpt/global/support/permission/app/constant';
export const authPluginByTmbId = async ({
tmbId,
@@ -60,7 +61,6 @@ export const authAppByTmbId = async ({
if (isRoot) {
return {
...app,
defaultPermission: app.defaultPermission,
permission: new AppPermission({ isOwner: true })
};
}
@@ -71,7 +71,13 @@ export const authAppByTmbId = async ({
const isOwner = tmbPer.isOwner || String(app.tmbId) === String(tmbId);
const { Per, defaultPermission } = await (async () => {
const { Per } = await (async () => {
if (isOwner) {
return {
Per: new AppPermission({ isOwner: true })
};
}
if (
AppFolderTypeList.includes(app.type) ||
app.inheritPermission === false ||
@@ -86,10 +92,9 @@ export const authAppByTmbId = async ({
resourceId: appId,
resourceType: PerResourceTypeEnum.app
});
const Per = new AppPermission({ per: rp ?? app.defaultPermission, isOwner });
const Per = new AppPermission({ per: rp ?? AppDefaultPermissionVal, isOwner });
return {
Per,
defaultPermission: app.defaultPermission
Per
};
} else {
// is not folder and inheritPermission is true and is not root folder.
@@ -104,8 +109,7 @@ export const authAppByTmbId = async ({
isOwner
});
return {
Per,
defaultPermission: parent.defaultPermission
Per
};
}
})();
@@ -116,7 +120,6 @@ export const authAppByTmbId = async ({
return {
...app,
defaultPermission,
permission: Per
};
})();

View File

@@ -10,12 +10,17 @@ import { MongoResourcePermission } from './schema';
import { ClientSession } from 'mongoose';
import {
PermissionValueType,
ResourcePermissionType
ResourcePermissionType,
ResourcePerWithGroup,
ResourcePerWithTmbWithUser
} from '@fastgpt/global/support/permission/type';
import { bucketNameMap } from '@fastgpt/global/common/file/constants';
import { addMinutes } from 'date-fns';
import { getGroupsByTmbId } from './memberGroup/controllers';
import { Permission } from '@fastgpt/global/support/permission/controller';
import { ParentIdType } from '@fastgpt/global/common/parentFolder/type';
import { RequireOnlyOne } from '@fastgpt/global/common/type/utils';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
/** get resource permission for a team member
* If there is no permission for the team member, it will return undefined
@@ -123,20 +128,94 @@ export async function getResourceAllClbs({
).lean();
}
export async function getResourceClbsAndGroups({
resourceId,
resourceType,
teamId,
session
}: {
resourceId: ParentIdType;
resourceType: Omit<`${PerResourceTypeEnum}`, 'team'>;
teamId: string;
session: ClientSession;
}) {
return MongoResourcePermission.find(
{
resourceId,
resourceType,
teamId
},
undefined,
{ session }
).lean();
}
export const getClbsAndGroupsWithInfo = async ({
resourceId,
resourceType,
teamId
}: {
resourceId: ParentIdType;
resourceType: Omit<`${PerResourceTypeEnum}`, 'team'>;
teamId: string;
}) =>
Promise.all([
(await MongoResourcePermission.find({
teamId,
resourceId,
resourceType,
tmbId: {
$exists: true
}
}).populate({
path: 'tmbId',
select: 'name userId',
populate: {
path: 'userId',
select: 'avatar'
}
})) as ResourcePerWithTmbWithUser[],
(await MongoResourcePermission.find({
teamId,
resourceId,
resourceType,
groupId: {
$exists: true
}
}).populate({
path: 'groupId',
select: 'name avatar'
})) as ResourcePerWithGroup[]
]);
export const delResourcePermissionById = (id: string) => {
return MongoResourcePermission.findByIdAndRemove(id);
};
export const delResourcePermission = ({
session,
tmbId,
groupId,
...props
}: {
resourceType: PerResourceTypeEnum;
teamId: string;
resourceId: string;
tmbId: string;
session?: ClientSession;
tmbId?: string;
groupId?: string;
}) => {
return MongoResourcePermission.deleteOne(props, { session });
// tmbId or groupId only one and not both
if (!!tmbId === !!groupId) {
return Promise.reject(CommonErrEnum.missingParams);
}
return MongoResourcePermission.deleteOne(
{
...(tmbId ? { tmbId } : {}),
...(groupId ? { groupId } : {}),
...props
},
{ session }
);
};
/* 下面代码等迁移 */

View File

@@ -20,6 +20,7 @@ import { MongoDatasetData } from '../../../core/dataset/data/schema';
import { AuthModeType, AuthResponseType } from '../type';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { ParentIdType } from '@fastgpt/global/common/parentFolder/type';
import { DatasetDefaultPermissionVal } from '@fastgpt/global/support/permission/dataset/constant';
export const authDatasetByTmbId = async ({
tmbId,
@@ -62,7 +63,12 @@ export const authDatasetByTmbId = async ({
const isOwner = tmbPer.isOwner || String(dataset.tmbId) === String(tmbId);
// get dataset permission or inherit permission from parent folder.
const { Per, defaultPermission } = await (async () => {
const { Per } = await (async () => {
if (isOwner) {
return {
Per: new DatasetPermission({ isOwner: true })
};
}
if (
dataset.type === DatasetTypeEnum.folder ||
dataset.inheritPermission === false ||
@@ -78,12 +84,11 @@ export const authDatasetByTmbId = async ({
resourceType: PerResourceTypeEnum.dataset
});
const Per = new DatasetPermission({
per: rp ?? dataset.defaultPermission,
per: rp ?? DatasetDefaultPermissionVal,
isOwner
});
return {
Per,
defaultPermission: dataset.defaultPermission
Per
};
} else {
// is not folder and inheritPermission is true and is not root folder.
@@ -100,8 +105,7 @@ export const authDatasetByTmbId = async ({
});
return {
Per,
defaultPermission: parent.defaultPermission
Per
};
}
})();
@@ -112,7 +116,6 @@ export const authDatasetByTmbId = async ({
return {
...dataset,
defaultPermission,
permission: Per
};
})();
@@ -154,6 +157,7 @@ export const authDataset = async ({
dataset
};
};
// the temporary solution for authDatasetCollection is getting the
export async function authDatasetCollection({
collectionId,
@@ -179,65 +183,70 @@ export async function authDatasetCollection({
tmbId,
datasetId: collection.datasetId._id,
per,
isRoot: isRootFromHeader || isRoot
isRoot: isRootFromHeader
});
return {
teamId,
tmbId,
collection,
permission: dataset.permission
permission: dataset.permission,
isRoot: isRootFromHeader
};
}
export async function authDatasetFile({
fileId,
per,
...props
}: AuthModeType & {
fileId: string;
}): Promise<
AuthResponseType<DatasetPermission> & {
file: DatasetFileSchema;
}
> {
const { teamId, tmbId, isRoot } = await parseHeaderCert(props);
// export async function authDatasetFile({
// fileId,
// per,
// ...props
// }: AuthModeType & {
// fileId: string;
// }): Promise<
// AuthResponseType<DatasetPermission> & {
// file: DatasetFileSchema;
// }
// > {
// const { teamId, tmbId, isRoot } = await parseHeaderCert(props);
const [file, collection] = await Promise.all([
getFileById({ bucketName: BucketNameEnum.dataset, fileId }),
MongoDatasetCollection.findOne({
teamId,
fileId
})
]);
// const [file, collection] = await Promise.all([
// getFileById({ bucketName: BucketNameEnum.dataset, fileId }),
// MongoDatasetCollection.findOne({
// teamId,
// fileId
// })
// ]);
if (!file) {
return Promise.reject(CommonErrEnum.fileNotFound);
}
// if (!file) {
// return Promise.reject(CommonErrEnum.fileNotFound);
// }
if (!collection) {
return Promise.reject(DatasetErrEnum.unAuthDatasetFile);
}
// if (!collection) {
// return Promise.reject(DatasetErrEnum.unAuthDatasetFile);
// }
try {
const { permission } = await authDatasetCollection({
...props,
collectionId: collection._id,
per,
isRoot
});
// try {
// const { permission } = await authDatasetCollection({
// ...props,
// collectionId: collection._id,
// per,
// isRoot
// });
return {
teamId,
tmbId,
file,
permission
};
} catch (error) {
return Promise.reject(DatasetErrEnum.unAuthDatasetFile);
}
}
// return {
// teamId,
// tmbId,
// file,
// permission,
// isRoot
// };
// } catch (error) {
// return Promise.reject(DatasetErrEnum.unAuthDatasetFile);
// }
// }
/*
DatasetData permission is inherited from collection.
*/
export async function authDatasetData({
dataId,
...props
@@ -268,8 +277,8 @@ export async function authDatasetData({
collectionId: String(datasetData.collectionId),
sourceName: result.collection.name || '',
sourceId: result.collection?.fileId || result.collection?.rawLink,
isOwner: String(datasetData.tmbId) === String(result.tmbId),
canWrite: result.permission.hasWritePer
isOwner: String(datasetData.tmbId) === String(result.tmbId)
// permission: result.permission
};
return {

View File

@@ -1,9 +1,9 @@
import { mongoSessionRun } from '../../common/mongo/sessionRun';
import { MongoResourcePermission } from './schema';
import { ClientSession, Model } from 'mongoose';
import { NullPermission, PerResourceTypeEnum } from '@fastgpt/global/support/permission/constant';
import { PerResourceTypeEnum } from '@fastgpt/global/support/permission/constant';
import { PermissionValueType } from '@fastgpt/global/support/permission/type';
import { getResourceAllClbs } from './controller';
import { getResourceClbsAndGroups } from './controller';
import { RequireOnlyOne } from '@fastgpt/global/common/type/utils';
import { ParentIdType } from '@fastgpt/global/common/parentFolder/type';
@@ -28,7 +28,6 @@ export async function syncChildrenPermission({
resourceModel,
session,
defaultPermission,
collaborators
}: {
resource: SyncChildrenPermissionResourceType;
@@ -42,7 +41,6 @@ export async function syncChildrenPermission({
// should be provided when inheritPermission is true
session: ClientSession;
defaultPermission?: PermissionValueType;
collaborators?: UpdateCollaboratorItem[];
}) {
// only folder has permission
@@ -76,19 +74,6 @@ export async function syncChildrenPermission({
}
if (!children.length) return;
// Sync default permission
if (defaultPermission !== undefined) {
await resourceModel.updateMany(
{
_id: { $in: children }
},
{
defaultPermission
},
{ session }
);
}
// sync the resource permission
if (collaborators) {
// Update the collaborators of all children
@@ -124,28 +109,20 @@ export async function resumeInheritPermission({
const isFolder = folderTypeList.includes(resource.type);
const fn = async (session: ClientSession) => {
const parentResource = await resourceModel
.findById(resource.parentId, 'defaultPermission')
.lean<SyncChildrenPermissionResourceType & { defaultPermission: PermissionValueType }>()
.session(session);
const parentDefaultPermissionVal = parentResource?.defaultPermission ?? NullPermission;
// update the resource permission
await resourceModel.updateOne(
{
_id: resource._id
},
{
inheritPermission: true,
defaultPermission: parentDefaultPermissionVal
inheritPermission: true
},
{ session }
);
// Folder resource, need to sync children
if (isFolder) {
const parentClbs = await getResourceAllClbs({
const parentClbsAndGroups = await getResourceClbsAndGroups({
resourceId: resource.parentId,
teamId: resource.teamId,
resourceType,
@@ -155,7 +132,7 @@ export async function resumeInheritPermission({
// sync self
await syncCollaborators({
resourceType,
collaborators: parentClbs,
collaborators: parentClbsAndGroups,
teamId: resource.teamId,
resourceId: resource._id,
session
@@ -169,8 +146,7 @@ export async function resumeInheritPermission({
folderTypeList,
resourceType,
session,
defaultPermission: parentDefaultPermissionVal,
collaborators: parentClbs
collaborators: parentClbsAndGroups
});
} else {
// Not folder, delete all clb
@@ -215,6 +191,7 @@ export async function syncCollaborators({
resourceId,
resourceType: resourceType,
tmbId: item.tmbId,
groupId: item.groupId,
permission: item.permission
})),
{

View File

@@ -64,7 +64,7 @@ export const getGroupsByTmbId = async ({
groupId: {
$exists: true
},
role: role ? { $in: role } : undefined
...(role ? { role: { $in: role } } : {})
})
.populate('groupId')
.lean()

Some files were not shown because too many files have changed in this diff Show More