Compare commits
25 Commits
v4.6.3
...
v4.6.6-alp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5b676ff4ad | ||
|
|
759a2330e6 | ||
|
|
86286efb54 | ||
|
|
99e8ba2256 | ||
|
|
f84fd93cbb | ||
|
|
cd682d4275 | ||
|
|
41115a96c0 | ||
|
|
b14a1db2f9 | ||
|
|
703583fff7 | ||
|
|
d33c99f564 | ||
|
|
05bf1b2265 | ||
|
|
dd7b4b98ae | ||
|
|
34656dfda0 | ||
|
|
7b5c35018b | ||
|
|
7630417679 | ||
|
|
63ce76413e | ||
|
|
1c1305fcb6 | ||
|
|
c3437b9367 | ||
|
|
e18c79ca71 | ||
|
|
d2d7eac9e0 | ||
|
|
84cf6b5658 | ||
|
|
b58249fc3a | ||
|
|
54d52d8d25 | ||
|
|
f298b90b69 | ||
|
|
e01c38efe0 |
@@ -8,4 +8,5 @@ README.md
|
||||
|
||||
.yalc/
|
||||
yalc.lock
|
||||
testApi/
|
||||
testApi/
|
||||
*.local.*
|
||||
3
.gitignore
vendored
@@ -35,4 +35,5 @@ dist/
|
||||
**/.hugo_build.lock
|
||||
docSite/public/
|
||||
docSite/resources/_gen/
|
||||
docSite/.vercel
|
||||
docSite/.vercel
|
||||
*.local.*
|
||||
69
Dockerfile
@@ -1,57 +1,86 @@
|
||||
# Install dependencies only when needed
|
||||
FROM node:18.15-alpine AS deps
|
||||
# Check https://github.com/nodejs/docker-node/tree/b4117f9333da4138b03a546ec926ef50a31506c3#nodealpine to understand why libc6-compat might be needed.
|
||||
RUN apk add --no-cache libc6-compat && npm install -g pnpm
|
||||
# --------- install dependence -----------
|
||||
FROM node:18.17-alpine AS mainDeps
|
||||
WORKDIR /app
|
||||
|
||||
ARG name
|
||||
ARG proxy
|
||||
|
||||
RUN [ -z "$proxy" ] || sed -i 's/dl-cdn.alpinelinux.org/mirrors.ustc.edu.cn/g' /etc/apk/repositories
|
||||
RUN apk add --no-cache libc6-compat && npm install -g pnpm@8.6.0
|
||||
# if proxy exists, set proxy
|
||||
RUN [ -z "$proxy" ] || pnpm config set registry https://registry.npm.taobao.org
|
||||
|
||||
# copy packages and one project
|
||||
COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./
|
||||
COPY pnpm-lock.yaml pnpm-workspace.yaml ./
|
||||
COPY ./packages ./packages
|
||||
COPY ./projects/$name/package.json ./projects/$name/package.json
|
||||
|
||||
RUN [ -f pnpm-lock.yaml ] || (echo "Lockfile not found." && exit 1)
|
||||
|
||||
RUN pnpm install
|
||||
RUN pnpm i
|
||||
|
||||
# Rebuild the source code only when needed
|
||||
FROM node:18.15-alpine AS builder
|
||||
# --------- install dependence -----------
|
||||
FROM node:18.17-alpine AS workerDeps
|
||||
WORKDIR /app
|
||||
|
||||
ARG proxy
|
||||
|
||||
RUN [ -z "$proxy" ] || sed -i 's/dl-cdn.alpinelinux.org/mirrors.ustc.edu.cn/g' /etc/apk/repositories
|
||||
RUN apk add --no-cache libc6-compat && npm install -g pnpm@8.6.0
|
||||
# if proxy exists, set proxy
|
||||
RUN [ -z "$proxy" ] || pnpm config set registry https://registry.npm.taobao.org
|
||||
|
||||
COPY ./worker /app/worker
|
||||
RUN cd /app/worker && pnpm i --production --ignore-workspace
|
||||
|
||||
# --------- builder -----------
|
||||
FROM node:18.17-alpine AS builder
|
||||
WORKDIR /app
|
||||
|
||||
ARG name
|
||||
ARG proxy
|
||||
|
||||
# copy common node_modules and one project node_modules
|
||||
COPY package.json pnpm-workspace.yaml ./
|
||||
COPY --from=deps /app/node_modules ./node_modules
|
||||
COPY --from=deps /app/packages ./packages
|
||||
COPY --from=mainDeps /app/node_modules ./node_modules
|
||||
COPY --from=mainDeps /app/packages ./packages
|
||||
COPY ./projects/$name ./projects/$name
|
||||
COPY --from=deps /app/projects/$name/node_modules ./projects/$name/node_modules
|
||||
COPY --from=mainDeps /app/projects/$name/node_modules ./projects/$name/node_modules
|
||||
|
||||
# Uncomment the following line in case you want to disable telemetry during the build.
|
||||
ENV NEXT_TELEMETRY_DISABLED 1
|
||||
RUN npm install -g pnpm
|
||||
RUN pnpm --filter=$name run build
|
||||
RUN [ -z "$proxy" ] || sed -i 's/dl-cdn.alpinelinux.org/mirrors.ustc.edu.cn/g' /etc/apk/repositories
|
||||
|
||||
FROM node:18.15-alpine AS runner
|
||||
RUN apk add --no-cache libc6-compat && npm install -g pnpm@8.6.0
|
||||
RUN pnpm --filter=$name build
|
||||
|
||||
# --------- runner -----------
|
||||
FROM node:18.17-alpine AS runner
|
||||
WORKDIR /app
|
||||
|
||||
ARG name
|
||||
ARG proxy
|
||||
|
||||
# create user and use it
|
||||
RUN addgroup --system --gid 1001 nodejs
|
||||
RUN adduser --system --uid 1001 nextjs
|
||||
|
||||
RUN [ -z "$proxy" ] || sed -i 's/dl-cdn.alpinelinux.org/mirrors.ustc.edu.cn/g' /etc/apk/repositories
|
||||
RUN apk add --no-cache curl ca-certificates \
|
||||
&& update-ca-certificates
|
||||
|
||||
# copy running files
|
||||
COPY --from=builder /app/projects/$name/public ./projects/$name/public
|
||||
COPY --from=builder /app/projects/$name/next.config.js ./projects/$name/next.config.js
|
||||
COPY --from=builder --chown=nextjs:nodejs /app/projects/$name/.next/standalone ./
|
||||
COPY --from=builder --chown=nextjs:nodejs /app/projects/$name/.next/static ./projects/$name/.next/static
|
||||
COPY --from=builder /app/projects/$name/public /app/projects/$name/public
|
||||
COPY --from=builder /app/projects/$name/next.config.js /app/projects/$name/next.config.js
|
||||
COPY --from=builder --chown=nextjs:nodejs /app/projects/$name/.next/standalone /app/
|
||||
COPY --from=builder --chown=nextjs:nodejs /app/projects/$name/.next/static /app/projects/$name/.next/static
|
||||
# copy package.json to version file
|
||||
COPY --from=builder /app/projects/$name/package.json ./package.json
|
||||
# copy woker
|
||||
COPY --from=workerDeps /app/worker /app/worker
|
||||
# copy config
|
||||
COPY ./projects/$name/data/config.json /app/data/config.json
|
||||
COPY ./projects/$name/data/pluginTemplates /app/data/pluginTemplates
|
||||
COPY ./projects/$name/data/simpleTemplates /app/data/simpleTemplates
|
||||
|
||||
|
||||
ENV NODE_ENV production
|
||||
ENV NEXT_TELEMETRY_DISABLED 1
|
||||
|
||||
66
README.md
@@ -50,19 +50,20 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
|
||||
|
||||
## 💡 功能
|
||||
|
||||
1. 强大的可视化编排,轻松构建 AI 应用
|
||||
`1` 强大的可视化编排,轻松构建 AI 应用
|
||||
- [x] 提供简易模式,无需操作编排
|
||||
- [x] 用户对话前引导,全局字符串变量
|
||||
- [x] 知识库搜索
|
||||
- [x] 多 LLM 模型对话
|
||||
- [x] 文本内容提取成结构化数据
|
||||
- [x] HTTP 扩展
|
||||
- [ ] 嵌入 Laf,实现在线编写 HTTP 模块
|
||||
- [ ] 嵌入 [Laf](https://github.com/labring/laf),实现在线编写 HTTP 模块
|
||||
- [x] 对话下一步指引
|
||||
- [ ] 对话多路线选择
|
||||
- [x] 源文件引用追踪
|
||||
- [x] 模块封装,实现多级复用
|
||||
2. 丰富的知识库预处理
|
||||
|
||||
`2` 丰富的知识库预处理
|
||||
- [x] 多库复用,混用
|
||||
- [x] chunk 记录修改和删除
|
||||
- [x] 支持手动输入,直接分段,QA 拆分导入
|
||||
@@ -70,15 +71,18 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
|
||||
- [x] 支持知识库单独设置向量模型
|
||||
- [x] 源文件存储
|
||||
- [ ] 文件学习 Agent
|
||||
3. 多种效果测试渠道
|
||||
|
||||
`3` 多种效果测试渠道
|
||||
- [x] 知识库单点搜索测试
|
||||
- [x] 对话时反馈引用并可修改与删除
|
||||
- [x] 完整上下文呈现
|
||||
- [x] 完整模块中间值呈现
|
||||
4. OpenAPI
|
||||
|
||||
`4` OpenAPI
|
||||
- [x] completions 接口 (对齐 GPT 接口)
|
||||
- [ ] 知识库 CRUD
|
||||
5. 运营功能
|
||||
|
||||
`5` 运营功能
|
||||
- [x] 免登录分享窗口
|
||||
- [x] Iframe 一键嵌入
|
||||
- [x] 统一查阅对话记录,并对数据进行标注
|
||||
@@ -93,7 +97,7 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
|
||||
|
||||
- **⚡ 快速部署**
|
||||
|
||||
> Sealos 的服务器在国外,不需要额外处理网络问题,无需服务器、无需魔法、无需域名,支持高并发 & 动态伸缩。点击以下按钮即可一键部署 👇
|
||||
> [Sealos](https://sealos.io) 的服务器在国外,不需要额外处理网络问题,无需服务器、无需魔法、无需域名,支持高并发 & 动态伸缩。点击以下按钮即可一键部署 👇
|
||||
|
||||
[](https://cloud.sealos.io/?openapp=system-fastdeploy%3FtemplateName%3Dfastgpt)
|
||||
|
||||
@@ -142,7 +146,7 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
|
||||
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
|
||||
</a>
|
||||
|
||||
## 🤝 第三方生态
|
||||
## 🌿 第三方生态
|
||||
|
||||
- [OnWeChat 个人微信/企微机器人](https://doc.fastgpt.in/docs/use-cases/onwechat/)
|
||||
|
||||
@@ -150,9 +154,51 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
|
||||
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
|
||||
</a>
|
||||
|
||||
## 🤝 参与贡献
|
||||
|
||||
我们非常欢迎各种形式的贡献。如果你对贡献代码感兴趣,可以查看我们的 GitHub [Issues](https://github.com/labring/FastGPT/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc),大展身手,向我们展示你的奇思妙想。
|
||||
|
||||
<a href="https://github.com/labring/FastGPT/graphs/contributors" target="_blank">
|
||||
<table>
|
||||
<tr>
|
||||
<th colspan="2">
|
||||
<br><img src="https://contrib.rocks/image?repo=labring/FastGPT"><br><br>
|
||||
</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://next.ossinsight.io/widgets/official/compose-org-active-contributors/thumbnail.png?activity=active&period=past_28_days&owner_id=102226726&repo_ids=605673387&image_size=2x3&color_scheme=dark">
|
||||
<img alt="Active participants of labring - past 28 days" src="https://next.ossinsight.io/widgets/official/compose-org-active-contributors/thumbnail.png?activity=active&period=past_28_days&owner_id=102226726&repo_ids=605673387&image_size=2x3&color_scheme=light">
|
||||
</picture>
|
||||
</td>
|
||||
<td rowspan="2">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://next.ossinsight.io/widgets/official/compose-org-participants-growth/thumbnail.png?activity=new&period=past_28_days&owner_id=102226726&repo_ids=605673387&image_size=4x7&color_scheme=dark">
|
||||
<img alt="New trends of labring" src="https://next.ossinsight.io/widgets/official/compose-org-participants-growth/thumbnail.png?activity=new&period=past_28_days&owner_id=102226726&repo_ids=605673387&image_size=4x7&color_scheme=light">
|
||||
</picture>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://next.ossinsight.io/widgets/official/compose-org-active-contributors/thumbnail.png?activity=new&period=past_28_days&owner_id=102226726&repo_ids=605673387&image_size=2x3&color_scheme=dark">
|
||||
<img alt="New participants of labring - past 28 days" src="https://next.ossinsight.io/widgets/official/compose-org-active-contributors/thumbnail.png?activity=new&period=past_28_days&owner_id=102226726&repo_ids=605673387&image_size=2x3&color_scheme=light">
|
||||
</picture>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</a>
|
||||
|
||||
## 🌟 Star History
|
||||
|
||||
[](https://star-history.com/#labring/FastGPT&Date)
|
||||
<a href="https://github.com/labring/FastGPT/stargazers" target="_blank" style="display: block" align="center">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=labring/FastGPT&type=Date&theme=dark" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=labring/FastGPT&type=Date" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=labring/FastGPT&type=Date" />
|
||||
</picture>
|
||||
</a>
|
||||
|
||||
<a href="#readme">
|
||||
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">
|
||||
@@ -165,4 +211,4 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
|
||||
1. 允许作为后台服务直接商用,但不允许提供 SaaS 服务。
|
||||
2. 未经商业授权,任何形式的商用服务均需保留相关版权信息。
|
||||
3. 完整请查看 [FastGPT Open Source License](./LICENSE)
|
||||
4. 联系方式:yujinlong@sealos.io,[点击查看商业版定价策略](https://doc.fastgpt.in/docs/commercial)
|
||||
4. 联系方式:yujinlong@sealos.io,[点击查看商业版定价策略](https://doc.fastgpt.in/docs/commercial)
|
||||
@@ -156,4 +156,10 @@ Project tech stack: NextJs + TS + ChakraUI + Mongo + Postgres (Vector plugin)
|
||||
|
||||
## 🌟 Star History
|
||||
|
||||
[](https://star-history.com/#labring/FastGPT&Date)
|
||||
<a href="https://github.com/labring/FastGPT/stargazers" target="_blank" style="display: block" align="center">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=labring/FastGPT&type=Date&theme=dark" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=labring/FastGPT&type=Date" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=labring/FastGPT&type=Date" />
|
||||
</picture>
|
||||
</a>
|
||||
|
||||
BIN
docSite/assets/imgs/coreferenceResolution1.png
Normal file
|
After Width: | Height: | Size: 136 KiB |
BIN
docSite/assets/imgs/coreferenceResolution2.png
Normal file
|
After Width: | Height: | Size: 1.2 MiB |
BIN
docSite/assets/imgs/coreferenceResolution3.png
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
BIN
docSite/assets/imgs/customfeedback1.png
Normal file
|
After Width: | Height: | Size: 459 KiB |
BIN
docSite/assets/imgs/customfeedback2.png
Normal file
|
After Width: | Height: | Size: 599 KiB |
BIN
docSite/assets/imgs/customfeedback3.png
Normal file
|
After Width: | Height: | Size: 267 KiB |
BIN
docSite/assets/imgs/customfeedback4.png
Normal file
|
After Width: | Height: | Size: 252 KiB |
BIN
docSite/assets/imgs/data_search1.png
Normal file
|
After Width: | Height: | Size: 84 KiB |
|
Before Width: | Height: | Size: 485 KiB After Width: | Height: | Size: 2.4 MiB |
|
Before Width: | Height: | Size: 147 KiB After Width: | Height: | Size: 1.3 MiB |
|
Before Width: | Height: | Size: 110 KiB After Width: | Height: | Size: 2.1 MiB |
|
Before Width: | Height: | Size: 112 KiB After Width: | Height: | Size: 1.4 MiB |
|
Before Width: | Height: | Size: 120 KiB After Width: | Height: | Size: 353 KiB |
|
Before Width: | Height: | Size: 205 KiB After Width: | Height: | Size: 138 KiB |
|
Before Width: | Height: | Size: 39 KiB After Width: | Height: | Size: 178 KiB |
BIN
docSite/assets/imgs/demo-appointment8.png
Normal file
|
After Width: | Height: | Size: 118 KiB |
BIN
docSite/assets/imgs/demo-appointment9.png
Normal file
|
After Width: | Height: | Size: 291 KiB |
|
Before Width: | Height: | Size: 294 KiB |
|
Before Width: | Height: | Size: 175 KiB |
|
Before Width: | Height: | Size: 172 KiB |
|
Before Width: | Height: | Size: 336 KiB After Width: | Height: | Size: 1.3 MiB |
|
Before Width: | Height: | Size: 280 KiB After Width: | Height: | Size: 1.3 MiB |
|
Before Width: | Height: | Size: 23 KiB |
|
Before Width: | Height: | Size: 29 KiB After Width: | Height: | Size: 162 KiB |
BIN
docSite/assets/imgs/judgement1.png
Normal file
|
After Width: | Height: | Size: 33 KiB |
BIN
docSite/assets/imgs/onSealos1.png
Normal file
|
After Width: | Height: | Size: 174 KiB |
BIN
docSite/assets/imgs/onsealos10.png
Normal file
|
After Width: | Height: | Size: 130 KiB |
BIN
docSite/assets/imgs/onsealos12.png
Normal file
|
After Width: | Height: | Size: 82 KiB |
BIN
docSite/assets/imgs/onsealos2.png
Normal file
|
After Width: | Height: | Size: 247 KiB |
BIN
docSite/assets/imgs/onsealos3.png
Normal file
|
After Width: | Height: | Size: 286 KiB |
BIN
docSite/assets/imgs/onsealos4.png
Normal file
|
After Width: | Height: | Size: 95 KiB |
BIN
docSite/assets/imgs/onsealos5.png
Normal file
|
After Width: | Height: | Size: 97 KiB |
BIN
docSite/assets/imgs/onsealos6.png
Normal file
|
After Width: | Height: | Size: 147 KiB |
BIN
docSite/assets/imgs/onsealos7.png
Normal file
|
After Width: | Height: | Size: 97 KiB |
BIN
docSite/assets/imgs/onsealos8.png
Normal file
|
After Width: | Height: | Size: 116 KiB |
BIN
docSite/assets/imgs/onsealos9.png
Normal file
|
After Width: | Height: | Size: 120 KiB |
BIN
docSite/assets/imgs/onsealosl11.PNG
Normal file
|
After Width: | Height: | Size: 104 KiB |
BIN
docSite/assets/imgs/sealos13.png
Normal file
|
After Width: | Height: | Size: 119 KiB |
BIN
docSite/assets/imgs/string.png
Normal file
|
After Width: | Height: | Size: 32 KiB |
BIN
docSite/assets/imgs/webSync1.jpg
Normal file
|
After Width: | Height: | Size: 268 KiB |
BIN
docSite/assets/imgs/webSync10.jpg
Normal file
|
After Width: | Height: | Size: 1.7 MiB |
BIN
docSite/assets/imgs/webSync2.jpg
Normal file
|
After Width: | Height: | Size: 123 KiB |
BIN
docSite/assets/imgs/webSync3.jpg
Normal file
|
After Width: | Height: | Size: 190 KiB |
BIN
docSite/assets/imgs/webSync4.jpg
Normal file
|
After Width: | Height: | Size: 139 KiB |
BIN
docSite/assets/imgs/webSync5.jpg
Normal file
|
After Width: | Height: | Size: 322 KiB |
BIN
docSite/assets/imgs/webSync6.jpg
Normal file
|
After Width: | Height: | Size: 836 KiB |
BIN
docSite/assets/imgs/webSync7.jpg
Normal file
|
After Width: | Height: | Size: 1.5 MiB |
BIN
docSite/assets/imgs/webSync8.jpg
Normal file
|
After Width: | Height: | Size: 1.6 MiB |
BIN
docSite/assets/imgs/webSync9.jpg
Normal file
|
After Width: | Height: | Size: 1.5 MiB |
@@ -11,6 +11,6 @@ FastGPT 是一个由用户和贡献者参与推动的开源项目,如果您对
|
||||
|
||||
+ 📱 扫码加入社区微信交流群👇
|
||||
|
||||
<img width="400px" src="/wechat-fastgpt.webp" />
|
||||
<img width="400px" src="https://oss.laf.run/htr4n1-images/fastgpt-qr-code.jpg" />
|
||||
|
||||
+ 🐞 请将任何 FastGPT 的 Bug、问题和需求提交到 [GitHub Issue](https://github.com/labring/fastgpt/issues/new/choose)。
|
||||
40
docSite/content/docs/course/data_search.md
Normal file
@@ -0,0 +1,40 @@
|
||||
---
|
||||
title: '知识库搜索参数'
|
||||
description: '知识库搜索原理'
|
||||
icon: 'language'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 106
|
||||
---
|
||||
|
||||
在知识库搜索的方式上,FastGPT提供了三种方式,分别为“语义检索”“增强语义检索”“混合检索”。
|
||||
|
||||

|
||||
|
||||
## 语义检索
|
||||
|
||||
语义检索就是向量检索,同时把用户的问题和知识库内容向量化,然后通过“语义相关度匹配”的方式从知识库中查找到匹配的知识点。
|
||||
|
||||
优点:
|
||||
- 相近语义理解
|
||||
- 跨多语言理解(例如输入中文问题匹配英文知识点)
|
||||
- 多模态理解(文本,图片,音视频等)
|
||||
|
||||
## 增强语义检索
|
||||
|
||||
在语义检索的基础上,增强“语义相关度匹配”并在搜索结束后进行 Rerank(重排)。
|
||||
|
||||
Rerank(重排):把检索结果按“与用户问题语义”相关性,从高到低排序,简单的说就是把最匹配用户问题的检索结果排在前面。
|
||||
|
||||
## 混合检索(推荐)
|
||||
|
||||
|
||||
在向量检索的同时进行全文检索,并把两项检索的结果混合一起重排,以便选中匹配用户问题的最佳结果。
|
||||
|
||||
全文检索:理解为全文关键词检索,通过关键词查询知识库,并返回包含关键词的文本片段。
|
||||
|
||||
优点:
|
||||
- 精确匹配(姓名,编号,ID等)
|
||||
- 少量关键词匹配(当用户问题字数过少时向量检索效果非常不好)
|
||||
|
||||
混合检索结合了向量检索和全文检索的优点,并且对查询结果进行了重排,大大提高了命中率,推荐使用。
|
||||
78
docSite/content/docs/course/websync.md
Normal file
@@ -0,0 +1,78 @@
|
||||
---
|
||||
title: 'Web 站点同步'
|
||||
description: 'FastGPT Web 站点同步功能介绍和使用方式'
|
||||
icon: 'language'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 105
|
||||
---
|
||||
|
||||

|
||||
|
||||
## 什么是 Web 站点同步
|
||||
|
||||
Web 站点同步利用爬虫的技术,可以通过一个入口网站,自动捕获`同域名`下的所有网站,目前最多支持`200`个子页面。出于合规与安全角度,FastGPT 仅支持`静态站点`的爬取,主要用于各个文档站点快速构建知识库。
|
||||
|
||||
Tips: 国内的媒体站点基本不可用,公众号、csdn、知乎等。可以通过终端发送`curl`请求检测是否为静态站点,例如:
|
||||
|
||||
```bash
|
||||
curl ai.fastgpt.in
|
||||
```
|
||||
|
||||
## 如何使用
|
||||
|
||||
### 1. 新建知识库,选择 Web 站点同步
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
### 2. 点击配置站点信息
|
||||
|
||||

|
||||
|
||||
### 3. 填写网址和选择器
|
||||
|
||||

|
||||
|
||||
好了, 现在点击开始同步,静等系统自动抓取网站信息即可。
|
||||
|
||||
|
||||
## 创建应用,绑定知识库
|
||||
|
||||

|
||||
|
||||
## 选择器如何使用
|
||||
|
||||
选择器是 HTML CSS JS 的产物,你可以通过选择器来定位到你需要抓取的具体内容,而不是整个站点。使用方式为:
|
||||
|
||||
### 首先打开浏览器调试面板(通常是 F12,或者【右键 - 检查】)
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
### 输入对应元素的选择器
|
||||
|
||||
[菜鸟教程 css 选择器](https://www.runoob.com/cssref/css-selectors.html),具体选择器的使用方式可以参考菜鸟教程。
|
||||
|
||||
上图中,我们选中了一个区域,对应的是`div`标签,它有 `data-prismjs-copy`, `data-prismjs-copy-success`, `data-prismjs-copy-error` 三个属性,这里我们用到一个就够。所以选择器是:
|
||||
**`div[data-prismjs-copy]`**
|
||||
|
||||
除了属性选择器,常见的还有类和ID选择器。例如:
|
||||
|
||||

|
||||
|
||||
上图 class 里的是类名(可能包含多个类名,都是空格隔开的,选择一个即可),选择器可以为:**`.docs-content`**
|
||||
|
||||
### 多选择器使用
|
||||
|
||||
在开头的演示中,我们对 FastGPT 文档是使用了多选择器的方式来选择,通过逗号隔开了两个选择器。
|
||||
|
||||

|
||||
|
||||
我们希望选中上图两个标签中的内容,此时就需要两组选择器。一组是:`.docs-content .mb-0.d-flex`,含义是 `docs-content` 类下同时包含 `mb-0`和`d-flex` 两个类的子元素;
|
||||
|
||||
另一组是`.docs-content div[data-prismjs-copy]`,含义是`docs-content` 类下包含`data-prismjs-copy`属性的`div`元素。
|
||||
|
||||
把两组选择器用逗号隔开即可:`.docs-content .mb-0.d-flex, .docs-content div[data-prismjs-copy]`
|
||||
@@ -14,9 +14,7 @@ weight: 708
|
||||
这个配置文件中包含了系统级参数、AI 对话的模型、function 模型等……
|
||||
|
||||
|
||||
## 完整配置参数
|
||||
|
||||
**使用时,请务必去除注释!**
|
||||
## 旧版本配置文件
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -92,7 +90,7 @@ weight: 708
|
||||
"maxContext": 16000,
|
||||
"maxResponse": 4000,
|
||||
"price": 0,
|
||||
"functionCall": true, // 是否支持function call, 不支持的模型需要设置为 false,会走提示词生成
|
||||
"toolChoice": true, // 是否支持openai的 toolChoice, 不支持的模型需要设置为 false,会走提示词生成
|
||||
"functionPrompt": ""
|
||||
},
|
||||
{
|
||||
@@ -101,7 +99,7 @@ weight: 708
|
||||
"maxContext": 8000,
|
||||
"maxResponse": 8000,
|
||||
"price": 0,
|
||||
"functionCall": true,
|
||||
"toolChoice": true,
|
||||
"functionPrompt": ""
|
||||
}
|
||||
],
|
||||
@@ -112,7 +110,7 @@ weight: 708
|
||||
"maxContext": 16000,
|
||||
"maxResponse": 4000,
|
||||
"price": 0,
|
||||
"functionCall": true,
|
||||
"toolChoice": true,
|
||||
"functionPrompt": ""
|
||||
}
|
||||
],
|
||||
@@ -134,6 +132,7 @@ weight: 708
|
||||
"maxToken": 3000
|
||||
}
|
||||
],
|
||||
"ReRankModels": [], // 重排模型,暂时填空数组
|
||||
"AudioSpeechModels": [
|
||||
{
|
||||
"model": "tts-1",
|
||||
@@ -158,3 +157,151 @@ weight: 708
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 4.6.6-alpha 版本完整配置参数
|
||||
|
||||
**使用时,请务必去除注释!**
|
||||
|
||||
以下配置适用于V4.6.6-alpha版本以后
|
||||
|
||||
```json
|
||||
{
|
||||
"systemEnv": {
|
||||
"pluginBaseUrl": "", // 商业版接口地址
|
||||
"vectorMaxProcess": 15, // 向量生成最大进程,结合数据库性能和 key 来设置
|
||||
"qaMaxProcess": 15, // QA 生成最大进程,结合数据库性能和 key 来设置
|
||||
"pgHNSWEfSearch": 100 // pg vector 索引参数,越大精度高但速度慢
|
||||
},
|
||||
"chatModels": [ // 对话模型
|
||||
{
|
||||
"model": "gpt-3.5-turbo-1106",
|
||||
"name": "GPT35-1106",
|
||||
"price": 0, // 除以 100000 后等于1个token的价格
|
||||
"maxContext": 16000, // 最大上下文长度
|
||||
"maxResponse": 4000, // 最大回复长度
|
||||
"quoteMaxToken": 2000, // 最大引用内容长度
|
||||
"maxTemperature": 1.2, // 最大温度值
|
||||
"censor": false, // 是否开启敏感词过滤(商业版)
|
||||
"vision": false, // 支持图片输入
|
||||
"defaultSystemChatPrompt": ""
|
||||
},
|
||||
{
|
||||
"model": "gpt-3.5-turbo-16k",
|
||||
"name": "GPT35-16k",
|
||||
"maxContext": 16000,
|
||||
"maxResponse": 16000,
|
||||
"price": 0,
|
||||
"quoteMaxToken": 8000,
|
||||
"maxTemperature": 1.2,
|
||||
"censor": false,
|
||||
"vision": false,
|
||||
"defaultSystemChatPrompt": ""
|
||||
},
|
||||
{
|
||||
"model": "gpt-4",
|
||||
"name": "GPT4-8k",
|
||||
"maxContext": 8000,
|
||||
"maxResponse": 8000,
|
||||
"price": 0,
|
||||
"quoteMaxToken": 4000,
|
||||
"maxTemperature": 1.2,
|
||||
"censor": false,
|
||||
"vision": false,
|
||||
"defaultSystemChatPrompt": ""
|
||||
},
|
||||
{
|
||||
"model": "gpt-4-vision-preview",
|
||||
"name": "GPT4-Vision",
|
||||
"maxContext": 128000,
|
||||
"maxResponse": 4000,
|
||||
"price": 0,
|
||||
"quoteMaxToken": 100000,
|
||||
"maxTemperature": 1.2,
|
||||
"censor": false,
|
||||
"vision": true,
|
||||
"defaultSystemChatPrompt": ""
|
||||
}
|
||||
],
|
||||
"qaModels": [ // QA 生成模型
|
||||
{
|
||||
"model": "gpt-3.5-turbo-16k",
|
||||
"name": "GPT35-16k",
|
||||
"maxContext": 16000,
|
||||
"maxResponse": 16000,
|
||||
"price": 0
|
||||
}
|
||||
],
|
||||
"cqModels": [ // 问题分类模型
|
||||
{
|
||||
"model": "gpt-3.5-turbo-1106",
|
||||
"name": "GPT35-1106",
|
||||
"maxContext": 16000,
|
||||
"maxResponse": 4000,
|
||||
"price": 0,
|
||||
"toolChoice": true, // 是否支持openai的 toolChoice, 不支持的模型需要设置为 false,会走提示词生成
|
||||
"functionPrompt": ""
|
||||
},
|
||||
{
|
||||
"model": "gpt-4",
|
||||
"name": "GPT4-8k",
|
||||
"maxContext": 8000,
|
||||
"maxResponse": 8000,
|
||||
"price": 0,
|
||||
"toolChoice": true,
|
||||
"functionPrompt": ""
|
||||
}
|
||||
],
|
||||
"extractModels": [ // 内容提取模型
|
||||
{
|
||||
"model": "gpt-3.5-turbo-1106",
|
||||
"name": "GPT35-1106",
|
||||
"maxContext": 16000,
|
||||
"maxResponse": 4000,
|
||||
"price": 0,
|
||||
"toolChoice": true,
|
||||
"functionPrompt": ""
|
||||
}
|
||||
],
|
||||
"qgModels": [ // 生成下一步指引
|
||||
{
|
||||
"model": "gpt-3.5-turbo-1106",
|
||||
"name": "GPT35-1106",
|
||||
"maxContext": 1600,
|
||||
"maxResponse": 4000,
|
||||
"price": 0
|
||||
}
|
||||
],
|
||||
"vectorModels": [ // 向量模型
|
||||
{
|
||||
"model": "text-embedding-ada-002",
|
||||
"name": "Embedding-2",
|
||||
"price": 0.2,
|
||||
"defaultToken": 700,
|
||||
"maxToken": 3000
|
||||
}
|
||||
],
|
||||
"reRankModels": [], // 重排模型,暂时填空数组
|
||||
"audioSpeechModels": [
|
||||
{
|
||||
"model": "tts-1",
|
||||
"name": "OpenAI TTS1",
|
||||
"price": 0,
|
||||
"baseUrl": "",
|
||||
"key": "",
|
||||
"voices": [
|
||||
{ "label": "Alloy", "value": "alloy", "bufferId": "openai-Alloy" },
|
||||
{ "label": "Echo", "value": "echo", "bufferId": "openai-Echo" },
|
||||
{ "label": "Fable", "value": "fable", "bufferId": "openai-Fable" },
|
||||
{ "label": "Onyx", "value": "onyx", "bufferId": "openai-Onyx" },
|
||||
{ "label": "Nova", "value": "nova", "bufferId": "openai-Nova" },
|
||||
{ "label": "Shimmer", "value": "shimmer", "bufferId": "openai-Shimmer" }
|
||||
]
|
||||
}
|
||||
],
|
||||
"whisperModel": {
|
||||
"model": "whisper-1",
|
||||
"name": "Whisper1",
|
||||
"price": 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -107,4 +107,4 @@ docker build -t dockername/fastgpt --build-arg name=app .
|
||||
|
||||
遇到困难了吗?有任何问题吗? 加入微信群与开发者和用户保持沟通。
|
||||
|
||||
<center><image width="400px" src="/wechat-fastgpt.webp" /></center>
|
||||
<center><image width="400px" src="https://oss.laf.run/htr4n1-images/fastgpt-qr-code.jpg" /></center>
|
||||
|
||||
@@ -1,16 +1,15 @@
|
||||
---
|
||||
title: '接入微软、ChatGLM、本地模型等'
|
||||
description: '通过接入 One API 来实现对各种大模型的支持'
|
||||
description: '部署和接入 OneAPI,实现对各种大模型的支持'
|
||||
icon: 'Api'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 708
|
||||
---
|
||||
|
||||
* 默认情况下,FastGPT 只配置了 GPT 的 3 个模型,如果你需要接入其他模型,需要进行一些额外配置。
|
||||
* 默认情况下,FastGPT 只配置了 GPT 的模型,如果你需要接入其他模型,需要进行一些额外配置。
|
||||
* [One API](https://github.com/songquanpeng/one-api) 是一个 OpenAI 接口管理 & 分发系统,可以通过标准的 OpenAI API 格式访问所有的大模型,开箱即用。
|
||||
|
||||
FastGPT 可以通过接入 One API 来实现对各种大模型的支持。部署方法也很简单。
|
||||
* FastGPT 可以通过接入 OneAPI 来实现对不同大模型的支持。OneAPI 的部署方法也很简单。
|
||||
|
||||
## MySQL 版本
|
||||
|
||||
@@ -51,7 +50,7 @@ BATCH_UPDATE_ENABLED=true
|
||||
BATCH_UPDATE_INTERVAL=60
|
||||
```
|
||||
|
||||
## 使用步骤
|
||||
## One API使用步骤
|
||||
|
||||
### 1. 登录 One API
|
||||
|
||||
|
||||
@@ -179,7 +179,7 @@ curl --location --request POST 'https://fastgpt.run/api/core/dataset/searchTest'
|
||||
{{< tab tabName="响应示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
返回 top limit 结果
|
||||
返回 top k 结果, limit 为预估条数,会按每条数据 800 tokens 的长度进行预估,20条也就是返回 16000 tokens 长度的数据,最多测试 30000 tokens 的数据。
|
||||
|
||||
```bash
|
||||
{
|
||||
|
||||
@@ -1,94 +1,181 @@
|
||||
---
|
||||
title: '分享链接鉴权'
|
||||
description: 'FastGPT 分享链接鉴权'
|
||||
title: '分享链接身份鉴权'
|
||||
description: 'FastGPT 分享链接身份鉴权'
|
||||
icon: 'share'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 860
|
||||
---
|
||||
|
||||
## 介绍
|
||||
|
||||
在 FastGPT V4.6.4 中,我们修改了分享链接的数据读取方式,为每个用户生成一个 localId,用于标识用户,从云端拉取对话记录。但是这种方式仅能保障用户在同一设备同一浏览器中使用,如果切换设备或者清空浏览器缓存则会丢失这些记录。这种方式存在一定的风险,因此我们仅允许用户拉取近`30天`的`20条`记录。
|
||||
|
||||
分享链接身份鉴权设计的目的在于,将 FastGPT 的对话框快速、安全的接入到你现有的系统中,仅需 2 个接口即可实现。
|
||||
|
||||
## 使用说明
|
||||
|
||||
分享链接鉴权设计的目的在于,将 FastGPT 的对话框安全的接入你现有的系统中。
|
||||
免登录链接配置中,你可以选择填写`身份验证`栏。这是一个`POST`请求的根地址。在填写该地址后,分享链接的初始化、开始对话以及对话结束都会向该地址的特定接口发送一条请求。下面以`host`来表示`凭身份验证根地址`。服务器接口仅需返回是否校验成功即可,不需要返回其他数据,格式如下:
|
||||
|
||||
免登录链接配置中,增加了`凭证校验服务器`后,使用分享链接时会向服务器发起请求,校验链接是否可用,并在每次对话结束后,向服务器发送对话结果。下面以`host`来表示`凭证校验服务器`。服务器接口仅需返回是否校验成功即可,不需要返回其他数据,格式如下:
|
||||
### 接口统一响应格式
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"message": "错误提示",
|
||||
"msg": "同message, 错误提示"
|
||||
"msg": "同message, 错误提示",
|
||||
"data": {
|
||||
"uid": "用户唯一凭证"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`FastGPT` 将会判断`success`是否为`true`决定是允许用户继续操作。`message`与`msg`是等同的,你可以选择返回其中一个,当`success`不为`true`时,将会提示这个错误。
|
||||
|
||||
`uid`是用户的唯一凭证,将会用于拉取对话记录以及保存对话记录。可参考下方实践案例。
|
||||
|
||||
### 触发流程
|
||||
|
||||

|
||||
|
||||
## 配置校验地址和校验token
|
||||
|
||||
### 1. 配置校验地址的`BaseURL`、
|
||||
## 配置教程
|
||||
### 1. 配置身份校验地址
|
||||
|
||||

|
||||
|
||||
配置校验地址后,在每次分享链接使用时,都会向对应的地址发起校验和上报请求。
|
||||
|
||||
{{% alert icon="🤖" %}}
|
||||
这里仅需配置根地址,无需具体到完整请求路径。
|
||||
{{% /alert %}}
|
||||
|
||||
### 2. 分享链接中增加额外 query
|
||||
|
||||
在分享链接的地址中,增加一个额外的参数: authToken。例如:
|
||||
|
||||
原始的链接:https://fastgpt.run/chat/share?shareId=648aaf5ae121349a16d62192
|
||||
完整链接: https://fastgpt.run/chat/share?shareId=648aaf5ae121349a16d62192&authToken=userid12345
|
||||
原始的链接:`https://fastgpt.run/chat/share?shareId=648aaf5ae121349a16d62192`
|
||||
|
||||
这个`token`通常是你系统生成的,在发出校验请求时,FastGPT 会在`body`中携带 token={{authToken}} 的参数。
|
||||
完整链接: `https://fastgpt.run/chat/share?shareId=648aaf5ae121349a16d62192&authToken=userid12345`
|
||||
|
||||
## 聊天初始化校验
|
||||
这个`authToken`通常是你系统生成的用户唯一凭证(Token之类的)。FastGPT 会在鉴权接口的`body`中携带 token={{authToken}} 的参数。
|
||||
|
||||
**FastGPT 发出的请求**
|
||||
### 3. 编写聊天初始化校验接口
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
{{< tab tabName="请求示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```bash
|
||||
curl --location --request POST '{{host}}/shareAuth/init' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"token": "sintdolore"
|
||||
"token": "{{authToken}}"
|
||||
}'
|
||||
```
|
||||
|
||||
**响应示例**
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
{{< tab tabName="鉴权成功" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"data": {
|
||||
"uid": "用户唯一凭证"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
系统会拉取该分享链接下,uid 为 username123 的对话记录。
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
{{< tab tabName="鉴权失败" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```json
|
||||
{
|
||||
"success": false,
|
||||
"message": "分享链接无效",
|
||||
"message": "身份错误",
|
||||
}
|
||||
```
|
||||
|
||||
## 对话前校验
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
**FastGPT 发出的请求**
|
||||
|
||||
|
||||
### 4. 编写对话前校验接口
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
{{< tab tabName="请求示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```bash
|
||||
curl --location --request POST '{{host}}/shareAuth/start' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"token": "sintdolore",
|
||||
"token": "{{authToken}}",
|
||||
"question": "用户问题",
|
||||
}'
|
||||
```
|
||||
|
||||
**响应示例**
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
{{< tab tabName="鉴权成功" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true
|
||||
"success": true,
|
||||
"data": {
|
||||
"uid": "用户唯一凭证"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 对话结果上报
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
{{< tab tabName="鉴权失败" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```json
|
||||
{
|
||||
"success": false,
|
||||
"message": "身份验证失败",
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"success": false,
|
||||
"message": "存在违规词",
|
||||
}
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
### 5. 编写对话结果上报接口(可选)
|
||||
|
||||
该接口无规定返回值。
|
||||
|
||||
响应值与[chat 接口格式相同](/docs/development/openapi/chat/#响应),仅多了一个`token`。
|
||||
|
||||
可以重点关注`responseData`里的`price`值,`price`与实际价格的倍率为`100000`,即 100000=1元。
|
||||
|
||||
```bash
|
||||
curl --location --request POST '{{host}}/shareAuth/finish' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"token": "sint dolore",
|
||||
"token": "{{authToken}}",
|
||||
"responseData": [
|
||||
{
|
||||
"moduleName": "KB Search",
|
||||
@@ -156,18 +243,18 @@ curl --location --request POST '{{host}}/shareAuth/finish' \
|
||||
}'
|
||||
```
|
||||
|
||||
响应值与 chat 接口相同,增加了一个 token。可以重点关注`responseData`里的值,price 与实际价格的倍率为`100000`。
|
||||
|
||||
**此接口无需响应值**
|
||||
|
||||
## 使用示例
|
||||
## 实践案例
|
||||
|
||||
我们以[Laf作为服务器为例](https://laf.dev/),展示这 3 个接口的使用方式。
|
||||
我们以[Laf作为服务器为例](https://laf.dev/),简单展示这 3 个接口的使用方式。
|
||||
|
||||
### 1. 创建3个Laf接口
|
||||
|
||||

|
||||
|
||||
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
{{< tab tabName="/shareAuth/init" >}}
|
||||
{{< markdownify >}}
|
||||
@@ -179,13 +266,15 @@ import cloud from '@lafjs/cloud'
|
||||
|
||||
export default async function (ctx: FunctionContext) {
|
||||
const { token } = ctx.body
|
||||
|
||||
|
||||
// 此处省略 token 解码过程
|
||||
if (token === 'fastgpt') {
|
||||
return { success: true }
|
||||
return { success: true, data: { uid: "user1" } }
|
||||
}
|
||||
|
||||
return { success: false,message: "身份错误" }
|
||||
return { success: false,message:"身份错误" }
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
@@ -201,8 +290,8 @@ import cloud from '@lafjs/cloud'
|
||||
|
||||
export default async function (ctx: FunctionContext) {
|
||||
const { token, question } = ctx.body
|
||||
console.log(token, question, 'start')
|
||||
|
||||
// 此处省略 token 解码过程
|
||||
if (token !== 'fastgpt') {
|
||||
return { success: false, message: "身份错误" }
|
||||
|
||||
@@ -212,8 +301,9 @@ export default async function (ctx: FunctionContext) {
|
||||
return { success: false, message: "内容不合规" }
|
||||
}
|
||||
|
||||
return { success: true }
|
||||
return { success: true, data: { uid: "user1" } }
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
@@ -229,7 +319,12 @@ import cloud from '@lafjs/cloud'
|
||||
|
||||
export default async function (ctx: FunctionContext) {
|
||||
const { token, responseData } = ctx.body
|
||||
console.log(token,responseData,'=====')
|
||||
|
||||
const total = responseData.reduce((sum,item) => sum + item.price,0)
|
||||
const amount = total / 100000
|
||||
|
||||
// 省略数据库操作
|
||||
|
||||
return { }
|
||||
}
|
||||
```
|
||||
@@ -241,17 +336,24 @@ export default async function (ctx: FunctionContext) {
|
||||
|
||||
### 2. 配置校验地址
|
||||
|
||||
我们随便复制3个地址中一个接口:https://d8dns0.laf.dev/shareAuth/finish , 去除 /shareAuth/finish 后填入 FastGPT 中: https://d8dns0.laf.dev
|
||||
我们随便复制3个地址中一个接口: `https://d8dns0.laf.dev/shareAuth/finish`, 去除`/shareAuth/finish`后填入`身份校验`:`https://d8dns0.laf.dev`
|
||||
|
||||

|
||||
|
||||
### 3. 修改分享链接参数
|
||||
|
||||
源分享链接:[https://fastgpt.run/chat/share?shareId=64be36376a438af0311e599c](https://fastgpt.run/chat/share?shareId=64be36376a438af0311e599c)
|
||||
源分享链接:`https://fastgpt.run/chat/share?shareId=64be36376a438af0311e599c`
|
||||
|
||||
修改后:[https://fastgpt.run/chat/share?shareId=64be36376a438af0311e599c&authToken=fastgpt](https://fastgpt.run/chat/share?shareId=64be36376a438af0311e599c&authToken=fastgpt)
|
||||
修改后:`https://fastgpt.run/chat/share?shareId=64be36376a438af0311e599c&authToken=fastgpt`
|
||||
|
||||
### 4. 测试效果
|
||||
|
||||
1. 打开源链接或者`authToken`不等于 `fastgpt`的链接会提示身份错误。
|
||||
2. 发送内容中包含你字,会提示内容不合规。
|
||||
1. 打开源链接或者`authToken`不等于`fastgpt`的链接会提示身份错误。
|
||||
2. 发送内容中包含你字,会提示内容不合规。
|
||||
|
||||
|
||||
## 使用场景
|
||||
|
||||
这个鉴权方式通常是帮助你直接嵌入`分享链接`到你的应用中,在你的应用打开分享链接前,应做`authToken`的拼接后再打开。
|
||||
|
||||
除了对接已有系统的用户外,你还可以对接`余额`功能,通过`结果上报`接口扣除用户余额,通过`对话前校验`接口检查用户的余额。
|
||||
@@ -41,4 +41,154 @@ Sealos 的服务器在国外,不需要额外处理网络问题,无需服务
|
||||
|
||||
## 部署架构图
|
||||
|
||||

|
||||

|
||||
|
||||
## Sealos 使用
|
||||
|
||||
### 简介
|
||||
|
||||
FastGPT 商业版共包含了3个应用(fastgpt, fastgpt-plus, fastgpt-admin)和2个数据库,使用多 Api Key 时候需要安装 OneAPI(一个应用和一个数据库),总计4个应用和3个数据库。
|
||||
|
||||

|
||||
|
||||
点击右侧的详情,可以查看对应应用的详细信息。
|
||||
|
||||
### 如何更新/升级 FastGPT
|
||||
[升级脚本文档](https://doc.fastgpt.in/docs/development/upgrading/)先看下文档,看下需要升级哪个版本。注意,不要跨版本升级!!!!!
|
||||
|
||||
例如,目前是4.5 版本,要升级到4.5.1,就先把镜像版本改成v4.5.1,执行一下升级脚本,等待完成后再继续升级。如果目标版本不需要执行初始化,则可以跳过。
|
||||
|
||||
升级步骤:
|
||||
1. 打开sealos的应用管理
|
||||
2. 有3个应用 fastgpt , fastgpt-plugin 和 fastgpt-admin
|
||||
3. 点击对应应用右边3个点,变更。或者点详情后右上角的变更。
|
||||
4. 修改镜像名栏
|
||||

|
||||
|
||||
5. 点击变更/重启,会自动拉取最新镜像进行更新
|
||||
6. 执行对应版本的初始化脚本
|
||||
|
||||
### 如何获取 FastGPT 访问链接
|
||||
|
||||
打开对应的应用,点击外网访问地址。
|
||||
|
||||

|
||||
|
||||
### 配置自定义域名
|
||||
|
||||
点击对应应用的变更->点击自定义域名->填写域名-> 操作域名 Cname -> 确认 -> 确认变。
|
||||
|
||||

|
||||
|
||||
### 如何修改配置文件
|
||||
|
||||
打开 Sealos 的应用管理 -> 找到对应的应用 -> 变更 -> 往下拉到高级配置,里面有个配置文件 -> 新增或点击对应的配置文件可以进行编辑 -> 点击右上角确认变。
|
||||
|
||||

|
||||
|
||||
[配置文件参考](https://doc.fastgpt.in/docs/development/configuration/)
|
||||
|
||||
FeConfig 参考下面(目前未做可视化)
|
||||
```
|
||||
"FeConfig": {
|
||||
"show_emptyChat": false, // 是否展示聊天时空白的内容
|
||||
"show_register": true, // 展示注册按键
|
||||
"show_appStore": false, // 应用市场(暂时不可用)
|
||||
"show_contact": false, // 联系方式(目前不可配置,直接false)
|
||||
"show_git": false, // 展示 github
|
||||
"show_doc": false, // 展示文档
|
||||
"show_pay": true, // 展示支付
|
||||
"show_openai_account": false, // 用户可自定义 openai key
|
||||
"show_promotion": false, // 邀请好友机制
|
||||
"docUrl": "https://doc.fastgpt.in", // 文档基本地址
|
||||
"systemTitle": "FastGPT", // 系统的 title
|
||||
"googleClientVerKey": "", // 谷歌 v3 校验前端凭证
|
||||
"isPlus": true, // 直接设置 true
|
||||
"oauth": { // oauth登录
|
||||
"github": "",
|
||||
"google": ""
|
||||
},
|
||||
"limit": {
|
||||
"exportLimitMinutes": 0 // 导出间隔限制
|
||||
},
|
||||
"scripts": [
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 修改站点名称以及 favicon
|
||||
修改应用的环境变量,增加
|
||||
|
||||
```
|
||||
SYSTEM_NAME=FastGPT
|
||||
SYSTEM_FAVICON=/favicon.ico
|
||||
HOME_URL=/app/list
|
||||
```
|
||||
|
||||
SYSTEM_FAVICON 可以是一个网络地址
|
||||
|
||||

|
||||
|
||||
### 挂载logo
|
||||
目前暂时无法 把浏览器上的logo替换。仅支持svg,待后续可视化做了后可以全部替换。
|
||||
新增一个挂载文件,文件名为:/app/projects/app/public/icon/logo.svg ,值为 svg 对应的值。
|
||||
|
||||

|
||||

|
||||
|
||||
### 管理后台
|
||||
|
||||

|
||||
|
||||
|
||||
### 商业版镜像配置文件
|
||||
|
||||
```
|
||||
{
|
||||
"license": "",
|
||||
"system": {
|
||||
"title": "" // 系统名称
|
||||
},
|
||||
"censor": {
|
||||
"BAIDU_TEXT_CENSOR_CLIENTID": "", // 百度文本安全校验
|
||||
"BAIDU_TEXT_CENSOR_CLIENTSECRET": "" // 百度文本安全校验
|
||||
},
|
||||
"auth": {
|
||||
"googleServiceVerKey": "", // 谷歌 v3 校验
|
||||
"github": { // github oauth
|
||||
"clientId": "",
|
||||
"secret": ""
|
||||
},
|
||||
"google": { // google oauth
|
||||
"clientId": "",
|
||||
"secret": ""
|
||||
},
|
||||
"email": { // 注册邮箱配置
|
||||
"service": "qq",
|
||||
"user": "",
|
||||
"pass": ""
|
||||
},
|
||||
"phone": { // 阿里短信配置
|
||||
"SNED_PHONE_ACCESSKEYID": "",
|
||||
"SNED_PHONE_ACCESSSECRET": "",
|
||||
"SNED_PHONE_SIGNNAME": "",
|
||||
"SNED_PHONE_TEMPLATE": ""
|
||||
}
|
||||
},
|
||||
"pay": { // 微信支付配置
|
||||
"wx": {
|
||||
"WX_APPID": "",
|
||||
"WX_MCHID": "",
|
||||
"WX_V3_CODE": "",
|
||||
"WX_NOTIFY_URL": "",
|
||||
"WX_SERIAL_NO": "",
|
||||
"WX_PRIVATE_KEY": ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
### One API 使用
|
||||
|
||||
[参考 OneAPI 使用步骤](/docs/development/one-api/)
|
||||
43
docSite/content/docs/development/upgrading/464.md
Normal file
@@ -0,0 +1,43 @@
|
||||
---
|
||||
title: 'V4.6.4(需要初始化)'
|
||||
description: 'FastGPT V4.6.4'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 832
|
||||
---
|
||||
|
||||
## 1。执行初始化 API
|
||||
|
||||
发起 1 个 HTTP 请求 ({{rootkey}} 替换成环境变量里的 `rootkey`,{{host}} 替换成自己域名)
|
||||
|
||||
1. https://xxxxx/api/admin/initv464
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'https://{{host}}/api/admin/initv464' \
|
||||
--header 'rootkey: {{rootkey}}' \
|
||||
--header 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
初始化说明:
|
||||
1. 初始化 PG 的createTime字段
|
||||
2. 初始化 Mongo 中 chat 的 feedback 字段
|
||||
|
||||
|
||||
## V4.6.4 功能介绍
|
||||
|
||||
1. 重写 - 分享链接身份逻辑,采用 localID 记录用户的ID。
|
||||
2. 商业版新增 - 分享链接 SSO 方案,通过`身份鉴权`地址,仅需`3个接口`即可完全接入已有用户系统。具体参考[分享链接身份鉴权](/docs/development/openapi/share/)
|
||||
3. 新增 - 分享链接更多嵌入方式提示,更多DIY方式。
|
||||
4. 优化 - 历史记录模块。弃用旧的历史记录模块,直接在对应地方填写数值即可。
|
||||
5. 调整 - 知识库搜索模块 topk 逻辑,采用 MaxToken 计算,兼容不同长度的文本块
|
||||
6. 调整鉴权顺序,提高 apikey 的优先级,避免cookie抢占 apikey 的鉴权。
|
||||
7. 链接读取支持多选择器。参考[Web 站点同步用法](/docs/course/websync)
|
||||
8. 修复 - 分享链接图片上传鉴权问题
|
||||
9. 修复 - Mongo 连接池未释放问题。
|
||||
10. 修复 - Dataset Intro 无法更新
|
||||
11. 修复 - md 代码块问题
|
||||
12. 修复 - root 权限问题
|
||||
13. 优化 docker file
|
||||
|
||||
|
||||
33
docSite/content/docs/development/upgrading/465.md
Normal file
@@ -0,0 +1,33 @@
|
||||
---
|
||||
title: 'V4.6.5(需要改配置文件)'
|
||||
description: 'FastGPT V4.6.5'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 831
|
||||
---
|
||||
|
||||
## 配置文件变更
|
||||
|
||||
由于 openai 已开始弃用 function call,改为 toolChoice。FastGPT 同步的修改了对于的配置和调用方式,需要对配置文件做一些修改:
|
||||
|
||||
[点击查看最新的配置文件](/docs/development/configuration/)
|
||||
|
||||
1. 主要是修改模型的`functionCall`字段,改成`toolChoice`即可。设置为`true`的模型,会默认走 openai 的 tools 模式;未设置或设置为`false`的,会走提示词生成模式。
|
||||
|
||||
问题补全模型与内容提取模型使用同一组配置。
|
||||
|
||||
2. 增加 `"ReRankModels": []`
|
||||
|
||||
## V4.6.5 功能介绍
|
||||
|
||||
1. 新增 - [问题补全模块](/docs/workflow/modules/coreferenceresolution/)
|
||||
2. 新增 - [文本编辑模块](/docs/workflow/modules/text_editor/)
|
||||
3. 新增 - [判断器模块](/docs/workflow/modules/tfswitch/)
|
||||
4. 新增 - [自定义反馈模块](/docs/workflow/modules/custom_feedback/)
|
||||
5. 新增 - 【内容提取】模块支持选择模型,以及字段枚举
|
||||
6. 优化 - docx读取,兼容表格(表格转markdown)
|
||||
7. 优化 - 高级编排连接线交互
|
||||
8. 优化 - 由于 html2md 导致的 cpu密集计算,阻断线程问题
|
||||
9. 修复 - 高级编排提示词提取描述
|
||||
|
||||
22
docSite/content/docs/development/upgrading/466.md
Normal file
@@ -0,0 +1,22 @@
|
||||
---
|
||||
title: 'V4.6.6(需要改配置文件)'
|
||||
description: 'FastGPT V4.6.6'
|
||||
icon: 'upgrade'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 831
|
||||
---
|
||||
|
||||
**版本仍在开发中……**
|
||||
|
||||
## 配置文件变更
|
||||
|
||||
为了减少代码重复度,我们对配置文件做了一些修改:[点击查看最新的配置文件](/docs/development/configuration/)
|
||||
|
||||
|
||||
|
||||
## V4.6.6 即将更新
|
||||
|
||||
1. UI 优化,未来将逐步替换新的UI设计。
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ weight: -10
|
||||
FastGPT 是一个基于 LLM 大语言模型的知识库问答系统,提供开箱即用的数据处理、模型调用等能力。同时可以通过 Flow 可视化进行工作流编排,从而实现复杂的问答场景!
|
||||
|
||||
{{% alert icon="🤖 " context="success" %}}
|
||||
FastGPT 在线体验:[https://fastgpt.run](https://fastgpt.run)
|
||||
FastGPT 在线使用:[https://ai.fastgpt.in](https://ai.fastgpt.in)
|
||||
{{% /alert %}}
|
||||
|
||||
| | |
|
||||
|
||||
@@ -43,7 +43,6 @@ weight: 506
|
||||
AIBOTK_KEY=微秘书 APIKEY
|
||||
AIBOTK_SECRET=微秘书 APISECRET
|
||||
WORK_PRO_TOKEN=你申请的企微 token (企业微信需要填写,私人微信不需要)
|
||||
WECHATY_PUPPET_SERVICE_AUTHORITY=token-service-discovery-test.juzibot.com(企业微信需要填写,私人微信不需要)
|
||||
```
|
||||
|
||||
这里最后两个变量只有部署企业微信才需要,私人微信只需要填写前两个即可。
|
||||
@@ -56,7 +55,7 @@ WECHATY_PUPPET_SERVICE_AUTHORITY=token-service-discovery-test.juzibot.com(企
|
||||
|
||||

|
||||
|
||||
`WORK_PRO_TOKEN` [点击这里](https://tss.juzibot.com?aff=aibotk)申请 token 然后填入即可。
|
||||
`WORK_PRO_TOKEN` [点击这里](https://tss.rpachat.com/?aff=aibotk)申请 token 然后填入即可。
|
||||
|
||||
`WECHATY_PUPPET_SERVICE_AUTHORITY`的值复制过去就可以。
|
||||
|
||||
|
||||
@@ -1,512 +0,0 @@
|
||||
---
|
||||
title: '优化知识库搜索词'
|
||||
description: '利用 GPT 优化和完善知识库搜索词,实现上下文关联搜索'
|
||||
icon: 'search'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 404
|
||||
---
|
||||
|
||||

|
||||
|
||||
| 优化前 | 优化后 |
|
||||
| --------------------- | --------------------- |
|
||||
|  |  |
|
||||
|
||||
如上图,优化后的搜索可以针对【自动数据预处理】进行搜索,从而找到其相关的内容,一定程度上弥补了向量搜索的上下文缺失问题。
|
||||
|
||||
## 模块编排
|
||||
|
||||
复制下面配置,点击「高级编排」右上角的导入按键,导入该配置。
|
||||
|
||||
{{% details title="编排配置" closed="true" %}}
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"moduleId": "userChatInput",
|
||||
"name": "用户问题(对话入口)",
|
||||
"flowType": "questionInput",
|
||||
"position": {
|
||||
"x": 585.750318069507,
|
||||
"y": 1597.4127130315183
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"key": "userChatInput",
|
||||
"type": "systemInput",
|
||||
"label": "用户问题",
|
||||
"connected": true
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"key": "userChatInput",
|
||||
"label": "用户问题",
|
||||
"type": "source",
|
||||
"valueType": "string",
|
||||
"targets": [
|
||||
{
|
||||
"moduleId": "ssdd86",
|
||||
"key": "content"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"moduleId": "history",
|
||||
"name": "聊天记录",
|
||||
"flowType": "historyNode",
|
||||
"position": {
|
||||
"x": 567.49877916803,
|
||||
"y": 1289.3453864378014
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"key": "maxContext",
|
||||
"type": "numberInput",
|
||||
"label": "最长记录数",
|
||||
"value": 6,
|
||||
"min": 0,
|
||||
"max": 50,
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "history",
|
||||
"type": "hidden",
|
||||
"label": "聊天记录",
|
||||
"connected": true
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"key": "history",
|
||||
"label": "聊天记录",
|
||||
"valueType": "chatHistory",
|
||||
"type": "source",
|
||||
"targets": [
|
||||
{
|
||||
"moduleId": "ssdd86",
|
||||
"key": "history"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"moduleId": "nkxlso",
|
||||
"name": "知识库搜索",
|
||||
"flowType": "datasetSearchNode",
|
||||
"showStatus": true,
|
||||
"position": {
|
||||
"x": 1542.6434554710224,
|
||||
"y": 1153.7853815737192
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"key": "kbList",
|
||||
"type": "custom",
|
||||
"label": "关联的知识库",
|
||||
"value": [],
|
||||
"list": [],
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "similarity",
|
||||
"type": "slider",
|
||||
"label": "相似度",
|
||||
"value": 0.8,
|
||||
"min": 0,
|
||||
"max": 1,
|
||||
"step": 0.01,
|
||||
"markList": [
|
||||
{
|
||||
"label": "100",
|
||||
"value": 100
|
||||
},
|
||||
{
|
||||
"label": "1",
|
||||
"value": 1
|
||||
}
|
||||
],
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "limit",
|
||||
"type": "slider",
|
||||
"label": "单次搜索上限",
|
||||
"description": "最多取 n 条记录作为本次问题引用",
|
||||
"value": 7,
|
||||
"min": 1,
|
||||
"max": 20,
|
||||
"step": 1,
|
||||
"markList": [
|
||||
{
|
||||
"label": "1",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"label": "20",
|
||||
"value": 20
|
||||
}
|
||||
],
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "switch",
|
||||
"type": "target",
|
||||
"label": "触发器",
|
||||
"valueType": "any",
|
||||
"connected": false
|
||||
},
|
||||
{
|
||||
"key": "userChatInput",
|
||||
"type": "target",
|
||||
"label": "用户问题",
|
||||
"required": true,
|
||||
"valueType": "string",
|
||||
"connected": true
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"key": "isEmpty",
|
||||
"label": "搜索结果为空",
|
||||
"type": "source",
|
||||
"valueType": "boolean",
|
||||
"targets": []
|
||||
},
|
||||
{
|
||||
"key": "unEmpty",
|
||||
"label": "搜索结果不为空",
|
||||
"type": "source",
|
||||
"valueType": "boolean",
|
||||
"targets": []
|
||||
},
|
||||
{
|
||||
"key": "quoteQA",
|
||||
"label": "引用内容",
|
||||
"description": "始终返回数组,如果希望搜索结果为空时执行额外操作,需要用到上面的两个输入以及目标模块的触发器",
|
||||
"type": "source",
|
||||
"valueType": "datasetQuote",
|
||||
"targets": [
|
||||
{
|
||||
"moduleId": "ol82hp",
|
||||
"key": "quoteQA"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"moduleId": "ol82hp",
|
||||
"name": "AI 对话",
|
||||
"flowType": "chatNode",
|
||||
"showStatus": true,
|
||||
"position": {
|
||||
"x": 2207.4577044902126,
|
||||
"y": 1079.6308003796544
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"key": "model",
|
||||
"type": "custom",
|
||||
"label": "对话模型",
|
||||
"value": "gpt-3.5-turbo",
|
||||
"list": [],
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "temperature",
|
||||
"type": "slider",
|
||||
"label": "温度",
|
||||
"value": 0,
|
||||
"min": 0,
|
||||
"max": 10,
|
||||
"step": 1,
|
||||
"markList": [
|
||||
{
|
||||
"label": "严谨",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"label": "发散",
|
||||
"value": 10
|
||||
}
|
||||
],
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "maxToken",
|
||||
"type": "custom",
|
||||
"label": "回复上限",
|
||||
"value": 2000,
|
||||
"min": 100,
|
||||
"max": 4000,
|
||||
"step": 50,
|
||||
"markList": [
|
||||
{
|
||||
"label": "100",
|
||||
"value": 100
|
||||
},
|
||||
{
|
||||
"label": "4000",
|
||||
"value": 4000
|
||||
}
|
||||
],
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "systemPrompt",
|
||||
"type": "textarea",
|
||||
"label": "系统提示词",
|
||||
"max": 300,
|
||||
"valueType": "string",
|
||||
"description": "模型固定的引导词,通过调整该内容,可以引导模型聊天方向。该内容会被固定在上下文的开头。可使用变量,例如 {{language}}",
|
||||
"placeholder": "模型固定的引导词,通过调整该内容,可以引导模型聊天方向。该内容会被固定在上下文的开头。可使用变量,例如 {{language}}",
|
||||
"value": "我会向你询问三引号引用中提及的内容,你仅使用提供的引用内容来回答我的问题,不要做额外的扩展补充。",
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "limitPrompt",
|
||||
"type": "textarea",
|
||||
"valueType": "string",
|
||||
"label": "限定词",
|
||||
"max": 500,
|
||||
"description": "限定模型对话范围,会被放置在本次提问前,拥有强引导和限定性。不建议内容太长,会影响上下文,可使用变量,例如 {{language}}。可在文档中找到对应的限定例子",
|
||||
"placeholder": "限定模型对话范围,会被放置在本次提问前,拥有强引导和限定性。不建议内容太长,会影响上下文,可使用变量,例如 {{language}}。可在文档中找到对应的限定例子",
|
||||
"value": "",
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "switch",
|
||||
"type": "target",
|
||||
"label": "触发器",
|
||||
"valueType": "any",
|
||||
"connected": false
|
||||
},
|
||||
{
|
||||
"key": "quoteQA",
|
||||
"type": "target",
|
||||
"label": "引用内容",
|
||||
"description": "对象数组格式,结构:\n [{q:'问题',a:'回答'}]",
|
||||
"valueType": "datasetQuote",
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "history",
|
||||
"type": "target",
|
||||
"label": "聊天记录",
|
||||
"valueType": "chatHistory",
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "userChatInput",
|
||||
"type": "target",
|
||||
"label": "用户问题",
|
||||
"required": true,
|
||||
"valueType": "string",
|
||||
"connected": true
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"key": "answerText",
|
||||
"label": "AI回复",
|
||||
"description": "将在 stream 回复完毕后触发",
|
||||
"valueType": "string",
|
||||
"type": "source",
|
||||
"targets": []
|
||||
},
|
||||
{
|
||||
"key": "finish",
|
||||
"label": "回复结束",
|
||||
"description": "AI 回复完成后触发",
|
||||
"valueType": "boolean",
|
||||
"type": "source",
|
||||
"targets": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"moduleId": "o62kns",
|
||||
"name": "用户问题(对话入口)",
|
||||
"flowType": "questionInput",
|
||||
"position": {
|
||||
"x": 1696.5940057372968,
|
||||
"y": 2270.5070479742435
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"key": "userChatInput",
|
||||
"type": "systemInput",
|
||||
"label": "用户问题",
|
||||
"connected": true
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"key": "userChatInput",
|
||||
"label": "用户问题",
|
||||
"type": "source",
|
||||
"valueType": "string",
|
||||
"targets": [
|
||||
{
|
||||
"moduleId": "ol82hp",
|
||||
"key": "userChatInput"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"moduleId": "he7013",
|
||||
"name": "聊天记录",
|
||||
"flowType": "historyNode",
|
||||
"position": {
|
||||
"x": 1636.793907221069,
|
||||
"y": 1952.7122387165764
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"key": "maxContext",
|
||||
"type": "numberInput",
|
||||
"label": "最长记录数",
|
||||
"value": 6,
|
||||
"min": 0,
|
||||
"max": 50,
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "history",
|
||||
"type": "hidden",
|
||||
"label": "聊天记录",
|
||||
"connected": true
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"key": "history",
|
||||
"label": "聊天记录",
|
||||
"valueType": "chatHistory",
|
||||
"type": "source",
|
||||
"targets": [
|
||||
{
|
||||
"moduleId": "ol82hp",
|
||||
"key": "history"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"moduleId": "ssdd86",
|
||||
"name": "文本内容提取",
|
||||
"flowType": "contentExtract",
|
||||
"showStatus": true,
|
||||
"position": {
|
||||
"x": 1031.822028231947,
|
||||
"y": 1231.9793566344022
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"key": "switch",
|
||||
"type": "target",
|
||||
"label": "触发器",
|
||||
"valueType": "any",
|
||||
"connected": false
|
||||
},
|
||||
{
|
||||
"key": "description",
|
||||
"type": "textarea",
|
||||
"valueType": "string",
|
||||
"value": "结合上下文,优化用户的问题,要求不能包含\"它\"、\"第几个\"等代名词,需将他们替换成具体的名词。",
|
||||
"label": "提取要求描述",
|
||||
"description": "写一段提取要求,告诉 AI 需要提取哪些内容",
|
||||
"required": true,
|
||||
"placeholder": "例如: \n1. 你是一个实验室预约助手。根据用户问题,提取出姓名、实验室号和预约时间",
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "history",
|
||||
"type": "target",
|
||||
"label": "聊天记录",
|
||||
"valueType": "chatHistory",
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "content",
|
||||
"type": "target",
|
||||
"label": "需要提取的文本",
|
||||
"required": true,
|
||||
"valueType": "string",
|
||||
"connected": true
|
||||
},
|
||||
{
|
||||
"key": "extractKeys",
|
||||
"type": "custom",
|
||||
"label": "目标字段",
|
||||
"description": "由 '描述' 和 'key' 组成一个目标字段,可提取多个目标字段",
|
||||
"value": [
|
||||
{
|
||||
"desc": "优化后的问题",
|
||||
"key": "q",
|
||||
"required": true
|
||||
}
|
||||
],
|
||||
"connected": true
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"key": "success",
|
||||
"label": "字段完全提取",
|
||||
"valueType": "boolean",
|
||||
"type": "source",
|
||||
"targets": []
|
||||
},
|
||||
{
|
||||
"key": "failed",
|
||||
"label": "提取字段缺失",
|
||||
"valueType": "boolean",
|
||||
"type": "source",
|
||||
"targets": []
|
||||
},
|
||||
{
|
||||
"key": "fields",
|
||||
"label": "完整提取结果",
|
||||
"description": "一个 JSON 字符串,例如:{\"name:\":\"YY\",\"Time\":\"2023/7/2 18:00\"}",
|
||||
"valueType": "string",
|
||||
"type": "source",
|
||||
"targets": []
|
||||
},
|
||||
{
|
||||
"key": "q",
|
||||
"label": "提取结果-优化后的问题",
|
||||
"description": "无法提取时不会返回",
|
||||
"valueType": "string",
|
||||
"type": "source",
|
||||
"targets": [
|
||||
{
|
||||
"moduleId": "nkxlso",
|
||||
"key": "userChatInput"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
{{% /details %}}
|
||||
|
||||
## 流程说明
|
||||
|
||||
1. 利用内容提取模块,将用户的问题进行优化。
|
||||
2. 将优化后的问题传递到知识库搜索模块进行搜索。
|
||||
3. 搜索内容传递到 AI 对话模块,进行回答。
|
||||
|
||||
## Tips
|
||||
|
||||
内容提取模块可以将自然语言提取成结构化数据,可以使用其进行一些神奇的操作。
|
||||
@@ -5,4 +5,6 @@ description: "介绍 FastGPT 的常用模块"
|
||||
icon: "apps"
|
||||
draft: false
|
||||
images: []
|
||||
---
|
||||
---
|
||||
|
||||
<!-- 350 ~ 400 -->
|
||||
@@ -0,0 +1,39 @@
|
||||
---
|
||||
title: "问题补全"
|
||||
description: "问题补全模块介绍和使用"
|
||||
icon: "input"
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 364
|
||||
---
|
||||
|
||||
## 特点
|
||||
|
||||
- 可重复添加
|
||||
- 有外部输入
|
||||
- 触发执行
|
||||
|
||||

|
||||
|
||||
## 背景
|
||||
|
||||
在 RAG 中,我们需要根据输入的问题去数据库里执行 embedding 搜索,查找相关的内容,从而查找到相似的内容(简称知识库搜索)。
|
||||
|
||||
在搜索的过程中,尤其是连续对话的搜索,我们通常会发现后续的问题难以搜索到合适的内容,其中一个原因是知识库搜索只会使用“当前”的问题去执行。看下面的例子:
|
||||
|
||||

|
||||
|
||||
用户在提问“第二点是什么”的时候,只会去知识库里查找“第二点是什么”,压根查不到内容。实际上需要查询的是“QA结构是什么”。因此我们需要引入一个【问题补全】模块,来对用户当前的问题进行补全,从而使得知识库搜索能够搜索到合适的内容。使用补全后效果如下:
|
||||
|
||||

|
||||
|
||||
|
||||
## 功能
|
||||
|
||||
调用 AI 去对用户当前的问题进行补全。目前主要是补全“指代”词,使得检索词更加的完善可靠,从而增强上下文连续对话的知识库搜索能力。
|
||||
|
||||
遇到最大的难题在于:模型对于【补全】的概念可能不清晰,且对于长上下文往往无法准确的知道应该如何补全。
|
||||
|
||||
## 示例
|
||||
|
||||
- [接入谷歌搜索](/docs/workflow/examples/google_search/)
|
||||
35
docSite/content/docs/workflow/modules/custom_feedback.md
Normal file
@@ -0,0 +1,35 @@
|
||||
---
|
||||
title: "自定义反馈"
|
||||
description: "自定义反馈模块介绍"
|
||||
icon: "feedback"
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 354
|
||||
---
|
||||
|
||||
该模块为临时模块,后续会针对该模块进行更全面的设计。
|
||||
|
||||
## 特点
|
||||
|
||||
- 可重复添加
|
||||
- 无外部输入
|
||||
- 自动执行
|
||||
|
||||
|
||||
| | |
|
||||
| --------------------- | --------------------- |
|
||||
|  |  |
|
||||
|  |  |
|
||||
|
||||
|
||||
## 介绍
|
||||
|
||||
自定义反馈模块,可以为你的对话增加一个反馈标记,从而方便在后台更好的分析对话的数据。
|
||||
|
||||
在调试模式下,不会记录反馈内容,而是直接提示: `自动反馈测试: 反馈内容`。
|
||||
|
||||
在对话模式(对话、分享窗口、带 chatId 的 API 调用)时,会将反馈内容记录到对话日志中。(会延迟60s记录)
|
||||
|
||||
## 作用
|
||||
|
||||
自定义反馈模块的功能类似于程序开发的`埋点`,便于你观测的对话中的数据。
|
||||
@@ -1,19 +0,0 @@
|
||||
---
|
||||
title: "历史记录"
|
||||
description: "FastGPT 历史记录模块介绍"
|
||||
icon: "history"
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 354
|
||||
---
|
||||
|
||||
# 特点
|
||||
|
||||
- 可重复添加(防止复杂编排时线太乱,重复添加可以更美观)
|
||||
- 无外部输入
|
||||
- 流程入口
|
||||
- 自动执行
|
||||
|
||||
每次对话时,会从数据库取最多 n 条聊天记录作为上下文。注意,不是指本轮对话最多 n 条上下文,本轮对话还包括:提示词、限定词、引用内容和问题。
|
||||
|
||||

|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: "HTTP 模块"
|
||||
title: "新 HTTP 模块"
|
||||
description: "FastGPT HTTP 模块介绍"
|
||||
icon: "http"
|
||||
draft: false
|
||||
@@ -19,86 +19,233 @@ weight: 355
|
||||
|
||||
## 介绍
|
||||
|
||||
HTTP 模块会向对应的地址发送一个 POST 请求(Body 中携带 JSON 类型的参数,具体的参数可自定义),并接收一个 JSON 响应值,字段也是自定义。如上图中,我们定义了一个入参:「提取的字段」(定义的 key 为 appointment,类型为 string)和一个出参:「提取结果」(定义的 key 为 response,类型为 string)。
|
||||
HTTP 模块会向对应的地址发送一个 `POST/GET` 请求,携带部分`系统参数`及`自定义参数`,并接收一个 JSON 响应值,字段也是自定义。
|
||||
|
||||
那么,这个请求的命令为:
|
||||
- 你还可以通过 JSON 传入自定义的请求头。
|
||||
- POST 请求中,数据会被放置在 `body` 中。
|
||||
- GET 请求中,数据会被放置在 `query` 中。
|
||||
- 在出入参数中,你都可以通过 xxx.xxx 来代表嵌套的对象。
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'https://xxxx.laf.dev/appointment-lab' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"appointment":"{\"name\":\"小明\",\"time\":\"2023/08/16 15:00\",\"labname\":\"子良A323\"}"
|
||||
}'
|
||||
```
|
||||
## 参数结构
|
||||
|
||||
响应为:
|
||||
### 系统参数说明
|
||||
|
||||
- appId: 应用的ID
|
||||
- chatId: 当前对话的ID,测试模式下不存在。
|
||||
- responseChatItemId: 当前对话中,响应的消息ID,测试模式下不存在。
|
||||
- variables: 当前对话的全局变量。
|
||||
- data: 自定义传递的参数。
|
||||
|
||||
### 嵌套对象使用
|
||||
|
||||
**入参**
|
||||
|
||||
假设我们设计了`3个`输入。
|
||||
|
||||
- user.name (string)
|
||||
- user.age (number)
|
||||
- type (string)
|
||||
|
||||
最终组成的对象为:
|
||||
|
||||
```json
|
||||
{
|
||||
"response": "您已经有一个预约记录了,每人仅能同时预约一个实验室:\n 姓名:小明\n 时间: 2023/08/15 15:00\n 实验室: 子良A323\n "
|
||||
"user": {
|
||||
"name": "",
|
||||
"age": ""
|
||||
},
|
||||
"type": ""
|
||||
}
|
||||
```
|
||||
|
||||
**出参**
|
||||
|
||||
假设接口的输出结构为:
|
||||
|
||||
```json
|
||||
{
|
||||
"message": "测试",
|
||||
"data":{
|
||||
"name": "name",
|
||||
"age": 10
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
那么,自定出参的`key`可以设置为:
|
||||
|
||||
- message (string)
|
||||
- data.name (string)
|
||||
- data.age (number)
|
||||
|
||||
|
||||
## POST 示例
|
||||
|
||||
**自定义入参**
|
||||
|
||||
- user.name (string)
|
||||
- user.age (number)
|
||||
- type (string)
|
||||
|
||||
**自定义出参**
|
||||
|
||||
- message (string)
|
||||
- data.name (string)
|
||||
- data.age (number)
|
||||
|
||||
那么,这个模块发出的请求则是:
|
||||
|
||||
{{< tabs tabTotal="2" >}}
|
||||
{{< tab tabName="POST 请求示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'http://xxxx.com' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"appId": "65782f7ffae5f7854ed4498b",
|
||||
"chatId": "xxxx",
|
||||
"responseChatItemId": "xxxx",
|
||||
"variables": {
|
||||
"cTime": "2023-12-18 13:45:46"
|
||||
},
|
||||
"data": {
|
||||
"user": {
|
||||
"name": "",
|
||||
"age": ""
|
||||
},
|
||||
"type": ""
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
{{< tab tabName="POST响应" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```json
|
||||
{
|
||||
"message": "message",
|
||||
"data": {
|
||||
"name": "name",
|
||||
"age": 10
|
||||
}
|
||||
}
|
||||
```
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
## GET 示例
|
||||
|
||||
GET 中,不推荐使用嵌套参数,否则会出现奇怪的问题。此外,GET 请求中,FastGPT 会将参数扁平化,不会将自定义参单独抽到 data 中,同时全局变量也会扁平化,因此需要注意字段 key 是否冲突。
|
||||
|
||||
**自定义入参**
|
||||
|
||||
- name (string)
|
||||
- age (number)
|
||||
- type (string)
|
||||
|
||||
**自定义出参**
|
||||
|
||||
- message (string)
|
||||
- name (string)
|
||||
- age (number)
|
||||
|
||||
那么,这个模块发出的请求则是:
|
||||
|
||||
{{< tabs tabTotal="2" >}}
|
||||
{{< tab tabName="GET 请求示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```bash
|
||||
curl --location --request GET 'http://xxx.com/test?name&age&type&appId=65782f7ffae5f7854ed4498b&chatId=xxxx&responseChatItemId=xxxx&cTime=2023-12-18 13:45:46'
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
{{< tab tabName="GET 响应" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```json
|
||||
{
|
||||
"message": "message",
|
||||
"data": {
|
||||
"name": "name",
|
||||
"age": 10
|
||||
}
|
||||
}
|
||||
```
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
|
||||
## laf 对接 HTTP 示例
|
||||
|
||||
{{% alert context="warning" %}}
|
||||
如果你不想额外部署服务,可以使用 [Laf](https://laf.dev/) 来快速开发上线接口,即写即发,无需部署。
|
||||
|
||||
下面是在 Laf 上编写的一个请求示例:
|
||||
{{% /alert %}}
|
||||
|
||||
下面是在 Laf 编写的 POST 请求示例:
|
||||
|
||||
```ts
|
||||
import cloud from '@lafjs/cloud';
|
||||
const db = cloud.database();
|
||||
import cloud from '@lafjs/cloud'
|
||||
const db = cloud.database()
|
||||
|
||||
type RequestType = {
|
||||
appId: string;
|
||||
data: {
|
||||
appointment: string;
|
||||
action: 'post' | 'delete' | 'put' | 'get'
|
||||
}
|
||||
}
|
||||
|
||||
export default async function (ctx: FunctionContext) {
|
||||
const { appointment } = ctx.body;
|
||||
const { name, time, labname } = JSON.parse(appointment);
|
||||
try {
|
||||
// 从 body 中获取参数
|
||||
const { appId, data: { appointment, action } } = ctx.body as RequestType
|
||||
|
||||
const parseBody = JSON.parse(appointment)
|
||||
if (action === 'get') {
|
||||
return await getRecord(parseBody)
|
||||
}
|
||||
if (action === 'post') {
|
||||
return await createRecord(parseBody)
|
||||
}
|
||||
if (action === 'put') {
|
||||
return await putRecord(parseBody)
|
||||
}
|
||||
if (action === 'delete') {
|
||||
return await removeRecord(parseBody)
|
||||
}
|
||||
|
||||
const missData = [];
|
||||
if (!name) missData.push('你的姓名');
|
||||
if (!time) missData.push('需要预约的时间');
|
||||
if (!labname) missData.push('实验室名称');
|
||||
|
||||
if (missData.length > 0) {
|
||||
return {
|
||||
response: `请提供: ${missData.join('、')}`
|
||||
};
|
||||
}
|
||||
|
||||
const { data: record } = await db
|
||||
.collection('LabAppointment')
|
||||
.where({
|
||||
name,
|
||||
status: 'unStart'
|
||||
})
|
||||
.getOne();
|
||||
|
||||
if (record) {
|
||||
response: "异常"
|
||||
}
|
||||
} catch (err) {
|
||||
return {
|
||||
response: `您已经有一个预约记录了,每人仅能同时预约一个实验室:
|
||||
姓名:${record.name}
|
||||
时间: ${record.time}
|
||||
实验室: ${record.labname}
|
||||
`
|
||||
};
|
||||
response: "异常"
|
||||
}
|
||||
}
|
||||
|
||||
await db.collection('LabAppointment').add({
|
||||
name,
|
||||
time,
|
||||
labname,
|
||||
status: 'unStart'
|
||||
});
|
||||
|
||||
return {
|
||||
response: `预约成功。
|
||||
姓名:${name}
|
||||
时间: ${time}
|
||||
实验室: ${labname}
|
||||
`
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
## 作用
|
||||
|
||||
基于 HTTP 模块可以无限扩展,比如操作数据库、执行联网搜索、发送邮箱等等。如果你有有趣的案例,欢迎提交 PR 到 [编排案例](/docs/workflow/examples)
|
||||
通过 HTTP 模块你可以无限扩展,比如:
|
||||
- 操作数据库
|
||||
- 调用外部数据源
|
||||
- 执行联网搜索
|
||||
- 发送邮箱
|
||||
- ....
|
||||
|
||||
|
||||
## 相关示例
|
||||
|
||||
- [谷歌搜索](/docs/workflow/examples/google_search/)
|
||||
- [实验室预约(操作数据库)](/docs/workflow/examples/lab_appointment/)
|
||||
32
docSite/content/docs/workflow/modules/text_editor.md
Normal file
@@ -0,0 +1,32 @@
|
||||
---
|
||||
title: "文本加工"
|
||||
description: "FastGPT 文本加工模块介绍"
|
||||
icon: "input"
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 363
|
||||
---
|
||||
|
||||
## 特点
|
||||
|
||||
- 可重复添加
|
||||
- 有外部输入
|
||||
- 触发执行
|
||||
- 手动配置
|
||||
|
||||

|
||||
|
||||
|
||||
## 功能
|
||||
对输入文本进行固定加工处理,入参仅支持字符串和数字格式,入参以变量形式使用在文本编辑区域。
|
||||
|
||||
根据上方示例图的处理方式,对任何输入都会在前面拼接“我的问题是:”。
|
||||
|
||||
|
||||
## 作用
|
||||
|
||||
给任意模块输入自定格式文本,或处理 AI 模块系统提示词。
|
||||
|
||||
## 示例
|
||||
|
||||
- [接入谷歌搜索](/docs/workflow/examples/google_search/)
|
||||
29
docSite/content/docs/workflow/modules/tfswitch.md
Normal file
@@ -0,0 +1,29 @@
|
||||
---
|
||||
title: "判断器"
|
||||
description: "FastGPT 判断器模块介绍"
|
||||
icon: "input"
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 362
|
||||
---
|
||||
|
||||
## 特点
|
||||
|
||||
- 可重复添加
|
||||
- 有外部输入
|
||||
- 触发执行
|
||||
|
||||

|
||||
|
||||
## 功能
|
||||
|
||||
对任意输入内容进行 True False 输出,默认情况下,当传入的内容为 false, undefined, null,0,none 时,会输出 false。
|
||||
|
||||
也可以增加自定义规则来补充输出 false 的内容,每行代表一个匹配规则,支持正则表达式。
|
||||
|
||||
根据上方示例图的匹配规则,当我们输入`123` `hi` `你好` 和任意手机号码时(正则匹配)同样也会输出 False 。
|
||||
|
||||
## 作用
|
||||
|
||||
适用场景有:让大模型做判断后输出固定内容,根据大模型回复内容判断是否触发后续模块。
|
||||
|
||||
@@ -4,5 +4,5 @@ go 1.21
|
||||
|
||||
require (
|
||||
github.com/colinwilson/lotusdocs v0.1.0 // indirect
|
||||
github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20003 // indirect
|
||||
github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200 // indirect
|
||||
)
|
||||
|
||||
@@ -10,5 +10,8 @@ github.com/colinwilson/lotusdocs v0.1.0 h1:oTC8pAYQp9XDNaUwE4SEY+id3ByNELxIIFrkt
|
||||
github.com/colinwilson/lotusdocs v0.1.0/go.mod h1:9zu2REJDi+zdPRcR5/bRYSUR7gkNF4NQLvV38SEoCP8=
|
||||
github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20003 h1:pt/JGVD5YYRsVVijOHPZI6YKTUvbR4e0hgV9B0S6rbI=
|
||||
github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20003/go.mod h1:mvM05r93HiefwoaxQTaYiJxtJAhTebwQtU1Xh/J+Okk=
|
||||
github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200 h1:SmpwwN3DNzJWbV+IT8gaFu07ENUFpCvKou5BHYUKuVs=
|
||||
github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200/go.mod h1:kx8MBj9T7SFR8ZClWvKZPmmUxBaltkoXvnWlZZcSnYA=
|
||||
github.com/gohugoio/hugo-mod-jslibs-dist/popperjs/v2 v2.21100.20000/go.mod h1:mFberT6ZtcchrsDtfvJM7aAH2bDKLdOnruUHl0hlapI=
|
||||
github.com/twbs/bootstrap v5.3.0+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0=
|
||||
github.com/twbs/bootstrap v5.3.2+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0=
|
||||
|
||||
@@ -68,7 +68,7 @@ defaultContentLanguage = 'zh-cn'
|
||||
# twitter = "" # YOUR_TWITTER_ID
|
||||
# instagram = "colinwilson" # YOUR_INSTAGRAM_ID
|
||||
# rss = true # show rss icon with link
|
||||
wechat = "/wechat-fastgpt.webp"
|
||||
wechat = "https://oss.laf.run/htr4n1-images/fastgpt-qr-code.jpg"
|
||||
|
||||
[params.docs] # Parameters for the /docs 'template'
|
||||
title = "" # default html title for documentation pages/sections
|
||||
@@ -109,6 +109,11 @@ defaultContentLanguage = 'zh-cn'
|
||||
|
||||
listDescTrunc = 100 # Number of characters by which to truncate the list card description
|
||||
|
||||
# Link behaviour
|
||||
intLinkTooltip = true # Enable a tooltip for internal links that displays info about the destination? default false
|
||||
# extLinkNewTab = false # Open external links in a new Tab? default true
|
||||
# logoLinkURL = "" # Set a custom URL destination for the top header logo link.
|
||||
|
||||
[params.flexsearch] # Parameters for FlexSearch
|
||||
# enabled = true
|
||||
# tokenize = "full"
|
||||
|
||||
@@ -11,9 +11,9 @@
|
||||
{{ end -}}
|
||||
|
||||
{{ if .Page.Store.Get "hasMermaid" }}
|
||||
{{ $mermaid := resources.Get (printf "%s/%s" ($.Scratch.Get "pathName") "js/mermaid.js") }}
|
||||
{{ $mermaid := resources.Get (printf "%s/%s" ($.Scratch.Get "pathName") "js/mermaid.min.js") }}
|
||||
{{ if hugo.IsProduction }}
|
||||
{{ $mermaid = $mermaid | minify | fingerprint "sha384" }}
|
||||
{{ $mermaid = $mermaid | fingerprint "sha384" }}
|
||||
{{ end }}
|
||||
<script src="{{ $mermaid.RelPermalink }}" {{ if hugo.IsProduction }}integrity="{{ $mermaid.Data.Integrity }}"{{ end }}></script>
|
||||
<script>
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
<nav id="sidebar" class="sidebar-wrapper">
|
||||
<div class="sidebar-brand">
|
||||
<!-- change -->
|
||||
<a href='{{ relLangURL "" }}' aria-label="HomePage" alt="HomePage" style="text-transform: unset;">
|
||||
<a href='{{ with .Site.Params.docs.logoLinkURL }}{{ . }}{{ else }}{{ relLangURL "" }}{{ end }}' aria-label="HomePage" alt="HomePage" style="text-transform: unset;">
|
||||
{{ with resources.Get "images/logos/logo.svg" }}
|
||||
{{ .Content | safeHTML }}
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
<div id="top-header" class="top-header d-print-none">
|
||||
<div class="header-bar d-flex justify-content-between">
|
||||
<div class="d-flex align-items-center">
|
||||
<a href='{{ relLangURL "" }}' class="logo-icon me-3" aria-label="HomePage" alt="HomePage">
|
||||
<a href='{{ with .Site.Params.docs.logoLinkURL }}{{ . }}{{ else }}{{ relLangURL "" }}{{ end }}' class="logo-icon me-3" aria-label="HomePage" alt="HomePage">
|
||||
<div class="small">
|
||||
{{ with resources.Get "images/logos/mark.svg" }}
|
||||
{{ .Content | safeHTML }}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
# 非 host 版本, 不使用本机代理
|
||||
# (不懂 Docker 的,只需要关心 OPENAI_BASE_URL 和 CHAT_API_KEY 即可!)
|
||||
version: '3.3'
|
||||
services:
|
||||
pg:
|
||||
@@ -47,7 +48,7 @@ services:
|
||||
environment:
|
||||
# root 密码,用户名为: root
|
||||
- DEFAULT_ROOT_PSW=1234
|
||||
# 中转地址,如果是用官方号,不需要管
|
||||
# 中转地址,如果是用官方号,不需要管。务必加 /v1
|
||||
- OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
- CHAT_API_KEY=sk-xxxx
|
||||
- DB_MAX_LINK=5 # database max link
|
||||
|
||||
13
package.json
@@ -5,10 +5,12 @@
|
||||
"scripts": {
|
||||
"prepare": "husky install",
|
||||
"format-code": "prettier --config \"./.prettierrc.js\" --write \"./**/src/**/*.{ts,tsx,scss}\"",
|
||||
"format-doc": "zhlint --dir ./docSite *.md --fix"
|
||||
"format-doc": "zhlint --dir ./docSite *.md --fix",
|
||||
"gen:theme-typings": "chakra-cli tokens projects/app/src/web/styles/theme.ts --out node_modules/.pnpm/node_modules/@chakra-ui/styled-system/dist/theming.types.d.ts",
|
||||
"postinstall": "sh ./scripts/postinstall.sh"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/multer": "^1.4.10",
|
||||
"@chakra-ui/cli": "^2.4.1",
|
||||
"husky": "^8.0.3",
|
||||
"i18next": "^22.5.1",
|
||||
"lint-staged": "^13.2.1",
|
||||
@@ -22,10 +24,7 @@
|
||||
"./**/**/*.md": "npm run format-doc"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"multer": "1.4.5-lts.1",
|
||||
"openai": "4.16.1"
|
||||
"node": ">=18.0.0",
|
||||
"pnpm": ">=8.6.0"
|
||||
}
|
||||
}
|
||||
|
||||
24
packages/global/common/error/code/common.ts
Normal file
@@ -0,0 +1,24 @@
|
||||
import { ErrType } from '../errorCode';
|
||||
|
||||
/* dataset: 507000 */
|
||||
const startCode = 507000;
|
||||
export enum CommonErrEnum {
|
||||
fileNotFound = 'fileNotFound'
|
||||
}
|
||||
const datasetErr = [
|
||||
{
|
||||
statusText: CommonErrEnum.fileNotFound,
|
||||
message: 'error.fileNotFound'
|
||||
}
|
||||
];
|
||||
export default datasetErr.reduce((acc, cur, index) => {
|
||||
return {
|
||||
...acc,
|
||||
[cur.statusText]: {
|
||||
code: startCode + index,
|
||||
statusText: cur.statusText,
|
||||
message: cur.message,
|
||||
data: null
|
||||
}
|
||||
};
|
||||
}, {} as ErrType<`${CommonErrEnum}`>);
|
||||
@@ -13,23 +13,23 @@ export enum DatasetErrEnum {
|
||||
const datasetErr = [
|
||||
{
|
||||
statusText: DatasetErrEnum.unAuthDataset,
|
||||
message: '无权操作该知识库'
|
||||
message: 'core.dataset.error.unAuthDataset'
|
||||
},
|
||||
{
|
||||
statusText: DatasetErrEnum.unAuthDatasetCollection,
|
||||
message: '无权操作该数据集'
|
||||
message: 'core.dataset.error.unAuthDatasetCollection'
|
||||
},
|
||||
{
|
||||
statusText: DatasetErrEnum.unAuthDatasetData,
|
||||
message: '无权操作该数据'
|
||||
message: 'core.dataset.error.unAuthDatasetData'
|
||||
},
|
||||
{
|
||||
statusText: DatasetErrEnum.unAuthDatasetFile,
|
||||
message: '无权操作该文件'
|
||||
message: 'core.dataset.error.unAuthDatasetFile'
|
||||
},
|
||||
{
|
||||
statusText: DatasetErrEnum.unCreateCollection,
|
||||
message: '无权创建数据集'
|
||||
message: 'core.dataset.error.unCreateCollection'
|
||||
},
|
||||
{
|
||||
statusText: DatasetErrEnum.unLinkCollection,
|
||||
|
||||
@@ -4,7 +4,9 @@ import { ErrType } from '../errorCode';
|
||||
export enum OutLinkErrEnum {
|
||||
unExist = 'unExist',
|
||||
unAuthLink = 'unAuthLink',
|
||||
linkUnInvalid = 'linkUnInvalid'
|
||||
linkUnInvalid = 'linkUnInvalid',
|
||||
|
||||
unAuthUser = 'unAuthUser'
|
||||
}
|
||||
const errList = [
|
||||
{
|
||||
@@ -19,6 +21,10 @@ const errList = [
|
||||
code: 501,
|
||||
statusText: OutLinkErrEnum.linkUnInvalid,
|
||||
message: '分享链接无效'
|
||||
},
|
||||
{
|
||||
statusText: OutLinkErrEnum.unAuthUser,
|
||||
message: '身份校验失败'
|
||||
}
|
||||
];
|
||||
export default errList.reduce((acc, cur, index) => {
|
||||
|
||||
@@ -6,6 +6,7 @@ import pluginErr from './code/plugin';
|
||||
import outLinkErr from './code/outLink';
|
||||
import teamErr from './code/team';
|
||||
import userErr from './code/user';
|
||||
import commonErr from './code/common';
|
||||
|
||||
export const ERROR_CODE: { [key: number]: string } = {
|
||||
400: '请求失败',
|
||||
@@ -96,5 +97,6 @@ export const ERROR_RESPONSE: Record<
|
||||
...outLinkErr,
|
||||
...teamErr,
|
||||
...userErr,
|
||||
...pluginErr
|
||||
...pluginErr,
|
||||
...commonErr
|
||||
};
|
||||
|
||||
7
packages/global/common/file/api.d.ts
vendored
@@ -1,3 +1,10 @@
|
||||
export type UploadImgProps = {
|
||||
base64Img: string;
|
||||
expiredTime?: Date;
|
||||
metadata?: Record<string, any>;
|
||||
shareId?: string;
|
||||
};
|
||||
|
||||
export type UrlFetchParams = {
|
||||
urlList: string[];
|
||||
selector?: string;
|
||||
|
||||
62
packages/global/common/file/read/index.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
/* read file to txt */
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
|
||||
export const readPdfFile = async ({ pdf }: { pdf: string | URL | ArrayBuffer }) => {
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = '/js/pdf.worker.js';
|
||||
|
||||
type TokenType = {
|
||||
str: string;
|
||||
dir: string;
|
||||
width: number;
|
||||
height: number;
|
||||
transform: number[];
|
||||
fontName: string;
|
||||
hasEOL: boolean;
|
||||
};
|
||||
|
||||
const readPDFPage = async (doc: any, pageNo: number) => {
|
||||
const page = await doc.getPage(pageNo);
|
||||
const tokenizedText = await page.getTextContent();
|
||||
|
||||
const viewport = page.getViewport({ scale: 1 });
|
||||
const pageHeight = viewport.height;
|
||||
const headerThreshold = pageHeight * 0.07; // 假设页头在页面顶部5%的区域内
|
||||
const footerThreshold = pageHeight * 0.93; // 假设页脚在页面底部5%的区域内
|
||||
|
||||
const pageTexts: TokenType[] = tokenizedText.items.filter((token: TokenType) => {
|
||||
return (
|
||||
!token.transform ||
|
||||
(token.transform[5] > headerThreshold && token.transform[5] < footerThreshold)
|
||||
);
|
||||
});
|
||||
|
||||
// concat empty string 'hasEOL'
|
||||
for (let i = 0; i < pageTexts.length; i++) {
|
||||
const item = pageTexts[i];
|
||||
if (item.str === '' && pageTexts[i - 1]) {
|
||||
pageTexts[i - 1].hasEOL = item.hasEOL;
|
||||
pageTexts.splice(i, 1);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
page.cleanup();
|
||||
|
||||
return pageTexts
|
||||
.map((token) => {
|
||||
const paragraphEnd = token.hasEOL && /([。?!.?!\n\r]|(\r\n))$/.test(token.str);
|
||||
|
||||
return paragraphEnd ? `${token.str}\n` : token.str;
|
||||
})
|
||||
.join('');
|
||||
};
|
||||
|
||||
const doc = await pdfjsLib.getDocument(pdf).promise;
|
||||
const pageTextPromises = [];
|
||||
for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
|
||||
pageTextPromises.push(readPDFPage(doc, pageNo));
|
||||
}
|
||||
const pageTexts = await Promise.all(pageTextPromises);
|
||||
|
||||
return pageTexts.join('');
|
||||
};
|
||||
@@ -1,8 +1,3 @@
|
||||
import axios from 'axios';
|
||||
import { UrlFetchParams, UrlFetchResponse } from './api.d';
|
||||
import { htmlToMarkdown } from '../string/markdown';
|
||||
import * as cheerio from 'cheerio';
|
||||
|
||||
export const formatFileSize = (bytes: number): string => {
|
||||
if (bytes === 0) return '0 B';
|
||||
|
||||
@@ -12,84 +7,3 @@ export const formatFileSize = (bytes: number): string => {
|
||||
|
||||
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
||||
};
|
||||
|
||||
export const cheerioToHtml = ({
|
||||
fetchUrl,
|
||||
$,
|
||||
selector
|
||||
}: {
|
||||
fetchUrl: string;
|
||||
$: cheerio.CheerioAPI;
|
||||
selector?: string;
|
||||
}) => {
|
||||
// get origin url
|
||||
const originUrl = new URL(fetchUrl).origin;
|
||||
|
||||
// remove i element
|
||||
$('i,script').remove();
|
||||
|
||||
// remove empty a element
|
||||
$('a')
|
||||
.filter((i, el) => {
|
||||
return $(el).text().trim() === '' && $(el).children().length === 0;
|
||||
})
|
||||
.remove();
|
||||
|
||||
// if link,img startWith /, add origin url
|
||||
$('a').each((i, el) => {
|
||||
const href = $(el).attr('href');
|
||||
if (href && href.startsWith('/')) {
|
||||
$(el).attr('href', originUrl + href);
|
||||
}
|
||||
});
|
||||
$('img').each((i, el) => {
|
||||
const src = $(el).attr('src');
|
||||
if (src && src.startsWith('/')) {
|
||||
$(el).attr('src', originUrl + src);
|
||||
}
|
||||
});
|
||||
|
||||
return $(selector || 'body').html();
|
||||
};
|
||||
export const urlsFetch = async ({
|
||||
urlList,
|
||||
selector
|
||||
}: UrlFetchParams): Promise<UrlFetchResponse> => {
|
||||
urlList = urlList.filter((url) => /^(http|https):\/\/[^ "]+$/.test(url));
|
||||
|
||||
const response = (
|
||||
await Promise.all(
|
||||
urlList.map(async (url) => {
|
||||
try {
|
||||
const fetchRes = await axios.get(url, {
|
||||
timeout: 30000
|
||||
});
|
||||
|
||||
const $ = cheerio.load(fetchRes.data);
|
||||
|
||||
const md = htmlToMarkdown(
|
||||
cheerioToHtml({
|
||||
fetchUrl: url,
|
||||
$,
|
||||
selector
|
||||
})
|
||||
);
|
||||
|
||||
return {
|
||||
url,
|
||||
content: md
|
||||
};
|
||||
} catch (error) {
|
||||
console.log(error, 'fetch error');
|
||||
|
||||
return {
|
||||
url,
|
||||
content: ''
|
||||
};
|
||||
}
|
||||
})
|
||||
)
|
||||
).filter((item) => item.content);
|
||||
|
||||
return response;
|
||||
};
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import { simpleText } from './tools';
|
||||
import { NodeHtmlMarkdown } from 'node-html-markdown';
|
||||
|
||||
/* Delete redundant text in markdown */
|
||||
export const simpleMarkdownText = (rawText: string) => {
|
||||
@@ -26,72 +25,50 @@ export const simpleMarkdownText = (rawText: string) => {
|
||||
rawText = rawText.replace(/\\\\n/g, '\\n');
|
||||
|
||||
// Remove headings and code blocks front spaces
|
||||
['####', '###', '##', '#', '```', '~~~'].forEach((item) => {
|
||||
['####', '###', '##', '#', '```', '~~~'].forEach((item, i) => {
|
||||
const reg = new RegExp(`\\n\\s*${item}`, 'g');
|
||||
if (reg.test(rawText)) {
|
||||
rawText = rawText.replace(new RegExp(`\\n\\s*(${item})`, 'g'), '\n$1');
|
||||
rawText = rawText.replace(new RegExp(`(\\n)( *)(${item})`, 'g'), '$1$3');
|
||||
}
|
||||
});
|
||||
|
||||
return rawText.trim();
|
||||
};
|
||||
|
||||
/* html string to markdown */
|
||||
export const htmlToMarkdown = (html?: string | null) => {
|
||||
if (!html) return '';
|
||||
/**
|
||||
* format markdown
|
||||
* 1. upload base64
|
||||
* 2. replace \
|
||||
*/
|
||||
export const uploadMarkdownBase64 = async ({
|
||||
rawText,
|
||||
uploadImgController
|
||||
}: {
|
||||
rawText: string;
|
||||
uploadImgController: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
// match base64, upload and replace it
|
||||
const base64Regex = /data:image\/.*;base64,([^\)]+)/g;
|
||||
const base64Arr = rawText.match(base64Regex) || [];
|
||||
// upload base64 and replace it
|
||||
await Promise.all(
|
||||
base64Arr.map(async (base64Img) => {
|
||||
try {
|
||||
const str = await uploadImgController(base64Img);
|
||||
|
||||
const surround = (source: string, surroundStr: string) => `${surroundStr}${source}${surroundStr}`;
|
||||
|
||||
const nhm = new NodeHtmlMarkdown(
|
||||
{
|
||||
codeFence: '```',
|
||||
codeBlockStyle: 'fenced',
|
||||
ignore: ['i', 'script']
|
||||
},
|
||||
{
|
||||
code: ({ node, parent, options: { codeFence, codeBlockStyle }, visitor }) => {
|
||||
const isCodeBlock = ['PRE', 'WRAPPED-PRE'].includes(parent?.tagName!);
|
||||
|
||||
if (!isCodeBlock) {
|
||||
return {
|
||||
spaceIfRepeatingChar: true,
|
||||
noEscape: true,
|
||||
postprocess: ({ content }) => {
|
||||
// Find longest occurring sequence of running backticks and add one more (so content is escaped)
|
||||
const delimiter =
|
||||
'`' + (content.match(/`+/g)?.sort((a, b) => b.length - a.length)?.[0] || '');
|
||||
const padding = delimiter.length > 1 ? ' ' : '';
|
||||
|
||||
return surround(surround(content, padding), delimiter);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/* Handle code block */
|
||||
if (codeBlockStyle === 'fenced') {
|
||||
const language =
|
||||
node.getAttribute('class')?.match(/language-(\S+)/)?.[1] ||
|
||||
parent?.getAttribute('class')?.match(/language-(\S+)/)?.[1] ||
|
||||
'';
|
||||
|
||||
return {
|
||||
noEscape: true,
|
||||
prefix: `${codeFence}${language}\n`,
|
||||
postfix: `\n${codeFence}\n`,
|
||||
childTranslators: visitor.instance.codeBlockTranslators
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
noEscape: true,
|
||||
postprocess: ({ content }) => content.replace(/^/gm, ' '),
|
||||
childTranslators: visitor.instance.codeBlockTranslators
|
||||
};
|
||||
rawText = rawText.replace(base64Img, str);
|
||||
} catch (error) {
|
||||
rawText = rawText.replace(base64Img, '');
|
||||
rawText = rawText.replace(/!\[.*\]\(\)/g, '');
|
||||
}
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
const markdown = nhm.translate(html).trim();
|
||||
// Remove white space on both sides of the picture
|
||||
const trimReg = /(!\[.*\]\(.*\))\s*/g;
|
||||
if (trimReg.test(rawText)) {
|
||||
rawText = rawText.replace(trimReg, '$1');
|
||||
}
|
||||
|
||||
return simpleMarkdownText(markdown);
|
||||
return simpleMarkdownText(rawText);
|
||||
};
|
||||
|
||||
@@ -12,12 +12,14 @@ export const splitText2Chunks = (props: {
|
||||
text: string;
|
||||
chunkLen: number;
|
||||
overlapRatio?: number;
|
||||
customReg?: string[];
|
||||
countTokens?: boolean;
|
||||
}): {
|
||||
chunks: string[];
|
||||
tokens: number;
|
||||
overlapRatio?: number;
|
||||
} => {
|
||||
let { text = '', chunkLen, overlapRatio = 0.2 } = props;
|
||||
let { text = '', chunkLen, overlapRatio = 0.2, customReg = [], countTokens = true } = props;
|
||||
const splitMarker = 'SPLIT_HERE_SPLIT_HERE';
|
||||
const codeBlockMarker = 'CODE_BLOCK_LINE_MARKER';
|
||||
const overlapLen = Math.round(chunkLen * overlapRatio);
|
||||
@@ -29,22 +31,29 @@ export const splitText2Chunks = (props: {
|
||||
|
||||
// The larger maxLen is, the next sentence is less likely to trigger splitting
|
||||
const stepReges: { reg: RegExp; maxLen: number }[] = [
|
||||
{ reg: /^(#\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 },
|
||||
{ reg: /^(##\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 },
|
||||
{ reg: /^(###\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 },
|
||||
{ reg: /^(####\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 },
|
||||
...customReg.map((text) => ({ reg: new RegExp(`(${text})`, 'g'), maxLen: chunkLen * 1.4 })),
|
||||
{ reg: /^(#\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
|
||||
{ reg: /^(##\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
|
||||
{ reg: /^(###\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
|
||||
{ reg: /^(####\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
|
||||
|
||||
{ reg: /([\n](`))/g, maxLen: chunkLen * 4 }, // code block
|
||||
{ reg: /([\n](?![\*\-|>0-9]))/g, maxLen: chunkLen * 1.8 }, // (?![\*\-|>`0-9]): markdown special char
|
||||
{ reg: /([\n])/g, maxLen: chunkLen * 1.4 },
|
||||
{ reg: /([\n]([`~]))/g, maxLen: chunkLen * 4 }, // code block
|
||||
{ reg: /([\n](?!\s*[\*\-|>0-9]))/g, maxLen: chunkLen * 2 }, // (?![\*\-|>`0-9]): markdown special char
|
||||
{ reg: /([\n])/g, maxLen: chunkLen * 1.2 },
|
||||
|
||||
{ reg: /([。]|([a-zA-Z])\.\s)/g, maxLen: chunkLen * 1.4 },
|
||||
{ reg: /([!]|!\s)/g, maxLen: chunkLen * 1.4 },
|
||||
{ reg: /([?]|\?\s)/g, maxLen: chunkLen * 1.6 },
|
||||
{ reg: /([;]|;\s)/g, maxLen: chunkLen * 1.8 },
|
||||
{ reg: /([。]|([a-zA-Z])\.\s)/g, maxLen: chunkLen * 1.2 },
|
||||
{ reg: /([!]|!\s)/g, maxLen: chunkLen * 1.2 },
|
||||
{ reg: /([?]|\?\s)/g, maxLen: chunkLen * 1.4 },
|
||||
{ reg: /([;]|;\s)/g, maxLen: chunkLen * 1.6 },
|
||||
{ reg: /([,]|,\s)/g, maxLen: chunkLen * 2 }
|
||||
];
|
||||
|
||||
const customRegLen = customReg.length;
|
||||
const checkIsCustomStep = (step: number) => step < customRegLen;
|
||||
const checkIsMarkdownSplit = (step: number) => step >= customRegLen && step <= 3 + customRegLen;
|
||||
const checkIndependentChunk = (step: number) => step >= customRegLen && step <= 4 + customRegLen;
|
||||
const checkForbidOverlap = (step: number) => step <= 6 + customRegLen;
|
||||
|
||||
// if use markdown title split, Separate record title title
|
||||
const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
|
||||
if (step >= stepReges.length) {
|
||||
@@ -55,11 +64,22 @@ export const splitText2Chunks = (props: {
|
||||
}
|
||||
];
|
||||
}
|
||||
const isMarkdownSplit = step <= 3;
|
||||
|
||||
const isCustomSteep = checkIsCustomStep(step);
|
||||
const isMarkdownSplit = checkIsMarkdownSplit(step);
|
||||
const independentChunk = checkIndependentChunk(step);
|
||||
|
||||
const { reg } = stepReges[step];
|
||||
|
||||
const splitTexts = text
|
||||
.replace(reg, isMarkdownSplit ? `${splitMarker}$1` : `$1${splitMarker}`)
|
||||
.replace(
|
||||
reg,
|
||||
(() => {
|
||||
if (isCustomSteep) return splitMarker;
|
||||
if (independentChunk) return `${splitMarker}$1`;
|
||||
return `$1${splitMarker}`;
|
||||
})()
|
||||
)
|
||||
.split(`${splitMarker}`)
|
||||
.filter((part) => part.trim());
|
||||
|
||||
@@ -76,7 +96,7 @@ export const splitText2Chunks = (props: {
|
||||
};
|
||||
|
||||
const getOneTextOverlapText = ({ text, step }: { text: string; step: number }): string => {
|
||||
const forbidOverlap = step <= 6;
|
||||
const forbidOverlap = checkForbidOverlap(step);
|
||||
const maxOverlapLen = chunkLen * 0.4;
|
||||
|
||||
// step >= stepReges.length: Do not overlap incomplete sentences
|
||||
@@ -114,12 +134,8 @@ export const splitText2Chunks = (props: {
|
||||
lastText: string;
|
||||
mdTitle: string;
|
||||
}): string[] => {
|
||||
const isMarkdownSplit = step <= 3;
|
||||
|
||||
// mini text
|
||||
if (text.length <= chunkLen) {
|
||||
return [text];
|
||||
}
|
||||
const independentChunk = checkIndependentChunk(step);
|
||||
const isCustomStep = checkIsCustomStep(step);
|
||||
|
||||
// oversize
|
||||
if (step >= stepReges.length) {
|
||||
@@ -134,12 +150,14 @@ export const splitText2Chunks = (props: {
|
||||
return chunks;
|
||||
}
|
||||
|
||||
const { maxLen } = stepReges[step];
|
||||
const minChunkLen = chunkLen * 0.7;
|
||||
|
||||
// split text by special char
|
||||
const splitTexts = getSplitTexts({ text, step });
|
||||
|
||||
const maxLen = splitTexts.length > 1 ? stepReges[step].maxLen : chunkLen;
|
||||
const minChunkLen = chunkLen * 0.7;
|
||||
const miniChunkLen = 30;
|
||||
// console.log(splitTexts, stepReges[step].reg);
|
||||
|
||||
const chunks: string[] = [];
|
||||
for (let i = 0; i < splitTexts.length; i++) {
|
||||
const item = splitTexts[i];
|
||||
@@ -170,8 +188,8 @@ export const splitText2Chunks = (props: {
|
||||
mdTitle: currentTitle
|
||||
});
|
||||
const lastChunk = innerChunks[innerChunks.length - 1];
|
||||
// last chunk is too small, concat it to lastText
|
||||
if (!isMarkdownSplit && lastChunk.length < minChunkLen) {
|
||||
// last chunk is too small, concat it to lastText(next chunk start)
|
||||
if (!independentChunk && lastChunk.length < minChunkLen) {
|
||||
chunks.push(...innerChunks.slice(0, -1));
|
||||
lastText = lastChunk;
|
||||
} else {
|
||||
@@ -189,10 +207,14 @@ export const splitText2Chunks = (props: {
|
||||
lastText = newText;
|
||||
|
||||
// markdown paragraph block: Direct addition; If the chunk size reaches, add a chunk
|
||||
if (isMarkdownSplit || newTextLen >= chunkLen) {
|
||||
if (
|
||||
isCustomStep ||
|
||||
(independentChunk && newTextLen > miniChunkLen) ||
|
||||
newTextLen >= chunkLen
|
||||
) {
|
||||
chunks.push(`${currentTitle}${lastText}`);
|
||||
|
||||
lastText = isMarkdownSplit ? '' : getOneTextOverlapText({ text: lastText, step });
|
||||
lastText = getOneTextOverlapText({ text: lastText, step });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -203,6 +225,8 @@ export const splitText2Chunks = (props: {
|
||||
} else {
|
||||
chunks.push(`${mdTitle}${lastText}`);
|
||||
}
|
||||
} else if (lastText && chunks.length === 0) {
|
||||
chunks.push(lastText);
|
||||
}
|
||||
|
||||
return chunks;
|
||||
@@ -214,9 +238,11 @@ export const splitText2Chunks = (props: {
|
||||
step: 0,
|
||||
lastText: '',
|
||||
mdTitle: ''
|
||||
}).map((chunk) => chunk.replaceAll(codeBlockMarker, '\n')); // restore code block
|
||||
}).map((chunk) => chunk?.replaceAll(codeBlockMarker, '\n') || ''); // restore code block
|
||||
|
||||
const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0);
|
||||
const tokens = countTokens
|
||||
? chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0)
|
||||
: 0;
|
||||
|
||||
return {
|
||||
chunks,
|
||||
|
||||
13
packages/global/common/system/config/constants.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
export enum SystemConfigsTypeEnum {
|
||||
fastgpt = 'fastgpt',
|
||||
fastgptPro = 'fastgptPro'
|
||||
}
|
||||
|
||||
export const SystemConfigsTypeMap = {
|
||||
[SystemConfigsTypeEnum.fastgpt]: {
|
||||
label: 'fastgpt'
|
||||
},
|
||||
[SystemConfigsTypeEnum.fastgptPro]: {
|
||||
label: 'fastgptPro'
|
||||
}
|
||||
};
|
||||
8
packages/global/common/system/config/type.d.ts
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
import { SystemConfigsTypeEnum } from "./constants";
|
||||
|
||||
export type SystemConfigsType = {
|
||||
_id: string;
|
||||
type: `${SystemConfigsTypeEnum}`;
|
||||
value: Record<string, any>;
|
||||
createTime: Date;
|
||||
};
|
||||
2
packages/global/common/system/constants.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export const HUMAN_ICON = `/icon/human.svg`;
|
||||
export const LOGO_ICON = `/icon/logo.svg`;
|
||||