Compare commits

..

15 Commits

Author SHA1 Message Date
archer
6d4776b3aa action 2025-03-06 22:56:00 +08:00
archer
2d351c3654 perf: http body check 2025-03-06 18:22:42 +08:00
Finley Ge
662a4a4671 fix: remove defaultTeam (#3989) 2025-03-06 00:26:32 +08:00
lgphone
3fadabd28b fix: 优化读取mongo文件性能,解决因执行Buffer.concat过多造成等待导致整个服务hang住的 (#3985)
如果 Buffer.concat() 被频繁调用,尤其是在处理大量数据时,可能会导致内存分配压力增大,从而影响性能。
测试大于100M的PDF文件上传解析会导致/api/core/dataset/collection/create/fileId接口长时间无响应,其他接口也处于hang住的状态,导致整个服务不可用。
使用一次性拼接后问题解决
2025-03-05 15:08:40 +08:00
heheer
dbf25cef88 fix ai proxy logs pagination (#3954)
* fix ai proxy logs pagination

* delete unused code
2025-03-05 14:08:38 +08:00
heheer
b2e2fa6b76 fix loop child nodes variables (#3980) 2025-03-05 13:53:28 +08:00
heheer
576c60bd55 plugin customize author (#3963)
* plugin customize author

* fix name
2025-03-04 16:23:12 +08:00
Archer
33617ab5dc Create SECURITY.md (#3967) 2025-03-04 14:51:35 +08:00
Archer
b4dda6a41b fix: Check the url to avoid ssrf attacks (#3965)
* fix: Check the url to avoid ssrf attacks

* Delete docSite/content/zh-cn/docs/development/upgrading/490.md
2025-03-04 14:45:29 +08:00
Archer
e860c56b77 perf: delete dataset (#3949)
* fix: collection list count

* fix: collection list count

* ai proxy ui

* perf: delete dataset

* perf: add dataset text index

* update doc
2025-03-03 12:49:13 +08:00
Archer
efac5312b4 fix: rerank model cannot use ai proxy (#3945)
* fix: collection list count

* fix: collection list count

* fix: rerank model cannot use ai proxy

* mongo init
2025-03-03 11:49:35 +08:00
Finley Ge
4bc7f21182 fix: add order:true to all create transactions (#3948) 2025-03-03 11:37:51 +08:00
gggaaallleee
113e8f711f add env proxypool (#3939) 2025-03-02 17:50:03 +08:00
Archer
abc6dffb41 4.8.23 dev (#3932)
* fix: collection list count

* fix: collection list count

* update doc

* perf: init log

* yml
2025-02-28 19:18:12 +08:00
gggaaallleee
f7b2a57ca3 1 (#3924) 2025-02-28 19:00:58 +08:00
83 changed files with 8117 additions and 209 deletions

View File

@@ -6,8 +6,6 @@ on:
- 'docSite/**'
branches:
- 'main'
tags:
- 'v*.*.*'
jobs:
build-fastgpt-docs-images:

View File

@@ -7,8 +7,6 @@ on:
- 'docSite/**'
branches:
- 'main'
tags:
- 'v*.*.*'
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:

View File

@@ -4,8 +4,6 @@ on:
pull_request_target:
paths:
- 'docSite/**'
branches:
- 'main'
workflow_dispatch:
# A workflow run is made up of one or more jobs that can run sequentially or in parallel

View File

@@ -26,7 +26,7 @@ jobs:
with:
driver-opts: network=host
- name: Cache Docker layers
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
@@ -108,7 +108,7 @@ jobs:
with:
driver-opts: network=host
- name: Cache Docker layers
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
@@ -191,7 +191,7 @@ jobs:
with:
driver-opts: network=host
- name: Cache Docker layers
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}

View File

@@ -25,7 +25,7 @@ jobs:
with:
driver-opts: network=host
- name: Cache Docker layers
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}

26
SECURITY.md Normal file
View File

@@ -0,0 +1,26 @@
# 安全策略
## 漏洞报告
如果您发现了 FastGPT 的安全漏洞,请按照以下步骤进行报告:
1. **报告方式**
发送邮件至yujinlong@sealos.io
请备注版本以及您的 GitHub 账号
3. **响应时间**
- 我们会在 48 小时内确认收到您的报告
- 一般在 3 个工作日内给出初步评估结果
4. **漏洞处理流程**
- 确认漏洞:我们会验证漏洞的存在性和影响范围
- 修复开发:针对已确认的漏洞进行修复
- 版本发布:在下一个版本更新中发布安全补丁
- 公开披露:在修复完成后,我们会在更新日志中公布相关信息
5. **注意事项**
- 在漏洞未修复前,请勿公开披露漏洞详情
- 我们欢迎负责任的漏洞披露
- 对于重大贡献者,我们会在项目致谢名单中提及
感谢您为 FastGPT 的安全性做出贡献!

View File

@@ -114,15 +114,15 @@ services:
# fastgpt
sandbox:
container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.8.22 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.22 # 阿里云
image: ghcr.io/labring/fastgpt-sandbox:v4.8.23-fix # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.23-fix # 阿里云
networks:
- fastgpt
restart: always
fastgpt:
container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.8.22 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.22 # 阿里云
image: ghcr.io/labring/fastgpt:v4.8.23-fix # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.23-fix # 阿里云
ports:
- 3000:3000
networks:

View File

@@ -72,15 +72,15 @@ services:
# fastgpt
sandbox:
container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.8.22 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.22 # 阿里云
image: ghcr.io/labring/fastgpt-sandbox:v4.8.23-fix # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.23-fix # 阿里云
networks:
- fastgpt
restart: always
fastgpt:
container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.8.22 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.22 # 阿里云
image: ghcr.io/labring/fastgpt:v4.8.23-fix # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.23-fix # 阿里云
ports:
- 3000:3000
networks:

View File

@@ -53,15 +53,15 @@ services:
wait $$!
sandbox:
container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.8.22 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.22 # 阿里云
image: ghcr.io/labring/fastgpt-sandbox:v4.8.23-fix # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.8.23-fix # 阿里云
networks:
- fastgpt
restart: always
fastgpt:
container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.8.22 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.22 # 阿里云
image: ghcr.io/labring/fastgpt:v4.8.23-fix # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.8.23-fix # 阿里云
ports:
- 3000:3000
networks:

View File

@@ -1,5 +1,5 @@
---
title: 'V4.8.23(进行中)'
title: 'V4.8.23'
description: 'FastGPT V4.8.23 更新说明'
icon: 'upgrade'
draft: false
@@ -7,6 +7,28 @@ toc: true
weight: 802
---
## 更新指南
### 1. 做好数据库备份
### 2. 更新镜像:
- 更新 fastgpt 镜像 tag: v4.8.23-fix
- 更新 fastgpt-pro 商业版镜像 tag: v4.8.23-fix
- Sandbox 镜像无需更新
### 3. 运行升级脚本
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`{{host}} 替换成**FastGPT 域名**。
```bash
curl --location --request POST 'https://{{host}}/api/admin/initv4823' \
--header 'rootkey: {{rootkey}}' \
--header 'Content-Type: application/json'
```
脚本会清理一些知识库脏数据,主要是多余的全文索引。
## 🚀 新增内容
1. 增加默认“知识库文本理解模型”配置
@@ -28,4 +50,5 @@ weight: 802
2. 暂时移除 md 阅读优化,避免链接分割错误。
3. 离开团队时,未刷新成员列表。
4. PPTX 编码错误,导致解析失败。
5. 删除知识库单条数据时,全文索引未跟随删除。
5. 删除知识库单条数据时,全文索引未跟随删除。
6. 修复 Mongo Dataset text 索引在查询数据时未生效。

View File

@@ -10,7 +10,6 @@ export type AuthTeamRoleProps = {
export type CreateTeamProps = {
name: string;
avatar?: string;
defaultTeam?: boolean;
memberName?: string;
memberAvatar?: string;
notificationAccount?: string;

View File

@@ -47,7 +47,6 @@ export type TeamMemberSchema = {
role: `${TeamMemberRoleEnum}`;
status: `${TeamMemberStatusEnum}`;
avatar: string;
defaultTeam: boolean;
};
export type TeamMemberWithTeamAndUserSchema = TeamMemberSchema & {
@@ -65,7 +64,6 @@ export type TeamTmbItemType = {
balance?: number;
tmbId: string;
teamDomain: string;
defaultTeam: boolean;
role: `${TeamMemberRoleEnum}`;
status: `${TeamMemberStatusEnum}`;
notificationAccount?: string;

View File

@@ -16,7 +16,7 @@
"nodeId": "lmpb9v2lo2lk",
"name": "插件开始",
"intro": "自定义配置外部输入,使用插件时,仅暴露自定义配置的输入",
"avatar": "/imgs/workflow/input.png",
"avatar": "core/workflow/template/workflowStart",
"flowNodeType": "pluginInput",
"showStatus": false,
"position": {
@@ -26,14 +26,16 @@
"version": "481",
"inputs": [
{
"renderTypeList": ["reference"],
"renderTypeList": ["input", "reference"],
"selectedTypeIndex": 0,
"valueType": "string",
"key": "url",
"label": "url",
"description": "需要读取的网页链接",
"required": true,
"toolDescription": "需要读取的网页链接"
"toolDescription": "需要读取的网页链接",
"list": [],
"defaultValue": ""
}
],
"outputs": [
@@ -50,12 +52,12 @@
"nodeId": "i7uow4wj2wdp",
"name": "插件输出",
"intro": "自定义配置外部输出,使用插件时,仅暴露自定义配置的输出",
"avatar": "/imgs/workflow/output.png",
"avatar": "core/workflow/template/pluginOutput",
"flowNodeType": "pluginOutput",
"showStatus": false,
"position": {
"x": 1607.7142331269129,
"y": -150.8808596935447
"x": 1853.935047606551,
"y": -154.13661665265613
},
"version": "481",
"inputs": [
@@ -81,12 +83,12 @@
"nodeId": "ebLCxU43hHuZ",
"name": "HTTP 请求",
"intro": "可以发出一个 HTTP 请求,实现更为复杂的操作(联网搜索、数据库查询等)",
"avatar": "/imgs/workflow/http.png",
"avatar": "core/workflow/template/httpRequest",
"flowNodeType": "httpRequest468",
"showStatus": true,
"position": {
"x": 1050.9890727421412,
"y": -415.2085119990912
"x": 1054.2940501177068,
"y": -503.13661665265613
},
"version": "481",
"inputs": [
@@ -96,7 +98,7 @@
"valueType": "dynamic",
"label": "",
"required": false,
"description": "core.module.input.description.HTTP Dynamic Input",
"description": "common:core.module.input.description.HTTP Dynamic Input",
"customInputConfig": {
"selectValueTypeList": [
"string",
@@ -107,16 +109,19 @@
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
"selectDataset",
"selectApp"
],
"showDescription": false,
"showDefaultValue": true
}
},
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpMethod",
@@ -124,17 +129,33 @@
"valueType": "string",
"label": "",
"value": "POST",
"required": true
"required": true,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpTimeout",
"renderTypeList": ["custom"],
"valueType": "number",
"label": "",
"value": 30,
"min": 5,
"max": 600,
"required": true,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpReqUrl",
"renderTypeList": ["hidden"],
"valueType": "string",
"label": "",
"description": "core.module.input.description.Http Request Url",
"description": "common:core.module.input.description.Http Request Url",
"placeholder": "https://api.ai.com/getInventory",
"required": false,
"value": "fetchUrl"
"value": "fetchUrl",
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpHeader",
@@ -142,9 +163,11 @@
"valueType": "any",
"value": [],
"label": "",
"description": "core.module.input.description.Http Request Header",
"placeholder": "core.module.input.description.Http Request Header",
"required": false
"description": "common:core.module.input.description.Http Request Header",
"placeholder": "common:core.module.input.description.Http Request Header",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpParams",
@@ -152,7 +175,9 @@
"valueType": "any",
"value": [],
"label": "",
"required": false
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpJsonBody",
@@ -160,7 +185,29 @@
"valueType": "any",
"value": "{\n \"url\": \"{{url}}\"\n}",
"label": "",
"required": false
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpFormBody",
"renderTypeList": ["hidden"],
"valueType": "any",
"value": [],
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"key": "system_httpContentType",
"renderTypeList": ["hidden"],
"valueType": "string",
"value": "json",
"label": "",
"required": false,
"debugLabel": "",
"toolDescription": ""
},
{
"renderTypeList": ["reference"],
@@ -178,12 +225,13 @@
"arrayNumber",
"arrayBoolean",
"arrayObject",
"arrayAny",
"any",
"chatHistory",
"datasetQuote",
"dynamic",
"selectApp",
"selectDataset"
"selectDataset",
"selectApp"
],
"showDescription": false,
"showDefaultValue": true
@@ -193,6 +241,23 @@
}
],
"outputs": [
{
"id": "error",
"key": "error",
"label": "workflow:request_error",
"description": "HTTP请求错误信息成功时返回空",
"valueType": "object",
"type": "static"
},
{
"id": "httpRawResponse",
"key": "httpRawResponse",
"required": true,
"label": "workflow:raw_response",
"description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。",
"valueType": "any",
"type": "static"
},
{
"id": "system_addOutputParam",
"key": "system_addOutputParam",
@@ -220,23 +285,6 @@
"showDefaultValue": true
}
},
{
"id": "error",
"key": "error",
"label": "请求错误",
"description": "HTTP请求错误信息成功时返回空",
"valueType": "object",
"type": "static"
},
{
"id": "httpRawResponse",
"key": "httpRawResponse",
"label": "原始响应",
"required": true,
"description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。",
"valueType": "any",
"type": "static"
},
{
"id": "rH4tMV02robs",
"valueType": "string",
@@ -260,6 +308,34 @@
"sourceHandle": "ebLCxU43hHuZ-source-right",
"targetHandle": "i7uow4wj2wdp-target-left"
}
]
],
"chatConfig": {
"welcomeText": "",
"variables": [],
"questionGuide": {
"open": false,
"model": "gpt-4o-mini",
"customPrompt": "You are an AI assistant tasked with predicting the user's next question based on the conversation history. Your goal is to generate 3 potential questions that will guide the user to continue the conversation. When generating these questions, adhere to the following rules:\n\n1. Use the same language as the user's last question in the conversation history.\n2. Keep each question under 20 characters in length.\n\nAnalyze the conversation history provided to you and use it as context to generate relevant and engaging follow-up questions. Your predictions should be logical extensions of the current topic or related areas that the user might be interested in exploring further.\n\nRemember to maintain consistency in tone and style with the existing conversation while providing diverse options for the user to choose from. Your goal is to keep the conversation flowing naturally and help the user delve deeper into the subject matter or explore related topics."
},
"ttsConfig": {
"type": "web"
},
"whisperConfig": {
"open": false,
"autoSend": false,
"autoTTSResponse": false
},
"chatInputGuide": {
"open": false,
"textList": [],
"customUrl": ""
},
"instruction": "",
"autoExecute": {
"open": false,
"defaultPrompt": ""
},
"_id": "677b59849d672185a5671b45"
}
}
}

View File

@@ -3,13 +3,16 @@ import { PassThrough } from 'stream';
export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
return new Promise<Buffer>((resolve, reject) => {
let tmpBuffer: Buffer = Buffer.from([]);
const chunks: Buffer[] = [];
let totalLength = 0;
stream.on('data', (chunk) => {
tmpBuffer = Buffer.concat([tmpBuffer, chunk]);
chunks.push(chunk);
totalLength += chunk.length;
});
stream.on('end', () => {
resolve(tmpBuffer);
const resultBuffer = Buffer.concat(chunks, totalLength); // 一次性拼接
resolve(resultBuffer);
});
stream.on('error', (err) => {
reject(err);

View File

@@ -118,7 +118,7 @@ export async function delImgByRelatedId({
}: {
teamId: string;
relateIds: string[];
session: ClientSession;
session?: ClientSession;
}) {
if (relateIds.length === 0) return;

View File

@@ -16,16 +16,30 @@ export async function connectMongo(): Promise<Mongoose> {
console.log('mongo start connect');
try {
// Remove existing listeners to prevent duplicates
connectionMongo.connection.removeAllListeners('error');
connectionMongo.connection.removeAllListeners('disconnected');
connectionMongo.set('strictQuery', false);
connectionMongo.connection.on('error', async (error) => {
console.log('mongo error', error);
await connectionMongo.disconnect();
await delay(1000);
connectMongo();
try {
if (connectionMongo.connection.readyState !== 0) {
await connectionMongo.disconnect();
await delay(1000);
await connectMongo();
}
} catch (error) {}
});
connectionMongo.connection.on('disconnected', () => {
connectionMongo.connection.on('disconnected', async () => {
console.log('mongo disconnected');
try {
if (connectionMongo.connection.readyState !== 0) {
await connectionMongo.disconnect();
await delay(1000);
await connectMongo();
}
} catch (error) {}
});
await connectionMongo.connect(process.env.MONGODB_URI as string, {

View File

@@ -2,6 +2,7 @@ import { UrlFetchParams, UrlFetchResponse } from '@fastgpt/global/common/file/ap
import * as cheerio from 'cheerio';
import axios from 'axios';
import { htmlToMarkdown } from './utils';
import { isInternalAddress } from '../system/utils';
export const cheerioToHtml = ({
fetchUrl,
@@ -75,6 +76,16 @@ export const urlsFetch = async ({
const response = await Promise.all(
urlList.map(async (url) => {
const isInternal = isInternalAddress(url);
if (isInternal) {
return {
url,
title: '',
content: 'Cannot fetch internal url',
selector: ''
};
}
try {
const fetchRes = await axios.get(url, {
timeout: 30000

View File

@@ -0,0 +1,63 @@
import { SERVICE_LOCAL_HOST } from './tools';
export const isInternalAddress = (url: string): boolean => {
try {
const parsedUrl = new URL(url);
const hostname = parsedUrl.hostname;
const fullUrl = parsedUrl.toString();
// Check for localhost and common internal domains
if (hostname === SERVICE_LOCAL_HOST) {
return true;
}
// Metadata endpoints whitelist
const metadataEndpoints = [
// AWS
'http://169.254.169.254/latest/meta-data/',
// Azure
'http://169.254.169.254/metadata/instance?api-version=2021-02-01',
// GCP
'http://metadata.google.internal/computeMetadata/v1/',
// Alibaba Cloud
'http://100.100.100.200/latest/meta-data/',
// Tencent Cloud
'http://metadata.tencentyun.com/latest/meta-data/',
// Huawei Cloud
'http://169.254.169.254/latest/meta-data/'
];
if (metadataEndpoints.some((endpoint) => fullUrl.startsWith(endpoint))) {
return true;
}
// For non-metadata URLs, check if it's a domain name
const ipv4Pattern = /^(\d{1,3}\.){3}\d{1,3}$/;
if (!ipv4Pattern.test(hostname)) {
return true;
}
// ... existing IP validation code ...
const parts = hostname.split('.').map(Number);
if (parts.length !== 4 || parts.some((part) => part < 0 || part > 255)) {
return false;
}
// Only allow public IP ranges
return (
parts[0] !== 0 &&
parts[0] !== 10 &&
parts[0] !== 127 &&
!(parts[0] === 169 && parts[1] === 254) &&
!(parts[0] === 172 && parts[1] >= 16 && parts[1] <= 31) &&
!(parts[0] === 192 && parts[1] === 168) &&
!(parts[0] >= 224 && parts[0] <= 239) &&
!(parts[0] >= 240 && parts[0] <= 255) &&
!(parts[0] === 100 && parts[1] >= 64 && parts[1] <= 127) &&
!(parts[0] === 9 && parts[1] === 0) &&
!(parts[0] === 11 && parts[1] === 0)
);
} catch {
return false; // If URL parsing fails, reject it as potentially unsafe
}
};

View File

@@ -35,7 +35,7 @@ export const getAxiosConfig = (props?: { userKey?: OpenaiAccountType }) => {
const { userKey } = props || {};
const baseUrl = userKey?.baseUrl || global?.systemEnv?.oneapiUrl || openaiBaseUrl;
const apiKey = userKey?.key || global?.systemEnv?.chatApiKey || process.env.CHAT_API_KEY || '';
const apiKey = userKey?.key || global?.systemEnv?.chatApiKey || openaiBaseKey;
return {
baseUrl,

View File

@@ -1,4 +1,10 @@
{
"provider": "AliCloud",
"list": []
}
"list": [
{
"model": "SenseVoiceSmall",
"name": "SenseVoiceSmall",
"type": "stt"
}
]
}

View File

@@ -25,6 +25,7 @@ export type SystemPluginConfigSchemaType = {
templateType: string;
associatedPluginId: string;
userGuide: string;
author?: string;
};
};

View File

@@ -18,6 +18,9 @@ const AppTemplateSchema = new Schema({
avatar: {
type: String
},
author: {
type: String
},
tags: {
type: [String],
default: undefined

View File

@@ -25,6 +25,7 @@ import { MongoImage } from '../../../common/file/image/schema';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { addDays } from 'date-fns';
import { MongoDatasetDataText } from '../data/dataTextSchema';
import { delay, retryFn } from '@fastgpt/global/common/system/utils';
export const createCollectionAndInsertData = async ({
dataset,
@@ -216,7 +217,7 @@ export async function createOneCollection({
nextSyncTime
}
],
{ session }
{ session, ordered: true }
);
return collection;
@@ -234,7 +235,7 @@ export const delCollectionRelatedSource = async ({
relatedImgId?: string;
};
}[];
session: ClientSession;
session?: ClientSession;
}) => {
if (collections.length === 0) return;
@@ -282,47 +283,55 @@ export async function delCollection({
const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId))));
const collectionIds = collections.map((item) => String(item._id));
// Delete training data
await MongoDatasetTraining.deleteMany({
teamId,
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
await retryFn(async () => {
await Promise.all([
// Delete training data
MongoDatasetTraining.deleteMany({
teamId,
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
}),
// Delete dataset_data_texts
MongoDatasetDataText.deleteMany({
teamId,
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
}),
// Delete dataset_datas
MongoDatasetData.deleteMany({
teamId,
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
}),
...(delImg
? [
delImgByRelatedId({
teamId,
relateIds: collections
.map((item) => item?.metadata?.relatedImgId || '')
.filter(Boolean)
})
]
: []),
...(delFile
? [
delFileByFileIdList({
bucketName: BucketNameEnum.dataset,
fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean)
})
]
: []),
// Delete vector data
deleteDatasetDataVector({ teamId, datasetIds, collectionIds })
]);
// delete collections
await MongoDatasetCollection.deleteMany(
{
teamId,
_id: { $in: collectionIds }
},
{ session }
);
});
if (delImg) {
await delImgByRelatedId({
teamId,
relateIds: collections.map((item) => item?.metadata?.relatedImgId || '').filter(Boolean),
session
});
}
if (delFile) {
await delFileByFileIdList({
bucketName: BucketNameEnum.dataset,
fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean)
});
}
// Delete dataset_datas
await MongoDatasetData.deleteMany(
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ session }
);
// Delete dataset_data_texts
await MongoDatasetDataText.deleteMany(
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ session }
);
// delete collections
await MongoDatasetCollection.deleteMany(
{
teamId,
_id: { $in: collectionIds }
},
{ session }
);
// no session delete: delete files, vector data
await deleteDatasetDataVector({ teamId, datasetIds, collectionIds });
}

View File

@@ -97,7 +97,7 @@ export const createOrGetCollectionTags = async ({
datasetId,
tag: tagContent
})),
{ session }
{ session, ordered: true }
);
return [...existingTags.map((tag) => tag._id), ...newTags.map((tag) => tag._id)];

View File

@@ -8,6 +8,7 @@ import { MongoDatasetData } from './data/schema';
import { deleteDatasetDataVector } from '../../common/vectorStore/controller';
import { MongoDatasetDataText } from './data/dataTextSchema';
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
import { retryFn } from '@fastgpt/global/common/system/utils';
/* ============= dataset ========== */
/* find all datasetId by top datasetId */
@@ -78,40 +79,39 @@ export async function delDatasetRelevantData({
const datasetIds = datasets.map((item) => item._id);
// delete training data
await MongoDatasetTraining.deleteMany({
teamId,
datasetId: { $in: datasetIds }
});
// Get _id, teamId, fileId, metadata.relatedImgId for all collections
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds }
},
'_id teamId datasetId fileId metadata',
{ session }
'_id teamId datasetId fileId metadata'
).lean();
// Delete Image and file
await delCollectionRelatedSource({ collections, session });
await retryFn(async () => {
await Promise.all([
// delete training data
MongoDatasetTraining.deleteMany({
teamId,
datasetId: { $in: datasetIds }
}),
//Delete dataset_data_texts
MongoDatasetDataText.deleteMany({
teamId,
datasetId: { $in: datasetIds }
}),
//delete dataset_datas
MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }),
// Delete Image and file
delCollectionRelatedSource({ collections }),
// Delete vector data
deleteDatasetDataVector({ teamId, datasetIds })
]);
});
// delete collections
await MongoDatasetCollection.deleteMany({
teamId,
datasetId: { $in: datasetIds }
}).session(session);
// No session delete:
// Delete dataset_data_texts
await MongoDatasetDataText.deleteMany({
teamId,
datasetId: { $in: datasetIds }
});
// delete dataset_datas
await MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } });
// Delete vector data
await deleteDatasetDataVector({ teamId, datasetIds });
}

View File

@@ -40,6 +40,7 @@ try {
default_language: 'none'
}
);
DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, collectionId: 1 });
DatasetDataTextSchema.index({ dataId: 1 }, { unique: true });
} catch (error) {
console.log(error);

View File

@@ -62,6 +62,7 @@ export const dispatchLoop = async (props: Props): Promise<Response> => {
const response = await dispatchWorkFlow({
...props,
variables: newVariables,
runtimeEdges: cloneDeep(runtimeEdges)
});

View File

@@ -120,27 +120,144 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
2. Replace newline strings
*/
const replaceJsonBodyString = (text: string) => {
const valToStr = (val: any) => {
// Check if the variable is in quotes
const isVariableInQuotes = (text: string, variable: string) => {
const index = text.indexOf(variable);
if (index === -1) return false;
// 计算变量前面的引号数量
const textBeforeVar = text.substring(0, index);
const matches = textBeforeVar.match(/"/g) || [];
// 如果引号数量为奇数,则变量在引号内
return matches.length % 2 === 1;
};
const valToStr = (val: any, isQuoted = false) => {
if (val === undefined) return 'null';
if (val === null) return 'null';
if (typeof val === 'object') return JSON.stringify(val);
if (typeof val === 'string') {
if (isQuoted) {
return val.replace(/(?<!\\)"/g, '\\"');
}
try {
const parsed = JSON.parse(val);
if (typeof parsed === 'object') {
return JSON.stringify(parsed);
}
JSON.parse(val);
return val;
} catch (error) {
const str = JSON.stringify(val);
return str.startsWith('"') && str.endsWith('"') ? str.slice(1, -1) : str;
}
}
return String(val);
};
// Test cases for variable replacement in JSON body
// const bodyTest = () => {
// const testData = [
// // 基本字符串替换
// {
// body: `{"name":"{{name}}","age":"18"}`,
// variables: [{ key: '{{name}}', value: '测试' }],
// result: `{"name":"测试","age":"18"}`
// },
// // 特殊字符处理
// {
// body: `{"text":"{{text}}"}`,
// variables: [{ key: '{{text}}', value: '包含"引号"和\\反斜杠' }],
// result: `{"text":"包含\\"引号\\"和\\反斜杠"}`
// },
// // 数字类型处理
// {
// body: `{"count":{{count}},"price":{{price}}}`,
// variables: [
// { key: '{{count}}', value: '42' },
// { key: '{{price}}', value: '99.99' }
// ],
// result: `{"count":42,"price":99.99}`
// },
// // 布尔值处理
// {
// body: `{"isActive":{{isActive}},"hasData":{{hasData}}}`,
// variables: [
// { key: '{{isActive}}', value: 'true' },
// { key: '{{hasData}}', value: 'false' }
// ],
// result: `{"isActive":true,"hasData":false}`
// },
// // 对象类型处理
// {
// body: `{"user":{{user}},"user2":"{{user2}}"}`,
// variables: [
// { key: '{{user}}', value: `{"id":1,"name":"张三"}` },
// { key: '{{user2}}', value: `{"id":1,"name":"张三"}` }
// ],
// result: `{"user":{"id":1,"name":"张三"},"user2":"{\\"id\\":1,\\"name\\":\\"张三\\"}"}`
// },
// // 数组类型处理
// {
// body: `{"items":{{items}}}`,
// variables: [{ key: '{{items}}', value: '[1, 2, 3]' }],
// result: `{"items":[1,2,3]}`
// },
// // null 和 undefined 处理
// {
// body: `{"nullValue":{{nullValue}},"undefinedValue":{{undefinedValue}}}`,
// variables: [
// { key: '{{nullValue}}', value: 'null' },
// { key: '{{undefinedValue}}', value: 'undefined' }
// ],
// result: `{"nullValue":null,"undefinedValue":null}`
// },
// // 嵌套JSON结构
// {
// body: `{"data":{"nested":{"value":"{{nestedValue}}"}}}`,
// variables: [{ key: '{{nestedValue}}', value: '嵌套值' }],
// result: `{"data":{"nested":{"value":"嵌套值"}}}`
// },
// // 多变量替换
// {
// body: `{"first":"{{first}}","second":"{{second}}","third":{{third}}}`,
// variables: [
// { key: '{{first}}', value: '第一' },
// { key: '{{second}}', value: '第二' },
// { key: '{{third}}', value: '3' }
// ],
// result: `{"first":"第一","second":"第二","third":3}`
// },
// // JSON字符串作为变量值
// {
// body: `{"config":{{config}}}`,
// variables: [{ key: '{{config}}', value: '{"setting":"enabled","mode":"advanced"}' }],
// result: `{"config":{"setting":"enabled","mode":"advanced"}}`
// }
// ];
// for (let i = 0; i < testData.length; i++) {
// const item = testData[i];
// let bodyStr = item.body;
// for (const variable of item.variables) {
// const isQuote = isVariableInQuotes(bodyStr, variable.key);
// bodyStr = bodyStr.replace(variable.key, valToStr(variable.value, isQuote));
// }
// bodyStr = bodyStr.replace(/(".*?")\s*:\s*undefined\b/g, '$1:null');
// console.log(bodyStr === item.result, i);
// if (bodyStr !== item.result) {
// console.log(bodyStr);
// console.log(item.result);
// } else {
// try {
// JSON.parse(item.result);
// } catch (error) {
// console.log('反序列化异常', i, item.result);
// }
// }
// }
// };
// bodyTest();
// 1. Replace {{key.key}} variables
const regex1 = /\{\{\$([^.]+)\.([^$]+)\$\}\}/g;
@@ -148,6 +265,10 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
matches1.forEach((match) => {
const nodeId = match[1];
const id = match[2];
const fullMatch = match[0];
// 检查变量是否在引号内
const isInQuotes = isVariableInQuotes(text, fullMatch);
const variableVal = (() => {
if (nodeId === VARIABLE_NODE_ID) {
@@ -165,9 +286,9 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
return getReferenceVariableValue({ value: input.value, nodes: runtimeNodes, variables });
})();
const formatVal = valToStr(variableVal);
const formatVal = valToStr(variableVal, isInQuotes);
const regex = new RegExp(`\\{\\{\\$(${nodeId}\\.${id})\\$\\}\\}`, 'g');
const regex = new RegExp(`\\{\\{\\$(${nodeId}\\.${id})\\$\\}\\}`, '');
text = text.replace(regex, () => formatVal);
});
@@ -176,10 +297,16 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
const matches2 = text.match(regex2) || [];
const uniqueKeys2 = [...new Set(matches2.map((match) => match.slice(2, -2)))];
for (const key of uniqueKeys2) {
text = text.replace(new RegExp(`{{(${key})}}`, 'g'), () => valToStr(allVariables[key]));
const fullMatch = `{{${key}}}`;
// 检查变量是否在引号内
const isInQuotes = isVariableInQuotes(text, fullMatch);
text = text.replace(new RegExp(`{{(${key})}}`, ''), () =>
valToStr(allVariables[key], isInQuotes)
);
}
return text.replace(/(".*?")\s*:\s*undefined\b/g, '$1: null');
return text.replace(/(".*?")\s*:\s*undefined\b/g, '$1:null');
};
httpReqUrl = replaceStringVariables(httpReqUrl);

View File

@@ -196,7 +196,8 @@ export async function syncCollaborators({
permission: item.permission
})),
{
session
session,
ordered: true
}
);
}

View File

@@ -43,7 +43,6 @@ async function getTeamMember(match: Record<string, any>): Promise<TeamTmbItemTyp
teamDomain: tmb.team?.teamDomain,
role: tmb.role,
status: tmb.status,
defaultTeam: tmb.defaultTeam,
permission: new TeamPermission({
per: Per ?? TeamDefaultPermissionVal,
isOwner: tmb.role === TeamMemberRoleEnum.owner
@@ -71,8 +70,7 @@ export async function getUserDefaultTeam({ userId }: { userId: string }) {
return Promise.reject('tmbId or userId is required');
}
return getTeamMember({
userId: new Types.ObjectId(userId),
defaultTeam: true
userId: new Types.ObjectId(userId)
});
}

View File

@@ -39,14 +39,14 @@ const TeamMemberSchema = new Schema({
updateTime: {
type: Date
},
defaultTeam: {
type: Boolean,
default: false
},
// Abandoned
role: {
type: String
},
// Abandoned
defaultTeam: {
type: Boolean
}
});

View File

@@ -100,7 +100,7 @@ export const initTeamFreePlan = async ({
surplusPoints: freePoints
}
],
{ session }
{ session, ordered: true }
);
};

View File

@@ -160,7 +160,7 @@ export const createTrainingUsage = async ({
]
}
],
{ session }
{ session, ordered: true }
);
return { billId: String(_id) };

View File

@@ -214,10 +214,15 @@ export function useScrollPagination<
async (init = false, ScrollContainerRef?: RefObject<HTMLDivElement>) => {
if (noMore && !init) return;
if (init) {
setData([]);
setTotal(0);
}
const offset = init ? 0 : data.length;
setTrue();
console.log(offset);
try {
const res = await api({
offset,
@@ -288,7 +293,7 @@ export function useScrollPagination<
// Watch scroll position
useThrottleEffect(
() => {
if (!ref?.current || noMore) return;
if (!ref?.current || noMore || isLoading || data.length === 0) return;
const { scrollTop, scrollHeight, clientHeight } = ref.current;
if (

View File

@@ -24,6 +24,7 @@
"key_type": "API key format:",
"log": "Call log",
"log_detail": "Log details",
"log_request_id_search": "Search by requestId",
"log_status": "Status",
"mapping": "Model Mapping",
"mapping_tip": "A valid Json is required. \nThe model can be mapped when sending a request to the actual address. \nFor example:\n{\n \n \"gpt-4o\": \"gpt-4o-test\"\n\n}\n\nWhen FastGPT requests the gpt-4o model, the gpt-4o-test model is sent to the actual address, instead of gpt-4o.",

View File

@@ -24,6 +24,7 @@
"key_type": "API key 格式: ",
"log": "调用日志",
"log_detail": "日志详情",
"log_request_id_search": "根据 requestId 搜索",
"log_status": "状态",
"mapping": "模型映射",
"mapping_tip": "需填写一个有效 Json。可在向实际地址发送请求时对模型进行映射。例如\n{\n \"gpt-4o\": \"gpt-4o-test\"\n}\n当 FastGPT 请求 gpt-4o 模型时,会向实际地址发送 gpt-4o-test 的模型,而不是 gpt-4o。",

View File

@@ -22,6 +22,7 @@
"key_type": "API key 格式:",
"log": "調用日誌",
"log_detail": "日誌詳情",
"log_request_id_search": "根據 requestId 搜索",
"log_status": "狀態",
"mapping": "模型映射",
"mapping_tip": "需填寫一個有效 Json。\n可在向實際地址發送請求時對模型進行映射。\n例如\n{\n \n \"gpt-4o\": \"gpt-4o-test\"\n\n}\n\n當 FastGPT 請求 gpt-4o 模型時,會向實際地址發送 gpt-4o-test 的模型,而不是 gpt-4o。",

View File

@@ -0,0 +1,3 @@
# 忽略 .git 目录及其内容
.git
.gitignore

25
plugins/webcrawler/.gitignore vendored Normal file
View File

@@ -0,0 +1,25 @@
*~
searxng-docker.service
caddy
srv
searxng/uwsgi.ini
.env
SPIDER/.env
# 忽略 node_modules 文件夹
SPIDER/node_modules/
# 忽略构建输出文件夹
SPIDER/dist/
# 忽略日志文件
*.log
# 忽略操作系统生成的文件
.DS_Store
Thumbs.db
# 忽略 IDE/编辑器生成的文件
.vscode/
.idea/

View File

@@ -0,0 +1,14 @@
# By default listen on https://localhost
# To change this:
# * uncomment SEARXNG_HOSTNAME, and replace <host> by the SearXNG hostname
# * uncomment LETSENCRYPT_EMAIL, and replace <email> by your email (require to create a Let's Encrypt certificate)
# SEARXNG_HOSTNAME=<host>
# LETSENCRYPT_EMAIL=<email>
# Optional:
# If you run a very small or a very large instance, you might want to change the amount of used uwsgi workers and threads per worker
# More workers (= processes) means that more search requests can be handled at the same time, but it also causes more resource usage
SEARXNG_UWSGI_WORKERS=4
SEARXNG_UWSGI_THREADS=4

View File

@@ -0,0 +1,91 @@
{
admin off
log {
output stderr
format filter {
# Preserves first 8 bits from IPv4 and 32 bits from IPv6
request>remote_ip ip_mask 8 32
request>client_ip ip_mask 8 32
# Remove identificable information
request>remote_port delete
request>headers delete
request>uri query {
delete url
delete h
delete q
}
}
}
}
{$SEARXNG_HOSTNAME}
tls {$SEARXNG_TLS}
encode zstd gzip
@api {
path /config
path /healthz
path /stats/errors
path /stats/checker
}
@search {
path /search
}
@imageproxy {
path /image_proxy
}
@static {
path /static/*
}
header {
# CSP (https://content-security-policy.com)
Content-Security-Policy "upgrade-insecure-requests; default-src 'none'; script-src 'self'; style-src 'self' 'unsafe-inline'; form-action 'self' https://github.com/searxng/searxng/issues/new; font-src 'self'; frame-ancestors 'self'; base-uri 'self'; connect-src 'self' https://overpass-api.de; img-src * data:; frame-src https://www.youtube-nocookie.com https://player.vimeo.com https://www.dailymotion.com https://www.deezer.com https://www.mixcloud.com https://w.soundcloud.com https://embed.spotify.com;"
# Disable some browser features
Permissions-Policy "accelerometer=(),camera=(),geolocation=(),gyroscope=(),magnetometer=(),microphone=(),payment=(),usb=()"
# Set referrer policy
Referrer-Policy "no-referrer"
# Force clients to use HTTPS
Strict-Transport-Security "max-age=31536000"
# Prevent MIME type sniffing from the declared Content-Type
X-Content-Type-Options "nosniff"
# X-Robots-Tag (comment to allow site indexing)
X-Robots-Tag "noindex, noarchive, nofollow"
# Remove "Server" header
-Server
}
header @api {
Access-Control-Allow-Methods "GET, OPTIONS"
Access-Control-Allow-Origin "*"
}
route {
# Cache policy
header Cache-Control "max-age=0, no-store"
header @search Cache-Control "max-age=5, private"
header @imageproxy Cache-Control "max-age=604800, public"
header @static Cache-Control "max-age=31536000, public, immutable"
}
# SearXNG (uWSGI)
reverse_proxy localhost:8080 {
header_up X-Forwarded-Port ""
header_up X-Real-IP ""
# https://github.com/searx/searx-docker/issues/24
header_up Connection "close"
}

View File

@@ -0,0 +1,57 @@
FROM node:20.10.0-slim
WORKDIR /app
# 安装 Chrome 运行依赖
RUN apt-get update && apt-get install -y \
ca-certificates \
fonts-liberation \
libasound2 \
libatk-bridge2.0-0 \
libatk1.0-0 \
libc6 \
libcairo2 \
libcups2 \
libdbus-1-3 \
libexpat1 \
libfontconfig1 \
libgbm1 \
libgcc1 \
libglib2.0-0 \
libgtk-3-0 \
libnspr4 \
libnss3 \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libstdc++6 \
libx11-6 \
libx11-xcb1 \
libxcb1 \
libxcomposite1 \
libxcursor1 \
libxdamage1 \
libxext6 \
libxfixes3 \
libxi6 \
libxrandr2 \
libxrender1 \
libxss1 \
libxtst6 \
lsb-release \
wget \
xdg-utils \
chromium \
&& rm -rf /var/lib/apt/lists/*
# 安装中文字体
RUN apt-get update && apt-get install -y fonts-wqy-microhei && fc-cache -f -v
COPY SPIDER/. .
RUN test -f package.json || (echo "package.json missing" && exit 1)
RUN test -f .env || (echo ".env file missing in SPIDER directory" && exit 1)
RUN npm run build
EXPOSE 3000
CMD ["npm", "start"]

View File

@@ -0,0 +1,73 @@
# webcrawler
## docker版快速部署
## 代码版部署
0. 按照 https://github.com/searxng/searxng-docker 的方式处理docker
1. 参考SPIDER文件夹下的.env.example添加.env文件
2. 进入SPIDER文件夹进行pnpm install
3. 回到根目录运行docker compose up -d
## 代码版开发
1. 将docker-compose.yml中与SPIDER相关的部分注释掉nodeapp
2. .env文件中的URL参照注释修改
3. 注释掉启动puppteer部分里面指定浏览器地址的代码
4. pnpm run dev
## 测试样例:
Auth的Bear Token记得填,也就是.env里的ACCESS_TOKEN
### 读取单页面(content以HTML形式返回)
```
http://localhost:3000/api/read?queryUrl=<url>
```
返回结构
```json
{
"status": 200,
"data": {
"title": "something here",
"content": "something here"
}
}
{
"status": 400,
"error": {
"code": "MISSING_PARAM",
"message": "缺少必要参数: query"
}
}
```
### 搜索(content以HTML形式返回)
```
http://localhost:3000/api/search?query=<something>&pageCount=5&needDetails=true&engine=baidu
```
```json
{
"status": 200,
"data": {
"results": [
{
"title": "string",
"url": "string",
"snippet": "string",
"source": "string",
"crawlStatus": "string",
"score": 0,
"content": "string"
}
]
}
}
{
"status": 400,
"error": {
"code": "MISSING_PARAM",
"message": "缺少必要参数: query"
}
}
```

View File

@@ -0,0 +1,23 @@
ACCESS_TOKEN=114514
DETECT_WEBSITE = zhuanlan.zhihu.com
STRATEGIES=[{"waitUntil":"networkidle0","timeout":5000},{"waitUntil":"networkidle2","timeout":10000},{"waitUntil":"load","timeout":15000}]
PORT=3000
MAX_CONCURRENCY=10
NODE_ENV=development
ENGINE = [
]
ENGINE_BAIDUURL=https://www.baidu.com/s
#ENGINE_SEARCHXNGURL=http://localhost:8080/search
ENGINE_SEARCHXNGURL=http://searxng:8080/search
#MONGODB_URI=mongodb://root:example@localhost:27017
MONGODB_URI=mongodb://root:example@mongodb:27017
BLACKLIST = [".gov.cn",".edu.cn"]
STD_TTL=3600
EXPIRE_AFTER_SECONDS=9000
#VALIDATE_PROXY=[{"ip":"","port":},{"ip":"","port":}]

5804
plugins/webcrawler/SPIDER/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,62 @@
{
"name": "spider",
"version": "1.0.0",
"description": "",
"main": "/dist/index.ts",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"start": "ts-node src/index.ts",
"build": "webpack",
"dev": "ts-node-dev --respawn src/index.ts"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"@types/node-fetch": "^2.6.12",
"assert": "^2.1.0",
"axios": "^1.7.9",
"body-parser": "^1.20.3",
"browserify-zlib": "^0.2.0",
"buffer": "^6.0.3",
"cheerio": "^1.0.0",
"crypto-browserify": "^3.12.1",
"dotenv": "^16.4.7",
"express": "^4.21.2",
"https-proxy-agent": "^7.0.6",
"jsdom": "^26.0.0",
"mongodb": "^6.13.1",
"node-cache": "^5.1.2",
"node-fetch": "^2.7.0",
"os-browserify": "^0.3.0",
"path-browserify": "^1.0.1",
"puppeteer": "^24.2.1",
"puppeteer-cluster": "^0.24.0",
"querystring-es3": "^0.2.1",
"random-useragent": "^0.5.0",
"spider": "file:",
"stream-browserify": "^3.0.0",
"stream-http": "^3.2.0",
"string_decoder": "^1.3.0",
"turndown": "^7.2.0",
"turndown-plugin-gfm": "^1.0.2",
"url": "^0.11.4",
"user-agents": "^1.1.454",
"util": "^0.12.5",
"vm-browserify": "^1.1.2"
},
"devDependencies": {
"@types/body-parser": "^1.19.5",
"@types/express": "^5.0.0",
"@types/jsdom": "^21.1.7",
"@types/node": "^22.13.4",
"@types/random-useragent": "^0.3.3",
"@types/user-agents": "^1.0.4",
"ts-loader": "^9.5.2",
"ts-node-dev": "^2.0.0",
"typescript": "^5.7.3",
"webpack": "^5.98.0",
"webpack-cli": "^6.0.1",
"webpack-node-externals": "^3.0.0"
}
}

View File

@@ -0,0 +1,60 @@
import { Request, Response } from 'express';
import fetch from 'node-fetch';
import dotenv from 'dotenv';
dotenv.config();
const userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0'
];
export const quickFetch = async (req: Request, res: Response): Promise<void> => {
const { url } = req.query;
if (!url) {
res.status(400).json({
status: 400,
error: {
code: 'MISSING_PARAM',
message: '缺少必要参数: url'
}
});
return;
}
try {
const response = await fetch(url as string, {
headers: {
'User-Agent': userAgents[Math.floor(Math.random() * userAgents.length)],
Referer: 'https://www.google.com/',
'Accept-Language': 'en-US,en;q=0.9',
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
Connection: 'keep-alive',
'Cache-Control': 'no-cache'
}
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.text();
res.status(200).json({
status: 200,
data: {
content: data
}
});
} catch (error) {
console.error('Error fetching the page:', error);
res.status(500).json({
status: 500,
error: {
code: 'INTERNAL_SERVER_ERROR',
message: '发生错误'
}
});
}
};
export default { quickFetch };

View File

@@ -0,0 +1,148 @@
import { Request, Response } from 'express';
import puppeteer, { Page } from 'puppeteer';
import * as cheerio from 'cheerio';
import UserAgent from 'user-agents';
import { setupPage } from '../utils/setupPage'; // 导入 setupPage 模块
import dotenv from 'dotenv'; // 导入 dotenv 模块
import { URL } from 'url'; // 导入 URL 模块
import { handleSpecialWebsite } from '../specialHandlers'; // 导入 handleSpecialWebsite 模块
import fetch from 'node-fetch';
import { getCachedPage, updateCacheAsync } from '../utils/cacheUpdater'; // 导入缓存相关模块
dotenv.config(); // 加载环境变量
const detectWebsites = process.env.DETECT_WEBSITES?.split(',') || [];
const blacklistDomains = process.env.BLACKLIST ? JSON.parse(process.env.BLACKLIST) : [];
export const readPage = async (req: Request, res: Response): Promise<void> => {
const { queryUrl } = req.query;
console.log('-------');
console.log(queryUrl);
console.log('-------');
if (!queryUrl) {
res.status(400).json({
status: 400,
error: {
code: 'MISSING_PARAM',
message: '缺少必要参数: queryUrl'
}
});
return;
}
const urlDomain = new URL(queryUrl as string).hostname;
if (blacklistDomains.some((domain: string) => urlDomain.endsWith(domain))) {
res.status(403).json({
status: 403,
error: {
code: 'BLACKLISTED_DOMAIN',
message: '该域名受到保护中'
}
});
return;
}
try {
const response = await fetch(queryUrl as string, {
headers: {
'User-Agent': new UserAgent({
deviceCategory: 'desktop',
platform: 'Linux x86_64'
}).toString(),
Referer: 'https://www.google.com/',
'Accept-Language': 'en-US,en;q=0.9',
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
Connection: 'keep-alive',
'Cache-Control': 'no-cache'
}
});
if (response.ok) {
const content = await response.text();
const $ = cheerio.load(content);
const cleanedContent = $('body').html();
res.status(200).json({
status: 200,
data: {
title: $('title').text(),
content: cleanedContent
}
});
await updateCacheAsync(queryUrl as string, cleanedContent || '');
console.log('Page read successfully');
return;
} else {
throw new Error(`HTTP error! status: ${response.status}`);
}
} catch (error) {
console.error('快速抓取页面时发生错误:', error);
}
try {
const browser = await puppeteer.launch({
ignoreDefaultArgs: ['--enable-automation'],
headless: true,
executablePath: '/usr/bin/chromium', // 明确指定 Chromium 路径
pipe: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu'
// '--single-process'
]
});
const page = await browser.newPage();
// 检测是否需要特殊处理
if (
typeof queryUrl === 'string' &&
detectWebsites.some((website) => queryUrl.includes(website))
) {
await setupPage(page);
} else {
const userAgent = new UserAgent({ deviceCategory: 'desktop', platform: 'Linux x86_64' });
await page.setUserAgent(userAgent.toString());
}
const queryUrlSafe = new URL(queryUrl as string).toString();
await page.goto(queryUrlSafe, { waitUntil: 'load' });
await page.waitForSelector('body');
const title = await page.title();
let cleanedContent = await handleSpecialWebsite(page, queryUrl as string);
if (!cleanedContent) {
const content = await page.content();
const $ = cheerio.load(content);
cleanedContent = $('body').html();
}
await page.close();
await browser.close();
res.status(200).json({
status: 200,
data: {
title,
content: cleanedContent
}
});
await updateCacheAsync(queryUrl as string, cleanedContent || '');
console.log('Page read successfully');
} catch (error) {
console.error(error);
res.status(500).json({
status: 500,
error: {
code: 'INTERNAL_SERVER_ERROR',
message: '读取页面时发生内部服务器错误'
}
});
}
};

View File

@@ -0,0 +1,132 @@
import { Request, Response } from 'express';
import { Cluster } from 'puppeteer-cluster';
import dotenv from 'dotenv';
import { performDeepSearch } from '../utils/deepSearch';
import { fetchSearchResults as fetchBaiduResults } from '../engines/baiduEngine';
import { fetchSearchResults as fetchSearchxngResults } from '../engines/searchxngEngine';
dotenv.config();
const strategies = JSON.parse(process.env.STRATEGIES || '[]');
const detectWebsites = process.env.DETECT_WEBSITES?.split(',') || [];
const maxConcurrency = parseInt(process.env.MAX_CONCURRENCY || '10', 10);
export const search = async (req: Request, res: Response): Promise<void> => {
const {
query,
pageCount = 10,
needDetails = 'false',
engine = 'baidu',
categories = 'general'
} = req.query;
const needDetailsBool = needDetails === 'true';
if (!query) {
res.status(400).json({
status: 400,
error: {
code: 'MISSING_PARAM',
message: '缺少必要参数: query'
}
});
return;
}
let fetchSearchResults;
let searchUrlBase;
try {
if (engine === 'baidu') {
fetchSearchResults = fetchBaiduResults;
searchUrlBase = process.env.ENGINE_BAIDUURL;
} else if (engine === 'searchxng') {
fetchSearchResults = fetchSearchxngResults;
searchUrlBase = process.env.ENGINE_SEARCHXNGURL;
} else {
res.status(400).json({
status: 400,
error: {
code: 'INVALID_ENGINE',
message: '无效的搜索引擎'
}
});
return;
}
const { resultUrls, results } = await fetchSearchResults(
query as string,
Number(pageCount),
searchUrlBase || '',
categories as string
);
//如果返回值为空,返回空数组
if (results.size === 0) {
console.log('No results found');
res.status(200).json({
status: 200,
data: {
results: []
}
});
return;
}
if (!needDetailsBool) {
console.log('Need details is false');
results.forEach((value: any) => {
if (value.crawlStatus === 'Pending') {
value.crawlStatus = 'Success';
}
});
res.status(200).json({
status: 200,
data: {
results: Array.from(results.values())
}
});
} else {
console.log('Need details is true');
const clusterInstance = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_CONTEXT,
maxConcurrency: maxConcurrency,
puppeteerOptions: {
ignoreDefaultArgs: ['--enable-automation'],
headless: 'true',
executablePath: '/usr/bin/chromium', // 明确指定 Chromium 路径
pipe: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu'
]
}
});
const sortedResults = await performDeepSearch(
clusterInstance,
resultUrls,
results,
strategies,
detectWebsites,
Number(pageCount)
);
res.status(200).json({
status: 200,
data: {
results: sortedResults.slice(0, Number(pageCount))
}
});
}
} catch (error) {
res.status(500).json({
status: 500,
error: {
code: 'INTERNAL_SERVER_ERROR',
message: '发生错误'
}
});
}
};
export default { search };

View File

@@ -0,0 +1,207 @@
import { URL } from 'url';
import { JSDOM } from 'jsdom';
import puppeteer from 'puppeteer';
import { setupPage } from '../utils/setupPage';
import { Cluster } from 'puppeteer-cluster';
async function randomWait(min: number, max: number) {
// 随机等待时间
const delay = Math.floor(Math.random() * (max - min + 1)) + min;
return new Promise((resolve) => setTimeout(resolve, delay));
}
export const fetchSearchResults = async (
query: string,
pageCount: number,
searchUrlBase: string,
categories: string
) => {
console.log(`Fetching Baidu search results for query: ${query}`);
// 如果 searchUrlBase 为空,返回空数组
if (!searchUrlBase) {
return { resultUrls: [], results: new Map() };
}
const resultUrls: string[] = [];
const results = new Map<string, any>();
const pagesToFetch = Math.ceil(pageCount / 10);
const browser = await puppeteer.launch({
ignoreDefaultArgs: ['--enable-automation'],
headless: true,
executablePath: '/usr/bin/chromium', // 明确指定 Chromium 路径
pipe: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu'
// '--single-process'
]
});
const page = await browser.newPage();
await setupPage(page);
for (let i = 0; i < pagesToFetch; i++) {
const searchUrl = new URL(`${searchUrlBase}?wd=${encodeURIComponent(query)}&pn=${i * 10}`);
console.log(`Fetching page ${i + 1} from Baidu: ${searchUrl.toString()}`);
let retryCount = 0;
let success = false;
while (retryCount < 5 && !success) {
try {
console.time(`Page Load Time for page ${i + 1}`);
await page.goto(searchUrl.toString(), { waitUntil: 'load' });
console.timeEnd(`Page Load Time for page ${i + 1}`);
let content = await page.content();
let dom = new JSDOM(content);
let document = dom.window.document;
console.log(document.title);
// 如果是百度安全验证页面,重新设置页面并重新访问
if (document.title.includes('百度安全验证')) {
console.log('Detected Baidu security verification, retrying...');
await setupPage(page);
retryCount++;
//随机等待时间
await randomWait(1000, 3000);
continue;
}
// 解析搜索结果
console.time(`Link Retrieval Time for page ${i + 1}`);
const resultContainers = document.querySelectorAll('.result.c-container');
for (const result of resultContainers) {
if (resultUrls.length > pageCount + 5) {
break;
}
const titleElement = result.querySelector('h3 a');
const title = titleElement ? titleElement.textContent : '';
const url = titleElement ? titleElement.getAttribute('href') : '';
const contentElement = result.querySelector('[class^="content"]');
const content = contentElement ? contentElement.textContent : '';
if (url) {
resultUrls.push(url);
results.set(url, {
title,
url,
snippet: content,
source: 'baidu',
crawlStatus: 'Pending',
score: 0
});
}
}
console.timeEnd(`Link Retrieval Time for page ${i + 1}`);
success = true;
} catch (error) {
console.error(`Error fetching page ${i + 1}:`, error);
retryCount++;
}
}
}
await browser.close();
console.log('fetch all fake urls');
// 快速检索真实 URL
const urlsToProcessWithPuppeteer = [];
for (const url of resultUrls) {
try {
const response = await fetch(url, {
headers: {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
Referer: 'https://www.google.com/',
'Accept-Language': 'en-US,en;q=0.9',
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
Connection: 'keep-alive',
'Cache-Control': 'no-cache'
}
});
if (response.ok) {
const realUrl = response.url;
console.log('realurl:', realUrl);
const result = results.get(url);
if (result) {
result.url = realUrl;
result.crawlStatus = 'Success';
}
} else {
throw new Error(`HTTP error! status: ${response.status}`);
}
} catch (error) {
console.error(`Error fetching original URL for ${url}:`, error);
urlsToProcessWithPuppeteer.push(url);
}
}
console.log('pass quickfetch');
// 并发处理真实 URL
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_CONTEXT,
maxConcurrency: 10,
puppeteerOptions: {
ignoreDefaultArgs: ['--enable-automation'],
headless: 'true',
executablePath: '/usr/bin/chromium', // 明确指定 Chromium 路径
pipe: true,
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu']
}
});
let failedUrlCount = 0;
await cluster.task(async ({ page, data: url }) => {
let retryUrlCount = 0;
let urlSuccess = false;
while (retryUrlCount < 3 && !urlSuccess) {
console.log(`Fetching original URL for ${url}, attempt ${retryUrlCount + 1}`);
try {
await page.goto(url, { waitUntil: 'load' });
// 检查页面是否被分离
if (page.isClosed()) {
throw new Error('Page has been closed');
}
const realUrl = page.url(); // 获取真实 URL
const result = results.get(url);
if (result) {
result.url = realUrl;
result.crawlStatus = 'Success';
}
urlSuccess = true;
} catch (error) {
console.error(`Error fetching original URL, retrying...`, error);
retryUrlCount++;
await randomWait(1000, 3000);
}
}
if (!urlSuccess) {
failedUrlCount++;
}
});
for (const url of urlsToProcessWithPuppeteer) {
cluster.queue(url);
}
await cluster.idle();
await cluster.close();
console.log(`Number of URLs that failed to return a real URL: ${failedUrlCount}`);
// 过滤并返回前 pageCount 个结果
const filteredResults = Array.from(results.values()).slice(0, pageCount);
return {
resultUrls: filteredResults.map((result) => result.url),
results: new Map(filteredResults.map((result) => [result.url, result]))
};
};

View File

@@ -0,0 +1,64 @@
import axios from 'axios';
import { URL } from 'url';
import dotenv from 'dotenv';
dotenv.config();
const blacklistDomains = process.env.BLACKLIST ? JSON.parse(process.env.BLACKLIST) : [];
export const fetchSearchResults = async (
query: string,
pageCount: number,
searchUrlBase: string,
categories: string
) => {
const MAX_PAGES = (pageCount / 10 + 1) * 2 + 1; // 最多搜索的页面数
//如果searchUrlBase为空返回空数组pagecount是需要搜索结果的数量
if (!searchUrlBase) {
return { resultUrls: [], results: new Map() };
}
const resultUrls: string[] = [];
const results = new Map<string, any>();
let fetchedResultsCount = 0;
let pageIndex = 0;
while (fetchedResultsCount < pageCount && pageIndex < MAX_PAGES) {
const searchUrl = new URL(
`${searchUrlBase}?q=${encodeURIComponent(query)}&pageno=${pageIndex + 1}&format=json&categories=${categories}`
);
console.log(`Fetching page ${pageIndex + 1} from SearchXNG: ${searchUrl.toString()}`);
const response = await axios.get(searchUrl.toString());
const jsonResults = response.data.results;
for (let index = 0; index < jsonResults.length; index++) {
const result = jsonResults[index];
const resultDomain = new URL(result.url).hostname;
if (
blacklistDomains.some((domain: string) => resultDomain.endsWith(domain)) ||
resultDomain.includes('zhihu')
) {
continue;
}
resultUrls.push(result.url);
results.set(result.url, {
title: result.title,
url: result.url,
snippet: result.content,
source: result.engine,
crawlStatus: 'Pending',
score: result.score
});
fetchedResultsCount++;
if (fetchedResultsCount >= pageCount) {
break;
}
}
pageIndex++;
if (jsonResults.length === 0) {
break; // 如果没有更多结果,退出循环
}
}
return { resultUrls, results };
};

View File

@@ -0,0 +1,18 @@
import express, { Application } from 'express';
import bodyParser from 'body-parser';
import searchRoutes from './routes/searchRoutes';
import readRoutes from './routes/readRoutes';
import quickfetchRoutes from './routes/quickfetchRoutes';
import dotenv from 'dotenv';
dotenv.config();
const app: Application = express();
app.use(bodyParser.json());
app.use('/api', searchRoutes);
app.use('/api', readRoutes);
app.use('/api', quickfetchRoutes);
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => console.log(`Server running on port ${PORT}`));

View File

@@ -0,0 +1,21 @@
import { Request, Response, NextFunction } from 'express';
const authMiddleware = (req: Request, res: Response, next: NextFunction) => {
const bearerHeader = req.headers['authorization'];
if (bearerHeader) {
console.log('bearerHeader:' + bearerHeader);
const bearer = bearerHeader.split(' ');
const bearerToken = bearer[1];
if (bearerToken === process.env.ACCESS_TOKEN) {
next();
} else {
res.status(403).json({ message: 'Invalid token' });
}
} else {
res.status(401).json({ message: 'Bearer token not found' });
}
};
export default authMiddleware;

View File

@@ -0,0 +1,9 @@
import express from 'express';
import { quickFetch } from '../controllers/quickfetchController';
import authMiddleware from '../middleware/authMiddleware';
const readRoutes = express.Router();
readRoutes.get('/quickFetch', authMiddleware, quickFetch);
export default readRoutes;

View File

@@ -0,0 +1,9 @@
import express from 'express';
import { readPage } from '../controllers/readController';
import authMiddleware from '../middleware/authMiddleware';
const readRoutes = express.Router();
readRoutes.get('/read', authMiddleware, readPage);
export default readRoutes;

View File

@@ -0,0 +1,9 @@
import express from 'express';
import searchController from '../controllers/searchController';
import authMiddleware from '../middleware/authMiddleware';
const searchRoutes = express.Router();
searchRoutes.get('/search', authMiddleware, searchController.search);
export default searchRoutes;

View File

@@ -0,0 +1,26 @@
import { Page } from 'puppeteer';
export const handleSpecialWebsite = async (page: Page, url: string): Promise<string | null> => {
if (url.includes('blog.csdn.net')) {
await page.waitForSelector('article');
const content = await page.$eval('article', (el) => el.innerHTML);
return content;
}
if (url.includes('zhuanlan.zhihu.com')) {
console.log('是知乎,需要点击按掉!');
console.log(await page.content());
if (
(await page.content()).includes(
'{"error":{"message":"您当前请求存在异常,暂时限制本次访问。如有疑问,您可以通过手机摇一摇或登录后私信知乎小管家反馈。","code":40362}}'
)
)
return null;
await page.waitForSelector('button[aria-label="关闭"]');
await page.click('button[aria-label="关闭"]'); // 使用 aria-label 选择按钮
await page.waitForSelector('article');
const content = await page.$eval('article', (el) => el.innerHTML);
return content;
}
// 可以添加更多特殊网站的处理逻辑
return null;
};

View File

@@ -0,0 +1,77 @@
import NodeCache from 'node-cache';
import { MongoClient } from 'mongodb';
import crypto from 'crypto';
import dotenv from 'dotenv';
dotenv.config();
const cache = new NodeCache({ stdTTL: parseInt(process.env.STD_TTL || '3600') });
const mongoClient = new MongoClient(process.env.MONGODB_URI || 'mongodb://localhost:27017');
const dbName = 'pageCache';
const collectionName = 'pages';
const connectToMongo = async () => {
await mongoClient.connect();
return mongoClient.db(dbName);
};
const createTTLIndex = async () => {
try {
const db = await connectToMongo();
await db
.collection(collectionName)
.createIndex(
{ updatedAt: 1 },
{ expireAfterSeconds: parseInt(process.env.EXPIRE_AFTER_SECONDS || '9000') }
);
console.log('TTL index created successfully');
} catch (error) {
console.error('Error creating TTL index:', error);
}
};
const getPageHash = (content: string) => {
return crypto.createHash('md5').update(content).digest('hex');
};
export const getCachedPage = async (url: string) => {
const cachedPage = cache.get(url);
if (cachedPage) return cachedPage;
try {
const db = await connectToMongo();
const page = await db.collection(collectionName).findOne({ url });
if (page) cache.set(url, page);
return page;
} catch (error) {
console.error('Error getting cached page:', error);
throw error;
}
};
const savePageToCache = async (url: string, content: string) => {
const hash = getPageHash(content);
const page = { url, content, hash, updatedAt: new Date() };
cache.set(url, page); // 更新内存缓存
try {
const db = await connectToMongo();
await db.collection(collectionName).updateOne({ url }, { $set: page }, { upsert: true }); // 更新持久化缓存
} catch (error) {
console.error('Error saving page to cache:', error);
throw error;
}
};
export const updateCacheAsync = async (url: string, content: string) => {
await savePageToCache(url, content);
};
process.on('SIGINT', async () => {
await mongoClient.close();
process.exit(0);
});
// 在应用启动时创建 TTL 索引
createTTLIndex();

View File

@@ -0,0 +1,158 @@
import { Cluster } from 'puppeteer-cluster';
import * as cheerio from 'cheerio';
import UserAgent from 'user-agents';
import { setupPage } from './setupPage';
import { getCachedPage, updateCacheAsync } from './cacheUpdater';
import { handleSpecialWebsite } from '../specialHandlers';
import fetch from 'node-fetch';
interface CachedPage {
url: string;
content: string;
hash: string;
updatedAt: Date;
}
export const performDeepSearch = async (
clusterInstance: Cluster,
resultUrls: string[],
results: Map<string, any>,
strategies: any[],
detectWebsites: string[],
pageCount: number
) => {
const tasks = [];
await clusterInstance.task(async ({ page, data: { searchUrl } }) => {
try {
const cachedPage = (await getCachedPage(searchUrl)) as CachedPage | null;
if (cachedPage) {
const result = results.get(searchUrl);
if (result) {
result.content = cachedPage.content;
result.crawlStatus = 'Success';
}
return;
}
} catch (error) {
console.error(`从缓存获取页面 ${searchUrl} 时发生错误:`, error);
results.set(searchUrl, {
url: searchUrl,
error: (error as Error).message,
crawlStatus: 'Failed'
});
return;
}
try {
const response = await fetch(searchUrl, {
headers: {
'User-Agent': new UserAgent({
deviceCategory: 'desktop',
platform: 'Linux x86_64'
}).toString(),
Referer: 'https://www.google.com/',
'Accept-Language': 'en-US,en;q=0.9',
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
Connection: 'keep-alive',
'Cache-Control': 'no-cache'
}
});
if (response.ok) {
const content = await response.text();
const $ = cheerio.load(content);
const cleanedContent = $('body').html() || '';
const result = results.get(searchUrl);
if (result) {
result.content = cleanedContent;
result.crawlStatus = 'Success';
}
await updateCacheAsync(searchUrl, cleanedContent || '');
return;
} else {
throw new Error(`HTTP error! status: ${response.status}`);
}
} catch (error) {
console.error(`快速抓取页面 ${searchUrl} 时发生错误:`, error);
}
try {
if (detectWebsites.some((website) => searchUrl.includes(website))) {
await setupPage(page);
} else {
const userAgent = new UserAgent({ deviceCategory: 'desktop', platform: 'Linux x86_64' });
await page.setUserAgent(userAgent.toString());
}
} catch (error) {
console.error(`访问页面 ${searchUrl} 设置用户代理时发生错误:`, error);
}
let pageLoaded = false;
let pageLoadError: Error | null = null;
for (const strategy of strategies) {
try {
await page.goto(searchUrl, { waitUntil: strategy.waitUntil, timeout: strategy.timeout });
pageLoaded = true;
break;
} catch (error: any) {
if (error.name === 'TimeoutError') {
pageLoadError = error;
continue;
} else {
pageLoadError = error;
throw error;
}
}
}
if (!pageLoaded) {
const result = results.get(searchUrl);
if (result) {
result.error = pageLoadError;
result.crawlStatus = 'Failed';
}
return;
}
try {
let cleanedContent = await handleSpecialWebsite(page, searchUrl);
if (!cleanedContent) {
const content = await page.content();
const $ = cheerio.load(content);
cleanedContent = $('body').html() || '';
}
const result = results.get(searchUrl);
if (result) {
result.content = cleanedContent;
result.crawlStatus = 'Success';
}
await updateCacheAsync(searchUrl, cleanedContent || '');
} catch (error) {
results.set(searchUrl, {
url: searchUrl,
error: (error as Error).message,
crawlStatus: 'Failed'
});
} finally {
await page.close().catch(() => {});
}
});
for (const url of resultUrls) {
if (tasks.length >= pageCount + 10) {
break;
}
tasks.push(clusterInstance.queue({ searchUrl: url }));
}
await Promise.all(tasks);
await clusterInstance.idle();
await clusterInstance.close();
return Array.from(results.values()).sort((a, b) => b.score - a.score);
};

View File

@@ -0,0 +1,81 @@
import { Page } from 'puppeteer';
import randomUseragent from 'random-useragent';
import dotenv from 'dotenv';
dotenv.config();
const getRandomUserAgent = () => {
return randomUseragent.getRandom();
};
const getRandomPlatform = () => {
const platforms = ['Win32', 'MacIntel', 'Linux x86_64'];
return platforms[Math.floor(Math.random() * platforms.length)];
};
//代理池
const validateproxy = process.env.VALIDATE_PROXY ? JSON.parse(process.env.VALIDATE_PROXY) : [];
const getRandomProxy = () => {
return validateproxy.length > 0
? validateproxy[Math.floor(Math.random() * validateproxy.length)]
: null;
};
const getRandomLanguages = () => {
const languages = [
['zh-CN', 'zh', 'en'],
['en-US', 'en', 'fr'],
['es-ES', 'es', 'en']
];
return languages[Math.floor(Math.random() * languages.length)];
};
export const setupPage = async (page: Page): Promise<void> => {
const proxy = getRandomProxy();
if (proxy) {
await page.authenticate({
username: proxy.ip,
password: proxy.port.toString()
});
}
await page.evaluateOnNewDocument(() => {
const newProto = (navigator as any).__proto__;
delete newProto.webdriver;
(navigator as any).__proto__ = newProto;
(window as any).chrome = {};
(window as any).chrome.app = {
InstallState: 'testt',
RunningState: 'estt',
getDetails: 'stte',
getIsInstalled: 'ttes'
};
(window as any).chrome.csi = function () {};
(window as any).chrome.loadTimes = function () {};
(window as any).chrome.runtime = function () {};
Object.defineProperty(navigator, 'userAgent', {
get: () => getRandomUserAgent()
});
Object.defineProperty(navigator, 'platform', {
get: () => getRandomPlatform()
});
Object.defineProperty(navigator, 'plugins', {
get: () => [
{
description: 'Shockwave Flash',
filename: 'pepflashplayer.dll',
length: 1,
name: 'Shockwave Flash'
}
]
});
Object.defineProperty(navigator, 'languages', {
get: () => getRandomLanguages()
});
const originalQuery = (window.navigator.permissions as any).query;
(window.navigator.permissions as any).query = (parameters: any) =>
parameters.name === 'notifications'
? Promise.resolve({ state: Notification.permission } as PermissionStatus)
: originalQuery(parameters);
});
};

View File

@@ -0,0 +1,113 @@
{
"compilerOptions": {
/* Visit https://aka.ms/tsconfig to read more about this file */
/* Projects */
// "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
"types": ["node"],
/* Language and Environment */
"target": "es6", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
// "jsx": "preserve", /* Specify what JSX code is generated. */
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
/* Modules */
//"module": "es6", /* Specify what module code is generated. */
"rootDir": "./src", /* Specify the root folder within your source files. */
"moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ /* Specify type package names to be included without being referenced in a source file. */
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
// "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
// "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
// "rewriteRelativeImportExtensions": true, /* Rewrite '.ts', '.tsx', '.mts', and '.cts' file extensions in relative import paths to their JavaScript equivalent in output files. */
// "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */
// "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */
// "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
// "noUncheckedSideEffectImports": true, /* Check side effect imports. */
// "resolveJsonModule": true, /* Enable importing .json files. */
// "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
/* JavaScript Support */
// "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
// "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
// "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
/* Emit */
// "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
// "declarationMap": true, /* Create sourcemaps for d.ts files. */
// "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
// "sourceMap": true, /* Create source map files for emitted JavaScript files. */
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
// "noEmit": true, /* Disable emitting files from a compilation. */
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
"outDir": "./dist", /* Specify an output folder for all emitted files. */
// "removeComments": true, /* Disable emitting comments. */
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
// "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
// "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
// "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
// "newLine": "crlf", /* Set the newline character for emitting files. */
// "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
// "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
// "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
// "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
// "declarationDir": "./", /* Specify the output directory for generated declaration files. */
/* Interop Constraints */
// "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
// "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
// "isolatedDeclarations": true, /* Require sufficient annotation on exports so other tools can trivially generate declaration files. */
// "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
// "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
/* Type Checking */
"typeRoots": ["./node_modules/@types"],
"strict": true, /* Enable all strict type-checking options. */
// "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
// "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
// "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
// "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
// "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
// "strictBuiltinIteratorReturn": true, /* Built-in iterators are instantiated with a 'TReturn' type of 'undefined' instead of 'any'. */
// "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
// "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
// "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
// "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
// "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
// "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
// "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
// "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
// "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
// "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
/* Completeness */
// "skipDefaultLibCheck": true,
// /* Skip type checking .d.ts files that are included with TypeScript. */
"skipLibCheck": true/* Skip type checking all .d.ts files. */
},
"include": ["src/**/*.ts"],
"exclude": ["node_modules"]
}

View File

@@ -0,0 +1,55 @@
// 引入path包
const path = require('path')
require('dotenv').config();
const mode = process.env.NODE_ENV || 'development'
const nodeExternals = require('webpack-node-externals');
module.exports = {
target: 'node', // 指定构建目标为 Node.js
externals: [nodeExternals()], // 排除 node_modules
// 指定入口文件
entry: "./src/index.ts",
// 指定打包文件所在目录
output: {
path: path.resolve(__dirname, 'dist'),
// 打包后文件的名称
filename: "bundle.js"
},
resolve: {
extensions: ['.ts', '.tsx', '.js', '.json'],
fallback: {
"zlib": require.resolve("browserify-zlib"),
"querystring": require.resolve("querystring-es3"),
"path": require.resolve("path-browserify"),
"crypto": require.resolve("crypto-browserify"),
"stream": require.resolve("stream-browserify"),
"os": require.resolve("os-browserify/browser"),
"http": require.resolve("stream-http"),
"net": false,
"string_decoder": require.resolve("string_decoder/"),
"url": require.resolve("url/"),
"buffer": require.resolve("buffer/"),
"util": require.resolve("util/"),
// 新增 assert 的 fallback
"assert": require.resolve("assert/"),
// 处理新出现的 vm 警告
"vm": require.resolve("vm-browserify"),
"fs": false
}
},
// 指定webpack打包的时候要使用的模块
module: {
// 指定要价在的规则
rules: [
{
// test指定的是规则生效的文件,意思是用ts-loader来处理以ts为结尾的文件
test: /\.ts$/,
use: 'ts-loader',
exclude: /node_modules/
}
]
},
mode,
}

View File

@@ -0,0 +1,124 @@
name: spider
version: "0.0.1"
services:
caddy:
container_name: caddy
image: docker.io/library/caddy:2-alpine
network_mode: host
restart: unless-stopped
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile:ro
- caddy-data:/data:rw
- caddy-config:/config:rw
environment:
- SEARXNG_HOSTNAME=${SEARXNG_HOSTNAME:-http://localhost}
- SEARXNG_TLS=${LETSENCRYPT_EMAIL:-internal}
cap_add:
- NET_BIND_SERVICE
cap_drop:
- ALL
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
redis:
container_name: redis
image: docker.io/valkey/valkey:8-alpine
command: valkey-server --save 30 1 --loglevel warning
restart: unless-stopped
networks:
- searxng
volumes:
- valkey-data2:/data
cap_drop:
- ALL
cap_add:
- SETGID
- SETUID
- DAC_OVERRIDE
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
searxng:
container_name: searxng
image: docker.io/searxng/searxng:latest
restart: unless-stopped
networks:
- searxng
ports:
- "127.0.0.1:8080:8080"
volumes:
- ./searxng:/etc/searxng:rw
environment:
- SEARXNG_BASE_URL=https://${SEARXNG_HOSTNAME:-localhost}/
- UWSGI_WORKERS=${SEARXNG_UWSGI_WORKERS:-4}
- UWSGI_THREADS=${SEARXNG_UWSGI_THREADS:-4}
env_file:
- .searchxng.env
cap_drop:
- ALL
cap_add:
- CHOWN
- SETGID
- SETUID
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
mongodb:
container_name: mongodb
image: mongo:4.4
restart: unless-stopped
networks:
- searxng
ports:
- "27017:27017"
volumes:
- mongo-data:/data/db
environment:
MONGO_INITDB_ROOT_USERNAME: root
MONGO_INITDB_ROOT_PASSWORD: example
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
nodeapp:
container_name: main
build:
context: .
ports:
- "3000:3000"
networks:
- searxng
depends_on:
- mongodb
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
volumes:
- /dev/shm:/dev/shm
deploy:
resources:
limits:
memory: 4G
cpus: '2.0'
networks:
searxng:
volumes:
caddy-data:
caddy-config:
valkey-data2:
mongo-data:

View File

@@ -0,0 +1,16 @@
[Unit]
Description=SearXNG service
Requires=docker.service
After=docker.service
[Service]
Restart=on-failure
Environment=SEARXNG_DOCKERCOMPOSEFILE=docker-compose.yaml
WorkingDirectory=/usr/local/searxng-docker
ExecStart=/usr/local/bin/docker compose -f ${SEARXNG_DOCKERCOMPOSEFILE} up --remove-orphans
ExecStop=/usr/local/bin/docker compose -f ${SEARXNG_DOCKERCOMPOSEFILE} down
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,6 @@
# This configuration file updates the default configuration file
# See https://github.com/searxng/searxng/blob/master/searx/limiter.toml
[botdetection.ip_limit]
# activate link_token method in the ip_limit method
link_token = true

View File

@@ -0,0 +1,38 @@
# see https://docs.searxng.org/admin/settings/settings.html#settings-use-default-settings
use_default_settings: true
server:
# base_url is defined in the SEARXNG_BASE_URL environment variable, see .env and docker-compose.yml
secret_key: "01042f00ae8bb522a9c03d3e7e1910318208a2c9fbdd23a6315577a9c98553a8" # change this!
limiter: false # can be disabled for a private instance
image_proxy: true
ui:
static_use_hash: true
# 启用 cn 分类
enabled_categories: [cn, general, images] # 按需添加其他分类
# 或者定义分类显示顺序
categories_order: [cn, general, images]
redis:
url: redis://redis:6379/0
engines:
- name: bing
disabled: false
categories: cn
#- name: bilibili
# engine: bilibili
# shortcut: bil
# disabled: false
# categories: cn
- name : baidu
engine : json_engine
paging : True
first_page_num : 0
search_url : https://www.baidu.com/s?tn=json&wd={query}&pn={pageno}&rn=50
url_query : url
title_query : title
content_query : abs
categories : cn
search:
formats:
- html
- json

View File

@@ -32,6 +32,7 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
import { formatTime2YMDHMS } from '@fastgpt/global/common/string/time';
import MyModal from '@fastgpt/web/components/common/MyModal';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
import SearchInput from '@fastgpt/web/components/common/Input/SearchInput';
type LogDetailType = {
id: number;
@@ -55,11 +56,13 @@ const ChannelLog = ({ Tab }: { Tab: React.ReactNode }) => {
const isRoot = userInfo?.username === 'root';
const [filterProps, setFilterProps] = useState<{
request_id?: string;
channelId?: string;
model?: string;
code_type: 'all' | 'success' | 'error';
dateRange: DateRangeType;
}>({
request_id: '',
code_type: 'all',
dateRange: {
from: (() => {
@@ -125,6 +128,7 @@ const ChannelLog = ({ Tab }: { Tab: React.ReactNode }) => {
pageSize: 20,
refreshDeps: [filterProps],
params: {
request_id: filterProps.request_id,
channel: filterProps.channelId,
model_name: filterProps.model,
code_type: filterProps.code_type,
@@ -162,7 +166,7 @@ const ChannelLog = ({ Tab }: { Tab: React.ReactNode }) => {
content: item.content
};
});
}, [data]);
}, [channelList, data, systemModelList]);
const [logDetail, setLogDetail] = useState<LogDetailType>();
@@ -172,6 +176,13 @@ const ChannelLog = ({ Tab }: { Tab: React.ReactNode }) => {
<Flex alignItems={'center'}>
{Tab}
<Box flex={1} />
<Box flex={'0 0 200px'}>
<SearchInput
placeholder={t('account_model:log_request_id_search')}
defaultValue={filterProps.request_id}
onBlur={(e) => setFilterProps({ ...filterProps, request_id: e.target.value })}
/>
</Box>
</Flex>
)}
<HStack spacing={4}>
@@ -244,8 +255,8 @@ const ChannelLog = ({ Tab }: { Tab: React.ReactNode }) => {
</Tr>
</Thead>
<Tbody>
{formatData.map((item) => (
<Tr key={item.id}>
{formatData.map((item, index) => (
<Tr key={index}>
<Td>{item.channelName}</Td>
<Td>{item.model}</Td>
<Td>
@@ -393,7 +404,7 @@ const LogDetail = ({ data, onClose }: { data: LogDetailType; onClose: () => void
</GridItem>
)}
{detailData?.response_body && (
<GridItem display={'flex'} borderBottomWidth="1px" borderRightWidth="1px" colSpan={2}>
<GridItem display={'flex'} colSpan={2}>
<Title>Response Body</Title>
<Container>{detailData?.response_body}</Container>
</GridItem>

View File

@@ -93,7 +93,7 @@ function MemberTable({ Tabs }: { Tabs: React.ReactNode }) {
const { runAsync: onLeaveTeam } = useRequest2(
async () => {
const defaultTeam = myTeams.find((item) => item.defaultTeam) || myTeams[0];
const defaultTeam = myTeams[0];
// change to personal team
onSwitchTeam(defaultTeam.teamId);
return delLeaveTeam();

View File

@@ -143,7 +143,7 @@ const checkInvalidData = async () => {
console.log(`检测集合完成`);
} catch (error) {
console.log(error);
console.log('checkInvalidData error', error);
}
};
@@ -166,7 +166,9 @@ const checkInvalidDataText = async () => {
await MongoDatasetDataText.deleteMany({
dataId: { $in: unExistsSet }
});
} catch (error) {}
} catch (error) {
console.log('checkInvalidDataText error', error);
}
};
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */

View File

@@ -63,7 +63,7 @@ async function initHttp(teamId?: string): Promise<any> {
}
}
],
{ session }
{ session, ordered: true }
);
/* 批量创建子插件 */
@@ -88,7 +88,7 @@ async function initHttp(teamId?: string): Promise<any> {
}
}
],
{ session }
{ session, ordered: true }
);
if (item.version === 'v2') {
await MongoAppVersion.create(
@@ -100,7 +100,7 @@ async function initHttp(teamId?: string): Promise<any> {
edges: item.edges
}
],
{ session }
{ session, ordered: true }
);
}
}
@@ -160,7 +160,7 @@ async function initPlugin(teamId?: string): Promise<any> {
}
}
],
{ session }
{ session, ordered: true }
);
if (plugin.version === 'v2') {
@@ -173,7 +173,7 @@ async function initPlugin(teamId?: string): Promise<any> {
edges: plugin.edges
}
],
{ session }
{ session, ordered: true }
);
}

View File

@@ -98,7 +98,8 @@ async function handler(
}
],
{
session
session,
ordered: true
}
);
}

View File

@@ -126,7 +126,7 @@ export const onCreateApp = async ({
'pluginData.nodeVersion': defaultNodeVersion
}
],
{ session }
{ session, ordered: true }
);
if (!AppFolderTypeList.includes(type!)) {
@@ -144,7 +144,7 @@ export const onCreateApp = async ({
isPublish: true
}
],
{ session }
{ session, ordered: true }
);
}

View File

@@ -89,7 +89,8 @@ async function handler(req: ApiRequestProps<CreateAppFolderBody>) {
}
],
{
session
session,
ordered: true
}
);
}

View File

@@ -1,18 +1,23 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { loadOpenAPISchemaFromUrl } from '@fastgpt/global/common/string/swagger';
import { NextAPI } from '@/service/middleware/entry';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { isInternalAddress } from '@fastgpt/service/common/system/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
const apiURL = req.body.url as string;
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const apiURL = req.body.url as string;
return jsonRes(res, {
data: await loadOpenAPISchemaFromUrl(apiURL)
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
if (!apiURL) {
return Promise.reject(CommonErrEnum.missingParams);
}
const isInternal = isInternalAddress(apiURL);
if (isInternal) {
return Promise.reject('Invalid url');
}
return await loadOpenAPISchemaFromUrl(apiURL);
}
export default NextAPI(handler);

View File

@@ -45,7 +45,7 @@ async function handler(req: ApiRequestProps<PostPublishAppProps>, res: NextApiRe
tmbId
}
],
{ session }
{ session, ordered: true }
);
// update app

View File

@@ -88,7 +88,7 @@ async function handler(
yuqueServer
}
],
{ session }
{ session, ordered: true }
);
await refreshSourceAvatar(avatar, undefined, session);

View File

@@ -34,17 +34,17 @@ async function handler(req: NextApiRequest) {
});
const datasetIds = datasets.map((d) => d._id);
// delete collection.tags
await MongoDatasetCollectionTags.deleteMany({
teamId,
datasetId: { $in: datasetIds }
});
// delete all dataset.data and pg data
await mongoSessionRun(async (session) => {
// delete dataset data
await delDatasetRelevantData({ datasets, session });
// delete collection.tags
await MongoDatasetCollectionTags.deleteMany({
teamId,
datasetId: { $in: datasetIds }
}).session(session);
// delete dataset
await MongoDataset.deleteMany(
{

View File

@@ -87,7 +87,7 @@ async function handler(
permission: OwnerPermissionVal
}
],
{ session }
{ session, ordered: true }
);
}
});

View File

@@ -122,7 +122,8 @@ async function handler(req: ApiRequestProps<rebuildEmbeddingBody>): Promise<Resp
}
],
{
session
session,
ordered: true
}
);
}

View File

@@ -98,7 +98,7 @@ export async function insertData2Dataset({
}))
}
],
{ session }
{ session, ordered: true }
);
// 3. Create mongo data text
@@ -112,7 +112,7 @@ export async function insertData2Dataset({
fullTextToken: jiebaSplit({ text: qaStr })
}
],
{ session }
{ session, ordered: true }
);
return {

View File

@@ -192,7 +192,7 @@ const rebuildData = async ({
retryCount: 50
}
],
{ session }
{ session, ordered: true }
);
}
});

View File

@@ -37,7 +37,7 @@ export async function initRootUser(retry = 3): Promise<any> {
password: hashStr(psw)
}
],
{ session }
{ session, ordered: true }
);
rootId = _id;
}

View File

@@ -152,6 +152,7 @@ export const putChannel = (data: ChannelInfoType) =>
export const deleteChannel = (id: number) => DELETE(`/channel/${id}`);
export const getChannelLog = (params: {
request_id?: string;
channel?: string;
model_name?: string;
code_type?: 'all' | 'success' | 'error';
@@ -164,6 +165,7 @@ export const getChannelLog = (params: {
logs: ChannelLogListItemType[];
total: number;
}>(`/logs/search`, {
request_id: params.request_id,
channel: params.channel,
model_name: params.model_name,
code_type: params.code_type,