Compare commits
1 Commits
gru/projec
...
gru/projec
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
18ddccb709 |
@@ -645,7 +645,7 @@ data 为集合的 ID。
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
### 创建一个外部文件库集合(弃用)
|
||||
### 创建一个外部文件库集合(商业版)
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
{{< tab tabName="请求示例" >}}
|
||||
|
||||
@@ -40,6 +40,5 @@ export function getSourceNameIcon({
|
||||
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
|
||||
if (mode === TrainingModeEnum.qa) return data.length * 20;
|
||||
if (mode === TrainingModeEnum.auto) return data.length * 5;
|
||||
if (mode === TrainingModeEnum.image) return data.length * 2;
|
||||
return data.length;
|
||||
};
|
||||
|
||||
@@ -13,7 +13,6 @@ const staticPluginList = [
|
||||
'WeWorkWebhook',
|
||||
'google',
|
||||
'bing',
|
||||
'bocha',
|
||||
'delay'
|
||||
];
|
||||
// Run in worker thread (Have npm packages)
|
||||
|
||||
@@ -1,677 +0,0 @@
|
||||
{
|
||||
"author": "",
|
||||
"name": "博查搜索",
|
||||
"avatar": "core/workflow/template/bocha",
|
||||
"intro": "使用博查AI搜索引擎进行网络搜索。",
|
||||
"showStatus": true,
|
||||
"weight": 10,
|
||||
"courseUrl": "",
|
||||
"isTool": true,
|
||||
"templateType": "search",
|
||||
"workflow": {
|
||||
"nodes": [
|
||||
{
|
||||
"nodeId": "pluginInput",
|
||||
"name": "workflow:template.plugin_start",
|
||||
"intro": "workflow:intro_plugin_input",
|
||||
"avatar": "core/workflow/template/workflowStart",
|
||||
"flowNodeType": "pluginInput",
|
||||
"showStatus": false,
|
||||
"position": {
|
||||
"x": 636.3048409085379,
|
||||
"y": -238.61714728578016
|
||||
},
|
||||
"version": "481",
|
||||
"inputs": [
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "apiKey",
|
||||
"label": "apiKey",
|
||||
"description": "博查API密钥",
|
||||
"defaultValue": "",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input",
|
||||
"reference"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "query",
|
||||
"label": "query",
|
||||
"description": "搜索查询词",
|
||||
"defaultValue": "",
|
||||
"required": true,
|
||||
"toolDescription": "搜索查询词"
|
||||
},
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input",
|
||||
"reference"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "freshness",
|
||||
"label": "freshness",
|
||||
"description": "搜索指定时间范围内的网页。可填值:oneDay(一天内)、oneWeek(一周内)、oneMonth(一个月内)、oneYear(一年内)、noLimit(不限,默认)、YYYY-MM-DD..YYYY-MM-DD(日期范围)、YYYY-MM-DD(指定日期)",
|
||||
"defaultValue": "noLimit",
|
||||
"required": false,
|
||||
"toolDescription": "搜索时间范围"
|
||||
},
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input",
|
||||
"reference"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "boolean",
|
||||
"canEdit": true,
|
||||
"key": "summary",
|
||||
"label": "summary",
|
||||
"description": "是否显示文本摘要。true显示,false不显示(默认)",
|
||||
"defaultValue": false,
|
||||
"required": false,
|
||||
"toolDescription": "是否显示文本摘要"
|
||||
},
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input",
|
||||
"reference"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "include",
|
||||
"label": "include",
|
||||
"description": "指定搜索的site范围。多个域名使用|或,分隔,最多20个。例如:qq.com|m.163.com",
|
||||
"defaultValue": "",
|
||||
"required": false,
|
||||
"toolDescription": "指定搜索的site范围"
|
||||
},
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input",
|
||||
"reference"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "string",
|
||||
"canEdit": true,
|
||||
"key": "exclude",
|
||||
"label": "exclude",
|
||||
"description": "排除搜索的网站范围。多个域名使用|或,分隔,最多20个。例如:qq.com|m.163.com",
|
||||
"defaultValue": "",
|
||||
"required": false,
|
||||
"toolDescription": "排除搜索的网站范围"
|
||||
},
|
||||
{
|
||||
"renderTypeList": [
|
||||
"input",
|
||||
"reference"
|
||||
],
|
||||
"selectedTypeIndex": 0,
|
||||
"valueType": "number",
|
||||
"canEdit": true,
|
||||
"key": "count",
|
||||
"label": "count",
|
||||
"description": "返回结果的条数。可填范围:1-50,默认为10",
|
||||
"defaultValue": 10,
|
||||
"required": false,
|
||||
"min": 1,
|
||||
"max": 50,
|
||||
"toolDescription": "返回结果条数"
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"id": "apiKey",
|
||||
"valueType": "string",
|
||||
"key": "apiKey",
|
||||
"label": "apiKey",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "query",
|
||||
"valueType": "string",
|
||||
"key": "query",
|
||||
"label": "query",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "freshness",
|
||||
"valueType": "string",
|
||||
"key": "freshness",
|
||||
"label": "freshness",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "summary",
|
||||
"valueType": "boolean",
|
||||
"key": "summary",
|
||||
"label": "summary",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "include",
|
||||
"valueType": "string",
|
||||
"key": "include",
|
||||
"label": "include",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "exclude",
|
||||
"valueType": "string",
|
||||
"key": "exclude",
|
||||
"label": "exclude",
|
||||
"type": "hidden"
|
||||
},
|
||||
{
|
||||
"id": "count",
|
||||
"valueType": "number",
|
||||
"key": "count",
|
||||
"label": "count",
|
||||
"type": "hidden"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"nodeId": "pluginOutput",
|
||||
"name": "common:core.module.template.self_output",
|
||||
"intro": "workflow:intro_custom_plugin_output",
|
||||
"avatar": "core/workflow/template/pluginOutput",
|
||||
"flowNodeType": "pluginOutput",
|
||||
"showStatus": false,
|
||||
"position": {
|
||||
"x": 2764.1105686698083,
|
||||
"y": -30.617147285780163
|
||||
},
|
||||
"version": "481",
|
||||
"inputs": [
|
||||
{
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"valueType": "object",
|
||||
"canEdit": true,
|
||||
"key": "result",
|
||||
"label": "result",
|
||||
"isToolOutput": true,
|
||||
"description": "",
|
||||
"value": [
|
||||
"nyA6oA8mF1iW",
|
||||
"httpRawResponse"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"nodeId": "pluginConfig",
|
||||
"name": "common:core.module.template.system_config",
|
||||
"intro": "",
|
||||
"avatar": "core/workflow/template/systemConfig",
|
||||
"flowNodeType": "pluginConfig",
|
||||
"position": {
|
||||
"x": 184.66337662472682,
|
||||
"y": -216.05298493910115
|
||||
},
|
||||
"version": "4811",
|
||||
"inputs": [],
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"nodeId": "nyA6oA8mF1iW",
|
||||
"name": "HTTP 请求",
|
||||
"intro": "调用博查搜索API",
|
||||
"avatar": "core/workflow/template/httpRequest",
|
||||
"flowNodeType": "httpRequest468",
|
||||
"showStatus": true,
|
||||
"position": {
|
||||
"x": 1335.0647252518884,
|
||||
"y": -455.9043948565971
|
||||
},
|
||||
"version": "481",
|
||||
"inputs": [
|
||||
{
|
||||
"key": "system_addInputParam",
|
||||
"renderTypeList": [
|
||||
"addInputParam"
|
||||
],
|
||||
"valueType": "dynamic",
|
||||
"label": "",
|
||||
"required": false,
|
||||
"description": "common:core.module.input.description.HTTP Dynamic Input",
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectDataset",
|
||||
"selectApp"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpMethod",
|
||||
"renderTypeList": [
|
||||
"custom"
|
||||
],
|
||||
"valueType": "string",
|
||||
"label": "",
|
||||
"value": "POST",
|
||||
"required": true,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpTimeout",
|
||||
"renderTypeList": [
|
||||
"custom"
|
||||
],
|
||||
"valueType": "number",
|
||||
"label": "",
|
||||
"value": 30,
|
||||
"min": 5,
|
||||
"max": 600,
|
||||
"required": true,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpReqUrl",
|
||||
"renderTypeList": [
|
||||
"hidden"
|
||||
],
|
||||
"valueType": "string",
|
||||
"label": "",
|
||||
"description": "common:core.module.input.description.Http Request Url",
|
||||
"placeholder": "https://api.ai.com/getInventory",
|
||||
"required": false,
|
||||
"value": "https://api.bochaai.com/v1/web-search",
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpHeader",
|
||||
"renderTypeList": [
|
||||
"custom"
|
||||
],
|
||||
"valueType": "any",
|
||||
"value": [
|
||||
{
|
||||
"key": "Authorization",
|
||||
"type": "string",
|
||||
"value": "Bearer {{$pluginInput.apiKey$}}"
|
||||
},
|
||||
{
|
||||
"key": "Content-Type",
|
||||
"type": "string",
|
||||
"value": "application/json"
|
||||
}
|
||||
],
|
||||
"label": "",
|
||||
"description": "common:core.module.input.description.Http Request Header",
|
||||
"placeholder": "common:core.module.input.description.Http Request Header",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpParams",
|
||||
"renderTypeList": [
|
||||
"hidden"
|
||||
],
|
||||
"valueType": "any",
|
||||
"value": [],
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpJsonBody",
|
||||
"renderTypeList": [
|
||||
"hidden"
|
||||
],
|
||||
"valueType": "any",
|
||||
"value": "{\n \"query\": \"{{query}}\",\n \"freshness\": \"{{freshness}}\",\n \"summary\": {{summary}},\n \"include\": \"{{include}}\",\n \"exclude\": \"{{exclude}}\",\n \"count\": {{count}}\n}",
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpFormBody",
|
||||
"renderTypeList": [
|
||||
"hidden"
|
||||
],
|
||||
"valueType": "any",
|
||||
"value": [],
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"key": "system_httpContentType",
|
||||
"renderTypeList": [
|
||||
"hidden"
|
||||
],
|
||||
"valueType": "string",
|
||||
"value": "json",
|
||||
"label": "",
|
||||
"required": false,
|
||||
"debugLabel": "",
|
||||
"toolDescription": ""
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"key": "query",
|
||||
"label": "query",
|
||||
"toolDescription": "博查搜索检索词",
|
||||
"required": true,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": [
|
||||
"pluginInput",
|
||||
"query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"key": "freshness",
|
||||
"label": "freshness",
|
||||
"toolDescription": "搜索时间范围",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": [
|
||||
"pluginInput",
|
||||
"freshness"
|
||||
]
|
||||
},
|
||||
{
|
||||
"valueType": "boolean",
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"key": "summary",
|
||||
"label": "summary",
|
||||
"toolDescription": "是否显示文本摘要",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": [
|
||||
"pluginInput",
|
||||
"summary"
|
||||
]
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"key": "include",
|
||||
"label": "include",
|
||||
"toolDescription": "指定搜索的site范围",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": [
|
||||
"pluginInput",
|
||||
"include"
|
||||
]
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"key": "exclude",
|
||||
"label": "exclude",
|
||||
"toolDescription": "排除搜索的网站范围",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": [
|
||||
"pluginInput",
|
||||
"exclude"
|
||||
]
|
||||
},
|
||||
{
|
||||
"valueType": "number",
|
||||
"renderTypeList": [
|
||||
"reference"
|
||||
],
|
||||
"key": "count",
|
||||
"label": "count",
|
||||
"toolDescription": "返回结果条数",
|
||||
"required": false,
|
||||
"canEdit": true,
|
||||
"editField": {
|
||||
"key": true,
|
||||
"description": true
|
||||
},
|
||||
"customInputConfig": {
|
||||
"selectValueTypeList": [
|
||||
"string",
|
||||
"number",
|
||||
"boolean",
|
||||
"object",
|
||||
"arrayString",
|
||||
"arrayNumber",
|
||||
"arrayBoolean",
|
||||
"arrayObject",
|
||||
"arrayAny",
|
||||
"any",
|
||||
"chatHistory",
|
||||
"datasetQuote",
|
||||
"dynamic",
|
||||
"selectApp",
|
||||
"selectDataset"
|
||||
],
|
||||
"showDescription": false,
|
||||
"showDefaultValue": true
|
||||
},
|
||||
"value": [
|
||||
"pluginInput",
|
||||
"count"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"id": "error",
|
||||
"key": "error",
|
||||
"label": "workflow:request_error",
|
||||
"description": "HTTP请求错误信息,成功时返回空",
|
||||
"valueType": "object",
|
||||
"type": "static"
|
||||
},
|
||||
{
|
||||
"id": "httpRawResponse",
|
||||
"key": "httpRawResponse",
|
||||
"required": true,
|
||||
"label": "workflow:raw_response",
|
||||
"description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。",
|
||||
"valueType": "any",
|
||||
"type": "static"
|
||||
},
|
||||
{
|
||||
"id": "system_addOutputParam",
|
||||
"key": "system_addOutputParam",
|
||||
"type": "dynamic",
|
||||
"valueType": "dynamic",
|
||||
"label": "",
|
||||
"editField": {
|
||||
"key": true,
|
||||
"valueType": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"edges": [
|
||||
{
|
||||
"source": "pluginInput",
|
||||
"target": "nyA6oA8mF1iW",
|
||||
"sourceHandle": "pluginInput-source-right",
|
||||
"targetHandle": "nyA6oA8mF1iW-target-left"
|
||||
},
|
||||
{
|
||||
"source": "nyA6oA8mF1iW",
|
||||
"target": "pluginOutput",
|
||||
"sourceHandle": "nyA6oA8mF1iW-source-right",
|
||||
"targetHandle": "pluginOutput-target-left"
|
||||
}
|
||||
]
|
||||
},
|
||||
"chatConfig": {}
|
||||
}
|
||||
@@ -223,7 +223,7 @@ export const readFileContentFromMongo = async ({
|
||||
rawText: string;
|
||||
filename: string;
|
||||
}> => {
|
||||
const bufferId = `${String(fileId)}-${customPdfParse}`;
|
||||
const bufferId = `${fileId}-${customPdfParse}`;
|
||||
// read buffer
|
||||
const fileBuffer = await getRawTextBuffer(bufferId);
|
||||
if (fileBuffer) {
|
||||
|
||||
@@ -1,57 +1,5 @@
|
||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
import { PassThrough } from 'stream';
|
||||
import { getGridBucket } from './controller';
|
||||
import { type BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
|
||||
export const createFileFromText = async ({
|
||||
bucket,
|
||||
filename,
|
||||
text,
|
||||
metadata
|
||||
}: {
|
||||
bucket: `${BucketNameEnum}`;
|
||||
filename: string;
|
||||
text: string;
|
||||
metadata: Record<string, any>;
|
||||
}) => {
|
||||
const gridBucket = getGridBucket(bucket);
|
||||
|
||||
const buffer = Buffer.from(text);
|
||||
|
||||
const fileSize = buffer.length;
|
||||
// 单块大小:尽可能大,但不超过 14MB,不小于128KB
|
||||
const chunkSizeBytes = (() => {
|
||||
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
|
||||
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
|
||||
|
||||
// 确保块大小至少为128KB
|
||||
const minChunkSize = 128 * 1024; // 128KB
|
||||
|
||||
// 取理想块大小和最小块大小中的较大值
|
||||
let chunkSize = Math.max(idealChunkSize, minChunkSize);
|
||||
|
||||
// 将块大小向上取整到最接近的64KB的倍数,使其更整齐
|
||||
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
|
||||
|
||||
return chunkSize;
|
||||
})();
|
||||
|
||||
const uploadStream = gridBucket.openUploadStream(filename, {
|
||||
metadata,
|
||||
chunkSizeBytes
|
||||
});
|
||||
|
||||
return retryFn(async () => {
|
||||
return new Promise<{ fileId: string }>((resolve, reject) => {
|
||||
uploadStream.end(buffer);
|
||||
uploadStream.on('finish', () => {
|
||||
resolve({ fileId: String(uploadStream.id) });
|
||||
});
|
||||
uploadStream.on('error', reject);
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
|
||||
return new Promise<Buffer>((resolve, reject) => {
|
||||
|
||||
@@ -287,7 +287,6 @@ export const iconPaths = {
|
||||
'core/workflow/template/aiChat': () => import('./icons/core/workflow/template/aiChat.svg'),
|
||||
'core/workflow/template/baseChart': () => import('./icons/core/workflow/template/baseChart.svg'),
|
||||
'core/workflow/template/bing': () => import('./icons/core/workflow/template/bing.svg'),
|
||||
'core/workflow/template/bocha': () => import('./icons/core/workflow/template/bocha.svg'),
|
||||
'core/workflow/template/codeRun': () => import('./icons/core/workflow/template/codeRun.svg'),
|
||||
'core/workflow/template/customFeedback': () =>
|
||||
import('./icons/core/workflow/template/customFeedback.svg'),
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
<svg width="113" height="97" viewBox="0 0 113 97" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M0 31.7259C1.80046 29.9255 3.82784 28.3872 5.96621 27.1988C8.10469 26.0103 10.3126 25.1947 12.4634 24.7992C14.6143 24.4037 16.6664 24.4361 18.5022 24.8938C20.2678 25.334 21.7994 26.1604 23.0183 27.3272L23.021 27.3245L47.189 51.4924L33.4778 65.2037L0 31.7259Z" fill="#C4DEFE"/>
|
||||
<path d="M9.15662 11.5625C11.3617 10.2893 13.7181 9.32825 16.0912 8.73374C18.4645 8.13923 20.8082 7.92284 22.9882 8.09751C25.1681 8.27217 27.1419 8.83457 28.7966 9.75182C30.3881 10.6341 31.6537 11.8287 32.529 13.2712L32.5316 13.2697L32.6082 13.4025C32.6162 13.4162 32.6251 13.4297 32.633 13.4435L49.886 43.3286L33.0941 53.0234L9.15662 11.5625Z" fill="#A6CBFF"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M31.1377 0C33.6839 4.40811e-05 36.2052 0.345872 38.5576 1.01758C40.9099 1.68929 43.0472 2.67394 44.8477 3.91504C46.6482 5.15627 48.0773 6.63021 49.0518 8.25195C49.9888 9.81168 50.4867 11.4792 50.5234 13.166H50.5273V21.4072C56.6623 17.6586 63.874 15.498 71.5898 15.498C93.9304 15.4984 112.042 33.6087 112.042 55.9492C112.042 78.29 93.9305 96.401 71.5898 96.4014C49.3907 96.4014 31.3704 78.5193 31.1426 56.374H31.1377V0ZM71.9473 35.0439C60.1187 35.0441 50.5295 44.6334 50.5293 56.4619C50.5293 63.5338 53.9569 69.8057 59.2412 73.7061C66.4989 79.0625 76.5515 75.3841 85.3955 77.1592C92.613 78.608 97.2369 82.6827 98.3652 83.7686C97.3562 82.731 93.791 78.7138 92.2715 72.3291C89.8011 61.9479 94.8744 49.6043 87.5771 41.8184C83.6695 37.6493 78.1122 35.0441 71.9473 35.0439Z" fill="#006EFF"/>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 1.6 KiB |
7
plugins/webcrawler/SPIDER/package-lock.json
generated
7
plugins/webcrawler/SPIDER/package-lock.json
generated
@@ -4992,10 +4992,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/tar-fs": {
|
||||
"version": "3.0.9",
|
||||
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.9.tgz",
|
||||
"integrity": "sha512-XF4w9Xp+ZQgifKakjZYmFdkLoSWd34VGKcsTCwlNWM7QG3ZbaxnTsaBwnjFZqHRf/rROxaR8rXnbtwdvaDI+lA==",
|
||||
"license": "MIT",
|
||||
"version": "3.0.8",
|
||||
"resolved": "https://registry.npmmirror.com/tar-fs/-/tar-fs-3.0.8.tgz",
|
||||
"integrity": "sha512-ZoROL70jptorGAlgAYiLoBLItEKw/fUxg9BSYK/dF/GAGYFJOJJJMvjPAKDJraCXFwadD456FCuvLWgfhMsPwg==",
|
||||
"dependencies": {
|
||||
"pump": "^3.0.0",
|
||||
"tar-stream": "^3.1.5"
|
||||
|
||||
@@ -49,7 +49,7 @@ const CustomTextInput = () => {
|
||||
createStatus: 'waiting',
|
||||
rawText: data.value,
|
||||
sourceName: data.name,
|
||||
icon: 'file/fill/txt'
|
||||
icon: 'file/fill/manual'
|
||||
}
|
||||
]);
|
||||
goToNext();
|
||||
|
||||
@@ -6,7 +6,6 @@ import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { type CreateCollectionResponse } from '@/global/core/dataset/api';
|
||||
import { createFileFromText } from '@fastgpt/service/common/file/gridfs/utils';
|
||||
|
||||
async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
const { name, text, ...body } = req.body as TextCreateDatasetCollectionParams;
|
||||
@@ -19,18 +18,6 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
per: WritePermissionVal
|
||||
});
|
||||
|
||||
// 1. Create file from text
|
||||
const filename = `${name}.txt`;
|
||||
const { fileId } = await createFileFromText({
|
||||
bucket: 'dataset',
|
||||
filename,
|
||||
text,
|
||||
metadata: {
|
||||
teamId,
|
||||
uid: tmbId
|
||||
}
|
||||
});
|
||||
|
||||
const { collectionId, insertResults } = await createCollectionAndInsertData({
|
||||
dataset,
|
||||
rawText: text,
|
||||
@@ -38,9 +25,9 @@ async function handler(req: NextApiRequest): CreateCollectionResponse {
|
||||
...body,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
fileId,
|
||||
name: filename
|
||||
type: DatasetCollectionTypeEnum.virtual,
|
||||
|
||||
name
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { generateQA } from '@/service/core/dataset/queues/generateQA';
|
||||
import { generateVector } from '@/service/core/dataset/queues/generateVector';
|
||||
import { generateQA } from '@/service/events/generateQA';
|
||||
import { generateVector } from '@/service/events/generateVector';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { type DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
|
||||
@@ -2,7 +2,7 @@ import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/sch
|
||||
import { pushQAUsage } from '@/service/support/wallet/usage/push';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { createChatCompletion } from '@fastgpt/service/core/ai/config';
|
||||
import type { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type.d';
|
||||
import type { ChatCompletionMessageParam, StreamChatType } from '@fastgpt/global/core/ai/type.d';
|
||||
import { addLog } from '@fastgpt/service/common/system/log';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { replaceVariable } from '@fastgpt/global/common/string/tools';
|
||||
@@ -1,6 +1,6 @@
|
||||
import { TeamErrEnum } from '@fastgpt/global/common/error/code/team';
|
||||
import { checkTeamAIPoints } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { sendOneInform } from '../../../support/user/inform/api';
|
||||
import { sendOneInform } from '../support/user/inform/api';
|
||||
import { lockTrainingDataByTeamId } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { InformLevelEnum } from '@fastgpt/global/support/user/inform/constants';
|
||||
|
||||
@@ -18,7 +18,7 @@ export const checkTeamAiPointsAndLock = async (teamId: string) => {
|
||||
templateParam: {},
|
||||
teamId
|
||||
});
|
||||
console.log('余额不足,暂停训练生成任务');
|
||||
console.log('余额不足,暂停【向量】生成任务');
|
||||
await lockTrainingDataByTeamId(teamId);
|
||||
} catch (error) {}
|
||||
}
|
||||
@@ -1,128 +0,0 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { processChatTimeFilter } from '@/service/core/chat/utils';
|
||||
import type { DatasetCiteItemType } from '@fastgpt/global/core/dataset/type';
|
||||
|
||||
describe('processChatTimeFilter', () => {
|
||||
const baseTime = new Date('2025-01-01');
|
||||
|
||||
it('should return original item if no history', () => {
|
||||
const item: DatasetCiteItemType = {
|
||||
id: '1',
|
||||
q: 'original q',
|
||||
a: 'original a',
|
||||
updateTime: baseTime.getTime(),
|
||||
history: undefined
|
||||
};
|
||||
|
||||
const result = processChatTimeFilter([item], baseTime);
|
||||
expect(result).toEqual([item]);
|
||||
});
|
||||
|
||||
it('should return original item if updateTime <= chatTime', () => {
|
||||
const item: DatasetCiteItemType = {
|
||||
id: '1',
|
||||
q: 'original q',
|
||||
a: 'original a',
|
||||
updateTime: baseTime.getTime() - 1000,
|
||||
history: [
|
||||
{
|
||||
q: 'history q',
|
||||
a: 'history a',
|
||||
updateTime: baseTime.getTime() - 2000
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
const result = processChatTimeFilter([item], baseTime);
|
||||
expect(result).toEqual([item]);
|
||||
});
|
||||
|
||||
it('should return history item if one exists before chatTime', () => {
|
||||
const item: DatasetCiteItemType = {
|
||||
id: '1',
|
||||
q: 'original q',
|
||||
a: 'original a',
|
||||
updateTime: baseTime.getTime() + 2000,
|
||||
history: [
|
||||
{
|
||||
q: 'history q',
|
||||
a: 'history a',
|
||||
updateTime: baseTime.getTime() - 1000
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
const result = processChatTimeFilter([item], baseTime);
|
||||
expect(result).toEqual([
|
||||
{
|
||||
...item,
|
||||
q: 'history q',
|
||||
a: 'history a',
|
||||
updateTime: baseTime.getTime() - 1000,
|
||||
updated: true
|
||||
}
|
||||
]);
|
||||
});
|
||||
|
||||
it('should return original item if no history before chatTime', () => {
|
||||
const item: DatasetCiteItemType = {
|
||||
id: '1',
|
||||
q: 'original q',
|
||||
a: 'original a',
|
||||
updateTime: baseTime.getTime() + 2000,
|
||||
history: [
|
||||
{
|
||||
q: 'history q',
|
||||
a: 'history a',
|
||||
updateTime: baseTime.getTime() + 1000
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
const result = processChatTimeFilter([item], baseTime);
|
||||
expect(result).toEqual([item]);
|
||||
});
|
||||
|
||||
it('should handle multiple items', () => {
|
||||
const items: DatasetCiteItemType[] = [
|
||||
{
|
||||
id: '1',
|
||||
q: 'original q1',
|
||||
a: 'original a1',
|
||||
updateTime: baseTime.getTime() + 2000,
|
||||
history: [
|
||||
{
|
||||
q: 'history q1',
|
||||
a: 'history a1',
|
||||
updateTime: baseTime.getTime() - 1000
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
id: '2',
|
||||
q: 'original q2',
|
||||
a: 'original a2',
|
||||
updateTime: baseTime.getTime() - 1000,
|
||||
history: [
|
||||
{
|
||||
q: 'history q2',
|
||||
a: 'history a2',
|
||||
updateTime: baseTime.getTime() - 2000
|
||||
}
|
||||
]
|
||||
}
|
||||
];
|
||||
|
||||
const result = processChatTimeFilter(items, baseTime);
|
||||
expect(result).toEqual([
|
||||
{
|
||||
...items[0],
|
||||
q: 'history q1',
|
||||
a: 'history a1',
|
||||
updateTime: baseTime.getTime() - 1000,
|
||||
updated: true
|
||||
},
|
||||
items[1]
|
||||
]);
|
||||
});
|
||||
});
|
||||
128
test/cases/service/core/dataset/training/utils.test.ts
Normal file
128
test/cases/service/core/dataset/training/utils.test.ts
Normal file
@@ -0,0 +1,128 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import {
|
||||
createDatasetTrainingMongoWatch,
|
||||
startTrainingQueue
|
||||
} from '@/service/core/dataset/training/utils';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
import { generateQA } from '@/service/core/dataset/queues/generateQA';
|
||||
import { generateVector } from '@/service/core/dataset/queues/generateVector';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
vi.mock('@/service/core/dataset/queues/generateQA', () => ({
|
||||
generateQA: vi.fn()
|
||||
}));
|
||||
|
||||
vi.mock('@/service/core/dataset/queues/generateVector', () => ({
|
||||
generateVector: vi.fn()
|
||||
}));
|
||||
|
||||
vi.mock('@fastgpt/service/core/dataset/training/schema', () => ({
|
||||
MongoDatasetTraining: {
|
||||
watch: vi.fn().mockReturnValue({
|
||||
on: vi.fn()
|
||||
})
|
||||
}
|
||||
}));
|
||||
|
||||
describe('dataset training utils', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('createDatasetTrainingMongoWatch', () => {
|
||||
it('should setup mongo watch and handle qa mode', () => {
|
||||
const mockOn = vi.fn();
|
||||
vi.mocked(MongoDatasetTraining.watch).mockReturnValue({
|
||||
on: mockOn
|
||||
});
|
||||
|
||||
createDatasetTrainingMongoWatch();
|
||||
|
||||
expect(MongoDatasetTraining.watch).toHaveBeenCalled();
|
||||
expect(mockOn).toHaveBeenCalledWith('change', expect.any(Function));
|
||||
|
||||
// Simulate change event for QA mode
|
||||
const changeHandler = mockOn.mock.calls[0][1];
|
||||
changeHandler({
|
||||
operationType: 'insert',
|
||||
fullDocument: {
|
||||
mode: TrainingModeEnum.qa
|
||||
}
|
||||
});
|
||||
|
||||
expect(generateQA).toHaveBeenCalled();
|
||||
expect(generateVector).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle chunk mode', () => {
|
||||
const mockOn = vi.fn();
|
||||
vi.mocked(MongoDatasetTraining.watch).mockReturnValue({
|
||||
on: mockOn
|
||||
});
|
||||
|
||||
createDatasetTrainingMongoWatch();
|
||||
|
||||
const changeHandler = mockOn.mock.calls[0][1];
|
||||
changeHandler({
|
||||
operationType: 'insert',
|
||||
fullDocument: {
|
||||
mode: TrainingModeEnum.chunk
|
||||
}
|
||||
});
|
||||
|
||||
expect(generateVector).toHaveBeenCalled();
|
||||
expect(generateQA).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should ignore non-insert operations', () => {
|
||||
const mockOn = vi.fn();
|
||||
vi.mocked(MongoDatasetTraining.watch).mockReturnValue({
|
||||
on: mockOn
|
||||
});
|
||||
|
||||
createDatasetTrainingMongoWatch();
|
||||
|
||||
const changeHandler = mockOn.mock.calls[0][1];
|
||||
changeHandler({
|
||||
operationType: 'update',
|
||||
fullDocument: {
|
||||
mode: TrainingModeEnum.qa
|
||||
}
|
||||
});
|
||||
|
||||
expect(generateQA).not.toHaveBeenCalled();
|
||||
expect(generateVector).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('startTrainingQueue', () => {
|
||||
beforeEach(() => {
|
||||
global.systemEnv = {
|
||||
qaMaxProcess: 3
|
||||
};
|
||||
});
|
||||
|
||||
it('should start single process by default', () => {
|
||||
startTrainingQueue();
|
||||
|
||||
expect(generateQA).toHaveBeenCalledTimes(1);
|
||||
expect(generateVector).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('should start max processes when fast mode enabled', () => {
|
||||
startTrainingQueue(true);
|
||||
|
||||
expect(generateQA).toHaveBeenCalledTimes(3);
|
||||
expect(generateVector).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it('should use default max process when not configured', () => {
|
||||
global.systemEnv = undefined;
|
||||
|
||||
startTrainingQueue(true);
|
||||
|
||||
expect(generateQA).toHaveBeenCalledTimes(10);
|
||||
expect(generateVector).toHaveBeenCalledTimes(10);
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user