4.6.2-production (#518)
This commit is contained in:
@@ -26,7 +26,7 @@ weight: 520
|
|||||||
"qaMaxProcess": 15, // QA 生成最大进程,结合数据库性能和 key 来设置
|
"qaMaxProcess": 15, // QA 生成最大进程,结合数据库性能和 key 来设置
|
||||||
"pgHNSWEfSearch": 100 // pg vector 索引参数,越大精度高但速度慢
|
"pgHNSWEfSearch": 100 // pg vector 索引参数,越大精度高但速度慢
|
||||||
},
|
},
|
||||||
"ChatModels": [
|
"ChatModels": [ // 对话模型
|
||||||
{
|
{
|
||||||
"model": "gpt-3.5-turbo-1106",
|
"model": "gpt-3.5-turbo-1106",
|
||||||
"name": "GPT35-1106",
|
"name": "GPT35-1106",
|
||||||
@@ -76,7 +76,7 @@ weight: 520
|
|||||||
"defaultSystemChatPrompt": ""
|
"defaultSystemChatPrompt": ""
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"QAModels": [
|
"QAModels": [ // QA 生成模型
|
||||||
{
|
{
|
||||||
"model": "gpt-3.5-turbo-16k",
|
"model": "gpt-3.5-turbo-16k",
|
||||||
"name": "GPT35-16k",
|
"name": "GPT35-16k",
|
||||||
@@ -85,14 +85,14 @@ weight: 520
|
|||||||
"price": 0
|
"price": 0
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"CQModels": [
|
"CQModels": [ // 问题分类模型
|
||||||
{
|
{
|
||||||
"model": "gpt-3.5-turbo-1106",
|
"model": "gpt-3.5-turbo-1106",
|
||||||
"name": "GPT35-1106",
|
"name": "GPT35-1106",
|
||||||
"maxContext": 16000,
|
"maxContext": 16000,
|
||||||
"maxResponse": 4000,
|
"maxResponse": 4000,
|
||||||
"price": 0,
|
"price": 0,
|
||||||
"functionCall": true,
|
"functionCall": true, // 是否支持function call, 不支持的模型需要设置为 false,会走提示词生成
|
||||||
"functionPrompt": ""
|
"functionPrompt": ""
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -105,7 +105,7 @@ weight: 520
|
|||||||
"functionPrompt": ""
|
"functionPrompt": ""
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"ExtractModels": [
|
"ExtractModels": [ // 内容提取模型
|
||||||
{
|
{
|
||||||
"model": "gpt-3.5-turbo-1106",
|
"model": "gpt-3.5-turbo-1106",
|
||||||
"name": "GPT35-1106",
|
"name": "GPT35-1106",
|
||||||
@@ -116,7 +116,7 @@ weight: 520
|
|||||||
"functionPrompt": ""
|
"functionPrompt": ""
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"QGModels": [
|
"QGModels": [ // 生成下一步指引
|
||||||
{
|
{
|
||||||
"model": "gpt-3.5-turbo-1106",
|
"model": "gpt-3.5-turbo-1106",
|
||||||
"name": "GPT35-1106",
|
"name": "GPT35-1106",
|
||||||
@@ -125,7 +125,7 @@ weight: 520
|
|||||||
"price": 0
|
"price": 0
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"VectorModels": [
|
"VectorModels": [ // 向量模型
|
||||||
{
|
{
|
||||||
"model": "text-embedding-ada-002",
|
"model": "text-embedding-ada-002",
|
||||||
"name": "Embedding-2",
|
"name": "Embedding-2",
|
||||||
|
|||||||
31
docSite/content/docs/installation/upgrading/462.md
Normal file
31
docSite/content/docs/installation/upgrading/462.md
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
---
|
||||||
|
title: 'V4.6.2(需要初始化)'
|
||||||
|
description: 'FastGPT V4.6.2'
|
||||||
|
icon: 'upgrade'
|
||||||
|
draft: false
|
||||||
|
toc: true
|
||||||
|
weight: 834
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1。执行初始化 API
|
||||||
|
|
||||||
|
发起 1 个 HTTP 请求 ({{rootkey}} 替换成环境变量里的 `rootkey`,{{host}} 替换成自己域名)
|
||||||
|
|
||||||
|
1. https://xxxxx/api/admin/initv462
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location --request POST 'https://{{host}}/api/admin/initv462' \
|
||||||
|
--header 'rootkey: {{rootkey}}' \
|
||||||
|
--header 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
|
初始化说明:
|
||||||
|
1. 初始化全文索引
|
||||||
|
|
||||||
|
## V4.6.2 功能介绍
|
||||||
|
|
||||||
|
1. 新增 - 全文索引(需配合 Rerank 模型,在看怎么放到开源版,模型接口比较特殊)
|
||||||
|
2. 新增 - 插件来源(预计4.7/4.8版本会正式使用)
|
||||||
|
3. 优化 - PDF读取
|
||||||
|
4. 优化 - docx文件读取,转成 markdown 并保留其图片内容
|
||||||
|
5. 修复和优化 TextSplitter 函数
|
||||||
@@ -3,126 +3,184 @@ import { countPromptTokens } from './tiktoken';
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* text split into chunks
|
* text split into chunks
|
||||||
* maxLen - one chunk len. max: 3500
|
* chunkLen - one chunk len. max: 3500
|
||||||
* overlapLen - The size of the before and after Text
|
* overlapLen - The size of the before and after Text
|
||||||
* maxLen > overlapLen
|
* chunkLen > overlapLen
|
||||||
* markdown
|
* markdown
|
||||||
*/
|
*/
|
||||||
export const splitText2Chunks = (props: { text: string; maxLen: number; overlapLen?: number }) => {
|
export const splitText2Chunks = (props: {
|
||||||
const { text = '', maxLen, overlapLen = Math.floor(maxLen * 0.2) } = props;
|
text: string;
|
||||||
const tempMarker = 'SPLIT_HERE_SPLIT_HERE';
|
chunkLen: number;
|
||||||
|
overlapRatio?: number;
|
||||||
|
}): {
|
||||||
|
chunks: string[];
|
||||||
|
tokens: number;
|
||||||
|
} => {
|
||||||
|
const { text = '', chunkLen, overlapRatio = 0.2 } = props;
|
||||||
|
const splitMarker = 'SPLIT_HERE_SPLIT_HERE';
|
||||||
|
const overlapLen = Math.round(chunkLen * overlapRatio);
|
||||||
|
|
||||||
const stepReg: Record<number, RegExp> = {
|
// The larger maxLen is, the next sentence is less likely to trigger splitting
|
||||||
0: /^(#\s[^\n]+)\n/gm,
|
const stepReges: { reg: RegExp; maxLen: number }[] = [
|
||||||
1: /^(##\s[^\n]+)\n/gm,
|
{ reg: /^(#\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 },
|
||||||
2: /^(###\s[^\n]+)\n/gm,
|
{ reg: /^(##\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 },
|
||||||
3: /^(####\s[^\n]+)\n/gm,
|
{ reg: /^(###\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 },
|
||||||
|
{ reg: /^(####\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 },
|
||||||
|
|
||||||
4: /(\n\n)/g,
|
{ reg: /([\n]{2})/g, maxLen: chunkLen * 1.4 },
|
||||||
5: /([\n])/g,
|
{ reg: /([\n](?![\*\-|>`0-9]))/g, maxLen: chunkLen * 1.8 }, // (?![\*\-|>`0-9]): markdown special char
|
||||||
6: /([。]|(?!<[^a-zA-Z])\.\s)/g,
|
{ reg: /([\n])/g, maxLen: chunkLen * 1.4 },
|
||||||
7: /([!?]|!\s|\?\s)/g,
|
|
||||||
8: /([;]|;\s)/g,
|
{ reg: /([。]|([a-zA-Z])\.\s)/g, maxLen: chunkLen * 1.4 },
|
||||||
9: /([,]|,\s)/g
|
{ reg: /([!]|!\s)/g, maxLen: chunkLen * 1.4 },
|
||||||
|
{ reg: /([?]|\?\s)/g, maxLen: chunkLen * 1.6 },
|
||||||
|
{ reg: /([;]|;\s)/g, maxLen: chunkLen * 1.8 },
|
||||||
|
{ reg: /([,]|,\s)/g, maxLen: chunkLen * 2 }
|
||||||
|
];
|
||||||
|
|
||||||
|
const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
|
||||||
|
if (step >= stepReges.length) {
|
||||||
|
return [text];
|
||||||
|
}
|
||||||
|
const isMarkdownSplit = step <= 3;
|
||||||
|
const { reg } = stepReges[step];
|
||||||
|
|
||||||
|
const splitTexts = text
|
||||||
|
.replace(reg, isMarkdownSplit ? `${splitMarker}$1` : `$1${splitMarker}`)
|
||||||
|
.split(`${splitMarker}`)
|
||||||
|
.filter((part) => part.trim());
|
||||||
|
return splitTexts;
|
||||||
|
};
|
||||||
|
|
||||||
|
const getOneTextOverlapText = ({ text, step }: { text: string; step: number }): string => {
|
||||||
|
const forbidOverlap = step <= 6;
|
||||||
|
const maxOverlapLen = chunkLen * 0.4;
|
||||||
|
|
||||||
|
// step >= stepReges.length: Do not overlap incomplete sentences
|
||||||
|
if (forbidOverlap || overlapLen === 0 || step >= stepReges.length) return '';
|
||||||
|
|
||||||
|
const splitTexts = getSplitTexts({ text, step });
|
||||||
|
let overlayText = '';
|
||||||
|
|
||||||
|
for (let i = splitTexts.length - 1; i >= 0; i--) {
|
||||||
|
const currentText = splitTexts[i];
|
||||||
|
const newText = currentText + overlayText;
|
||||||
|
const newTextLen = newText.length;
|
||||||
|
|
||||||
|
if (newTextLen > overlapLen) {
|
||||||
|
if (newTextLen > maxOverlapLen) {
|
||||||
|
const text = getOneTextOverlapText({ text: newText, step: step + 1 });
|
||||||
|
return text || overlayText;
|
||||||
|
}
|
||||||
|
return newText;
|
||||||
|
}
|
||||||
|
|
||||||
|
overlayText = newText;
|
||||||
|
}
|
||||||
|
return overlayText;
|
||||||
};
|
};
|
||||||
|
|
||||||
const splitTextRecursively = ({
|
const splitTextRecursively = ({
|
||||||
text = '',
|
text = '',
|
||||||
step,
|
step,
|
||||||
lastChunk,
|
lastText
|
||||||
overlayChunk
|
|
||||||
}: {
|
}: {
|
||||||
text: string;
|
text: string;
|
||||||
step: number;
|
step: number;
|
||||||
lastChunk: string;
|
lastText: string;
|
||||||
overlayChunk: string;
|
}): string[] => {
|
||||||
}) => {
|
// mini text
|
||||||
if (text.length <= maxLen) {
|
if (text.length <= chunkLen) {
|
||||||
return [text];
|
return [text];
|
||||||
}
|
}
|
||||||
const reg = stepReg[step];
|
|
||||||
const isMarkdownSplit = step < 4;
|
|
||||||
|
|
||||||
if (!reg) {
|
// oversize
|
||||||
// use slice-maxLen to split text
|
if (step >= stepReges.length) {
|
||||||
|
if (text.length < chunkLen * 3) {
|
||||||
|
return [text];
|
||||||
|
}
|
||||||
|
// use slice-chunkLen to split text
|
||||||
const chunks: string[] = [];
|
const chunks: string[] = [];
|
||||||
let chunk = '';
|
for (let i = 0; i < text.length; i += chunkLen - overlapLen) {
|
||||||
for (let i = 0; i < text.length; i += maxLen - overlapLen) {
|
chunks.push(text.slice(i, i + chunkLen));
|
||||||
chunk = text.slice(i, i + maxLen);
|
|
||||||
chunks.push(chunk);
|
|
||||||
}
|
}
|
||||||
return chunks;
|
return chunks;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const { maxLen } = stepReges[step];
|
||||||
|
const minChunkLen = chunkLen * 0.7;
|
||||||
|
|
||||||
// split text by special char
|
// split text by special char
|
||||||
const splitTexts = (() => {
|
const splitTexts = getSplitTexts({ text, step });
|
||||||
if (!reg.test(text)) {
|
|
||||||
return [text];
|
|
||||||
}
|
|
||||||
return text
|
|
||||||
.replace(reg, isMarkdownSplit ? `${tempMarker}$1` : `$1${tempMarker}`)
|
|
||||||
.split(`${tempMarker}`)
|
|
||||||
.filter((part) => part);
|
|
||||||
})();
|
|
||||||
|
|
||||||
let chunks: string[] = [];
|
const chunks: string[] = [];
|
||||||
for (let i = 0; i < splitTexts.length; i++) {
|
for (let i = 0; i < splitTexts.length; i++) {
|
||||||
let text = splitTexts[i];
|
const currentText = splitTexts[i];
|
||||||
let chunkToken = lastChunk.length;
|
const currentTextLen = currentText.length;
|
||||||
const textToken = text.length;
|
const lastTextLen = lastText.length;
|
||||||
|
const newText = lastText + currentText;
|
||||||
|
const newTextLen = lastTextLen + currentTextLen;
|
||||||
|
|
||||||
// next chunk is too large / new chunk is too large(The current chunk must be smaller than maxLen)
|
// newText is too large(now, The lastText must be smaller than chunkLen)
|
||||||
if (textToken >= maxLen || chunkToken + textToken > maxLen * 1.4) {
|
if (newTextLen > maxLen) {
|
||||||
// last chunk is too large, push it to chunks, not add to next chunk
|
// lastText greater minChunkLen, direct push it to chunks, not add to next chunk. (large lastText)
|
||||||
if (chunkToken > maxLen * 0.7) {
|
if (lastTextLen > minChunkLen) {
|
||||||
chunks.push(lastChunk);
|
chunks.push(lastText);
|
||||||
lastChunk = '';
|
lastText = getOneTextOverlapText({ text: lastText, step }); // next chunk will start with overlayText
|
||||||
overlayChunk = '';
|
i--;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
// chunk is small, insert to next chunks
|
|
||||||
|
// split new Text, split chunks must will greater 1 (small lastText)
|
||||||
const innerChunks = splitTextRecursively({
|
const innerChunks = splitTextRecursively({
|
||||||
text,
|
text: newText,
|
||||||
step: step + 1,
|
step: step + 1,
|
||||||
lastChunk,
|
lastText: ''
|
||||||
overlayChunk
|
|
||||||
});
|
});
|
||||||
if (innerChunks.length === 0) continue;
|
const lastChunk = innerChunks[innerChunks.length - 1];
|
||||||
chunks = chunks.concat(innerChunks);
|
// last chunk is too small, concat it to lastText
|
||||||
lastChunk = '';
|
if (lastChunk.length < minChunkLen) {
|
||||||
overlayChunk = '';
|
chunks.push(...innerChunks.slice(0, -1));
|
||||||
|
lastText = lastChunk;
|
||||||
|
} else {
|
||||||
|
chunks.push(...innerChunks);
|
||||||
|
// compute new overlapText
|
||||||
|
lastText = getOneTextOverlapText({
|
||||||
|
text: lastChunk,
|
||||||
|
step
|
||||||
|
});
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// size less than maxLen, push text to last chunk
|
// size less than chunkLen, push text to last chunk. now, text definitely less than maxLen
|
||||||
lastChunk += text;
|
lastText = newText;
|
||||||
chunkToken += textToken; // Definitely less than 1.4 * maxLen
|
|
||||||
|
|
||||||
// size over lapLen, push it to next chunk
|
// If the chunk size reaches, add a chunk
|
||||||
if (
|
if (newTextLen >= chunkLen) {
|
||||||
overlapLen !== 0 &&
|
chunks.push(lastText);
|
||||||
!isMarkdownSplit &&
|
lastText = getOneTextOverlapText({ text: lastText, step });
|
||||||
chunkToken >= maxLen - overlapLen &&
|
|
||||||
textToken < overlapLen
|
|
||||||
) {
|
|
||||||
overlayChunk += text;
|
|
||||||
}
|
|
||||||
if (chunkToken >= maxLen) {
|
|
||||||
chunks.push(lastChunk);
|
|
||||||
lastChunk = overlayChunk;
|
|
||||||
overlayChunk = '';
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the last chunk is independent, it needs to be push chunks. */
|
/* If the last chunk is independent, it needs to be push chunks. */
|
||||||
if (lastChunk && chunks[chunks.length - 1] && !chunks[chunks.length - 1].endsWith(lastChunk)) {
|
if (lastText && chunks[chunks.length - 1] && !chunks[chunks.length - 1].endsWith(lastText)) {
|
||||||
chunks.push(lastChunk);
|
if (lastText.length < chunkLen * 0.4) {
|
||||||
|
chunks[chunks.length - 1] = chunks[chunks.length - 1] + lastText;
|
||||||
|
} else {
|
||||||
|
chunks.push(lastText);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return chunks;
|
return chunks;
|
||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const chunks = splitTextRecursively({ text, step: 0, lastChunk: '', overlayChunk: '' });
|
const chunks = splitTextRecursively({
|
||||||
|
text,
|
||||||
|
step: 0,
|
||||||
|
lastText: ''
|
||||||
|
});
|
||||||
|
|
||||||
const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0);
|
const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0);
|
||||||
|
|
||||||
|
|||||||
@@ -102,13 +102,13 @@ export function responseWriteController({
|
|||||||
readStream: any;
|
readStream: any;
|
||||||
}) {
|
}) {
|
||||||
res.on('drain', () => {
|
res.on('drain', () => {
|
||||||
readStream.resume();
|
readStream?.resume?.();
|
||||||
});
|
});
|
||||||
|
|
||||||
return (text: string | Buffer) => {
|
return (text: string | Buffer) => {
|
||||||
const writeResult = res.write(text);
|
const writeResult = res.write(text);
|
||||||
if (!writeResult) {
|
if (!writeResult) {
|
||||||
readStream?.pause();
|
readStream?.pause?.();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
2
pnpm-lock.yaml
generated
2
pnpm-lock.yaml
generated
@@ -219,7 +219,7 @@ importers:
|
|||||||
specifier: ^4.17.21
|
specifier: ^4.17.21
|
||||||
version: registry.npmmirror.com/lodash@4.17.21
|
version: registry.npmmirror.com/lodash@4.17.21
|
||||||
mammoth:
|
mammoth:
|
||||||
specifier: ^1.5.1
|
specifier: ^1.6.0
|
||||||
version: registry.npmmirror.com/mammoth@1.6.0
|
version: registry.npmmirror.com/mammoth@1.6.0
|
||||||
mermaid:
|
mermaid:
|
||||||
specifier: ^10.2.3
|
specifier: ^10.2.3
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "app",
|
"name": "app",
|
||||||
"version": "4.6.1",
|
"version": "4.6.2",
|
||||||
"private": false,
|
"private": false,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"dev": "next dev",
|
"dev": "next dev",
|
||||||
@@ -38,7 +38,7 @@
|
|||||||
"jsdom": "^22.1.0",
|
"jsdom": "^22.1.0",
|
||||||
"jsonwebtoken": "^9.0.2",
|
"jsonwebtoken": "^9.0.2",
|
||||||
"lodash": "^4.17.21",
|
"lodash": "^4.17.21",
|
||||||
"mammoth": "^1.5.1",
|
"mammoth": "^1.6.0",
|
||||||
"mermaid": "^10.2.3",
|
"mermaid": "^10.2.3",
|
||||||
"multer": "1.4.5-lts.1",
|
"multer": "1.4.5-lts.1",
|
||||||
"nanoid": "^4.0.1",
|
"nanoid": "^4.0.1",
|
||||||
|
|||||||
@@ -1,12 +1,13 @@
|
|||||||
### Fast GPT V4.6
|
### Fast GPT V4.6.2
|
||||||
|
|
||||||
1. 新增 - 团队空间
|
1. 新增 - 团队空间
|
||||||
2. 新增 - 多路向量(多个向量映射一组数据)
|
2. 新增 - 多路向量(多个向量映射一组数据)
|
||||||
3. 新增 - tts语音
|
3. 新增 - tts语音
|
||||||
4. 线上环境新增 - ReRank向量召回,提高召回精度
|
4. 新增 - 语音输入
|
||||||
5. 优化 - 知识库导出,可直接触发流下载,无需等待转圈圈
|
5. 新增 - 增强召回方式,提高召回精度
|
||||||
6. [知识库结构详解](https://doc.fastgpt.in/docs/use-cases/datasetengine/)
|
6. 优化 - 知识库导出,可直接触发流下载,无需等待转圈圈
|
||||||
7. [知识库提示词详解](https://doc.fastgpt.in/docs/use-cases/ai_settings/#引用模板--引用提示词)
|
7. [知识库结构详解](https://doc.fastgpt.in/docs/use-cases/datasetengine/)
|
||||||
8. [使用文档](https://doc.fastgpt.in/docs/intro/)
|
8. [知识库提示词详解](https://doc.fastgpt.in/docs/use-cases/ai_settings/#引用模板--引用提示词)
|
||||||
9. [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow)
|
9. [使用文档](https://doc.fastgpt.in/docs/intro/)
|
||||||
10. [点击查看商业版](https://doc.fastgpt.in/docs/commercial/)
|
10. [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow)
|
||||||
|
11. [点击查看商业版](https://doc.fastgpt.in/docs/commercial/)
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1700745751866" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="36975" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="128"><path d="M220.150923 700.501366l167.543301-167.5433a73.062148 73.062148 0 0 1 103.327242 103.327242l-365.310741 365.310741a73.062148 73.062148 0 1 1-103.327242-103.286288l94.481152-94.481152-64.257012-64.257013a73.062148 73.062148 0 1 1 103.327242-103.286288l64.216058 64.216058z" fill="#CCDAFF" p-id="36976"></path><path d="M475.909397 49.003242a365.310741 365.310741 0 1 1 365.310741 632.782092 365.310741 365.310741 0 0 1-365.310741-632.741138z m118.685036 205.589677a127.981622 127.981622 0 1 0 127.981622 221.643692 127.981622 127.981622 0 0 0-127.981622-221.684646z" fill="#244DD5" p-id="36977"></path></svg>
|
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1700983497588" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="6628" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="128"><path d="M698.483573 594.905936A287.506808 287.506808 0 1 1 984.611923 306.020671v1.181535a287.309885 287.309885 0 0 1-286.12835 287.70373z" fill="#FFFFFF" p-id="6629"></path><path d="M698.483573 39.387645A267.814561 267.814561 0 1 1 433.229005 308.777585v-1.575379A267.420716 267.420716 0 0 1 698.483573 39.387645m0-39.384494A307.199055 307.199055 0 1 0 1004.30417 308.580663v-1.378457A306.411365 306.411365 0 0 0 698.680495 0.003151z" fill="#007FB7" p-id="6630"></path><path d="M787.689452 236.310116m-78.768988 0a78.768988 78.768988 0 1 0 157.537977 0 78.768988 78.768988 0 1 0-157.537977 0Z" fill="#D1EBF2" p-id="6631"></path><path d="M787.689452 177.233375a59.076741 59.076741 0 1 1-59.076741 59.076741 59.076741 59.076741 0 0 1 59.076741-59.076741m0-39.384495a98.461236 98.461236 0 1 0 98.461236 98.461236 98.461236 98.461236 0 0 0-98.461236-98.461236z" fill="#007FB7" p-id="6632"></path><path d="M39.384062 974.57246v-113.033499l390.300338-392.466484 162.067194 108.701204-116.381181 58.682896v124.455002l-135.876505 5.316906v131.150366l-127.014993 4.923062-65.772106 99.248925L39.384062 974.57246z" fill="#D1EBF2" p-id="6633"></path><path d="M433.229005 494.475475l120.713474 80.935136-75.421306 38.006037-21.661472 10.830736v118.153482l-98.461235 3.741527-38.793727 2.166148v131.347288l-98.461236 3.741527h-19.692247l-11.224581 16.73841L137.845298 979.101677l-78.768988-19.692247v-89.796647l374.152695-374.152695m-5.119985-50.805998L19.691815 853.268218v136.467272L155.56832 1024l67.938253-102.399685 135.876505-5.316907v-131.150365l135.876505-5.316907v-131.347288L630.151476 580.333673l-203.027068-136.664195z" fill="#007FB7" p-id="6634"></path></svg>
|
||||||
|
Before Width: | Height: | Size: 939 B After Width: | Height: | Size: 1.9 KiB |
@@ -193,6 +193,9 @@
|
|||||||
"unKnow": "There was an accident"
|
"unKnow": "There was an accident"
|
||||||
},
|
},
|
||||||
"export": "",
|
"export": "",
|
||||||
|
"file": {
|
||||||
|
"Select file amount limit 100": "You can select a maximum of 100 files at a time"
|
||||||
|
},
|
||||||
"folder": {
|
"folder": {
|
||||||
"Drag Tip": "Click and move",
|
"Drag Tip": "Click and move",
|
||||||
"Move Success": "Move Success",
|
"Move Success": "Move Success",
|
||||||
@@ -260,10 +263,22 @@
|
|||||||
"Similarity": "Similarity",
|
"Similarity": "Similarity",
|
||||||
"data": {
|
"data": {
|
||||||
"Edit": "Edit Data",
|
"Edit": "Edit Data",
|
||||||
|
"data is deleted": "Data is deleted",
|
||||||
"id": "Data ID"
|
"id": "Data ID"
|
||||||
},
|
},
|
||||||
|
"import": {
|
||||||
|
"Ideal chunk length": "Ideal chunk length",
|
||||||
|
"Ideal chunk length Tips": "Segment by end symbol. We recommend that your document should be properly punctuated to ensure that each complete sentence length does not exceed this value \n Chinese document recommended 400~1000\n English document recommended 600~1200"
|
||||||
|
},
|
||||||
"test": {
|
"test": {
|
||||||
"Test Result": "Results"
|
"Test": "Start",
|
||||||
|
"Test Result": "Results",
|
||||||
|
"Test Text": "Text",
|
||||||
|
"Test Text Placeholder": "Enter the text you want to test",
|
||||||
|
"delete test history": "Delete the test result",
|
||||||
|
"test history": "Test History",
|
||||||
|
"test result placeholder": "The test results will be presented here",
|
||||||
|
"test result tip": "The contents of the knowledge base are sorted according to their similarity to the test text, and you can adjust the corresponding text according to the test results. Note: The data in the test record may have been modified, clicking on a test data will show the latest data."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"module": {
|
"module": {
|
||||||
|
|||||||
@@ -193,6 +193,9 @@
|
|||||||
"unKnow": "出现了点意外~"
|
"unKnow": "出现了点意外~"
|
||||||
},
|
},
|
||||||
"export": "",
|
"export": "",
|
||||||
|
"file": {
|
||||||
|
"Select file amount limit 100": "每次最多选择100个文件"
|
||||||
|
},
|
||||||
"folder": {
|
"folder": {
|
||||||
"Drag Tip": "点我可拖动",
|
"Drag Tip": "点我可拖动",
|
||||||
"Move Success": "移动成功",
|
"Move Success": "移动成功",
|
||||||
@@ -260,10 +263,22 @@
|
|||||||
"Similarity": "相似度",
|
"Similarity": "相似度",
|
||||||
"data": {
|
"data": {
|
||||||
"Edit": "编辑数据",
|
"Edit": "编辑数据",
|
||||||
|
"data is deleted": "该数据已被删除",
|
||||||
"id": "数据ID"
|
"id": "数据ID"
|
||||||
},
|
},
|
||||||
|
"import": {
|
||||||
|
"Ideal chunk length": "理想分块长度",
|
||||||
|
"Ideal chunk length Tips": "按结束符号进行分段。我们建议您的文档应合理的使用标点符号,以确保每个完整的句子长度不要超过该值\n中文文档建议400~1000\n英文文档建议600~1200"
|
||||||
|
},
|
||||||
"test": {
|
"test": {
|
||||||
"Test Result": "测试结果"
|
"Test": "测试",
|
||||||
|
"Test Result": "测试结果",
|
||||||
|
"Test Text": "测试文本",
|
||||||
|
"Test Text Placeholder": "输入需要测试的文本",
|
||||||
|
"delete test history": "删除该测试结果",
|
||||||
|
"test history": "测试历史",
|
||||||
|
"test result placeholder": "测试结果将在这里展示",
|
||||||
|
"test result tip": "根据知识库内容与测试文本的相似度进行排序,你可以根据测试结果调整对应的文本。\n注意:测试记录中的数据可能已经被修改过,点击某条测试数据后将展示最新的数据。"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"module": {
|
"module": {
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import MyIcon from '../Icon';
|
|||||||
import styles from './index.module.scss';
|
import styles from './index.module.scss';
|
||||||
import { useRouter } from 'next/router';
|
import { useRouter } from 'next/router';
|
||||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||||
import { compressImgAndUpload } from '@/web/common/file/controller';
|
import { compressImgFileAndUpload } from '@/web/common/file/controller';
|
||||||
import { useToast } from '@/web/common/hooks/useToast';
|
import { useToast } from '@/web/common/hooks/useToast';
|
||||||
import { customAlphabet } from 'nanoid';
|
import { customAlphabet } from 'nanoid';
|
||||||
import { IMG_BLOCK_KEY } from '@fastgpt/global/core/chat/constants';
|
import { IMG_BLOCK_KEY } from '@fastgpt/global/core/chat/constants';
|
||||||
@@ -72,7 +72,7 @@ const MessageInput = ({
|
|||||||
const uploadFile = async (file: FileItemType) => {
|
const uploadFile = async (file: FileItemType) => {
|
||||||
if (file.type === FileTypeEnum.image) {
|
if (file.type === FileTypeEnum.image) {
|
||||||
try {
|
try {
|
||||||
const src = await compressImgAndUpload({
|
const src = await compressImgFileAndUpload({
|
||||||
file: file.rawFile,
|
file: file.rawFile,
|
||||||
maxW: 1000,
|
maxW: 1000,
|
||||||
maxH: 1000,
|
maxH: 1000,
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import React, { useState } from 'react';
|
import React, { useState } from 'react';
|
||||||
import {
|
import {
|
||||||
|
Box,
|
||||||
Image,
|
Image,
|
||||||
Modal,
|
Modal,
|
||||||
ModalCloseButton,
|
ModalCloseButton,
|
||||||
@@ -8,6 +9,7 @@ import {
|
|||||||
Skeleton,
|
Skeleton,
|
||||||
useDisclosure
|
useDisclosure
|
||||||
} from '@chakra-ui/react';
|
} from '@chakra-ui/react';
|
||||||
|
import MyModal from '@/components/MyModal';
|
||||||
|
|
||||||
const MdImage = ({ src }: { src?: string }) => {
|
const MdImage = ({ src }: { src?: string }) => {
|
||||||
const [isLoading, setIsLoading] = useState(true);
|
const [isLoading, setIsLoading] = useState(true);
|
||||||
@@ -43,17 +45,21 @@ const MdImage = ({ src }: { src?: string }) => {
|
|||||||
onOpen();
|
onOpen();
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
<Modal isOpen={isOpen} onClose={onClose}>
|
<Modal isOpen={isOpen} onClose={onClose} isCentered>
|
||||||
<ModalOverlay />
|
<ModalOverlay />
|
||||||
<ModalContent m={'auto'}>
|
<ModalContent maxW={'80vw'} maxH={'auto'}>
|
||||||
<Image
|
<Box>
|
||||||
src={src}
|
<Image
|
||||||
alt={''}
|
borderRadius={'md'}
|
||||||
fallbackSrc={'/imgs/errImg.png'}
|
src={src}
|
||||||
fallbackStrategy={'onError'}
|
alt={''}
|
||||||
loading="eager"
|
w={'auto'}
|
||||||
objectFit={'contain'}
|
h={'auto'}
|
||||||
/>
|
fallbackSrc={'/imgs/errImg.png'}
|
||||||
|
fallbackStrategy={'onError'}
|
||||||
|
objectFit={'contain'}
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
</ModalContent>
|
</ModalContent>
|
||||||
<ModalCloseButton bg={'myWhite.500'} zIndex={999999} />
|
<ModalCloseButton bg={'myWhite.500'} zIndex={999999} />
|
||||||
</Modal>
|
</Modal>
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import React, { useCallback, useState } from 'react';
|
|||||||
import { useForm } from 'react-hook-form';
|
import { useForm } from 'react-hook-form';
|
||||||
import { useTranslation } from 'next-i18next';
|
import { useTranslation } from 'next-i18next';
|
||||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||||
import { compressImgAndUpload } from '@/web/common/file/controller';
|
import { compressImgFileAndUpload } from '@/web/common/file/controller';
|
||||||
import { useToast } from '@/web/common/hooks/useToast';
|
import { useToast } from '@/web/common/hooks/useToast';
|
||||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||||
import { useRequest } from '@/web/common/hooks/useRequest';
|
import { useRequest } from '@/web/common/hooks/useRequest';
|
||||||
@@ -49,7 +49,7 @@ function EditModal({
|
|||||||
const file = e[0];
|
const file = e[0];
|
||||||
if (!file) return;
|
if (!file) return;
|
||||||
try {
|
try {
|
||||||
const src = await compressImgAndUpload({
|
const src = await compressImgFileAndUpload({
|
||||||
file,
|
file,
|
||||||
maxW: 100,
|
maxW: 100,
|
||||||
maxH: 100
|
maxH: 100
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ export const Prompt_QuotePromptList: PromptTemplateItem[] = [
|
|||||||
1. 背景知识是最新的实时的信息,使用背景知识回答问题。
|
1. 背景知识是最新的实时的信息,使用背景知识回答问题。
|
||||||
2. 优先使用背景知识的内容回答我的问题,答案应与背景知识严格一致。
|
2. 优先使用背景知识的内容回答我的问题,答案应与背景知识严格一致。
|
||||||
3. 背景知识无法回答我的问题时,可以忽略背景知识,根据你的知识来自由回答。
|
3. 背景知识无法回答我的问题时,可以忽略背景知识,根据你的知识来自由回答。
|
||||||
4. 使用对话的风格,自然的回答问题。
|
4. 使用对话的风格,自然的回答问题。包含markdown内容,需按markdown格式返回。
|
||||||
我的问题是:"{{question}}"`
|
我的问题是:"{{question}}"`
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -49,7 +49,7 @@ export const Prompt_QuotePromptList: PromptTemplateItem[] = [
|
|||||||
1. 背景知识是最新的实时的信息,使用背景知识回答问题,其中 instruction 是相关介绍,output 是预期回答或补充。
|
1. 背景知识是最新的实时的信息,使用背景知识回答问题,其中 instruction 是相关介绍,output 是预期回答或补充。
|
||||||
2. 优先使用背景知识的内容回答我的问题,答案应与背景知识严格一致。
|
2. 优先使用背景知识的内容回答我的问题,答案应与背景知识严格一致。
|
||||||
3. 背景知识无法回答我的问题时,可以忽略背景知识,根据你的知识来自由回答。
|
3. 背景知识无法回答我的问题时,可以忽略背景知识,根据你的知识来自由回答。
|
||||||
4. 使用对话的风格,自然的回答问题。
|
4. 使用对话的风格,自然的回答问题。包含markdown内容,需按markdown格式返回。
|
||||||
我的问题是:"{{question}}"`
|
我的问题是:"{{question}}"`
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -63,7 +63,7 @@ export const Prompt_QuotePromptList: PromptTemplateItem[] = [
|
|||||||
1. 背景知识是最新的实时的信息,是你的唯一信息来源,使用背景知识回答问题。
|
1. 背景知识是最新的实时的信息,是你的唯一信息来源,使用背景知识回答问题。
|
||||||
2. 优先使用背景知识回答我的问题,答案与背景知识完全一致,无需做其他回答。
|
2. 优先使用背景知识回答我的问题,答案与背景知识完全一致,无需做其他回答。
|
||||||
3. 背景知识与问题无关,或背景知识无法回答本次问题时,则拒绝回答本次问题:“我不太清除xxx”。
|
3. 背景知识与问题无关,或背景知识无法回答本次问题时,则拒绝回答本次问题:“我不太清除xxx”。
|
||||||
4. 使用对话的风格,自然的回答问题。
|
4. 使用对话的风格,自然的回答问题。包含markdown内容,需按markdown格式返回。
|
||||||
我的问题是:"{{question}}"`
|
我的问题是:"{{question}}"`
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,18 +1,17 @@
|
|||||||
export const Prompt_AgentQA = {
|
export const Prompt_AgentQA = {
|
||||||
prompt: `我会给你一段文本,{{theme}},学习它们,并整理学习成果,要求为:
|
description: `我会给你一段文本,学习它们,并整理学习成果,要求为:
|
||||||
1. 提出问题并给出每个问题的答案。
|
1. 提出问题并给出每个问题的答案。
|
||||||
2. 每个答案都要详细完整,给出相关原文描述,答案可以包含普通文字、链接、代码、表格、公示、媒体链接等 markdown 元素。
|
2. 每个答案都要详细完整,给出相关原文描述,答案可以包含普通文字、链接、代码、表格、公示、媒体链接等 markdown 元素。
|
||||||
3. 最多提出 30 个问题。
|
3. 最多提出 30 个问题。
|
||||||
4. 按格式返回多个问题和答案:
|
`,
|
||||||
|
fixedText: `最后,你需要按下面的格式返回多个问题和答案:
|
||||||
Q1: 问题。
|
Q1: 问题。
|
||||||
A1: 答案。
|
A1: 答案。
|
||||||
Q2:
|
Q2:
|
||||||
A2:
|
A2:
|
||||||
……
|
……
|
||||||
|
|
||||||
我的文本:"""{{text}}"""`,
|
我的文本:"""{{text}}"""`
|
||||||
defaultTheme: '它们可能包含多个主题内容'
|
|
||||||
};
|
};
|
||||||
|
|
||||||
export const Prompt_ExtractJson = `你可以从 "对话记录" 中提取指定信息,并返回一个 JSON 对象,JSON 对象要求:
|
export const Prompt_ExtractJson = `你可以从 "对话记录" 中提取指定信息,并返回一个 JSON 对象,JSON 对象要求:
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import type { UserType } from '@fastgpt/global/support/user/type.d';
|
|||||||
import { useQuery } from '@tanstack/react-query';
|
import { useQuery } from '@tanstack/react-query';
|
||||||
import dynamic from 'next/dynamic';
|
import dynamic from 'next/dynamic';
|
||||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||||
import { compressImgAndUpload } from '@/web/common/file/controller';
|
import { compressImgFileAndUpload } from '@/web/common/file/controller';
|
||||||
import { feConfigs, systemVersion } from '@/web/common/system/staticData';
|
import { feConfigs, systemVersion } from '@/web/common/system/staticData';
|
||||||
import { useTranslation } from 'next-i18next';
|
import { useTranslation } from 'next-i18next';
|
||||||
import { timezoneList } from '@fastgpt/global/common/time/timezone';
|
import { timezoneList } from '@fastgpt/global/common/time/timezone';
|
||||||
@@ -94,7 +94,7 @@ const UserInfo = () => {
|
|||||||
const file = e[0];
|
const file = e[0];
|
||||||
if (!file || !userInfo) return;
|
if (!file || !userInfo) return;
|
||||||
try {
|
try {
|
||||||
const src = await compressImgAndUpload({
|
const src = await compressImgFileAndUpload({
|
||||||
file,
|
file,
|
||||||
maxW: 100,
|
maxW: 100,
|
||||||
maxH: 100
|
maxH: 100
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import { useForm } from 'react-hook-form';
|
|||||||
import { AppSchema } from '@fastgpt/global/core/app/type.d';
|
import { AppSchema } from '@fastgpt/global/core/app/type.d';
|
||||||
import { useToast } from '@/web/common/hooks/useToast';
|
import { useToast } from '@/web/common/hooks/useToast';
|
||||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||||
import { compressImgAndUpload } from '@/web/common/file/controller';
|
import { compressImgFileAndUpload } from '@/web/common/file/controller';
|
||||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||||
import { useRequest } from '@/web/common/hooks/useRequest';
|
import { useRequest } from '@/web/common/hooks/useRequest';
|
||||||
import Avatar from '@/components/Avatar';
|
import Avatar from '@/components/Avatar';
|
||||||
@@ -101,7 +101,7 @@ const InfoModal = ({
|
|||||||
const file = e[0];
|
const file = e[0];
|
||||||
if (!file) return;
|
if (!file) return;
|
||||||
try {
|
try {
|
||||||
const src = await compressImgAndUpload({
|
const src = await compressImgFileAndUpload({
|
||||||
file,
|
file,
|
||||||
maxW: 100,
|
maxW: 100,
|
||||||
maxH: 100
|
maxH: 100
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import {
|
|||||||
} from '@chakra-ui/react';
|
} from '@chakra-ui/react';
|
||||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||||
import { useForm } from 'react-hook-form';
|
import { useForm } from 'react-hook-form';
|
||||||
import { compressImgAndUpload } from '@/web/common/file/controller';
|
import { compressImgFileAndUpload } from '@/web/common/file/controller';
|
||||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||||
import { useToast } from '@/web/common/hooks/useToast';
|
import { useToast } from '@/web/common/hooks/useToast';
|
||||||
import { postCreateApp } from '@/web/core/app/api';
|
import { postCreateApp } from '@/web/core/app/api';
|
||||||
@@ -58,7 +58,7 @@ const CreateModal = ({ onClose, onSuccess }: { onClose: () => void; onSuccess: (
|
|||||||
const file = e[0];
|
const file = e[0];
|
||||||
if (!file) return;
|
if (!file) return;
|
||||||
try {
|
try {
|
||||||
const src = await compressImgAndUpload({
|
const src = await compressImgFileAndUpload({
|
||||||
file,
|
file,
|
||||||
maxW: 100,
|
maxW: 100,
|
||||||
maxH: 100
|
maxH: 100
|
||||||
|
|||||||
@@ -16,10 +16,12 @@ import { QuestionOutlineIcon } from '@chakra-ui/icons';
|
|||||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||||
|
|
||||||
import { useImportStore, SelectorContainer, PreviewFileOrChunk } from './Provider';
|
import { useImportStore, SelectorContainer, PreviewFileOrChunk } from './Provider';
|
||||||
|
import { useTranslation } from 'next-i18next';
|
||||||
|
|
||||||
const fileExtension = '.txt, .doc, .docx, .pdf, .md';
|
const fileExtension = '.txt, .docx, .pdf, .md';
|
||||||
|
|
||||||
const ChunkImport = () => {
|
const ChunkImport = () => {
|
||||||
|
const { t } = useTranslation();
|
||||||
const { datasetDetail } = useDatasetStore();
|
const { datasetDetail } = useDatasetStore();
|
||||||
const vectorModel = datasetDetail.vectorModel;
|
const vectorModel = datasetDetail.vectorModel;
|
||||||
const unitPrice = vectorModel?.price || 0.2;
|
const unitPrice = vectorModel?.price || 0.2;
|
||||||
@@ -48,13 +50,8 @@ const ChunkImport = () => {
|
|||||||
{/* chunk size */}
|
{/* chunk size */}
|
||||||
<Flex py={4} alignItems={'center'}>
|
<Flex py={4} alignItems={'center'}>
|
||||||
<Box>
|
<Box>
|
||||||
段落长度
|
{t('core.dataset.import.Ideal chunk length')}
|
||||||
<MyTooltip
|
<MyTooltip label={t('core.dataset.import.Ideal chunk length Tips')} forceShow>
|
||||||
label={
|
|
||||||
'按结束标点符号进行分段。前后段落会有 20% 的内容重叠。\n中文文档建议不要超过1000,英文不要超过1500'
|
|
||||||
}
|
|
||||||
forceShow
|
|
||||||
>
|
|
||||||
<QuestionOutlineIcon ml={1} />
|
<QuestionOutlineIcon ml={1} />
|
||||||
</MyTooltip>
|
</MyTooltip>
|
||||||
</Box>
|
</Box>
|
||||||
|
|||||||
@@ -48,6 +48,7 @@ export interface Props extends BoxProps {
|
|||||||
onPushFiles: (files: FileItemType[]) => void;
|
onPushFiles: (files: FileItemType[]) => void;
|
||||||
tipText?: string;
|
tipText?: string;
|
||||||
chunkLen?: number;
|
chunkLen?: number;
|
||||||
|
overlapRatio?: number;
|
||||||
fileTemplate?: {
|
fileTemplate?: {
|
||||||
type: string;
|
type: string;
|
||||||
filename: string;
|
filename: string;
|
||||||
@@ -63,6 +64,7 @@ const FileSelect = ({
|
|||||||
onPushFiles,
|
onPushFiles,
|
||||||
tipText,
|
tipText,
|
||||||
chunkLen = 500,
|
chunkLen = 500,
|
||||||
|
overlapRatio,
|
||||||
fileTemplate,
|
fileTemplate,
|
||||||
showUrlFetch = true,
|
showUrlFetch = true,
|
||||||
showCreateFile = true,
|
showCreateFile = true,
|
||||||
@@ -97,6 +99,13 @@ const FileSelect = ({
|
|||||||
// select file
|
// select file
|
||||||
const onSelectFile = useCallback(
|
const onSelectFile = useCallback(
|
||||||
async (files: File[]) => {
|
async (files: File[]) => {
|
||||||
|
if (files.length >= 100) {
|
||||||
|
return toast({
|
||||||
|
status: 'warning',
|
||||||
|
title: t('common.file.Select file amount limit 100')
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
for await (let file of files) {
|
for await (let file of files) {
|
||||||
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
|
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
|
||||||
@@ -165,7 +174,6 @@ const FileSelect = ({
|
|||||||
return readTxtContent(file);
|
return readTxtContent(file);
|
||||||
case 'pdf':
|
case 'pdf':
|
||||||
return readPdfContent(file);
|
return readPdfContent(file);
|
||||||
case 'doc':
|
|
||||||
case 'docx':
|
case 'docx':
|
||||||
return readDocContent(file);
|
return readDocContent(file);
|
||||||
}
|
}
|
||||||
@@ -176,7 +184,8 @@ const FileSelect = ({
|
|||||||
text = simpleText(text);
|
text = simpleText(text);
|
||||||
const splitRes = splitText2Chunks({
|
const splitRes = splitText2Chunks({
|
||||||
text,
|
text,
|
||||||
maxLen: chunkLen
|
chunkLen,
|
||||||
|
overlapRatio
|
||||||
});
|
});
|
||||||
|
|
||||||
const fileItem: FileItemType = {
|
const fileItem: FileItemType = {
|
||||||
@@ -206,7 +215,7 @@ const FileSelect = ({
|
|||||||
}
|
}
|
||||||
setSelectingText(undefined);
|
setSelectingText(undefined);
|
||||||
},
|
},
|
||||||
[chunkLen, datasetDetail._id, onPushFiles, t, toast]
|
[chunkLen, datasetDetail._id, onPushFiles, overlapRatio, t, toast]
|
||||||
);
|
);
|
||||||
// link fetch
|
// link fetch
|
||||||
const onUrlFetch = useCallback(
|
const onUrlFetch = useCallback(
|
||||||
@@ -214,7 +223,8 @@ const FileSelect = ({
|
|||||||
const result: FileItemType[] = e.map(({ url, content }) => {
|
const result: FileItemType[] = e.map(({ url, content }) => {
|
||||||
const splitRes = splitText2Chunks({
|
const splitRes = splitText2Chunks({
|
||||||
text: content,
|
text: content,
|
||||||
maxLen: chunkLen
|
chunkLen,
|
||||||
|
overlapRatio
|
||||||
});
|
});
|
||||||
return {
|
return {
|
||||||
id: nanoid(),
|
id: nanoid(),
|
||||||
@@ -234,7 +244,7 @@ const FileSelect = ({
|
|||||||
});
|
});
|
||||||
onPushFiles(result);
|
onPushFiles(result);
|
||||||
},
|
},
|
||||||
[chunkLen, onPushFiles]
|
[chunkLen, onPushFiles, overlapRatio]
|
||||||
);
|
);
|
||||||
// manual create file and copy data
|
// manual create file and copy data
|
||||||
const onCreateFile = useCallback(
|
const onCreateFile = useCallback(
|
||||||
@@ -255,7 +265,8 @@ const FileSelect = ({
|
|||||||
|
|
||||||
const splitRes = splitText2Chunks({
|
const splitRes = splitText2Chunks({
|
||||||
text: content,
|
text: content,
|
||||||
maxLen: chunkLen
|
chunkLen,
|
||||||
|
overlapRatio
|
||||||
});
|
});
|
||||||
|
|
||||||
onPushFiles([
|
onPushFiles([
|
||||||
@@ -276,7 +287,7 @@ const FileSelect = ({
|
|||||||
}
|
}
|
||||||
]);
|
]);
|
||||||
},
|
},
|
||||||
[chunkLen, datasetDetail._id, onPushFiles]
|
[chunkLen, datasetDetail._id, onPushFiles, overlapRatio]
|
||||||
);
|
);
|
||||||
|
|
||||||
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
|
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
|
||||||
|
|||||||
@@ -41,16 +41,19 @@ const ImportData = ({
|
|||||||
const map = {
|
const map = {
|
||||||
[ImportTypeEnum.chunk]: {
|
[ImportTypeEnum.chunk]: {
|
||||||
defaultChunkLen: vectorModel?.defaultToken || 500,
|
defaultChunkLen: vectorModel?.defaultToken || 500,
|
||||||
|
chunkOverlapRatio: 0.2,
|
||||||
unitPrice: vectorModel?.price || 0.2,
|
unitPrice: vectorModel?.price || 0.2,
|
||||||
mode: TrainingModeEnum.chunk
|
mode: TrainingModeEnum.chunk
|
||||||
},
|
},
|
||||||
[ImportTypeEnum.qa]: {
|
[ImportTypeEnum.qa]: {
|
||||||
defaultChunkLen: agentModel?.maxContext * 0.6 || 9000,
|
defaultChunkLen: agentModel?.maxContext * 0.6 || 8000,
|
||||||
|
chunkOverlapRatio: 0,
|
||||||
unitPrice: agentModel?.price || 3,
|
unitPrice: agentModel?.price || 3,
|
||||||
mode: TrainingModeEnum.qa
|
mode: TrainingModeEnum.qa
|
||||||
},
|
},
|
||||||
[ImportTypeEnum.csv]: {
|
[ImportTypeEnum.csv]: {
|
||||||
defaultChunkLen: vectorModel?.defaultToken || 500,
|
defaultChunkLen: vectorModel?.defaultToken || 500,
|
||||||
|
chunkOverlapRatio: 0,
|
||||||
unitPrice: vectorModel?.price || 0.2,
|
unitPrice: vectorModel?.price || 0.2,
|
||||||
mode: TrainingModeEnum.chunk
|
mode: TrainingModeEnum.chunk
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ type useImportStoreType = {
|
|||||||
price: number;
|
price: number;
|
||||||
uploading: boolean;
|
uploading: boolean;
|
||||||
chunkLen: number;
|
chunkLen: number;
|
||||||
|
chunkOverlapRatio: number;
|
||||||
setChunkLen: Dispatch<number>;
|
setChunkLen: Dispatch<number>;
|
||||||
showRePreview: boolean;
|
showRePreview: boolean;
|
||||||
setReShowRePreview: Dispatch<SetStateAction<boolean>>;
|
setReShowRePreview: Dispatch<SetStateAction<boolean>>;
|
||||||
@@ -66,6 +67,7 @@ const StateContext = createContext<useImportStoreType>({
|
|||||||
},
|
},
|
||||||
price: 0,
|
price: 0,
|
||||||
chunkLen: 0,
|
chunkLen: 0,
|
||||||
|
chunkOverlapRatio: 0,
|
||||||
setChunkLen: function (value: number): void {
|
setChunkLen: function (value: number): void {
|
||||||
throw new Error('Function not implemented.');
|
throw new Error('Function not implemented.');
|
||||||
},
|
},
|
||||||
@@ -93,6 +95,7 @@ const Provider = ({
|
|||||||
vectorModel,
|
vectorModel,
|
||||||
agentModel,
|
agentModel,
|
||||||
defaultChunkLen = 500,
|
defaultChunkLen = 500,
|
||||||
|
chunkOverlapRatio = 0.2,
|
||||||
importType,
|
importType,
|
||||||
onUploadSuccess,
|
onUploadSuccess,
|
||||||
children
|
children
|
||||||
@@ -104,6 +107,7 @@ const Provider = ({
|
|||||||
vectorModel: string;
|
vectorModel: string;
|
||||||
agentModel: string;
|
agentModel: string;
|
||||||
defaultChunkLen: number;
|
defaultChunkLen: number;
|
||||||
|
chunkOverlapRatio: number;
|
||||||
importType: `${ImportTypeEnum}`;
|
importType: `${ImportTypeEnum}`;
|
||||||
onUploadSuccess: () => void;
|
onUploadSuccess: () => void;
|
||||||
children: React.ReactNode;
|
children: React.ReactNode;
|
||||||
@@ -180,7 +184,8 @@ const Provider = ({
|
|||||||
state.map((file) => {
|
state.map((file) => {
|
||||||
const splitRes = splitText2Chunks({
|
const splitRes = splitText2Chunks({
|
||||||
text: file.text,
|
text: file.text,
|
||||||
maxLen: chunkLen
|
chunkLen,
|
||||||
|
overlapRatio: chunkOverlapRatio
|
||||||
});
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -228,6 +233,7 @@ const Provider = ({
|
|||||||
onclickUpload,
|
onclickUpload,
|
||||||
uploading,
|
uploading,
|
||||||
chunkLen,
|
chunkLen,
|
||||||
|
chunkOverlapRatio,
|
||||||
setChunkLen,
|
setChunkLen,
|
||||||
showRePreview,
|
showRePreview,
|
||||||
setReShowRePreview
|
setReShowRePreview
|
||||||
@@ -413,7 +419,8 @@ export const SelectorContainer = ({
|
|||||||
tip?: string;
|
tip?: string;
|
||||||
children: React.ReactNode;
|
children: React.ReactNode;
|
||||||
}) => {
|
}) => {
|
||||||
const { files, setPreviewFile, isUnselectedFile, setFiles, chunkLen } = useImportStore();
|
const { files, setPreviewFile, isUnselectedFile, setFiles, chunkLen, chunkOverlapRatio } =
|
||||||
|
useImportStore();
|
||||||
return (
|
return (
|
||||||
<Box
|
<Box
|
||||||
h={'100%'}
|
h={'100%'}
|
||||||
@@ -432,6 +439,7 @@ export const SelectorContainer = ({
|
|||||||
setFiles((state) => files.concat(state));
|
setFiles((state) => files.concat(state));
|
||||||
}}
|
}}
|
||||||
chunkLen={chunkLen}
|
chunkLen={chunkLen}
|
||||||
|
overlapRatio={chunkOverlapRatio}
|
||||||
showUrlFetch={showUrlFetch}
|
showUrlFetch={showUrlFetch}
|
||||||
showCreateFile={showCreateFile}
|
showCreateFile={showCreateFile}
|
||||||
fileTemplate={fileTemplate}
|
fileTemplate={fileTemplate}
|
||||||
|
|||||||
@@ -1,15 +1,14 @@
|
|||||||
import React, { useState, useMemo } from 'react';
|
import React, { useState } from 'react';
|
||||||
import { Box, Flex, Button, Input } from '@chakra-ui/react';
|
import { Box, Flex, Button, Textarea } from '@chakra-ui/react';
|
||||||
import { useConfirm } from '@/web/common/hooks/useConfirm';
|
import { useConfirm } from '@/web/common/hooks/useConfirm';
|
||||||
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
|
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
|
||||||
import MyTooltip from '@/components/MyTooltip';
|
import MyTooltip from '@/components/MyTooltip';
|
||||||
import { QuestionOutlineIcon, InfoOutlineIcon } from '@chakra-ui/icons';
|
import { QuestionOutlineIcon } from '@chakra-ui/icons';
|
||||||
import { Prompt_AgentQA } from '@/global/core/prompt/agent';
|
import { Prompt_AgentQA } from '@/global/core/prompt/agent';
|
||||||
import { replaceVariable } from '@fastgpt/global/common/string/tools';
|
|
||||||
import { useImportStore, SelectorContainer, PreviewFileOrChunk } from './Provider';
|
import { useImportStore, SelectorContainer, PreviewFileOrChunk } from './Provider';
|
||||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||||
|
|
||||||
const fileExtension = '.txt, .doc, .docx, .pdf, .md';
|
const fileExtension = '.txt, .docx, .pdf, .md';
|
||||||
|
|
||||||
const QAImport = () => {
|
const QAImport = () => {
|
||||||
const { datasetDetail } = useDatasetStore();
|
const { datasetDetail } = useDatasetStore();
|
||||||
@@ -31,36 +30,27 @@ const QAImport = () => {
|
|||||||
content: `该任务无法终止!导入后会自动调用大模型生成问答对,会有一些细节丢失,请确认!如果余额不足,未完成的任务会被暂停。`
|
content: `该任务无法终止!导入后会自动调用大模型生成问答对,会有一些细节丢失,请确认!如果余额不足,未完成的任务会被暂停。`
|
||||||
});
|
});
|
||||||
|
|
||||||
const [prompt, setPrompt] = useState('');
|
const [prompt, setPrompt] = useState(Prompt_AgentQA.description);
|
||||||
|
|
||||||
const previewQAPrompt = useMemo(() => {
|
|
||||||
return replaceVariable(Prompt_AgentQA.prompt, {
|
|
||||||
theme: prompt || Prompt_AgentQA.defaultTheme
|
|
||||||
});
|
|
||||||
}, [prompt]);
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Box display={['block', 'flex']} h={['auto', '100%']}>
|
<Box display={['block', 'flex']} h={['auto', '100%']}>
|
||||||
<SelectorContainer fileExtension={fileExtension}>
|
<SelectorContainer fileExtension={fileExtension}>
|
||||||
{/* prompt */}
|
{/* prompt */}
|
||||||
<Box py={5}>
|
<Box p={3} bg={'myWhite.600'} borderRadius={'md'}>
|
||||||
<Box mb={2}>
|
<Box mb={1} fontWeight={'bold'}>
|
||||||
QA 拆分引导词{' '}
|
QA 拆分引导词
|
||||||
<MyTooltip label={previewQAPrompt} forceShow>
|
|
||||||
<InfoOutlineIcon ml={1} />
|
|
||||||
</MyTooltip>
|
|
||||||
</Box>
|
</Box>
|
||||||
<Flex alignItems={'center'} fontSize={'sm'}>
|
<Box whiteSpace={'pre-wrap'} fontSize={'sm'}>
|
||||||
<Box mr={2}>文件主题</Box>
|
<Textarea
|
||||||
<Input
|
|
||||||
fontSize={'sm'}
|
|
||||||
flex={1}
|
|
||||||
placeholder={Prompt_AgentQA.defaultTheme}
|
|
||||||
bg={'myWhite.500'}
|
|
||||||
defaultValue={prompt}
|
defaultValue={prompt}
|
||||||
onChange={(e) => setPrompt(e.target.value || '')}
|
rows={8}
|
||||||
|
fontSize={'sm'}
|
||||||
|
onChange={(e) => {
|
||||||
|
setPrompt(e.target.value);
|
||||||
|
}}
|
||||||
/>
|
/>
|
||||||
</Flex>
|
<Box>{Prompt_AgentQA.fixedText}</Box>
|
||||||
|
</Box>
|
||||||
</Box>
|
</Box>
|
||||||
{/* price */}
|
{/* price */}
|
||||||
<Flex py={5} alignItems={'center'}>
|
<Flex py={5} alignItems={'center'}>
|
||||||
@@ -81,10 +71,7 @@ const QAImport = () => {
|
|||||||
重新生成预览
|
重新生成预览
|
||||||
</Button>
|
</Button>
|
||||||
)}
|
)}
|
||||||
<Button
|
<Button isDisabled={uploading} onClick={openConfirm(() => onclickUpload({ prompt }))}>
|
||||||
isDisabled={uploading}
|
|
||||||
onClick={openConfirm(() => onclickUpload({ prompt: previewQAPrompt }))}
|
|
||||||
>
|
|
||||||
{uploading ? <Box>{Math.round((successChunks / totalChunks) * 100)}%</Box> : '确认导入'}
|
{uploading ? <Box>{Math.round((successChunks / totalChunks) * 100)}%</Box> : '确认导入'}
|
||||||
</Button>
|
</Button>
|
||||||
</Flex>
|
</Flex>
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ import { useToast } from '@/web/common/hooks/useToast';
|
|||||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||||
import { useConfirm } from '@/web/common/hooks/useConfirm';
|
import { useConfirm } from '@/web/common/hooks/useConfirm';
|
||||||
import { UseFormReturn } from 'react-hook-form';
|
import { UseFormReturn } from 'react-hook-form';
|
||||||
import { compressImgAndUpload } from '@/web/common/file/controller';
|
import { compressImgFileAndUpload } from '@/web/common/file/controller';
|
||||||
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
|
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
|
||||||
import Avatar from '@/components/Avatar';
|
import Avatar from '@/components/Avatar';
|
||||||
import Tag from '@/components/Tag';
|
import Tag from '@/components/Tag';
|
||||||
@@ -95,7 +95,7 @@ const Info = (
|
|||||||
}
|
}
|
||||||
setBtnLoading(false);
|
setBtnLoading(false);
|
||||||
},
|
},
|
||||||
[updateDataset, datasetId, loadDatasetDetail, toast, loadDatasets]
|
[updateDataset, datasetId, toast, loadDatasets]
|
||||||
);
|
);
|
||||||
const saveSubmitError = useCallback(() => {
|
const saveSubmitError = useCallback(() => {
|
||||||
// deep search message
|
// deep search message
|
||||||
@@ -119,7 +119,7 @@ const Info = (
|
|||||||
const file = e[0];
|
const file = e[0];
|
||||||
if (!file) return;
|
if (!file) return;
|
||||||
try {
|
try {
|
||||||
const src = await compressImgAndUpload({
|
const src = await compressImgFileAndUpload({
|
||||||
file,
|
file,
|
||||||
maxW: 100,
|
maxW: 100,
|
||||||
maxH: 100
|
maxH: 100
|
||||||
|
|||||||
@@ -81,43 +81,45 @@ const Test = ({ datasetId }: { datasetId: string }) => {
|
|||||||
borderRight={['none', theme.borders.base]}
|
borderRight={['none', theme.borders.base]}
|
||||||
>
|
>
|
||||||
<Box border={'2px solid'} borderColor={'myBlue.600'} p={3} mx={4} borderRadius={'md'}>
|
<Box border={'2px solid'} borderColor={'myBlue.600'} p={3} mx={4} borderRadius={'md'}>
|
||||||
<Box fontSize={'sm'} fontWeight={'bold'}>
|
<Flex alignItems={'center'}>
|
||||||
<MyIcon mr={2} name={'text'} w={'18px'} h={'18px'} color={'myBlue.700'} />
|
<Box fontSize={'sm'} fontWeight={'bold'} flex={1}>
|
||||||
测试文本
|
<MyIcon mr={2} name={'text'} w={'18px'} h={'18px'} color={'myBlue.700'} />
|
||||||
</Box>
|
{t('core.dataset.test.Test Text')}
|
||||||
<Textarea
|
</Box>
|
||||||
rows={6}
|
|
||||||
resize={'none'}
|
|
||||||
variant={'unstyled'}
|
|
||||||
maxLength={datasetDetail.vectorModel.maxToken}
|
|
||||||
placeholder="输入需要测试的文本"
|
|
||||||
value={inputText}
|
|
||||||
onChange={(e) => setInputText(e.target.value)}
|
|
||||||
/>
|
|
||||||
<Flex alignItems={'center'} justifyContent={'flex-end'}>
|
|
||||||
{feConfigs?.isPlus && (
|
{feConfigs?.isPlus && (
|
||||||
<Flex alignItems={'center'}>
|
<Flex alignItems={'center'}>
|
||||||
{t('dataset.recall.rerank')}
|
{t('dataset.recall.rerank')}
|
||||||
<Switch ml={1} isChecked={rerank} onChange={(e) => setRerank(e.target.checked)} />
|
<Switch ml={1} isChecked={rerank} onChange={(e) => setRerank(e.target.checked)} />
|
||||||
</Flex>
|
</Flex>
|
||||||
)}
|
)}
|
||||||
|
</Flex>
|
||||||
|
<Textarea
|
||||||
|
rows={6}
|
||||||
|
resize={'none'}
|
||||||
|
variant={'unstyled'}
|
||||||
|
maxLength={datasetDetail.vectorModel.maxToken}
|
||||||
|
placeholder={t('core.dataset.test.Test Text Placeholder')}
|
||||||
|
value={inputText}
|
||||||
|
onChange={(e) => setInputText(e.target.value)}
|
||||||
|
/>
|
||||||
|
<Flex alignItems={'center'} justifyContent={'flex-end'}>
|
||||||
<Box mx={3} color={'myGray.500'}>
|
<Box mx={3} color={'myGray.500'}>
|
||||||
{inputText.length}
|
{inputText.length}
|
||||||
</Box>
|
</Box>
|
||||||
<Button isDisabled={inputText === ''} isLoading={isLoading} onClick={mutate}>
|
<Button isDisabled={inputText === ''} isLoading={isLoading} onClick={mutate}>
|
||||||
测试
|
{t('core.dataset.test.Test')}
|
||||||
</Button>
|
</Button>
|
||||||
</Flex>
|
</Flex>
|
||||||
</Box>
|
</Box>
|
||||||
<Box mt={5} flex={'1 0 0'} px={4} overflow={'overlay'} display={['none', 'block']}>
|
<Box mt={5} flex={'1 0 0'} px={4} overflow={'overlay'} display={['none', 'block']}>
|
||||||
<Flex alignItems={'center'} color={'myGray.600'}>
|
<Flex alignItems={'center'} color={'myGray.600'}>
|
||||||
<MyIcon mr={2} name={'history'} w={'16px'} h={'16px'} />
|
<MyIcon mr={2} name={'history'} w={'16px'} h={'16px'} />
|
||||||
<Box fontSize={'2xl'}>测试历史</Box>
|
<Box fontSize={'2xl'}>{t('core.dataset.test.test history')}</Box>
|
||||||
</Flex>
|
</Flex>
|
||||||
<Box mt={2}>
|
<Box mt={2}>
|
||||||
<Flex py={2} fontWeight={'bold'} borderBottom={theme.borders.sm}>
|
<Flex py={2} fontWeight={'bold'} borderBottom={theme.borders.sm}>
|
||||||
<Box flex={1}>测试文本</Box>
|
<Box flex={1}>{t('core.dataset.test.Test Text')}</Box>
|
||||||
<Box w={'80px'}>时间</Box>
|
<Box w={'80px'}>{t('common.Time')}</Box>
|
||||||
<Box w={'14px'}></Box>
|
<Box w={'14px'}></Box>
|
||||||
</Flex>
|
</Flex>
|
||||||
{kbTestHistory.map((item) => (
|
{kbTestHistory.map((item) => (
|
||||||
@@ -139,7 +141,7 @@ const Test = ({ datasetId }: { datasetId: string }) => {
|
|||||||
{item.text}
|
{item.text}
|
||||||
</Box>
|
</Box>
|
||||||
<Box w={'80px'}>{formatTimeToChatTime(item.time)}</Box>
|
<Box w={'80px'}>{formatTimeToChatTime(item.time)}</Box>
|
||||||
<MyTooltip label={'删除该测试记录'}>
|
<MyTooltip label={t('core.dataset.test.delete test history')}>
|
||||||
<Box w={'14px'} h={'14px'}>
|
<Box w={'14px'} h={'14px'}>
|
||||||
<MyIcon
|
<MyIcon
|
||||||
className="delete"
|
className="delete"
|
||||||
@@ -171,7 +173,7 @@ const Test = ({ datasetId }: { datasetId: string }) => {
|
|||||||
>
|
>
|
||||||
<MyIcon name={'empty'} color={'transparent'} w={'54px'} />
|
<MyIcon name={'empty'} color={'transparent'} w={'54px'} />
|
||||||
<Box mt={3} color={'myGray.600'}>
|
<Box mt={3} color={'myGray.600'}>
|
||||||
测试结果将在这里展示
|
{t('core.dataset.test.test result placeholder')}
|
||||||
</Box>
|
</Box>
|
||||||
</Flex>
|
</Flex>
|
||||||
) : (
|
) : (
|
||||||
@@ -180,12 +182,7 @@ const Test = ({ datasetId }: { datasetId: string }) => {
|
|||||||
<Box fontSize={'3xl'} color={'myGray.600'}>
|
<Box fontSize={'3xl'} color={'myGray.600'}>
|
||||||
{t('core.dataset.test.Test Result')}
|
{t('core.dataset.test.Test Result')}
|
||||||
</Box>
|
</Box>
|
||||||
<MyTooltip
|
<MyTooltip label={t('core.dataset.test.test result tip')} forceShow>
|
||||||
label={
|
|
||||||
'根据知识库内容与测试文本的相似度进行排序,你可以根据测试结果调整对应的文本。\n注意:测试记录中的数据可能已经被修改过,点击某条测试数据后将展示最新的数据。'
|
|
||||||
}
|
|
||||||
forceShow
|
|
||||||
>
|
|
||||||
<QuestionOutlineIcon
|
<QuestionOutlineIcon
|
||||||
mx={2}
|
mx={2}
|
||||||
color={'myGray.600'}
|
color={'myGray.600'}
|
||||||
@@ -221,7 +218,7 @@ const Test = ({ datasetId }: { datasetId: string }) => {
|
|||||||
const data = await getDatasetDataItemById(item.id);
|
const data = await getDatasetDataItemById(item.id);
|
||||||
|
|
||||||
if (!data) {
|
if (!data) {
|
||||||
throw new Error('该数据已被删除');
|
throw new Error(t('core.dataset.data.data is deleted'));
|
||||||
}
|
}
|
||||||
|
|
||||||
setEditInputData({
|
setEditInputData({
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import {
|
|||||||
} from '@chakra-ui/react';
|
} from '@chakra-ui/react';
|
||||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||||
import { useForm } from 'react-hook-form';
|
import { useForm } from 'react-hook-form';
|
||||||
import { compressImgAndUpload } from '@/web/common/file/controller';
|
import { compressImgFileAndUpload } from '@/web/common/file/controller';
|
||||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||||
import { useToast } from '@/web/common/hooks/useToast';
|
import { useToast } from '@/web/common/hooks/useToast';
|
||||||
import { useRouter } from 'next/router';
|
import { useRouter } from 'next/router';
|
||||||
@@ -57,7 +57,7 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
|
|||||||
const file = e[0];
|
const file = e[0];
|
||||||
if (!file) return;
|
if (!file) return;
|
||||||
try {
|
try {
|
||||||
const src = await compressImgAndUpload({
|
const src = await compressImgFileAndUpload({
|
||||||
file,
|
file,
|
||||||
maxW: 100,
|
maxW: 100,
|
||||||
maxH: 100
|
maxH: 100
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import {
|
|||||||
} from '@chakra-ui/react';
|
} from '@chakra-ui/react';
|
||||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||||
import { useForm } from 'react-hook-form';
|
import { useForm } from 'react-hook-form';
|
||||||
import { compressImgAndUpload } from '@/web/common/file/controller';
|
import { compressImgFileAndUpload } from '@/web/common/file/controller';
|
||||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||||
import { useToast } from '@/web/common/hooks/useToast';
|
import { useToast } from '@/web/common/hooks/useToast';
|
||||||
import { useRouter } from 'next/router';
|
import { useRouter } from 'next/router';
|
||||||
@@ -136,7 +136,7 @@ const CreateModal = ({
|
|||||||
const file = e[0];
|
const file = e[0];
|
||||||
if (!file) return;
|
if (!file) return;
|
||||||
try {
|
try {
|
||||||
const src = await compressImgAndUpload({
|
const src = await compressImgFileAndUpload({
|
||||||
file,
|
file,
|
||||||
maxW: 100,
|
maxW: 100,
|
||||||
maxH: 100
|
maxH: 100
|
||||||
|
|||||||
@@ -118,17 +118,14 @@ export async function generateQA(): Promise<any> {
|
|||||||
try {
|
try {
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
const model = data.model ?? global.qaModels[0].model;
|
const model = data.model ?? global.qaModels[0].model;
|
||||||
|
const prompt = `${data.prompt || Prompt_AgentQA.description}
|
||||||
|
${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
|
||||||
|
|
||||||
// request LLM to get QA
|
// request LLM to get QA
|
||||||
const messages: ChatMessageItemType[] = [
|
const messages: ChatMessageItemType[] = [
|
||||||
{
|
{
|
||||||
role: 'user',
|
role: 'user',
|
||||||
content: data.prompt
|
content: prompt
|
||||||
? replaceVariable(data.prompt, { text })
|
|
||||||
: replaceVariable(Prompt_AgentQA.prompt, {
|
|
||||||
theme: Prompt_AgentQA.defaultTheme,
|
|
||||||
text
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
@@ -244,7 +241,7 @@ function formatSplitText(text: string, rawText: string) {
|
|||||||
|
|
||||||
// empty result. direct split chunk
|
// empty result. direct split chunk
|
||||||
if (result.length === 0) {
|
if (result.length === 0) {
|
||||||
const splitRes = splitText2Chunks({ text: rawText, maxLen: 500 });
|
const splitRes = splitText2Chunks({ text: rawText, chunkLen: 512 });
|
||||||
splitRes.chunks.forEach((chunk) => {
|
splitRes.chunks.forEach((chunk) => {
|
||||||
result.push({
|
result.push({
|
||||||
q: chunk,
|
q: chunk,
|
||||||
|
|||||||
@@ -33,74 +33,98 @@ export const uploadFiles = ({
|
|||||||
* compress image. response base64
|
* compress image. response base64
|
||||||
* @param maxSize The max size of the compressed image
|
* @param maxSize The max size of the compressed image
|
||||||
*/
|
*/
|
||||||
export const compressImgAndUpload = ({
|
export const compressBase64ImgAndUpload = ({
|
||||||
file,
|
base64,
|
||||||
maxW = 200,
|
maxW = 200,
|
||||||
maxH = 200,
|
maxH = 200,
|
||||||
maxSize = 1024 * 100, // 100kb
|
maxSize = 1024 * 100, // 100kb
|
||||||
expiredTime
|
expiredTime
|
||||||
|
}: {
|
||||||
|
base64: string;
|
||||||
|
maxW?: number;
|
||||||
|
maxH?: number;
|
||||||
|
maxSize?: number;
|
||||||
|
expiredTime?: Date;
|
||||||
|
}) => {
|
||||||
|
return new Promise<string>((resolve, reject) => {
|
||||||
|
const fileType = /^data:([a-zA-Z0-9]+\/[a-zA-Z0-9-.+]+).*,/.exec(base64)?.[1] || 'image/jpeg';
|
||||||
|
|
||||||
|
const img = new Image();
|
||||||
|
img.src = base64;
|
||||||
|
img.onload = async () => {
|
||||||
|
let width = img.width;
|
||||||
|
let height = img.height;
|
||||||
|
|
||||||
|
if (width > height) {
|
||||||
|
if (width > maxW) {
|
||||||
|
height *= maxW / width;
|
||||||
|
width = maxW;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (height > maxH) {
|
||||||
|
width *= maxH / height;
|
||||||
|
height = maxH;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const canvas = document.createElement('canvas');
|
||||||
|
canvas.width = width;
|
||||||
|
canvas.height = height;
|
||||||
|
const ctx = canvas.getContext('2d');
|
||||||
|
|
||||||
|
if (!ctx) {
|
||||||
|
return reject('压缩图片异常');
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.drawImage(img, 0, 0, width, height);
|
||||||
|
const compressedDataUrl = canvas.toDataURL(fileType, 0.8);
|
||||||
|
// 移除 canvas 元素
|
||||||
|
canvas.remove();
|
||||||
|
|
||||||
|
if (compressedDataUrl.length > maxSize) {
|
||||||
|
return reject('图片太大了');
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const src = await postUploadImg(compressedDataUrl, expiredTime);
|
||||||
|
resolve(src);
|
||||||
|
} catch (error) {
|
||||||
|
reject(error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
});
|
||||||
|
};
|
||||||
|
export const compressImgFileAndUpload = async ({
|
||||||
|
file,
|
||||||
|
maxW,
|
||||||
|
maxH,
|
||||||
|
maxSize,
|
||||||
|
expiredTime
|
||||||
}: {
|
}: {
|
||||||
file: File;
|
file: File;
|
||||||
maxW?: number;
|
maxW?: number;
|
||||||
maxH?: number;
|
maxH?: number;
|
||||||
maxSize?: number;
|
maxSize?: number;
|
||||||
expiredTime?: Date;
|
expiredTime?: Date;
|
||||||
}) =>
|
}) => {
|
||||||
new Promise<string>((resolve, reject) => {
|
const reader = new FileReader();
|
||||||
const reader = new FileReader();
|
reader.readAsDataURL(file);
|
||||||
reader.readAsDataURL(file);
|
|
||||||
|
const base64 = await new Promise<string>((resolve, reject) => {
|
||||||
reader.onload = async () => {
|
reader.onload = async () => {
|
||||||
const img = new Image();
|
resolve(reader.result as string);
|
||||||
// @ts-ignore
|
|
||||||
img.src = reader.result;
|
|
||||||
img.onload = async () => {
|
|
||||||
let width = img.width;
|
|
||||||
let height = img.height;
|
|
||||||
|
|
||||||
if (width > height) {
|
|
||||||
if (width > maxW) {
|
|
||||||
height *= maxW / width;
|
|
||||||
width = maxW;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (height > maxH) {
|
|
||||||
width *= maxH / height;
|
|
||||||
height = maxH;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const canvas = document.createElement('canvas');
|
|
||||||
canvas.width = width;
|
|
||||||
canvas.height = height;
|
|
||||||
const ctx = canvas.getContext('2d');
|
|
||||||
|
|
||||||
if (!ctx) {
|
|
||||||
return reject('压缩图片异常');
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.drawImage(img, 0, 0, width, height);
|
|
||||||
const compressedDataUrl = canvas.toDataURL(file.type, 0.8);
|
|
||||||
// 移除 canvas 元素
|
|
||||||
canvas.remove();
|
|
||||||
|
|
||||||
if (compressedDataUrl.length > maxSize) {
|
|
||||||
return reject('图片太大了');
|
|
||||||
}
|
|
||||||
|
|
||||||
const src = await (async () => {
|
|
||||||
try {
|
|
||||||
const src = await postUploadImg(compressedDataUrl, expiredTime);
|
|
||||||
return src;
|
|
||||||
} catch (error) {
|
|
||||||
return compressedDataUrl;
|
|
||||||
}
|
|
||||||
})();
|
|
||||||
|
|
||||||
resolve(src);
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
reader.onerror = (err) => {
|
reader.onerror = (err) => {
|
||||||
console.log(err);
|
console.log(err);
|
||||||
reject('压缩图片异常');
|
reject('压缩图片异常');
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
|
return compressBase64ImgAndUpload({
|
||||||
|
base64,
|
||||||
|
maxW,
|
||||||
|
maxH,
|
||||||
|
maxSize,
|
||||||
|
expiredTime
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import mammoth from 'mammoth';
|
import mammoth from 'mammoth';
|
||||||
import Papa from 'papaparse';
|
import Papa from 'papaparse';
|
||||||
import { postUploadImg } from '@/web/common/file/api';
|
import { compressBase64ImgAndUpload } from './controller';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 读取 txt 文件内容
|
* 读取 txt 文件内容
|
||||||
@@ -51,16 +51,30 @@ export const readPdfContent = (file: File) =>
|
|||||||
const headerThreshold = pageHeight * 0.07; // 假设页头在页面顶部5%的区域内
|
const headerThreshold = pageHeight * 0.07; // 假设页头在页面顶部5%的区域内
|
||||||
const footerThreshold = pageHeight * 0.93; // 假设页脚在页面底部5%的区域内
|
const footerThreshold = pageHeight * 0.93; // 假设页脚在页面底部5%的区域内
|
||||||
|
|
||||||
const pageText = tokenizedText.items
|
const pageTexts: TokenType[] = tokenizedText.items.filter((token: TokenType) => {
|
||||||
.filter((token: TokenType) => {
|
return (
|
||||||
return (
|
!token.transform ||
|
||||||
!token.transform ||
|
(token.transform[5] > headerThreshold && token.transform[5] < footerThreshold)
|
||||||
(token.transform[5] > headerThreshold && token.transform[5] < footerThreshold)
|
);
|
||||||
);
|
});
|
||||||
|
|
||||||
|
// concat empty string 'hasEOL'
|
||||||
|
for (let i = 0; i < pageTexts.length; i++) {
|
||||||
|
const item = pageTexts[i];
|
||||||
|
if (item.str === '' && pageTexts[i - 1]) {
|
||||||
|
pageTexts[i - 1].hasEOL = item.hasEOL;
|
||||||
|
pageTexts.splice(i, 1);
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return pageTexts
|
||||||
|
.map((token) => {
|
||||||
|
const paragraphEnd = token.hasEOL && /([。?!.?!\n\r]|(\r\n))$/.test(token.str);
|
||||||
|
|
||||||
|
return paragraphEnd ? `${token.str}\n` : token.str;
|
||||||
})
|
})
|
||||||
.map((token: TokenType) => token.str)
|
|
||||||
.join('');
|
.join('');
|
||||||
return pageText;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let reader = new FileReader();
|
let reader = new FileReader();
|
||||||
@@ -100,10 +114,41 @@ export const readDocContent = (file: File) =>
|
|||||||
reader.onload = async ({ target }) => {
|
reader.onload = async ({ target }) => {
|
||||||
if (!target?.result) return reject('读取 doc 文件失败');
|
if (!target?.result) return reject('读取 doc 文件失败');
|
||||||
try {
|
try {
|
||||||
const res = await mammoth.extractRawText({
|
// @ts-ignore
|
||||||
|
const res = await mammoth.convertToMarkdown({
|
||||||
arrayBuffer: target.result as ArrayBuffer
|
arrayBuffer: target.result as ArrayBuffer
|
||||||
});
|
});
|
||||||
resolve(res?.value);
|
|
||||||
|
let rawText: string = res?.value || '';
|
||||||
|
|
||||||
|
// match base64, upload and replace it
|
||||||
|
const base64Regex = /data:image\/[a-zA-Z]+;base64,([^\)]+)/g;
|
||||||
|
const base64Arr = rawText.match(base64Regex) || [];
|
||||||
|
|
||||||
|
// upload base64 and replace it
|
||||||
|
await Promise.all(
|
||||||
|
base64Arr.map(async (base64) => {
|
||||||
|
try {
|
||||||
|
const str = await compressBase64ImgAndUpload({
|
||||||
|
base64,
|
||||||
|
maxW: 800,
|
||||||
|
maxH: 800,
|
||||||
|
maxSize: 1024 * 1024 * 2
|
||||||
|
});
|
||||||
|
rawText = rawText.replace(base64, str);
|
||||||
|
} catch (error) {
|
||||||
|
rawText = rawText.replace(base64, '');
|
||||||
|
rawText = rawText.replaceAll('![]()', '');
|
||||||
|
}
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
const trimReg = /\s*(!\[.*\]\(.*\))\s*/g;
|
||||||
|
if (trimReg.test(rawText)) {
|
||||||
|
rawText = rawText.replace(/\s*(!\[.*\]\(.*\))\s*/g, '$1');
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve(rawText);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
window.umami?.track('wordReadError', {
|
window.umami?.track('wordReadError', {
|
||||||
err: error?.toString()
|
err: error?.toString()
|
||||||
|
|||||||
Reference in New Issue
Block a user