perf: max_token count;feat: support resoner output;fix: member scroll (#3681)
* perf: supplement assistant empty response * check array * perf: max_token count * feat: support resoner output * member scroll * update provider order * i18n
This commit is contained in:
@@ -40,7 +40,7 @@ export async function uploadMongoImg({
|
||||
expiredTime: forever ? undefined : addHours(new Date(), 1)
|
||||
});
|
||||
|
||||
return `${process.env.FE_DOMAIN || ''}${process.env.NEXT_PUBLIC_BASE_URL || ''}${imageBaseUrl}${String(_id)}.${extension}`;
|
||||
return `${process.env.NEXT_PUBLIC_BASE_URL || ''}${imageBaseUrl}${String(_id)}.${extension}`;
|
||||
}
|
||||
|
||||
const getIdFromPath = (path?: string) => {
|
||||
|
||||
@@ -27,8 +27,9 @@
|
||||
"maxContext": 64000,
|
||||
"maxResponse": 4096,
|
||||
"quoteMaxToken": 60000,
|
||||
"maxTemperature": 1.5,
|
||||
"maxTemperature": null,
|
||||
"vision": false,
|
||||
"reasoning": true,
|
||||
"toolChoice": false,
|
||||
"functionCall": false,
|
||||
"defaultSystemChatPrompt": "",
|
||||
@@ -39,11 +40,9 @@
|
||||
"usedInQueryExtension": true,
|
||||
"customExtractPrompt": "",
|
||||
"usedInToolCall": true,
|
||||
"defaultConfig": {
|
||||
"temperature": null
|
||||
},
|
||||
"defaultConfig": {},
|
||||
"fieldMap": {},
|
||||
"type": "llm"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,10 +50,10 @@
|
||||
"maxContext": 128000,
|
||||
"maxResponse": 4000,
|
||||
"quoteMaxToken": 120000,
|
||||
"maxTemperature": 1.2,
|
||||
"maxTemperature": null,
|
||||
"vision": false,
|
||||
"toolChoice": false,
|
||||
"functionCall": true,
|
||||
"functionCall": false,
|
||||
"defaultSystemChatPrompt": "",
|
||||
"datasetProcess": true,
|
||||
"usedInClassify": true,
|
||||
@@ -63,8 +63,10 @@
|
||||
"customExtractPrompt": "",
|
||||
"usedInToolCall": true,
|
||||
"defaultConfig": {
|
||||
"temperature": 1,
|
||||
"max_tokens": null
|
||||
"stream": false
|
||||
},
|
||||
"fieldMap": {
|
||||
"max_tokens": "max_completion_tokens"
|
||||
},
|
||||
"type": "llm"
|
||||
},
|
||||
@@ -74,10 +76,10 @@
|
||||
"maxContext": 128000,
|
||||
"maxResponse": 4000,
|
||||
"quoteMaxToken": 120000,
|
||||
"maxTemperature": 1.2,
|
||||
"maxTemperature": null,
|
||||
"vision": false,
|
||||
"toolChoice": false,
|
||||
"functionCall": true,
|
||||
"functionCall": false,
|
||||
"defaultSystemChatPrompt": "",
|
||||
"datasetProcess": true,
|
||||
"usedInClassify": true,
|
||||
@@ -87,10 +89,11 @@
|
||||
"customExtractPrompt": "",
|
||||
"usedInToolCall": true,
|
||||
"defaultConfig": {
|
||||
"temperature": 1,
|
||||
"max_tokens": null,
|
||||
"stream": false
|
||||
},
|
||||
"fieldMap": {
|
||||
"max_tokens": "max_completion_tokens"
|
||||
},
|
||||
"type": "llm"
|
||||
},
|
||||
{
|
||||
@@ -99,10 +102,10 @@
|
||||
"maxContext": 195000,
|
||||
"maxResponse": 8000,
|
||||
"quoteMaxToken": 120000,
|
||||
"maxTemperature": 1.2,
|
||||
"maxTemperature": null,
|
||||
"vision": false,
|
||||
"toolChoice": false,
|
||||
"functionCall": true,
|
||||
"functionCall": false,
|
||||
"defaultSystemChatPrompt": "",
|
||||
"datasetProcess": true,
|
||||
"usedInClassify": true,
|
||||
@@ -112,10 +115,11 @@
|
||||
"customExtractPrompt": "",
|
||||
"usedInToolCall": true,
|
||||
"defaultConfig": {
|
||||
"temperature": 1,
|
||||
"max_tokens": null,
|
||||
"stream": false
|
||||
},
|
||||
"fieldMap": {
|
||||
"max_tokens": "max_completion_tokens"
|
||||
},
|
||||
"type": "llm"
|
||||
},
|
||||
{
|
||||
|
||||
@@ -2,10 +2,12 @@ import { replaceVariable } from '@fastgpt/global/common/string/tools';
|
||||
import { createChatCompletion } from '../config';
|
||||
import { ChatItemType } from '@fastgpt/global/core/chat/type';
|
||||
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
|
||||
import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
|
||||
import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
|
||||
import { getLLMModel } from '../model';
|
||||
import { llmCompletionsBodyFormat } from '../utils';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import { filterGPTMessageByMaxContext } from '../../chat/utils';
|
||||
import json5 from 'json5';
|
||||
|
||||
/*
|
||||
query extension - 问题扩展
|
||||
@@ -13,72 +15,73 @@ import { addLog } from '../../../common/system/log';
|
||||
*/
|
||||
|
||||
const title = global.feConfigs?.systemTitle || 'FastAI';
|
||||
const defaultPrompt = `作为一个向量检索助手,你的任务是结合历史记录,从不同角度,为“原问题”生成个不同版本的“检索词”,从而提高向量检索的语义丰富度,提高向量检索的精度。
|
||||
const defaultPrompt = `## 你的任务
|
||||
你作为一个向量检索助手,你的任务是结合历史记录,从不同角度,为“原问题”生成个不同版本的“检索词”,从而提高向量检索的语义丰富度,提高向量检索的精度。
|
||||
生成的问题要求指向对象清晰明确,并与“原问题语言相同”。
|
||||
|
||||
参考 <Example></Example> 标中的示例来完成任务。
|
||||
## 参考示例
|
||||
|
||||
<Example>
|
||||
历史记录:
|
||||
"""
|
||||
null
|
||||
"""
|
||||
原问题: 介绍下剧情。
|
||||
检索词: ["介绍下故事的背景。","故事的主题是什么?","介绍下故事的主要人物。"]
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: 对话背景。
|
||||
A: 当前对话是关于 Nginx 的介绍和使用等。
|
||||
user: 对话背景。
|
||||
assistant: 当前对话是关于 Nginx 的介绍和使用等。
|
||||
"""
|
||||
原问题: 怎么下载
|
||||
检索词: ["Nginx 如何下载?","下载 Nginx 需要什么条件?","有哪些渠道可以下载 Nginx?"]
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: 对话背景。
|
||||
A: 当前对话是关于 Nginx 的介绍和使用等。
|
||||
Q: 报错 "no connection"
|
||||
A: 报错"no connection"可能是因为……
|
||||
user: 对话背景。
|
||||
assistant: 当前对话是关于 Nginx 的介绍和使用等。
|
||||
user: 报错 "no connection"
|
||||
assistant: 报错"no connection"可能是因为……
|
||||
"""
|
||||
原问题: 怎么解决
|
||||
检索词: ["Nginx报错"no connection"如何解决?","造成'no connection'报错的原因。","Nginx提示'no connection',要怎么办?"]
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: 护产假多少天?
|
||||
A: 护产假的天数根据员工所在的城市而定。请提供您所在的城市,以便我回答您的问题。
|
||||
user: How long is the maternity leave?
|
||||
assistant: The number of days of maternity leave depends on the city in which the employee is located. Please provide your city so that I can answer your questions.
|
||||
"""
|
||||
原问题: 沈阳
|
||||
检索词: ["沈阳的护产假多少天?","沈阳的护产假政策。","沈阳的护产假标准。"]
|
||||
原问题: ShenYang
|
||||
检索词: ["How many days is maternity leave in Shenyang?","Shenyang's maternity leave policy.","The standard of maternity leave in Shenyang."]
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: 作者是谁?
|
||||
A: ${title} 的作者是 labring。
|
||||
user: 作者是谁?
|
||||
assistant: ${title} 的作者是 labring。
|
||||
"""
|
||||
原问题: Tell me about him
|
||||
检索词: ["Introduce labring, the author of ${title}." ," Background information on author labring." "," Why does labring do ${title}?"]
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: 对话背景。
|
||||
A: 关于 ${title} 的介绍和使用等问题。
|
||||
user: 对话背景。
|
||||
assistant: 关于 ${title} 的介绍和使用等问题。
|
||||
"""
|
||||
原问题: 你好。
|
||||
检索词: ["你好"]
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: ${title} 如何收费?
|
||||
A: ${title} 收费可以参考……
|
||||
user: ${title} 如何收费?
|
||||
assistant: ${title} 收费可以参考……
|
||||
"""
|
||||
原问题: 你知道 laf 么?
|
||||
检索词: ["laf 的官网地址是多少?","laf 的使用教程。","laf 有什么特点和优势。"]
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: ${title} 的优势
|
||||
A: 1. 开源
|
||||
user: ${title} 的优势
|
||||
assistant: 1. 开源
|
||||
2. 简便
|
||||
3. 扩展性强
|
||||
"""
|
||||
@@ -87,18 +90,20 @@ A: 1. 开源
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: 什么是 ${title}?
|
||||
A: ${title} 是一个 RAG 平台。
|
||||
Q: 什么是 Laf?
|
||||
A: Laf 是一个云函数开发平台。
|
||||
user: 什么是 ${title}?
|
||||
assistant: ${title} 是一个 RAG 平台。
|
||||
user: 什么是 Laf?
|
||||
assistant: Laf 是一个云函数开发平台。
|
||||
"""
|
||||
原问题: 它们有什么关系?
|
||||
检索词: ["${title}和Laf有什么关系?","介绍下${title}","介绍下Laf"]
|
||||
</Example>
|
||||
|
||||
-----
|
||||
## 输出要求
|
||||
|
||||
下面是正式的任务:
|
||||
1. 输出格式为 JSON 数组,数组中每个元素为字符串。无需对输出进行任何解释。
|
||||
2. 输出语言与原问题相同。原问题为中文则输出中文;原问题为英文则输出英文。
|
||||
|
||||
## 开始任务
|
||||
|
||||
历史记录:
|
||||
"""
|
||||
@@ -125,26 +130,39 @@ export const queryExtension = async ({
|
||||
outputTokens: number;
|
||||
}> => {
|
||||
const systemFewShot = chatBg
|
||||
? `Q: 对话背景。
|
||||
A: ${chatBg}
|
||||
? `user: 对话背景。
|
||||
assistant: ${chatBg}
|
||||
`
|
||||
: '';
|
||||
const historyFewShot = histories
|
||||
.map((item) => {
|
||||
const role = item.obj === 'Human' ? 'Q' : 'A';
|
||||
return `${role}: ${chatValue2RuntimePrompt(item.value).text}`;
|
||||
})
|
||||
.join('\n');
|
||||
const concatFewShot = `${systemFewShot}${historyFewShot}`.trim();
|
||||
|
||||
const modelData = getLLMModel(model);
|
||||
const filterHistories = await filterGPTMessageByMaxContext({
|
||||
messages: chats2GPTMessages({ messages: histories, reserveId: false }),
|
||||
maxContext: modelData.maxContext - 1000
|
||||
});
|
||||
|
||||
const historyFewShot = filterHistories
|
||||
.map((item) => {
|
||||
const role = item.role;
|
||||
const content = item.content;
|
||||
if ((role === 'user' || role === 'assistant') && content) {
|
||||
if (typeof content === 'string') {
|
||||
return `${role}: ${content}`;
|
||||
} else {
|
||||
return `${role}: ${content.map((item) => (item.type === 'text' ? item.text : '')).join('\n')}`;
|
||||
}
|
||||
}
|
||||
})
|
||||
.filter(Boolean)
|
||||
.join('\n');
|
||||
const concatFewShot = `${systemFewShot}${historyFewShot}`.trim();
|
||||
|
||||
const messages = [
|
||||
{
|
||||
role: 'user',
|
||||
content: replaceVariable(defaultPrompt, {
|
||||
query: `${query}`,
|
||||
histories: concatFewShot
|
||||
histories: concatFewShot || 'null'
|
||||
})
|
||||
}
|
||||
] as any;
|
||||
@@ -154,7 +172,7 @@ A: ${chatBg}
|
||||
{
|
||||
stream: false,
|
||||
model: modelData.model,
|
||||
temperature: 0.01,
|
||||
temperature: 0.1,
|
||||
messages
|
||||
},
|
||||
modelData
|
||||
@@ -172,22 +190,41 @@ A: ${chatBg}
|
||||
};
|
||||
}
|
||||
|
||||
const start = answer.indexOf('[');
|
||||
const end = answer.lastIndexOf(']');
|
||||
if (start === -1 || end === -1) {
|
||||
addLog.warn('Query extension failed, not a valid JSON', {
|
||||
answer
|
||||
});
|
||||
return {
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
model,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
|
||||
// Intercept the content of [] and retain []
|
||||
answer = answer.match(/\[.*?\]/)?.[0] || '';
|
||||
answer = answer.replace(/\\"/g, '"');
|
||||
const jsonStr = answer
|
||||
.substring(start, end + 1)
|
||||
.replace(/(\\n|\\)/g, '')
|
||||
.replace(/ /g, '');
|
||||
|
||||
try {
|
||||
const queries = JSON.parse(answer) as string[];
|
||||
const queries = json5.parse(jsonStr) as string[];
|
||||
|
||||
return {
|
||||
rawQuery: query,
|
||||
extensionQueries: Array.isArray(queries) ? queries : [],
|
||||
extensionQueries: (Array.isArray(queries) ? queries : []).slice(0, 5),
|
||||
model,
|
||||
inputTokens: await countGptMessagesTokens(messages),
|
||||
outputTokens: await countPromptTokens(answer)
|
||||
};
|
||||
} catch (error) {
|
||||
addLog.error(`Query extension error`, error);
|
||||
addLog.warn('Query extension failed, not a valid JSON', {
|
||||
answer
|
||||
});
|
||||
return {
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
|
||||
@@ -2,33 +2,23 @@ import { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
import {
|
||||
ChatCompletionCreateParamsNonStreaming,
|
||||
ChatCompletionCreateParamsStreaming,
|
||||
ChatCompletionMessageParam,
|
||||
StreamChatType
|
||||
} from '@fastgpt/global/core/ai/type';
|
||||
import { countGptMessagesTokens } from '../../common/string/tiktoken';
|
||||
import { getLLMModel } from './model';
|
||||
|
||||
export const computedMaxToken = async ({
|
||||
/*
|
||||
Count response max token
|
||||
*/
|
||||
export const computedMaxToken = ({
|
||||
maxToken,
|
||||
model,
|
||||
filterMessages = []
|
||||
model
|
||||
}: {
|
||||
maxToken?: number;
|
||||
model: LLMModelItemType;
|
||||
filterMessages: ChatCompletionMessageParam[];
|
||||
}) => {
|
||||
if (maxToken === undefined) return;
|
||||
|
||||
maxToken = Math.min(maxToken, model.maxResponse);
|
||||
const tokensLimit = model.maxContext;
|
||||
|
||||
/* count response max token */
|
||||
const promptsToken = await countGptMessagesTokens(filterMessages);
|
||||
maxToken = promptsToken + maxToken > tokensLimit ? tokensLimit - promptsToken : maxToken;
|
||||
|
||||
if (maxToken <= 0) {
|
||||
maxToken = 200;
|
||||
}
|
||||
return maxToken;
|
||||
};
|
||||
|
||||
@@ -40,6 +30,7 @@ export const computedTemperature = ({
|
||||
model: LLMModelItemType;
|
||||
temperature: number;
|
||||
}) => {
|
||||
if (typeof model.maxTemperature !== 'number') return undefined;
|
||||
temperature = +(model.maxTemperature * (temperature / 10)).toFixed(2);
|
||||
temperature = Math.max(temperature, 0.01);
|
||||
|
||||
|
||||
@@ -14,36 +14,19 @@ import { serverRequestBaseUrl } from '../../common/api/serverRequest';
|
||||
import { i18nT } from '../../../web/i18n/utils';
|
||||
import { addLog } from '../../common/system/log';
|
||||
|
||||
export const filterGPTMessageByMaxTokens = async ({
|
||||
export const filterGPTMessageByMaxContext = async ({
|
||||
messages = [],
|
||||
maxTokens
|
||||
maxContext
|
||||
}: {
|
||||
messages: ChatCompletionMessageParam[];
|
||||
maxTokens: number;
|
||||
maxContext: number;
|
||||
}) => {
|
||||
if (!Array.isArray(messages)) {
|
||||
return [];
|
||||
}
|
||||
const rawTextLen = messages.reduce((sum, item) => {
|
||||
if (typeof item.content === 'string') {
|
||||
return sum + item.content.length;
|
||||
}
|
||||
if (Array.isArray(item.content)) {
|
||||
return (
|
||||
sum +
|
||||
item.content.reduce((sum, item) => {
|
||||
if (item.type === 'text') {
|
||||
return sum + item.text.length;
|
||||
}
|
||||
return sum;
|
||||
}, 0)
|
||||
);
|
||||
}
|
||||
return sum;
|
||||
}, 0);
|
||||
|
||||
// If the text length is less than half of the maximum token, no calculation is required
|
||||
if (rawTextLen < maxTokens * 0.5) {
|
||||
if (messages.length < 4) {
|
||||
return messages;
|
||||
}
|
||||
|
||||
@@ -55,7 +38,7 @@ export const filterGPTMessageByMaxTokens = async ({
|
||||
const chatPrompts: ChatCompletionMessageParam[] = messages.slice(chatStartIndex);
|
||||
|
||||
// reduce token of systemPrompt
|
||||
maxTokens -= await countGptMessagesTokens(systemPrompts);
|
||||
maxContext -= await countGptMessagesTokens(systemPrompts);
|
||||
|
||||
// Save the last chat prompt(question)
|
||||
const question = chatPrompts.pop();
|
||||
@@ -73,9 +56,9 @@ export const filterGPTMessageByMaxTokens = async ({
|
||||
}
|
||||
|
||||
const tokens = await countGptMessagesTokens([assistant, user]);
|
||||
maxTokens -= tokens;
|
||||
maxContext -= tokens;
|
||||
/* 整体 tokens 超出范围,截断 */
|
||||
if (maxTokens < 0) {
|
||||
if (maxContext < 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
|
||||
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../chat/utils';
|
||||
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../chat/utils';
|
||||
import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
|
||||
import {
|
||||
countMessagesTokens,
|
||||
@@ -175,9 +175,9 @@ ${description ? `- ${description}` : ''}
|
||||
}
|
||||
];
|
||||
const adaptMessages = chats2GPTMessages({ messages, reserveId: false });
|
||||
const filterMessages = await filterGPTMessageByMaxTokens({
|
||||
const filterMessages = await filterGPTMessageByMaxContext({
|
||||
messages: adaptMessages,
|
||||
maxTokens: extractModel.maxContext
|
||||
maxContext: extractModel.maxContext
|
||||
});
|
||||
const requestMessages = await loadRequestMessages({
|
||||
messages: filterMessages,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { createChatCompletion } from '../../../../ai/config';
|
||||
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../../chat/utils';
|
||||
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
|
||||
import {
|
||||
ChatCompletion,
|
||||
StreamChatType,
|
||||
@@ -172,10 +172,14 @@ export const runToolWithFunctionCall = async (
|
||||
};
|
||||
});
|
||||
|
||||
const max_tokens = computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken
|
||||
});
|
||||
const filterMessages = (
|
||||
await filterGPTMessageByMaxTokens({
|
||||
await filterGPTMessageByMaxContext({
|
||||
messages,
|
||||
maxTokens: toolModel.maxContext - 300 // filter token. not response maxToken
|
||||
maxContext: toolModel.maxContext - (max_tokens || 0) // filter token. not response maxToken
|
||||
})
|
||||
).map((item) => {
|
||||
if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant && item.function_call) {
|
||||
@@ -190,16 +194,11 @@ export const runToolWithFunctionCall = async (
|
||||
}
|
||||
return item;
|
||||
});
|
||||
const [requestMessages, max_tokens] = await Promise.all([
|
||||
const [requestMessages] = await Promise.all([
|
||||
loadRequestMessages({
|
||||
messages: filterMessages,
|
||||
useVision: toolModel.vision && aiChatVision,
|
||||
origin: requestOrigin
|
||||
}),
|
||||
computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken,
|
||||
filterMessages
|
||||
})
|
||||
]);
|
||||
const requestBody = llmCompletionsBodyFormat(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { createChatCompletion } from '../../../../ai/config';
|
||||
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../../chat/utils';
|
||||
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
|
||||
import {
|
||||
ChatCompletion,
|
||||
StreamChatType,
|
||||
@@ -196,21 +196,20 @@ export const runToolWithPromptCall = async (
|
||||
return Promise.reject('Prompt call invalid input');
|
||||
}
|
||||
|
||||
const filterMessages = await filterGPTMessageByMaxTokens({
|
||||
const max_tokens = computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken
|
||||
});
|
||||
const filterMessages = await filterGPTMessageByMaxContext({
|
||||
messages,
|
||||
maxTokens: toolModel.maxContext - 500 // filter token. not response maxToken
|
||||
maxContext: toolModel.maxContext - (max_tokens || 0) // filter token. not response maxToken
|
||||
});
|
||||
|
||||
const [requestMessages, max_tokens] = await Promise.all([
|
||||
const [requestMessages] = await Promise.all([
|
||||
loadRequestMessages({
|
||||
messages: filterMessages,
|
||||
useVision: toolModel.vision && aiChatVision,
|
||||
origin: requestOrigin
|
||||
}),
|
||||
computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken,
|
||||
filterMessages
|
||||
})
|
||||
]);
|
||||
const requestBody = llmCompletionsBodyFormat(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { createChatCompletion } from '../../../../ai/config';
|
||||
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../../chat/utils';
|
||||
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
|
||||
import {
|
||||
ChatCompletion,
|
||||
ChatCompletionMessageToolCall,
|
||||
@@ -228,11 +228,16 @@ export const runToolWithToolChoice = async (
|
||||
};
|
||||
});
|
||||
|
||||
const max_tokens = computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken
|
||||
});
|
||||
|
||||
// Filter histories by maxToken
|
||||
const filterMessages = (
|
||||
await filterGPTMessageByMaxTokens({
|
||||
await filterGPTMessageByMaxContext({
|
||||
messages,
|
||||
maxTokens: toolModel.maxContext - 300 // filter token. not response maxToken
|
||||
maxContext: toolModel.maxContext - (max_tokens || 0) // filter token. not response maxToken
|
||||
})
|
||||
).map((item) => {
|
||||
if (item.role === 'assistant' && item.tool_calls) {
|
||||
@@ -248,16 +253,11 @@ export const runToolWithToolChoice = async (
|
||||
return item;
|
||||
});
|
||||
|
||||
const [requestMessages, max_tokens] = await Promise.all([
|
||||
const [requestMessages] = await Promise.all([
|
||||
loadRequestMessages({
|
||||
messages: filterMessages,
|
||||
useVision: toolModel.vision && aiChatVision,
|
||||
origin: requestOrigin
|
||||
}),
|
||||
computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken,
|
||||
filterMessages
|
||||
})
|
||||
]);
|
||||
const requestBody = llmCompletionsBodyFormat(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../chat/utils';
|
||||
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../chat/utils';
|
||||
import type { ChatItemType, UserChatItemValueItemType } from '@fastgpt/global/core/chat/type.d';
|
||||
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
|
||||
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
|
||||
@@ -58,6 +58,7 @@ export type ChatProps = ModuleDispatchProps<
|
||||
>;
|
||||
export type ChatResponse = DispatchNodeResultType<{
|
||||
[NodeOutputKeyEnum.answerText]: string;
|
||||
[NodeOutputKeyEnum.reasoningText]?: string;
|
||||
[NodeOutputKeyEnum.history]: ChatItemType[];
|
||||
}>;
|
||||
|
||||
@@ -87,22 +88,24 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
quoteTemplate,
|
||||
quotePrompt,
|
||||
aiChatVision,
|
||||
aiChatReasoning,
|
||||
fileUrlList: fileLinks, // node quote file links
|
||||
stringQuoteText //abandon
|
||||
}
|
||||
} = props;
|
||||
const { files: inputFiles } = chatValue2RuntimePrompt(query); // Chat box input files
|
||||
|
||||
stream = stream && isResponseAnswerText;
|
||||
|
||||
const chatHistories = getHistories(history, histories);
|
||||
quoteQA = checkQuoteQAValue(quoteQA);
|
||||
|
||||
const modelConstantsData = getLLMModel(model);
|
||||
if (!modelConstantsData) {
|
||||
return Promise.reject('The chat model is undefined, you need to select a chat model.');
|
||||
}
|
||||
|
||||
stream = stream && isResponseAnswerText;
|
||||
aiChatReasoning = !!aiChatReasoning && !!modelConstantsData.reasoning;
|
||||
|
||||
const chatHistories = getHistories(history, histories);
|
||||
quoteQA = checkQuoteQAValue(quoteQA);
|
||||
|
||||
const [{ datasetQuoteText }, { documentQuoteText, userFiles }] = await Promise.all([
|
||||
filterDatasetQuote({
|
||||
quoteQA,
|
||||
@@ -124,9 +127,15 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
return Promise.reject(i18nT('chat:AI_input_is_empty'));
|
||||
}
|
||||
|
||||
const max_tokens = computedMaxToken({
|
||||
model: modelConstantsData,
|
||||
maxToken
|
||||
});
|
||||
|
||||
const [{ filterMessages }] = await Promise.all([
|
||||
getChatMessages({
|
||||
model: modelConstantsData,
|
||||
maxTokens: max_tokens,
|
||||
histories: chatHistories,
|
||||
useDatasetQuote: quoteQA !== undefined,
|
||||
datasetQuoteText,
|
||||
@@ -137,8 +146,8 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
userFiles,
|
||||
documentQuoteText
|
||||
}),
|
||||
// Censor = true and system key, will check content
|
||||
(() => {
|
||||
// censor model and system key
|
||||
if (modelConstantsData.censor && !externalProvider.openaiAccount?.key) {
|
||||
return postTextCensor({
|
||||
text: `${systemPrompt}
|
||||
@@ -149,18 +158,11 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
})()
|
||||
]);
|
||||
|
||||
const [requestMessages, max_tokens] = await Promise.all([
|
||||
loadRequestMessages({
|
||||
messages: filterMessages,
|
||||
useVision: modelConstantsData.vision && aiChatVision,
|
||||
origin: requestOrigin
|
||||
}),
|
||||
computedMaxToken({
|
||||
model: modelConstantsData,
|
||||
maxToken,
|
||||
filterMessages
|
||||
})
|
||||
]);
|
||||
const requestMessages = await loadRequestMessages({
|
||||
messages: filterMessages,
|
||||
useVision: modelConstantsData.vision && aiChatVision,
|
||||
origin: requestOrigin
|
||||
});
|
||||
|
||||
const requestBody = llmCompletionsBodyFormat(
|
||||
{
|
||||
@@ -183,34 +185,42 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
}
|
||||
});
|
||||
|
||||
const { answerText } = await (async () => {
|
||||
const { answerText, reasoningText } = await (async () => {
|
||||
if (res && isStreamResponse) {
|
||||
// sse response
|
||||
const { answer } = await streamResponse({
|
||||
const { answer, reasoning } = await streamResponse({
|
||||
res,
|
||||
stream: response,
|
||||
aiChatReasoning,
|
||||
workflowStreamResponse
|
||||
});
|
||||
|
||||
return {
|
||||
answerText: answer
|
||||
answerText: answer,
|
||||
reasoningText: reasoning
|
||||
};
|
||||
} else {
|
||||
const unStreamResponse = response as ChatCompletion;
|
||||
const answer = unStreamResponse.choices?.[0]?.message?.content || '';
|
||||
|
||||
const reasoning = aiChatReasoning
|
||||
? // @ts-ignore
|
||||
unStreamResponse.choices?.[0]?.message?.reasoning_content || ''
|
||||
: '';
|
||||
if (stream) {
|
||||
// Some models do not support streaming
|
||||
workflowStreamResponse?.({
|
||||
event: SseResponseEventEnum.fastAnswer,
|
||||
data: textAdaptGptResponse({
|
||||
text: answer
|
||||
})
|
||||
});
|
||||
reasoning &&
|
||||
workflowStreamResponse?.({
|
||||
event: SseResponseEventEnum.fastAnswer,
|
||||
data: textAdaptGptResponse({
|
||||
text: answer,
|
||||
reasoning_content: reasoning
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
answerText: answer
|
||||
answerText: answer,
|
||||
reasoningText: reasoning
|
||||
};
|
||||
}
|
||||
})();
|
||||
@@ -241,6 +251,7 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
|
||||
return {
|
||||
answerText,
|
||||
reasoningText,
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: {
|
||||
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
|
||||
model: modelName,
|
||||
@@ -367,6 +378,7 @@ async function getMultiInput({
|
||||
|
||||
async function getChatMessages({
|
||||
model,
|
||||
maxTokens = 0,
|
||||
aiChatQuoteRole,
|
||||
datasetQuotePrompt = '',
|
||||
datasetQuoteText,
|
||||
@@ -378,6 +390,7 @@ async function getChatMessages({
|
||||
documentQuoteText
|
||||
}: {
|
||||
model: LLMModelItemType;
|
||||
maxTokens?: number;
|
||||
// dataset quote
|
||||
aiChatQuoteRole: AiChatQuoteRoleType; // user: replace user prompt; system: replace system prompt
|
||||
datasetQuotePrompt?: string;
|
||||
@@ -444,9 +457,9 @@ async function getChatMessages({
|
||||
|
||||
const adaptMessages = chats2GPTMessages({ messages, reserveId: false });
|
||||
|
||||
const filterMessages = await filterGPTMessageByMaxTokens({
|
||||
const filterMessages = await filterGPTMessageByMaxContext({
|
||||
messages: adaptMessages,
|
||||
maxTokens: model.maxContext - 300 // filter token. not response maxToken
|
||||
maxContext: model.maxContext - maxTokens // filter token. not response maxToken
|
||||
});
|
||||
|
||||
return {
|
||||
@@ -457,33 +470,43 @@ async function getChatMessages({
|
||||
async function streamResponse({
|
||||
res,
|
||||
stream,
|
||||
workflowStreamResponse
|
||||
workflowStreamResponse,
|
||||
aiChatReasoning
|
||||
}: {
|
||||
res: NextApiResponse;
|
||||
stream: StreamChatType;
|
||||
workflowStreamResponse?: WorkflowResponseType;
|
||||
aiChatReasoning?: boolean;
|
||||
}) {
|
||||
const write = responseWriteController({
|
||||
res,
|
||||
readStream: stream
|
||||
});
|
||||
let answer = '';
|
||||
let reasoning = '';
|
||||
for await (const part of stream) {
|
||||
if (res.closed) {
|
||||
stream.controller?.abort();
|
||||
break;
|
||||
}
|
||||
|
||||
const content = part.choices?.[0]?.delta?.content || '';
|
||||
answer += content;
|
||||
|
||||
const reasoningContent = aiChatReasoning
|
||||
? part.choices?.[0]?.delta?.reasoning_content || ''
|
||||
: '';
|
||||
reasoning += reasoningContent;
|
||||
|
||||
workflowStreamResponse?.({
|
||||
write,
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
text: content
|
||||
text: content,
|
||||
reasoning_content: reasoningContent
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
return { answer };
|
||||
return { answer, reasoning };
|
||||
}
|
||||
|
||||
@@ -204,6 +204,7 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
|
||||
{ inputs = [] }: RuntimeNodeItemType,
|
||||
{
|
||||
answerText = '',
|
||||
reasoningText,
|
||||
responseData,
|
||||
nodeDispatchUsages,
|
||||
toolResponses,
|
||||
@@ -213,6 +214,7 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
|
||||
}: Omit<
|
||||
DispatchNodeResultType<{
|
||||
[NodeOutputKeyEnum.answerText]?: string;
|
||||
[NodeOutputKeyEnum.reasoningText]?: string;
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]?: ChatHistoryItemResType;
|
||||
}>,
|
||||
'nodeResponse'
|
||||
@@ -251,6 +253,13 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
|
||||
}
|
||||
});
|
||||
}
|
||||
} else if (reasoningText) {
|
||||
chatAssistantResponse.push({
|
||||
type: ChatItemValueTypeEnum.reasoning,
|
||||
reasoning: {
|
||||
content: reasoningText
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (rewriteHistories) {
|
||||
|
||||
Reference in New Issue
Block a user