perf: max_token count;feat: support resoner output;fix: member scroll (#3681)

* perf: supplement assistant empty response

* check array

* perf: max_token count

* feat: support resoner output

* member scroll

* update provider order

* i18n
This commit is contained in:
Archer
2025-02-01 18:04:44 +08:00
committed by archer
parent 9e0379382f
commit 54defd8a3c
46 changed files with 462 additions and 266 deletions

View File

@@ -1,5 +1,5 @@
import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../chat/utils';
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../chat/utils';
import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
import {
countMessagesTokens,
@@ -175,9 +175,9 @@ ${description ? `- ${description}` : ''}
}
];
const adaptMessages = chats2GPTMessages({ messages, reserveId: false });
const filterMessages = await filterGPTMessageByMaxTokens({
const filterMessages = await filterGPTMessageByMaxContext({
messages: adaptMessages,
maxTokens: extractModel.maxContext
maxContext: extractModel.maxContext
});
const requestMessages = await loadRequestMessages({
messages: filterMessages,

View File

@@ -1,5 +1,5 @@
import { createChatCompletion } from '../../../../ai/config';
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../../chat/utils';
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
import {
ChatCompletion,
StreamChatType,
@@ -172,10 +172,14 @@ export const runToolWithFunctionCall = async (
};
});
const max_tokens = computedMaxToken({
model: toolModel,
maxToken
});
const filterMessages = (
await filterGPTMessageByMaxTokens({
await filterGPTMessageByMaxContext({
messages,
maxTokens: toolModel.maxContext - 300 // filter token. not response maxToken
maxContext: toolModel.maxContext - (max_tokens || 0) // filter token. not response maxToken
})
).map((item) => {
if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant && item.function_call) {
@@ -190,16 +194,11 @@ export const runToolWithFunctionCall = async (
}
return item;
});
const [requestMessages, max_tokens] = await Promise.all([
const [requestMessages] = await Promise.all([
loadRequestMessages({
messages: filterMessages,
useVision: toolModel.vision && aiChatVision,
origin: requestOrigin
}),
computedMaxToken({
model: toolModel,
maxToken,
filterMessages
})
]);
const requestBody = llmCompletionsBodyFormat(

View File

@@ -1,5 +1,5 @@
import { createChatCompletion } from '../../../../ai/config';
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../../chat/utils';
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
import {
ChatCompletion,
StreamChatType,
@@ -196,21 +196,20 @@ export const runToolWithPromptCall = async (
return Promise.reject('Prompt call invalid input');
}
const filterMessages = await filterGPTMessageByMaxTokens({
const max_tokens = computedMaxToken({
model: toolModel,
maxToken
});
const filterMessages = await filterGPTMessageByMaxContext({
messages,
maxTokens: toolModel.maxContext - 500 // filter token. not response maxToken
maxContext: toolModel.maxContext - (max_tokens || 0) // filter token. not response maxToken
});
const [requestMessages, max_tokens] = await Promise.all([
const [requestMessages] = await Promise.all([
loadRequestMessages({
messages: filterMessages,
useVision: toolModel.vision && aiChatVision,
origin: requestOrigin
}),
computedMaxToken({
model: toolModel,
maxToken,
filterMessages
})
]);
const requestBody = llmCompletionsBodyFormat(

View File

@@ -1,5 +1,5 @@
import { createChatCompletion } from '../../../../ai/config';
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../../chat/utils';
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
import {
ChatCompletion,
ChatCompletionMessageToolCall,
@@ -228,11 +228,16 @@ export const runToolWithToolChoice = async (
};
});
const max_tokens = computedMaxToken({
model: toolModel,
maxToken
});
// Filter histories by maxToken
const filterMessages = (
await filterGPTMessageByMaxTokens({
await filterGPTMessageByMaxContext({
messages,
maxTokens: toolModel.maxContext - 300 // filter token. not response maxToken
maxContext: toolModel.maxContext - (max_tokens || 0) // filter token. not response maxToken
})
).map((item) => {
if (item.role === 'assistant' && item.tool_calls) {
@@ -248,16 +253,11 @@ export const runToolWithToolChoice = async (
return item;
});
const [requestMessages, max_tokens] = await Promise.all([
const [requestMessages] = await Promise.all([
loadRequestMessages({
messages: filterMessages,
useVision: toolModel.vision && aiChatVision,
origin: requestOrigin
}),
computedMaxToken({
model: toolModel,
maxToken,
filterMessages
})
]);
const requestBody = llmCompletionsBodyFormat(

View File

@@ -1,5 +1,5 @@
import type { NextApiResponse } from 'next';
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../chat/utils';
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../chat/utils';
import type { ChatItemType, UserChatItemValueItemType } from '@fastgpt/global/core/chat/type.d';
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
@@ -58,6 +58,7 @@ export type ChatProps = ModuleDispatchProps<
>;
export type ChatResponse = DispatchNodeResultType<{
[NodeOutputKeyEnum.answerText]: string;
[NodeOutputKeyEnum.reasoningText]?: string;
[NodeOutputKeyEnum.history]: ChatItemType[];
}>;
@@ -87,22 +88,24 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
quoteTemplate,
quotePrompt,
aiChatVision,
aiChatReasoning,
fileUrlList: fileLinks, // node quote file links
stringQuoteText //abandon
}
} = props;
const { files: inputFiles } = chatValue2RuntimePrompt(query); // Chat box input files
stream = stream && isResponseAnswerText;
const chatHistories = getHistories(history, histories);
quoteQA = checkQuoteQAValue(quoteQA);
const modelConstantsData = getLLMModel(model);
if (!modelConstantsData) {
return Promise.reject('The chat model is undefined, you need to select a chat model.');
}
stream = stream && isResponseAnswerText;
aiChatReasoning = !!aiChatReasoning && !!modelConstantsData.reasoning;
const chatHistories = getHistories(history, histories);
quoteQA = checkQuoteQAValue(quoteQA);
const [{ datasetQuoteText }, { documentQuoteText, userFiles }] = await Promise.all([
filterDatasetQuote({
quoteQA,
@@ -124,9 +127,15 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
return Promise.reject(i18nT('chat:AI_input_is_empty'));
}
const max_tokens = computedMaxToken({
model: modelConstantsData,
maxToken
});
const [{ filterMessages }] = await Promise.all([
getChatMessages({
model: modelConstantsData,
maxTokens: max_tokens,
histories: chatHistories,
useDatasetQuote: quoteQA !== undefined,
datasetQuoteText,
@@ -137,8 +146,8 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
userFiles,
documentQuoteText
}),
// Censor = true and system key, will check content
(() => {
// censor model and system key
if (modelConstantsData.censor && !externalProvider.openaiAccount?.key) {
return postTextCensor({
text: `${systemPrompt}
@@ -149,18 +158,11 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
})()
]);
const [requestMessages, max_tokens] = await Promise.all([
loadRequestMessages({
messages: filterMessages,
useVision: modelConstantsData.vision && aiChatVision,
origin: requestOrigin
}),
computedMaxToken({
model: modelConstantsData,
maxToken,
filterMessages
})
]);
const requestMessages = await loadRequestMessages({
messages: filterMessages,
useVision: modelConstantsData.vision && aiChatVision,
origin: requestOrigin
});
const requestBody = llmCompletionsBodyFormat(
{
@@ -183,34 +185,42 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
}
});
const { answerText } = await (async () => {
const { answerText, reasoningText } = await (async () => {
if (res && isStreamResponse) {
// sse response
const { answer } = await streamResponse({
const { answer, reasoning } = await streamResponse({
res,
stream: response,
aiChatReasoning,
workflowStreamResponse
});
return {
answerText: answer
answerText: answer,
reasoningText: reasoning
};
} else {
const unStreamResponse = response as ChatCompletion;
const answer = unStreamResponse.choices?.[0]?.message?.content || '';
const reasoning = aiChatReasoning
? // @ts-ignore
unStreamResponse.choices?.[0]?.message?.reasoning_content || ''
: '';
if (stream) {
// Some models do not support streaming
workflowStreamResponse?.({
event: SseResponseEventEnum.fastAnswer,
data: textAdaptGptResponse({
text: answer
})
});
reasoning &&
workflowStreamResponse?.({
event: SseResponseEventEnum.fastAnswer,
data: textAdaptGptResponse({
text: answer,
reasoning_content: reasoning
})
});
}
return {
answerText: answer
answerText: answer,
reasoningText: reasoning
};
}
})();
@@ -241,6 +251,7 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
return {
answerText,
reasoningText,
[DispatchNodeResponseKeyEnum.nodeResponse]: {
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
model: modelName,
@@ -367,6 +378,7 @@ async function getMultiInput({
async function getChatMessages({
model,
maxTokens = 0,
aiChatQuoteRole,
datasetQuotePrompt = '',
datasetQuoteText,
@@ -378,6 +390,7 @@ async function getChatMessages({
documentQuoteText
}: {
model: LLMModelItemType;
maxTokens?: number;
// dataset quote
aiChatQuoteRole: AiChatQuoteRoleType; // user: replace user prompt; system: replace system prompt
datasetQuotePrompt?: string;
@@ -444,9 +457,9 @@ async function getChatMessages({
const adaptMessages = chats2GPTMessages({ messages, reserveId: false });
const filterMessages = await filterGPTMessageByMaxTokens({
const filterMessages = await filterGPTMessageByMaxContext({
messages: adaptMessages,
maxTokens: model.maxContext - 300 // filter token. not response maxToken
maxContext: model.maxContext - maxTokens // filter token. not response maxToken
});
return {
@@ -457,33 +470,43 @@ async function getChatMessages({
async function streamResponse({
res,
stream,
workflowStreamResponse
workflowStreamResponse,
aiChatReasoning
}: {
res: NextApiResponse;
stream: StreamChatType;
workflowStreamResponse?: WorkflowResponseType;
aiChatReasoning?: boolean;
}) {
const write = responseWriteController({
res,
readStream: stream
});
let answer = '';
let reasoning = '';
for await (const part of stream) {
if (res.closed) {
stream.controller?.abort();
break;
}
const content = part.choices?.[0]?.delta?.content || '';
answer += content;
const reasoningContent = aiChatReasoning
? part.choices?.[0]?.delta?.reasoning_content || ''
: '';
reasoning += reasoningContent;
workflowStreamResponse?.({
write,
event: SseResponseEventEnum.answer,
data: textAdaptGptResponse({
text: content
text: content,
reasoning_content: reasoningContent
})
});
}
return { answer };
return { answer, reasoning };
}

View File

@@ -204,6 +204,7 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
{ inputs = [] }: RuntimeNodeItemType,
{
answerText = '',
reasoningText,
responseData,
nodeDispatchUsages,
toolResponses,
@@ -213,6 +214,7 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
}: Omit<
DispatchNodeResultType<{
[NodeOutputKeyEnum.answerText]?: string;
[NodeOutputKeyEnum.reasoningText]?: string;
[DispatchNodeResponseKeyEnum.nodeResponse]?: ChatHistoryItemResType;
}>,
'nodeResponse'
@@ -251,6 +253,13 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
}
});
}
} else if (reasoningText) {
chatAssistantResponse.push({
type: ChatItemValueTypeEnum.reasoning,
reasoning: {
content: reasoningText
}
});
}
if (rewriteHistories) {