perf: max_token count;feat: support resoner output;fix: member scroll (#3681)
* perf: supplement assistant empty response * check array * perf: max_token count * feat: support resoner output * member scroll * update provider order * i18n
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
|
||||
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../chat/utils';
|
||||
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../chat/utils';
|
||||
import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
|
||||
import {
|
||||
countMessagesTokens,
|
||||
@@ -175,9 +175,9 @@ ${description ? `- ${description}` : ''}
|
||||
}
|
||||
];
|
||||
const adaptMessages = chats2GPTMessages({ messages, reserveId: false });
|
||||
const filterMessages = await filterGPTMessageByMaxTokens({
|
||||
const filterMessages = await filterGPTMessageByMaxContext({
|
||||
messages: adaptMessages,
|
||||
maxTokens: extractModel.maxContext
|
||||
maxContext: extractModel.maxContext
|
||||
});
|
||||
const requestMessages = await loadRequestMessages({
|
||||
messages: filterMessages,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { createChatCompletion } from '../../../../ai/config';
|
||||
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../../chat/utils';
|
||||
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
|
||||
import {
|
||||
ChatCompletion,
|
||||
StreamChatType,
|
||||
@@ -172,10 +172,14 @@ export const runToolWithFunctionCall = async (
|
||||
};
|
||||
});
|
||||
|
||||
const max_tokens = computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken
|
||||
});
|
||||
const filterMessages = (
|
||||
await filterGPTMessageByMaxTokens({
|
||||
await filterGPTMessageByMaxContext({
|
||||
messages,
|
||||
maxTokens: toolModel.maxContext - 300 // filter token. not response maxToken
|
||||
maxContext: toolModel.maxContext - (max_tokens || 0) // filter token. not response maxToken
|
||||
})
|
||||
).map((item) => {
|
||||
if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant && item.function_call) {
|
||||
@@ -190,16 +194,11 @@ export const runToolWithFunctionCall = async (
|
||||
}
|
||||
return item;
|
||||
});
|
||||
const [requestMessages, max_tokens] = await Promise.all([
|
||||
const [requestMessages] = await Promise.all([
|
||||
loadRequestMessages({
|
||||
messages: filterMessages,
|
||||
useVision: toolModel.vision && aiChatVision,
|
||||
origin: requestOrigin
|
||||
}),
|
||||
computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken,
|
||||
filterMessages
|
||||
})
|
||||
]);
|
||||
const requestBody = llmCompletionsBodyFormat(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { createChatCompletion } from '../../../../ai/config';
|
||||
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../../chat/utils';
|
||||
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
|
||||
import {
|
||||
ChatCompletion,
|
||||
StreamChatType,
|
||||
@@ -196,21 +196,20 @@ export const runToolWithPromptCall = async (
|
||||
return Promise.reject('Prompt call invalid input');
|
||||
}
|
||||
|
||||
const filterMessages = await filterGPTMessageByMaxTokens({
|
||||
const max_tokens = computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken
|
||||
});
|
||||
const filterMessages = await filterGPTMessageByMaxContext({
|
||||
messages,
|
||||
maxTokens: toolModel.maxContext - 500 // filter token. not response maxToken
|
||||
maxContext: toolModel.maxContext - (max_tokens || 0) // filter token. not response maxToken
|
||||
});
|
||||
|
||||
const [requestMessages, max_tokens] = await Promise.all([
|
||||
const [requestMessages] = await Promise.all([
|
||||
loadRequestMessages({
|
||||
messages: filterMessages,
|
||||
useVision: toolModel.vision && aiChatVision,
|
||||
origin: requestOrigin
|
||||
}),
|
||||
computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken,
|
||||
filterMessages
|
||||
})
|
||||
]);
|
||||
const requestBody = llmCompletionsBodyFormat(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { createChatCompletion } from '../../../../ai/config';
|
||||
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../../chat/utils';
|
||||
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../../chat/utils';
|
||||
import {
|
||||
ChatCompletion,
|
||||
ChatCompletionMessageToolCall,
|
||||
@@ -228,11 +228,16 @@ export const runToolWithToolChoice = async (
|
||||
};
|
||||
});
|
||||
|
||||
const max_tokens = computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken
|
||||
});
|
||||
|
||||
// Filter histories by maxToken
|
||||
const filterMessages = (
|
||||
await filterGPTMessageByMaxTokens({
|
||||
await filterGPTMessageByMaxContext({
|
||||
messages,
|
||||
maxTokens: toolModel.maxContext - 300 // filter token. not response maxToken
|
||||
maxContext: toolModel.maxContext - (max_tokens || 0) // filter token. not response maxToken
|
||||
})
|
||||
).map((item) => {
|
||||
if (item.role === 'assistant' && item.tool_calls) {
|
||||
@@ -248,16 +253,11 @@ export const runToolWithToolChoice = async (
|
||||
return item;
|
||||
});
|
||||
|
||||
const [requestMessages, max_tokens] = await Promise.all([
|
||||
const [requestMessages] = await Promise.all([
|
||||
loadRequestMessages({
|
||||
messages: filterMessages,
|
||||
useVision: toolModel.vision && aiChatVision,
|
||||
origin: requestOrigin
|
||||
}),
|
||||
computedMaxToken({
|
||||
model: toolModel,
|
||||
maxToken,
|
||||
filterMessages
|
||||
})
|
||||
]);
|
||||
const requestBody = llmCompletionsBodyFormat(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { filterGPTMessageByMaxTokens, loadRequestMessages } from '../../../chat/utils';
|
||||
import { filterGPTMessageByMaxContext, loadRequestMessages } from '../../../chat/utils';
|
||||
import type { ChatItemType, UserChatItemValueItemType } from '@fastgpt/global/core/chat/type.d';
|
||||
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
|
||||
import { SseResponseEventEnum } from '@fastgpt/global/core/workflow/runtime/constants';
|
||||
@@ -58,6 +58,7 @@ export type ChatProps = ModuleDispatchProps<
|
||||
>;
|
||||
export type ChatResponse = DispatchNodeResultType<{
|
||||
[NodeOutputKeyEnum.answerText]: string;
|
||||
[NodeOutputKeyEnum.reasoningText]?: string;
|
||||
[NodeOutputKeyEnum.history]: ChatItemType[];
|
||||
}>;
|
||||
|
||||
@@ -87,22 +88,24 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
quoteTemplate,
|
||||
quotePrompt,
|
||||
aiChatVision,
|
||||
aiChatReasoning,
|
||||
fileUrlList: fileLinks, // node quote file links
|
||||
stringQuoteText //abandon
|
||||
}
|
||||
} = props;
|
||||
const { files: inputFiles } = chatValue2RuntimePrompt(query); // Chat box input files
|
||||
|
||||
stream = stream && isResponseAnswerText;
|
||||
|
||||
const chatHistories = getHistories(history, histories);
|
||||
quoteQA = checkQuoteQAValue(quoteQA);
|
||||
|
||||
const modelConstantsData = getLLMModel(model);
|
||||
if (!modelConstantsData) {
|
||||
return Promise.reject('The chat model is undefined, you need to select a chat model.');
|
||||
}
|
||||
|
||||
stream = stream && isResponseAnswerText;
|
||||
aiChatReasoning = !!aiChatReasoning && !!modelConstantsData.reasoning;
|
||||
|
||||
const chatHistories = getHistories(history, histories);
|
||||
quoteQA = checkQuoteQAValue(quoteQA);
|
||||
|
||||
const [{ datasetQuoteText }, { documentQuoteText, userFiles }] = await Promise.all([
|
||||
filterDatasetQuote({
|
||||
quoteQA,
|
||||
@@ -124,9 +127,15 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
return Promise.reject(i18nT('chat:AI_input_is_empty'));
|
||||
}
|
||||
|
||||
const max_tokens = computedMaxToken({
|
||||
model: modelConstantsData,
|
||||
maxToken
|
||||
});
|
||||
|
||||
const [{ filterMessages }] = await Promise.all([
|
||||
getChatMessages({
|
||||
model: modelConstantsData,
|
||||
maxTokens: max_tokens,
|
||||
histories: chatHistories,
|
||||
useDatasetQuote: quoteQA !== undefined,
|
||||
datasetQuoteText,
|
||||
@@ -137,8 +146,8 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
userFiles,
|
||||
documentQuoteText
|
||||
}),
|
||||
// Censor = true and system key, will check content
|
||||
(() => {
|
||||
// censor model and system key
|
||||
if (modelConstantsData.censor && !externalProvider.openaiAccount?.key) {
|
||||
return postTextCensor({
|
||||
text: `${systemPrompt}
|
||||
@@ -149,18 +158,11 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
})()
|
||||
]);
|
||||
|
||||
const [requestMessages, max_tokens] = await Promise.all([
|
||||
loadRequestMessages({
|
||||
messages: filterMessages,
|
||||
useVision: modelConstantsData.vision && aiChatVision,
|
||||
origin: requestOrigin
|
||||
}),
|
||||
computedMaxToken({
|
||||
model: modelConstantsData,
|
||||
maxToken,
|
||||
filterMessages
|
||||
})
|
||||
]);
|
||||
const requestMessages = await loadRequestMessages({
|
||||
messages: filterMessages,
|
||||
useVision: modelConstantsData.vision && aiChatVision,
|
||||
origin: requestOrigin
|
||||
});
|
||||
|
||||
const requestBody = llmCompletionsBodyFormat(
|
||||
{
|
||||
@@ -183,34 +185,42 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
}
|
||||
});
|
||||
|
||||
const { answerText } = await (async () => {
|
||||
const { answerText, reasoningText } = await (async () => {
|
||||
if (res && isStreamResponse) {
|
||||
// sse response
|
||||
const { answer } = await streamResponse({
|
||||
const { answer, reasoning } = await streamResponse({
|
||||
res,
|
||||
stream: response,
|
||||
aiChatReasoning,
|
||||
workflowStreamResponse
|
||||
});
|
||||
|
||||
return {
|
||||
answerText: answer
|
||||
answerText: answer,
|
||||
reasoningText: reasoning
|
||||
};
|
||||
} else {
|
||||
const unStreamResponse = response as ChatCompletion;
|
||||
const answer = unStreamResponse.choices?.[0]?.message?.content || '';
|
||||
|
||||
const reasoning = aiChatReasoning
|
||||
? // @ts-ignore
|
||||
unStreamResponse.choices?.[0]?.message?.reasoning_content || ''
|
||||
: '';
|
||||
if (stream) {
|
||||
// Some models do not support streaming
|
||||
workflowStreamResponse?.({
|
||||
event: SseResponseEventEnum.fastAnswer,
|
||||
data: textAdaptGptResponse({
|
||||
text: answer
|
||||
})
|
||||
});
|
||||
reasoning &&
|
||||
workflowStreamResponse?.({
|
||||
event: SseResponseEventEnum.fastAnswer,
|
||||
data: textAdaptGptResponse({
|
||||
text: answer,
|
||||
reasoning_content: reasoning
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
answerText: answer
|
||||
answerText: answer,
|
||||
reasoningText: reasoning
|
||||
};
|
||||
}
|
||||
})();
|
||||
@@ -241,6 +251,7 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
|
||||
|
||||
return {
|
||||
answerText,
|
||||
reasoningText,
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]: {
|
||||
totalPoints: externalProvider.openaiAccount?.key ? 0 : totalPoints,
|
||||
model: modelName,
|
||||
@@ -367,6 +378,7 @@ async function getMultiInput({
|
||||
|
||||
async function getChatMessages({
|
||||
model,
|
||||
maxTokens = 0,
|
||||
aiChatQuoteRole,
|
||||
datasetQuotePrompt = '',
|
||||
datasetQuoteText,
|
||||
@@ -378,6 +390,7 @@ async function getChatMessages({
|
||||
documentQuoteText
|
||||
}: {
|
||||
model: LLMModelItemType;
|
||||
maxTokens?: number;
|
||||
// dataset quote
|
||||
aiChatQuoteRole: AiChatQuoteRoleType; // user: replace user prompt; system: replace system prompt
|
||||
datasetQuotePrompt?: string;
|
||||
@@ -444,9 +457,9 @@ async function getChatMessages({
|
||||
|
||||
const adaptMessages = chats2GPTMessages({ messages, reserveId: false });
|
||||
|
||||
const filterMessages = await filterGPTMessageByMaxTokens({
|
||||
const filterMessages = await filterGPTMessageByMaxContext({
|
||||
messages: adaptMessages,
|
||||
maxTokens: model.maxContext - 300 // filter token. not response maxToken
|
||||
maxContext: model.maxContext - maxTokens // filter token. not response maxToken
|
||||
});
|
||||
|
||||
return {
|
||||
@@ -457,33 +470,43 @@ async function getChatMessages({
|
||||
async function streamResponse({
|
||||
res,
|
||||
stream,
|
||||
workflowStreamResponse
|
||||
workflowStreamResponse,
|
||||
aiChatReasoning
|
||||
}: {
|
||||
res: NextApiResponse;
|
||||
stream: StreamChatType;
|
||||
workflowStreamResponse?: WorkflowResponseType;
|
||||
aiChatReasoning?: boolean;
|
||||
}) {
|
||||
const write = responseWriteController({
|
||||
res,
|
||||
readStream: stream
|
||||
});
|
||||
let answer = '';
|
||||
let reasoning = '';
|
||||
for await (const part of stream) {
|
||||
if (res.closed) {
|
||||
stream.controller?.abort();
|
||||
break;
|
||||
}
|
||||
|
||||
const content = part.choices?.[0]?.delta?.content || '';
|
||||
answer += content;
|
||||
|
||||
const reasoningContent = aiChatReasoning
|
||||
? part.choices?.[0]?.delta?.reasoning_content || ''
|
||||
: '';
|
||||
reasoning += reasoningContent;
|
||||
|
||||
workflowStreamResponse?.({
|
||||
write,
|
||||
event: SseResponseEventEnum.answer,
|
||||
data: textAdaptGptResponse({
|
||||
text: content
|
||||
text: content,
|
||||
reasoning_content: reasoningContent
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
return { answer };
|
||||
return { answer, reasoning };
|
||||
}
|
||||
|
||||
@@ -204,6 +204,7 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
|
||||
{ inputs = [] }: RuntimeNodeItemType,
|
||||
{
|
||||
answerText = '',
|
||||
reasoningText,
|
||||
responseData,
|
||||
nodeDispatchUsages,
|
||||
toolResponses,
|
||||
@@ -213,6 +214,7 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
|
||||
}: Omit<
|
||||
DispatchNodeResultType<{
|
||||
[NodeOutputKeyEnum.answerText]?: string;
|
||||
[NodeOutputKeyEnum.reasoningText]?: string;
|
||||
[DispatchNodeResponseKeyEnum.nodeResponse]?: ChatHistoryItemResType;
|
||||
}>,
|
||||
'nodeResponse'
|
||||
@@ -251,6 +253,13 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
|
||||
}
|
||||
});
|
||||
}
|
||||
} else if (reasoningText) {
|
||||
chatAssistantResponse.push({
|
||||
type: ChatItemValueTypeEnum.reasoning,
|
||||
reasoning: {
|
||||
content: reasoningText
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (rewriteHistories) {
|
||||
|
||||
Reference in New Issue
Block a user