perf: token slice
This commit is contained in:
@@ -85,11 +85,24 @@ export const searchKb = async ({
|
||||
};
|
||||
const filterRate = filterRateMap[systemPrompts.length] || filterRateMap[0];
|
||||
|
||||
// count fixed system prompt
|
||||
const fixedSystemPrompt = `
|
||||
${model.chat.systemPrompt}
|
||||
${
|
||||
model.chat.searchMode === ModelVectorSearchModeEnum.hightSimilarity ? '不回答知识库外的内容.' : ''
|
||||
}
|
||||
知识库内容为:`;
|
||||
const fixedSystemTokens = modelToolMap[model.chat.chatModel].countTokens({
|
||||
messages: [{ obj: 'System', value: fixedSystemPrompt }]
|
||||
});
|
||||
|
||||
const maxTokens = modelConstantsData.systemMaxToken - fixedSystemTokens;
|
||||
|
||||
const filterSystemPrompt = filterRate
|
||||
.map((rate, i) =>
|
||||
modelToolMap[model.chat.chatModel].sliceText({
|
||||
text: systemPrompts[i],
|
||||
length: Math.floor(modelConstantsData.systemMaxToken * rate)
|
||||
length: Math.floor(maxTokens * rate)
|
||||
})
|
||||
)
|
||||
.join('\n');
|
||||
@@ -122,13 +135,7 @@ export const searchKb = async ({
|
||||
code: 200,
|
||||
searchPrompt: {
|
||||
obj: ChatRoleEnum.System,
|
||||
value: `
|
||||
${model.chat.systemPrompt}
|
||||
${
|
||||
model.chat.searchMode === ModelVectorSearchModeEnum.hightSimilarity ? '不回答知识库外的内容.' : ''
|
||||
}
|
||||
知识库内容为: '${filterSystemPrompt}'
|
||||
`
|
||||
value: `${fixedSystemPrompt}'${filterSystemPrompt}'`
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
@@ -45,18 +45,13 @@ export const lafClaudChat = async ({
|
||||
}
|
||||
);
|
||||
|
||||
let responseText = '';
|
||||
let totalTokens = 0;
|
||||
|
||||
if (!stream) {
|
||||
responseText = lafResponse.data?.text || '';
|
||||
}
|
||||
const responseText = stream ? '' : lafResponse.data?.text || '';
|
||||
|
||||
return {
|
||||
streamResponse: lafResponse,
|
||||
responseMessages: messages.concat({ obj: ChatRoleEnum.AI, value: responseText }),
|
||||
responseText,
|
||||
totalTokens
|
||||
totalTokens: 0
|
||||
};
|
||||
};
|
||||
|
||||
@@ -83,18 +78,15 @@ export const lafClaudStreamResponse = async ({
|
||||
} catch (error) {
|
||||
console.log('pipe error', error);
|
||||
}
|
||||
// count tokens
|
||||
|
||||
const finishMessages = prompts.concat({
|
||||
obj: ChatRoleEnum.AI,
|
||||
value: responseContent
|
||||
});
|
||||
const totalTokens = modelToolMap[ClaudeEnum.Claude].countTokens({
|
||||
messages: finishMessages
|
||||
});
|
||||
|
||||
return {
|
||||
responseContent,
|
||||
totalTokens,
|
||||
totalTokens: 0,
|
||||
finishMessages
|
||||
};
|
||||
} catch (error) {
|
||||
|
||||
@@ -96,14 +96,8 @@ export const chatResponse = async ({
|
||||
}
|
||||
);
|
||||
|
||||
let responseText = '';
|
||||
let totalTokens = 0;
|
||||
|
||||
// adapt data
|
||||
if (!stream) {
|
||||
responseText = response.data.choices[0].message?.content || '';
|
||||
totalTokens = response.data.usage?.total_tokens || 0;
|
||||
}
|
||||
const responseText = stream ? '' : response.data.choices[0].message?.content || '';
|
||||
const totalTokens = stream ? 0 : response.data.usage?.total_tokens || 0;
|
||||
|
||||
return {
|
||||
streamResponse: response,
|
||||
|
||||
Reference in New Issue
Block a user