perf: token split text
This commit is contained in:
@@ -106,7 +106,7 @@ A2:
|
||||
)
|
||||
.then((res) => {
|
||||
const rawContent = res?.data.choices[0].message?.content || ''; // chatgpt 原本的回复
|
||||
const result = splitText(res?.data.choices[0].message?.content || ''); // 格式化后的QA对
|
||||
const result = formatSplitText(res?.data.choices[0].message?.content || ''); // 格式化后的QA对
|
||||
console.log(`split result length: `, result.length);
|
||||
// 计费
|
||||
pushSplitDataBill({
|
||||
@@ -190,7 +190,7 @@ A2:
|
||||
/**
|
||||
* 检查文本是否按格式返回
|
||||
*/
|
||||
function splitText(text: string) {
|
||||
function formatSplitText(text: string) {
|
||||
const regex = /Q\d+:(\s*)(.*)(\s*)A\d+:(\s*)([\s\S]*?)(?=Q|$)/g; // 匹配Q和A的正则表达式
|
||||
const matches = text.matchAll(regex); // 获取所有匹配到的结果
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import crypto from 'crypto';
|
||||
import jwt from 'jsonwebtoken';
|
||||
import { ChatItemSimpleType } from '@/types/chat';
|
||||
import { countChatTokens } from '@/utils/tools';
|
||||
import { countChatTokens, sliceTextByToken } from '@/utils/tools';
|
||||
import { ChatCompletionRequestMessageRoleEnum, ChatCompletionRequestMessage } from 'openai';
|
||||
import { ChatModelEnum } from '@/constants/model';
|
||||
|
||||
@@ -111,18 +111,11 @@ export const systemPromptFilter = ({
|
||||
prompts: string[];
|
||||
maxTokens: number;
|
||||
}) => {
|
||||
let splitText = '';
|
||||
const systemPrompt = prompts.join('\n');
|
||||
|
||||
// 从前往前截取
|
||||
for (let i = 0; i < prompts.length; i++) {
|
||||
const prompt = simplifyStr(prompts[i]);
|
||||
|
||||
splitText += `${prompt}\n`;
|
||||
const tokens = countChatTokens({ model, messages: [{ role: 'system', content: splitText }] });
|
||||
if (tokens >= maxTokens) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return splitText.slice(0, splitText.length - 1);
|
||||
return sliceTextByToken({
|
||||
model,
|
||||
text: systemPrompt,
|
||||
length: maxTokens
|
||||
});
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user