perf: token split text

This commit is contained in:
archer
2023-04-30 22:35:47 +08:00
parent 39869bc4ea
commit 89a67ca9c0
8 changed files with 96 additions and 85 deletions

View File

@@ -106,7 +106,7 @@ A2:
)
.then((res) => {
const rawContent = res?.data.choices[0].message?.content || ''; // chatgpt 原本的回复
const result = splitText(res?.data.choices[0].message?.content || ''); // 格式化后的QA对
const result = formatSplitText(res?.data.choices[0].message?.content || ''); // 格式化后的QA对
console.log(`split result length: `, result.length);
// 计费
pushSplitDataBill({
@@ -190,7 +190,7 @@ A2:
/**
* 检查文本是否按格式返回
*/
function splitText(text: string) {
function formatSplitText(text: string) {
const regex = /Q\d+:(\s*)(.*)(\s*)A\d+:(\s*)([\s\S]*?)(?=Q|$)/g; // 匹配Q和A的正则表达式
const matches = text.matchAll(regex); // 获取所有匹配到的结果

View File

@@ -1,7 +1,7 @@
import crypto from 'crypto';
import jwt from 'jsonwebtoken';
import { ChatItemSimpleType } from '@/types/chat';
import { countChatTokens } from '@/utils/tools';
import { countChatTokens, sliceTextByToken } from '@/utils/tools';
import { ChatCompletionRequestMessageRoleEnum, ChatCompletionRequestMessage } from 'openai';
import { ChatModelEnum } from '@/constants/model';
@@ -111,18 +111,11 @@ export const systemPromptFilter = ({
prompts: string[];
maxTokens: number;
}) => {
let splitText = '';
const systemPrompt = prompts.join('\n');
// 从前往前截取
for (let i = 0; i < prompts.length; i++) {
const prompt = simplifyStr(prompts[i]);
splitText += `${prompt}\n`;
const tokens = countChatTokens({ model, messages: [{ role: 'system', content: splitText }] });
if (tokens >= maxTokens) {
break;
}
}
return splitText.slice(0, splitText.length - 1);
return sliceTextByToken({
model,
text: systemPrompt,
length: maxTokens
});
};