fix: tiktoken memory

This commit is contained in:
archer
2023-06-02 13:10:34 +08:00
parent d69554575d
commit 8cafebe26c
5 changed files with 65 additions and 275 deletions

View File

@@ -37,6 +37,7 @@ export async function connectToDatabase(): Promise<void> {
});
}
// 初始化队列
global.qaQueueLen = 0;
global.vectorQueueLen = 0;

View File

@@ -1,6 +1,7 @@
import type { Mongoose } from 'mongoose';
import type { Agent } from 'http';
import type { Pool } from 'pg';
import type { Tiktoken } from '@dqbd/tiktoken';
declare global {
var mongodb: Mongoose | string | null;
@@ -11,6 +12,7 @@ declare global {
var QRCode: any;
var qaQueueLen: number;
var vectorQueueLen: number;
var OpenAiEncMap: Record<string, Tiktoken>;
interface Window {
['pdfjs-dist/build/pdf']: any;

View File

@@ -8,6 +8,66 @@ import Graphemer from 'graphemer';
const textDecoder = new TextDecoder();
const graphemer = new Graphemer();
export const getOpenAiEncMap = () => {
if (typeof window !== 'undefined') {
window.OpenAiEncMap = window.OpenAiEncMap || {
'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4': encoding_for_model('gpt-4', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4-32k': encoding_for_model('gpt-4-32k', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
})
};
return window.OpenAiEncMap;
}
if (typeof global !== 'undefined') {
global.OpenAiEncMap = global.OpenAiEncMap || {
'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4': encoding_for_model('gpt-4', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4-32k': encoding_for_model('gpt-4-32k', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
})
};
return global.OpenAiEncMap;
}
return {
'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4': encoding_for_model('gpt-4', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4-32k': encoding_for_model('gpt-4-32k', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
})
};
};
export const adaptChatItem_openAI = ({
messages
}: {
@@ -24,29 +84,6 @@ export const adaptChatItem_openAI = ({
}));
};
/* count openai chat token*/
let OpenAiEncMap: Record<string, Tiktoken>;
export const getOpenAiEncMap = () => {
if (OpenAiEncMap) return OpenAiEncMap;
OpenAiEncMap = {
'gpt-3.5-turbo': encoding_for_model('gpt-3.5-turbo', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4': encoding_for_model('gpt-4', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
}),
'gpt-4-32k': encoding_for_model('gpt-4-32k', {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266
})
};
return OpenAiEncMap;
};
export function countOpenAIToken({
messages,
model