feat: 数据集导出

This commit is contained in:
archer
2023-04-03 00:18:21 +08:00
parent 05b2e9e99c
commit 16a31de1c7
9 changed files with 35 additions and 17 deletions

View File

@@ -36,12 +36,20 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
const textList: string[] = [];
let splitText = '';
/* 取 3k ~ 4K tokens 内容 */
chunks.forEach((chunk) => {
splitText += chunk;
const tokens = encode(splitText).length;
if (tokens >= 980) {
const tokens = encode(splitText + chunk).length;
if (tokens >= 4000) {
// 超过 4000不要这块内容
textList.push(splitText);
splitText = chunk;
} else if (tokens >= 3000) {
// 超过 3000取内容
textList.push(splitText + chunk);
splitText = '';
} else {
//没超过 3000继续添加
splitText += chunk;
}
});