External dataset (#1497)

* perf: read rawText and chunk code

* perf: read raw text

* perf: read rawtext

* perf: token count

* log
This commit is contained in:
Archer
2024-05-16 11:47:53 +08:00
committed by GitHub
parent d5073f98ab
commit c6d9b15897
36 changed files with 531 additions and 267 deletions

View File

@@ -1,6 +1,6 @@
/* Only the token of gpt-3.5-turbo is used */
import { Tiktoken } from 'js-tiktoken/lite';
import encodingJson from './cl100k_base.json';
import { Tiktoken } from 'fastgpt-js-tiktoken/lite';
import cl100k_base from './cl100k_base.json';
import {
ChatCompletionMessageParam,
ChatCompletionContentPart,
@@ -10,7 +10,7 @@ import {
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
import { parentPort } from 'worker_threads';
const enc = new Tiktoken(encodingJson);
const enc = new Tiktoken(cl100k_base);
/* count messages tokens */
parentPort?.on(