External dataset (#1497)

* perf: read rawText and chunk code

* perf: read raw text

* perf: read rawtext

* perf: token count

* log
This commit is contained in:
Archer
2024-05-16 11:47:53 +08:00
committed by GitHub
parent d5073f98ab
commit c6d9b15897
36 changed files with 531 additions and 267 deletions

View File

@@ -13,10 +13,10 @@
"decompress": "^4.2.1",
"domino-ext": "^2.1.4",
"encoding": "^0.1.13",
"fastgpt-js-tiktoken": "^1.0.12",
"file-type": "^19.0.0",
"iconv-lite": "^0.6.3",
"joplin-turndown-plugin-gfm": "^1.0.12",
"js-tiktoken": "^1.0.7",
"json5": "^2.2.3",
"jsonwebtoken": "^9.0.2",
"mammoth": "^1.6.0",