This commit is contained in:
Archer
2023-12-31 14:12:51 +08:00
committed by GitHub
parent ccca0468da
commit 9ccfda47b7
270 changed files with 8182 additions and 1295 deletions

View File

@@ -0,0 +1,13 @@
import { cut } from '@node-rs/jieba';
import { stopWords } from '@fastgpt/global/common/string/jieba';
export function jiebaSplit({ text }: { text: string }) {
const tokens = cut(text, true);
return (
tokens
.map((item) => item.replace(/[^\u4e00-\u9fa5a-zA-Z0-9\s]/g, '').trim())
.filter((item) => item && !stopWords.has(item))
.join(' ') || ''
);
}