website sync feature (#4429)

* perf: introduce BullMQ for website sync (#4403)

* perf: introduce BullMQ for website sync

* feat: new redis module

* fix: remove graceful shutdown

* perf: improve UI in dataset detail

- Updated the "change" icon SVG file.
- Modified i18n strings.
- Added new i18n string "immediate_sync".
- Improved UI in dataset detail page, including button icons and
background colors.

* refactor: Add chunkSettings to DatasetSchema

* perf: website sync ux

* env template

* fix: clean up website dataset when updating chunk settings (#4420)

* perf: check setting updated

* perf: worker currency

* feat: init script for website sync refactor (#4425)

* website feature doc

---------

Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com>
This commit is contained in:
Archer
2025-04-02 13:51:58 +08:00
committed by archer
parent e54fe1eed6
commit d171b2d3d8
46 changed files with 1607 additions and 680 deletions

View File

@@ -515,7 +515,7 @@
"core.dataset.Query extension intro": "开启问题优化功能,可以提高提高连续对话时,知识库搜索的精度。开启该功能后,在进行知识库搜索时,会根据对话记录,利用 AI 补全问题缺失的信息。",
"core.dataset.Quote Length": "引用内容长度",
"core.dataset.Read Dataset": "查看知识库详情",
"core.dataset.Set Website Config": "开始配置网站信息",
"core.dataset.Set Website Config": "开始配置",
"core.dataset.Start export": "已开始导出",
"core.dataset.Table collection": "表格数据集",
"core.dataset.Text collection": "文本数据集",
@@ -531,7 +531,6 @@
"core.dataset.collection.Website Empty Tip": "还没有关联网站",
"core.dataset.collection.Website Link": "Web 站点地址",
"core.dataset.collection.id": "集合 ID",
"core.dataset.collection.metadata.Chunk Size": "分割大小",
"core.dataset.collection.metadata.Createtime": "创建时间",
"core.dataset.collection.metadata.Raw text length": "原文长度",
"core.dataset.collection.metadata.Updatetime": "更新时间",
@@ -633,6 +632,7 @@
"core.dataset.search.search mode": "搜索方式",
"core.dataset.status.active": "已就绪",
"core.dataset.status.syncing": "同步中",
"core.dataset.status.waiting": "排队中",
"core.dataset.test.Batch test": "批量测试",
"core.dataset.test.Batch test Placeholder": "选择一个 CSV 文件",
"core.dataset.test.Search Test": "搜索测试",
@@ -1291,4 +1291,4 @@
"yes": "是",
"yesterday": "昨天",
"yesterday_detail_time": "昨天 {{time}}"
}
}

View File

@@ -7,6 +7,7 @@
"auto_indexes_tips": "通过大模型进行额外索引生成,提高语义丰富度,提高检索的精度。",
"auto_training_queue": "增强索引排队",
"chunk_max_tokens": "分块上限",
"chunk_size": "分块大小",
"close_auto_sync": "确认关闭自动同步功能?",
"collection.Create update time": "创建/更新时间",
"collection.Training type": "训练模式",
@@ -70,6 +71,7 @@
"image_auto_parse": "图片自动索引",
"image_auto_parse_tips": "调用 VLM 自动标注文档里的图片,并生成额外的检索索引",
"image_training_queue": "图片处理排队",
"immediate_sync": "立即同步",
"import.Auto mode Estimated Price Tips": "需调用文本理解模型需要消耗较多AI 积分:{{price}} 积分/1K tokens",
"import.Embedding Estimated Price Tips": "仅使用索引模型,消耗少量 AI 积分:{{price}} 积分/1K tokens",
"import_confirm": "确认上传",
@@ -86,6 +88,7 @@
"keep_image": "保留图片",
"move.hint": "移动后,所选知识库/文件夹将继承新文件夹的权限设置,原先的权限设置失效。",
"open_auto_sync": "开启定时同步后,系统将会每天不定时尝试同步集合,集合同步期间,会出现无法搜索到该集合数据现象。",
"params_config": "配置",
"params_setting": "参数设置",
"pdf_enhance_parse": "PDF增强解析",
"pdf_enhance_parse_price": "{{price}}积分/页",
@@ -145,6 +148,7 @@
"vllm_model": "图片理解模型",
"website_dataset": "Web 站点同步",
"website_dataset_desc": "Web 站点同步允许你直接使用一个网页链接构建知识库",
"website_info": "网站信息",
"yuque_dataset": "语雀知识库",
"yuque_dataset_config": "配置语雀知识库",
"yuque_dataset_desc": "可通过配置语雀文档权限,使用语雀文档构建知识库,文档不会进行二次存储"