website sync feature (#4429)

* perf: introduce BullMQ for website sync (#4403)

* perf: introduce BullMQ for website sync

* feat: new redis module

* fix: remove graceful shutdown

* perf: improve UI in dataset detail

- Updated the "change" icon SVG file.
- Modified i18n strings.
- Added new i18n string "immediate_sync".
- Improved UI in dataset detail page, including button icons and
background colors.

* refactor: Add chunkSettings to DatasetSchema

* perf: website sync ux

* env template

* fix: clean up website dataset when updating chunk settings (#4420)

* perf: check setting updated

* perf: worker currency

* feat: init script for website sync refactor (#4425)

* website feature doc

---------

Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com>
This commit is contained in:
Archer
2025-04-02 13:51:58 +08:00
committed by archer
parent e54fe1eed6
commit d171b2d3d8
46 changed files with 1607 additions and 680 deletions

View File

@@ -511,7 +511,7 @@
"core.dataset.Query extension intro": "開啟問題最佳化功能,可以提高連續對話時知識庫搜尋的準確度。開啟此功能後,在進行知識庫搜尋時,系統會根據對話記錄,利用 AI 補充問題中缺少的資訊。",
"core.dataset.Quote Length": "引用內容長度",
"core.dataset.Read Dataset": "檢視知識庫詳細資料",
"core.dataset.Set Website Config": "開始設定網站資訊",
"core.dataset.Set Website Config": "開始設定",
"core.dataset.Start export": "已開始匯出",
"core.dataset.Table collection": "表格資料集",
"core.dataset.Text collection": "文字資料集",
@@ -527,7 +527,6 @@
"core.dataset.collection.Website Empty Tip": "還沒有關聯網站",
"core.dataset.collection.Website Link": "網站網址",
"core.dataset.collection.id": "集合 ID",
"core.dataset.collection.metadata.Chunk Size": "分割大小",
"core.dataset.collection.metadata.Createtime": "建立時間",
"core.dataset.collection.metadata.Raw text length": "原始文字長度",
"core.dataset.collection.metadata.Updatetime": "更新時間",
@@ -629,6 +628,7 @@
"core.dataset.search.search mode": "搜索方式",
"core.dataset.status.active": "已就緒",
"core.dataset.status.syncing": "同步中",
"core.dataset.status.waiting": "排队中",
"core.dataset.test.Batch test": "批次測試",
"core.dataset.test.Batch test Placeholder": "選擇一個 CSV 檔案",
"core.dataset.test.Search Test": "搜尋測試",

View File

@@ -7,6 +7,7 @@
"auto_indexes_tips": "通過大模型進行額外索引生成,提高語義豐富度,提高檢索的精度。",
"auto_training_queue": "增強索引排隊",
"chunk_max_tokens": "分塊上限",
"chunk_size": "分塊大小",
"close_auto_sync": "確認關閉自動同步功能?",
"collection.Create update time": "建立/更新時間",
"collection.Training type": "分段模式",
@@ -70,6 +71,7 @@
"image_auto_parse": "圖片自動索引",
"image_auto_parse_tips": "調用 VLM 自動標註文檔裡的圖片,並生成額外的檢索索引",
"image_training_queue": "圖片處理排隊",
"immediate_sync": "立即同步",
"import.Auto mode Estimated Price Tips": "需呼叫文字理解模型,將消耗較多 AI 點數:{{price}} 點數 / 1K tokens",
"import.Embedding Estimated Price Tips": "僅使用索引模型,消耗少量 AI 點數:{{price}} 點數 / 1K tokens",
"import_confirm": "確認上傳",
@@ -86,6 +88,7 @@
"keep_image": "保留圖片",
"move.hint": "移動後,所選資料集/資料夾將繼承新資料夾的權限設定,原先的權限設定將失效。",
"open_auto_sync": "開啟定時同步後,系統將每天不定時嘗試同步集合,集合同步期間,會出現無法搜尋到該集合資料現象。",
"params_config": "配置",
"params_setting": "參數設置",
"pdf_enhance_parse": "PDF增強解析",
"pdf_enhance_parse_price": "{{price}}積分/頁",
@@ -144,6 +147,7 @@
"vllm_model": "圖片理解模型",
"website_dataset": "網站同步",
"website_dataset_desc": "網站同步功能讓您可以直接使用網頁連結建立資料集",
"website_info": "網站資訊",
"yuque_dataset": "語雀知識庫",
"yuque_dataset_config": "配置語雀知識庫",
"yuque_dataset_desc": "可通過配置語雀文檔權限,使用語雀文檔構建知識庫,文檔不會進行二次存儲"