Add image index and pdf parse (#3956)

* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
This commit is contained in:
Archer
2025-03-03 23:08:29 +08:00
committed by archer
parent 08b6f594df
commit adf5377ebe
106 changed files with 2337 additions and 1454 deletions

View File

@@ -561,10 +561,7 @@
"core.dataset.file": "檔案",
"core.dataset.folder": "目錄",
"core.dataset.import.Auto mode Estimated Price Tips": "需要呼叫檔案處理模型,將消耗較多 AI 點數:{{price}} 點數/1K tokens",
"core.dataset.import.Auto process": "自動",
"core.dataset.import.Auto process desc": "自動設定分割和預處理規則",
"core.dataset.import.Chunk Range": "範圍:{{min}}~{{max}}",
"core.dataset.import.Chunk Split": "直接分段",
"core.dataset.import.Chunk Split Tip": "將文字依照特定規則進行分段處理後,轉換成可進行語意搜尋的格式,適合大多數場景。不需要呼叫模型額外處理,成本較低。",
"core.dataset.import.Continue upload": "繼續上傳",
"core.dataset.import.Custom process": "自訂規則",
@@ -574,7 +571,6 @@
"core.dataset.import.Custom split char Tips": "允許您根據自訂的分隔符進行分割。通常用於已處理好的資料,使用特定的分隔符來精確分割。",
"core.dataset.import.Custom text": "自訂文字",
"core.dataset.import.Custom text desc": "手動輸入一段文字作為資料集",
"core.dataset.import.Data Preprocessing": "資料處理",
"core.dataset.import.Data process params": "資料處理參數",
"core.dataset.import.Down load csv template": "點選下載 CSV 範本",
"core.dataset.import.Embedding Estimated Price Tips": "僅使用索引模型,消耗少量 AI 點數:{{price}} 點數/1K tokens",
@@ -596,7 +592,6 @@
"core.dataset.import.Source name": "來源名稱",
"core.dataset.import.Sources list": "來源列表",
"core.dataset.import.Start upload": "開始上傳",
"core.dataset.import.Total files": "共 {{total}} 個檔案",
"core.dataset.import.Upload complete": "上傳完成",
"core.dataset.import.Upload data": "確認上傳",
"core.dataset.import.Upload file progress": "檔案上傳進度",
@@ -646,12 +641,12 @@
"core.dataset.test.test result placeholder": "測試結果將顯示在這裡",
"core.dataset.test.test result tip": "根據知識庫內容與測試文字的相似度進行排序。您可以根據測試結果調整相應的文字。\n注意測試記錄中的資料可能已經被修改。點選某筆測試資料後將顯示最新資料。",
"core.dataset.training.Agent queue": "問答訓練排隊中",
"core.dataset.training.Auto mode": "增強處理",
"core.dataset.training.Auto mode": "補充索引",
"core.dataset.training.Auto mode Tip": "透過子索引以及呼叫模型產生相關問題與摘要,來增加資料區塊的語意豐富度,更有利於檢索。需要消耗更多的儲存空間並增加 AI 呼叫次數。",
"core.dataset.training.Chunk mode": "直接分",
"core.dataset.training.Chunk mode": "直接分",
"core.dataset.training.Full": "預計超過 5 分鐘",
"core.dataset.training.Leisure": "閒置",
"core.dataset.training.QA mode": "問答拆分",
"core.dataset.training.QA mode": "問答對提取",
"core.dataset.training.Vector queue": "索引排隊中",
"core.dataset.training.Waiting": "預計 5 分鐘",
"core.dataset.training.Website Sync": "網站同步",
@@ -861,7 +856,6 @@
"dataset.collections.Select Collection": "選擇檔案",
"dataset.collections.Select One Collection To Store": "選擇一個檔案進行儲存",
"dataset.data.Can not edit": "無編輯權限",
"dataset.data.Custom Index Number": "自訂索引 {{number}}",
"dataset.data.Default Index": "預設索引",
"dataset.data.Delete Tip": "確認刪除此資料?",
"dataset.data.Index Placeholder": "輸入索引文字內容",
@@ -955,6 +949,7 @@
"new_create": "建立新項目",
"no": "否",
"no_laf_env": "系統未設定 LAF 環境",
"not_model_config": "未配置相關模型",
"not_yet_introduced": "暫無介紹",
"option": "選項",
"pay.amount": "金額",
@@ -1120,7 +1115,6 @@
"support.wallet.invoice_detail": "發票詳細資訊",
"support.wallet.invoice_info": "發票將在 3-7 個工作天內寄送至電子郵件信箱,請耐心等候",
"support.wallet.invoicing": "開立發票",
"support.wallet.moduleName.index": "產生索引",
"support.wallet.moduleName.qa": "問答拆分",
"support.wallet.noBill": "無帳單紀錄",
"support.wallet.no_invoice": "無發票紀錄",