Training status (#4424)

* dataset data training state (#4311) * dataset data training state * fix * fix ts * fix * fix api format * fix * fix * perf: count training * format * fix: dataset training state (#4417) * fix * add test * fix * fix * fix test * fix test * perf: training count * count * loading status --------- Co-authored-by: heheer <heheer@sealos.io>
2025-04-02 10:53:15 +08:00
parent 5839325f77
commit 27332743c7
33 changed files with 1383 additions and 19 deletions
--- a/packages/web/i18n/en/common.json
+++ b/packages/web/i18n/en/common.json
@@ -1,5 +1,6 @@
 {
  "App": "Application",
+  "Click_to_expand": "Click to expand",
  "Download": "Download",
  "Export": "Export",
  "FAQ.ai_point_a": "Each time you use the AI model, a certain amount of AI points will be deducted. For detailed calculation standards, please refer to the 'AI Points Calculation Standards' above.\nToken calculation uses the same formula as GPT-3.5, where 1 Token ≈ 0.7 Chinese characters ≈ 0.9 English words. Consecutive characters may be considered as 1 Token.",
@@ -538,6 +539,7 @@
  "core.dataset.collection.metadata.source name": "Source Name",
  "core.dataset.collection.metadata.source size": "Source Size",
  "core.dataset.collection.status.active": "Ready",
+  "core.dataset.collection.status.error": "Error",
  "core.dataset.collection.sync.result.sameRaw": "Content Unchanged, No Update Needed",
  "core.dataset.collection.sync.result.success": "Sync Started",
  "core.dataset.data.Data Content": "Related Data Content",
--- a/packages/web/i18n/en/dataset.json
+++ b/packages/web/i18n/en/dataset.json
@@ -28,9 +28,24 @@
  "custom_data_process_params_desc": "Customize data processing rules",
  "custom_split_sign_tip": "Allows you to chunk according to custom delimiters. \nUsually used for processed data, using specific separators for precise chunking. \nYou can use the | symbol to represent multiple splitters, such as: \".|.\" to represent a period in Chinese and English.\n\nTry to avoid using special symbols related to regular, such as: * () [] {}, etc.",
  "data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes",
+  "data_error_amount": "{{errorAmount}} Group training exception",
  "data_index_num": "Index {{index}}",
  "data_process_params": "Params",
  "data_process_setting": "Processing config",
+  "dataset.Chunk_Number": "Block number",
+  "dataset.Completed": "Finish",
+  "dataset.Delete_Chunk": "delete",
+  "dataset.Edit_Chunk": "edit",
+  "dataset.Error_Message": "Report an error message",
+  "dataset.No_Error": "No exception information yet",
+  "dataset.Operation": "operate",
+  "dataset.ReTrain": "Retrain",
+  "dataset.Training Process": "Training status",
+  "dataset.Training_Count": "{{count}} Group training",
+  "dataset.Training_Errors": "Errors",
+  "dataset.Training_QA": "{{count}} Group Q&A pair training",
+  "dataset.Training_Status": "Training status",
+  "dataset.Training_Waiting": "Need to wait for {{count}} group data",
  "dataset.Unsupported operation": "dataset.Unsupported operation",
  "dataset.no_collections": "No datasets available",
  "dataset.no_tags": "No tags available",
@@ -82,6 +97,13 @@
  "preview_chunk_empty": "Unable to read the contents of the file",
  "preview_chunk_intro": "A total of {{total}} blocks, up to 10",
  "preview_chunk_not_selected": "Click on the file on the left to preview",
+  "process.Auto_Index": "Automatic index generation",
+  "process.Get QA": "Q&A extraction",
+  "process.Image_Index": "Image index generation",
+  "process.Is_Ready": "Ready",
+  "process.Parsing": "Parsing",
+  "process.Vectorizing": "Index vectorization",
+  "process.Waiting": "Queue",
  "rebuild_embedding_start_tip": "Index model switching task has started",
  "rebuilding_index_count": "Number of indexes being rebuilt: {{count}}",
  "request_headers": "Request headers, will automatically append 'Bearer '",
@@ -114,7 +136,10 @@
  "tag.total_tags": "Total {{total}} tags",
  "the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "The Dataset has indexes that are being trained or rebuilt",
  "total_num_files": "Total {{total}} files",
+  "training.Error": "{{count}} Group exception",
+  "training.Normal": "Normal",
  "training_mode": "Chunk mode",
+  "training_ready": "{{count}} Group",
  "vector_model_max_tokens_tip": "Each chunk of data has a maximum length of 3000 tokens",
  "vllm_model": "Image understanding model",
  "website_dataset": "Website Sync",
--- a/packages/web/i18n/zh-CN/common.json
+++ b/packages/web/i18n/zh-CN/common.json
@@ -1,5 +1,6 @@
 {
  "App": "应用",
+  "Click_to_expand": "点击查看详情",
  "Download": "下载",
  "Export": "导出",
  "FAQ.ai_point_a": "每次调用AI模型时，都会消耗一定的AI积分。具体的计算标准可参考上方的“AI 积分计算标准”。\nToken计算采用GPT3.5相同公式，1Token≈0.7中文字符≈0.9英文单词，连续出现的字符可能被认为是1个Tokens。",
@@ -541,6 +542,7 @@
  "core.dataset.collection.metadata.source name": "来源名",
  "core.dataset.collection.metadata.source size": "来源大小",
  "core.dataset.collection.status.active": "已就绪",
+  "core.dataset.collection.status.error": "训练异常",
  "core.dataset.collection.sync.result.sameRaw": "内容未变动，无需更新",
  "core.dataset.collection.sync.result.success": "开始同步",
  "core.dataset.data.Data Content": "相关数据内容",
--- a/packages/web/i18n/zh-CN/dataset.json
+++ b/packages/web/i18n/zh-CN/dataset.json
@@ -28,9 +28,24 @@
  "custom_data_process_params_desc": "自定义设置数据处理规则",
  "custom_split_sign_tip": "允许你根据自定义的分隔符进行分块。通常用于已处理好的数据，使用特定的分隔符来精确分块。可以使用 | 符号表示多个分割符，例如：“。|.” 表示中英文句号。\n尽量避免使用正则相关特殊符号，例如: * () [] {} 等。",
  "data_amount": "{{dataAmount}} 组数据, {{indexAmount}} 组索引",
+  "data_error_amount": "{{errorAmount}} 组训练异常",
  "data_index_num": "索引 {{index}}",
  "data_process_params": "处理参数",
  "data_process_setting": "数据处理配置",
+  "dataset.Chunk_Number": "分块号",
+  "dataset.Completed": "完成",
+  "dataset.Delete_Chunk": "删除",
+  "dataset.Edit_Chunk": "编辑",
+  "dataset.Error_Message": "报错信息",
+  "dataset.No_Error": "暂无异常信息",
+  "dataset.Operation": "操作",
+  "dataset.ReTrain": "重试",
+  "dataset.Training Process": "训练状态",
+  "dataset.Training_Count": "{{count}} 组训练中",
+  "dataset.Training_Errors": "异常 ({{count}})",
+  "dataset.Training_QA": "{{count}} 组问答对训练中",
+  "dataset.Training_Status": "训练状态",
+  "dataset.Training_Waiting": "需等待 {{count}} 组数据",
  "dataset.Unsupported operation": "操作不支持",
  "dataset.no_collections": "暂无数据集",
  "dataset.no_tags": "暂无标签",
@@ -82,6 +97,14 @@
  "preview_chunk_empty": "无法读取该文件内容",
  "preview_chunk_intro": "共 {{total}} 个分块，最多展示 10 个",
  "preview_chunk_not_selected": "点击左侧文件后进行预览",
+  "process.Auto_Index": "自动索引生成",
+  "process.Get QA": "问答对提取",
+  "process.Image_Index": "图片索引生成",
+  "process.Is_Ready": "已就绪",
+  "process.Is_Ready_Count": "{{count}} 组已就绪",
+  "process.Parsing": "内容解析中",
+  "process.Vectorizing": "索引向量化",
+  "process.Waiting": "排队中",
  "rebuild_embedding_start_tip": "切换索引模型任务已开始",
  "rebuilding_index_count": "重建中索引数量：{{count}}",
  "request_headers": "请求头参数，会自动补充 Bearer",
@@ -114,7 +137,10 @@
  "tag.total_tags": "共{{total}}个标签",
  "the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "知识库有训练中或正在重建的索引",
  "total_num_files": "共 {{total}} 个文件",
+  "training.Error": "{{count}} 组异常",
+  "training.Normal": "正常",
  "training_mode": "处理方式",
+  "training_ready": "{{count}} 组",
  "vector_model_max_tokens_tip": "每个分块数据，最大长度为 3000 tokens",
  "vllm_model": "图片理解模型",
  "website_dataset": "Web 站点同步",
--- a/packages/web/i18n/zh-Hant/common.json
+++ b/packages/web/i18n/zh-Hant/common.json
@@ -1,5 +1,6 @@
 {
  "App": "應用程式",
+  "Click_to_expand": "點擊查看詳情",
  "Download": "下載",
  "Export": "匯出",
  "FAQ.ai_point_a": "每次呼叫 AI 模型時，都會消耗一定數量的 AI 點數。詳細的計算標準請參考上方的「AI 點數計算標準」。\nToken 計算採用與 GPT3.5 相同的公式，1 Token ≈ 0.7 個中文字 ≈ 0.9 個英文單字，連續出現的字元可能會被視為 1 個 Token。",
@@ -537,6 +538,7 @@
  "core.dataset.collection.metadata.source name": "來源名稱",
  "core.dataset.collection.metadata.source size": "來源大小",
  "core.dataset.collection.status.active": "已就緒",
+  "core.dataset.collection.status.error": "訓練異常",
  "core.dataset.collection.sync.result.sameRaw": "內容未變更，無需更新",
  "core.dataset.collection.sync.result.success": "開始同步",
  "core.dataset.data.Data Content": "相關資料內容",
--- a/packages/web/i18n/zh-Hant/dataset.json
+++ b/packages/web/i18n/zh-Hant/dataset.json
@@ -28,9 +28,24 @@
  "custom_data_process_params_desc": "自訂資料處理規則",
  "custom_split_sign_tip": "允許你根據自定義的分隔符進行分塊。\n通常用於已處理好的數據，使用特定的分隔符來精確分塊。\n可以使用 | 符號表示多個分割符，例如：“。|.” 表示中英文句號。\n\n盡量避免使用正則相關特殊符號，例如: * () [] {} 等。",
  "data_amount": "{{dataAmount}} 組數據, {{indexAmount}} 組索引",
+  "data_error_amount": "{{errorAmount}} 組訓練異常",
  "data_index_num": "索引 {{index}}",
  "data_process_params": "處理參數",
  "data_process_setting": "資料處理設定",
+  "dataset.Chunk_Number": "分塊號",
+  "dataset.Completed": "完成",
+  "dataset.Delete_Chunk": "刪除",
+  "dataset.Edit_Chunk": "編輯",
+  "dataset.Error_Message": "報錯信息",
+  "dataset.No_Error": "暫無異常信息",
+  "dataset.Operation": "操作",
+  "dataset.ReTrain": "重試",
+  "dataset.Training Process": "訓練狀態",
+  "dataset.Training_Count": "{{count}} 組訓練中",
+  "dataset.Training_Errors": "異常",
+  "dataset.Training_QA": "{{count}} 組問答對訓練中",
+  "dataset.Training_Status": "訓練狀態",
+  "dataset.Training_Waiting": "需等待 {{count}} 組數據",
  "dataset.Unsupported operation": "操作不支持",
  "dataset.no_collections": "尚無資料集",
  "dataset.no_tags": "尚無標籤",
@@ -82,6 +97,13 @@
  "preview_chunk_empty": "無法讀取該文件內容",
  "preview_chunk_intro": "共 {{total}} 個分塊，最多展示 10 個",
  "preview_chunk_not_selected": "點擊左側文件後進行預覽",
+  "process.Auto_Index": "自動索引生成",
+  "process.Get QA": "問答對提取",
+  "process.Image_Index": "圖片索引生成",
+  "process.Is_Ready": "已就緒",
+  "process.Parsing": "內容解析中",
+  "process.Vectorizing": "索引向量化",
+  "process.Waiting": "排隊中",
  "rebuild_embedding_start_tip": "切換索引模型任務已開始",
  "rebuilding_index_count": "重建中索引數量：{{count}}",
  "request_headers": "請求頭",
@@ -114,7 +136,10 @@
  "tag.total_tags": "共 {{total}} 個標籤",
  "the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "資料集有索引正在訓練或重建中",
  "total_num_files": "共 {{total}} 個文件",
+  "training.Error": "{{count}} 組異常",
+  "training.Normal": "正常",
  "training_mode": "分段模式",
+  "training_ready": "{{count}} 組",
  "vector_model_max_tokens_tip": "每個分塊數據，最大長度為 3000 tokens",
  "vllm_model": "圖片理解模型",
  "website_dataset": "網站同步",