Training status (#4424)

* dataset data training state (#4311)

* dataset data training state

* fix

* fix ts

* fix

* fix api format

* fix

* fix

* perf: count training

* format

* fix: dataset training state (#4417)

* fix

* add test

* fix

* fix

* fix test

* fix test

* perf: training count

* count

* loading status

---------

Co-authored-by: heheer <heheer@sealos.io>
This commit is contained in:
Archer
2025-04-02 10:53:15 +08:00
committed by archer
parent 5839325f77
commit 27332743c7
33 changed files with 1383 additions and 19 deletions

View File

@@ -1,5 +1,6 @@
{
"App": "Application",
"Click_to_expand": "Click to expand",
"Download": "Download",
"Export": "Export",
"FAQ.ai_point_a": "Each time you use the AI model, a certain amount of AI points will be deducted. For detailed calculation standards, please refer to the 'AI Points Calculation Standards' above.\nToken calculation uses the same formula as GPT-3.5, where 1 Token ≈ 0.7 Chinese characters ≈ 0.9 English words. Consecutive characters may be considered as 1 Token.",
@@ -538,6 +539,7 @@
"core.dataset.collection.metadata.source name": "Source Name",
"core.dataset.collection.metadata.source size": "Source Size",
"core.dataset.collection.status.active": "Ready",
"core.dataset.collection.status.error": "Error",
"core.dataset.collection.sync.result.sameRaw": "Content Unchanged, No Update Needed",
"core.dataset.collection.sync.result.success": "Sync Started",
"core.dataset.data.Data Content": "Related Data Content",

View File

@@ -28,9 +28,24 @@
"custom_data_process_params_desc": "Customize data processing rules",
"custom_split_sign_tip": "Allows you to chunk according to custom delimiters. \nUsually used for processed data, using specific separators for precise chunking. \nYou can use the | symbol to represent multiple splitters, such as: \".|.\" to represent a period in Chinese and English.\n\nTry to avoid using special symbols related to regular, such as: * () [] {}, etc.",
"data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes",
"data_error_amount": "{{errorAmount}} Group training exception",
"data_index_num": "Index {{index}}",
"data_process_params": "Params",
"data_process_setting": "Processing config",
"dataset.Chunk_Number": "Block number",
"dataset.Completed": "Finish",
"dataset.Delete_Chunk": "delete",
"dataset.Edit_Chunk": "edit",
"dataset.Error_Message": "Report an error message",
"dataset.No_Error": "No exception information yet",
"dataset.Operation": "operate",
"dataset.ReTrain": "Retrain",
"dataset.Training Process": "Training status",
"dataset.Training_Count": "{{count}} Group training",
"dataset.Training_Errors": "Errors",
"dataset.Training_QA": "{{count}} Group Q&A pair training",
"dataset.Training_Status": "Training status",
"dataset.Training_Waiting": "Need to wait for {{count}} group data",
"dataset.Unsupported operation": "dataset.Unsupported operation",
"dataset.no_collections": "No datasets available",
"dataset.no_tags": "No tags available",
@@ -82,6 +97,13 @@
"preview_chunk_empty": "Unable to read the contents of the file",
"preview_chunk_intro": "A total of {{total}} blocks, up to 10",
"preview_chunk_not_selected": "Click on the file on the left to preview",
"process.Auto_Index": "Automatic index generation",
"process.Get QA": "Q&A extraction",
"process.Image_Index": "Image index generation",
"process.Is_Ready": "Ready",
"process.Parsing": "Parsing",
"process.Vectorizing": "Index vectorization",
"process.Waiting": "Queue",
"rebuild_embedding_start_tip": "Index model switching task has started",
"rebuilding_index_count": "Number of indexes being rebuilt: {{count}}",
"request_headers": "Request headers, will automatically append 'Bearer '",
@@ -114,7 +136,10 @@
"tag.total_tags": "Total {{total}} tags",
"the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "The Dataset has indexes that are being trained or rebuilt",
"total_num_files": "Total {{total}} files",
"training.Error": "{{count}} Group exception",
"training.Normal": "Normal",
"training_mode": "Chunk mode",
"training_ready": "{{count}} Group",
"vector_model_max_tokens_tip": "Each chunk of data has a maximum length of 3000 tokens",
"vllm_model": "Image understanding model",
"website_dataset": "Website Sync",

View File

@@ -1,5 +1,6 @@
{
"App": "应用",
"Click_to_expand": "点击查看详情",
"Download": "下载",
"Export": "导出",
"FAQ.ai_point_a": "每次调用AI模型时都会消耗一定的AI积分。具体的计算标准可参考上方的“AI 积分计算标准”。\nToken计算采用GPT3.5相同公式1Token≈0.7中文字符≈0.9英文单词连续出现的字符可能被认为是1个Tokens。",
@@ -541,6 +542,7 @@
"core.dataset.collection.metadata.source name": "来源名",
"core.dataset.collection.metadata.source size": "来源大小",
"core.dataset.collection.status.active": "已就绪",
"core.dataset.collection.status.error": "训练异常",
"core.dataset.collection.sync.result.sameRaw": "内容未变动,无需更新",
"core.dataset.collection.sync.result.success": "开始同步",
"core.dataset.data.Data Content": "相关数据内容",

View File

@@ -28,9 +28,24 @@
"custom_data_process_params_desc": "自定义设置数据处理规则",
"custom_split_sign_tip": "允许你根据自定义的分隔符进行分块。通常用于已处理好的数据,使用特定的分隔符来精确分块。可以使用 | 符号表示多个分割符,例如:“。|.” 表示中英文句号。\n尽量避免使用正则相关特殊符号例如: * () [] {} 等。",
"data_amount": "{{dataAmount}} 组数据, {{indexAmount}} 组索引",
"data_error_amount": "{{errorAmount}} 组训练异常",
"data_index_num": "索引 {{index}}",
"data_process_params": "处理参数",
"data_process_setting": "数据处理配置",
"dataset.Chunk_Number": "分块号",
"dataset.Completed": "完成",
"dataset.Delete_Chunk": "删除",
"dataset.Edit_Chunk": "编辑",
"dataset.Error_Message": "报错信息",
"dataset.No_Error": "暂无异常信息",
"dataset.Operation": "操作",
"dataset.ReTrain": "重试",
"dataset.Training Process": "训练状态",
"dataset.Training_Count": "{{count}} 组训练中",
"dataset.Training_Errors": "异常 ({{count}})",
"dataset.Training_QA": "{{count}} 组问答对训练中",
"dataset.Training_Status": "训练状态",
"dataset.Training_Waiting": "需等待 {{count}} 组数据",
"dataset.Unsupported operation": "操作不支持",
"dataset.no_collections": "暂无数据集",
"dataset.no_tags": "暂无标签",
@@ -82,6 +97,14 @@
"preview_chunk_empty": "无法读取该文件内容",
"preview_chunk_intro": "共 {{total}} 个分块,最多展示 10 个",
"preview_chunk_not_selected": "点击左侧文件后进行预览",
"process.Auto_Index": "自动索引生成",
"process.Get QA": "问答对提取",
"process.Image_Index": "图片索引生成",
"process.Is_Ready": "已就绪",
"process.Is_Ready_Count": "{{count}} 组已就绪",
"process.Parsing": "内容解析中",
"process.Vectorizing": "索引向量化",
"process.Waiting": "排队中",
"rebuild_embedding_start_tip": "切换索引模型任务已开始",
"rebuilding_index_count": "重建中索引数量:{{count}}",
"request_headers": "请求头参数,会自动补充 Bearer",
@@ -114,7 +137,10 @@
"tag.total_tags": "共{{total}}个标签",
"the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "知识库有训练中或正在重建的索引",
"total_num_files": "共 {{total}} 个文件",
"training.Error": "{{count}} 组异常",
"training.Normal": "正常",
"training_mode": "处理方式",
"training_ready": "{{count}} 组",
"vector_model_max_tokens_tip": "每个分块数据,最大长度为 3000 tokens",
"vllm_model": "图片理解模型",
"website_dataset": "Web 站点同步",

View File

@@ -1,5 +1,6 @@
{
"App": "應用程式",
"Click_to_expand": "點擊查看詳情",
"Download": "下載",
"Export": "匯出",
"FAQ.ai_point_a": "每次呼叫 AI 模型時,都會消耗一定數量的 AI 點數。詳細的計算標準請參考上方的「AI 點數計算標準」。\nToken 計算採用與 GPT3.5 相同的公式1 Token ≈ 0.7 個中文字 ≈ 0.9 個英文單字,連續出現的字元可能會被視為 1 個 Token。",
@@ -537,6 +538,7 @@
"core.dataset.collection.metadata.source name": "來源名稱",
"core.dataset.collection.metadata.source size": "來源大小",
"core.dataset.collection.status.active": "已就緒",
"core.dataset.collection.status.error": "訓練異常",
"core.dataset.collection.sync.result.sameRaw": "內容未變更,無需更新",
"core.dataset.collection.sync.result.success": "開始同步",
"core.dataset.data.Data Content": "相關資料內容",

View File

@@ -28,9 +28,24 @@
"custom_data_process_params_desc": "自訂資料處理規則",
"custom_split_sign_tip": "允許你根據自定義的分隔符進行分塊。\n通常用於已處理好的數據使用特定的分隔符來精確分塊。\n可以使用 | 符號表示多個分割符,例如:“。|.” 表示中英文句號。\n\n盡量避免使用正則相關特殊符號例如: * () [] {} 等。",
"data_amount": "{{dataAmount}} 組數據, {{indexAmount}} 組索引",
"data_error_amount": "{{errorAmount}} 組訓練異常",
"data_index_num": "索引 {{index}}",
"data_process_params": "處理參數",
"data_process_setting": "資料處理設定",
"dataset.Chunk_Number": "分塊號",
"dataset.Completed": "完成",
"dataset.Delete_Chunk": "刪除",
"dataset.Edit_Chunk": "編輯",
"dataset.Error_Message": "報錯信息",
"dataset.No_Error": "暫無異常信息",
"dataset.Operation": "操作",
"dataset.ReTrain": "重試",
"dataset.Training Process": "訓練狀態",
"dataset.Training_Count": "{{count}} 組訓練中",
"dataset.Training_Errors": "異常",
"dataset.Training_QA": "{{count}} 組問答對訓練中",
"dataset.Training_Status": "訓練狀態",
"dataset.Training_Waiting": "需等待 {{count}} 組數據",
"dataset.Unsupported operation": "操作不支持",
"dataset.no_collections": "尚無資料集",
"dataset.no_tags": "尚無標籤",
@@ -82,6 +97,13 @@
"preview_chunk_empty": "無法讀取該文件內容",
"preview_chunk_intro": "共 {{total}} 個分塊,最多展示 10 個",
"preview_chunk_not_selected": "點擊左側文件後進行預覽",
"process.Auto_Index": "自動索引生成",
"process.Get QA": "問答對提取",
"process.Image_Index": "圖片索引生成",
"process.Is_Ready": "已就緒",
"process.Parsing": "內容解析中",
"process.Vectorizing": "索引向量化",
"process.Waiting": "排隊中",
"rebuild_embedding_start_tip": "切換索引模型任務已開始",
"rebuilding_index_count": "重建中索引數量:{{count}}",
"request_headers": "請求頭",
@@ -114,7 +136,10 @@
"tag.total_tags": "共 {{total}} 個標籤",
"the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "資料集有索引正在訓練或重建中",
"total_num_files": "共 {{total}} 個文件",
"training.Error": "{{count}} 組異常",
"training.Normal": "正常",
"training_mode": "分段模式",
"training_ready": "{{count}} 組",
"vector_model_max_tokens_tip": "每個分塊數據,最大長度為 3000 tokens",
"vllm_model": "圖片理解模型",
"website_dataset": "網站同步",