Add image index and pdf parse (#3956)

* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
This commit is contained in:
Archer
2025-03-03 23:08:29 +08:00
committed by archer
parent 08b6f594df
commit adf5377ebe
106 changed files with 2337 additions and 1454 deletions

View File

@@ -562,10 +562,7 @@
"core.dataset.file": "File",
"core.dataset.folder": "Directory",
"core.dataset.import.Auto mode Estimated Price Tips": "Requires calling the file processing model, which consumes a lot of tokens: {{price}} points/1K tokens",
"core.dataset.import.Auto process": "Automatic",
"core.dataset.import.Auto process desc": "Automatically set segmentation and preprocessing rules",
"core.dataset.import.Chunk Range": "Range: {{min}}~{{max}}",
"core.dataset.import.Chunk Split": "Chunks",
"core.dataset.import.Chunk Split Tip": "Segment the text according to certain rules and convert it into a format that can be semantically searched. Suitable for most scenarios. No additional model processing is required, and the cost is low.",
"core.dataset.import.Continue upload": "Continue upload",
"core.dataset.import.Custom process": "Custom Rules",
@@ -575,7 +572,6 @@
"core.dataset.import.Custom split char Tips": "Allows you to segment based on custom separators. Usually used for pre-processed data, using specific separators for precise segmentation.",
"core.dataset.import.Custom text": "Custom Text",
"core.dataset.import.Custom text desc": "Manually enter a piece of text as a dataset",
"core.dataset.import.Data Preprocessing": "Data Processing",
"core.dataset.import.Data process params": "Data Processing Parameters",
"core.dataset.import.Down load csv template": "Click to Download CSV Template",
"core.dataset.import.Embedding Estimated Price Tips": "Only use the index model, consuming a small amount of AI points: {{price}} points/1K tokens",
@@ -597,7 +593,6 @@
"core.dataset.import.Source name": "Source Name",
"core.dataset.import.Sources list": "Sources",
"core.dataset.import.Start upload": "Start Upload",
"core.dataset.import.Total files": "Total {{total}} Files",
"core.dataset.import.Upload complete": "Upload complete",
"core.dataset.import.Upload data": "Confirm Upload",
"core.dataset.import.Upload file progress": "File Upload Progress",
@@ -649,10 +644,10 @@
"core.dataset.training.Agent queue": "QA Training Queue",
"core.dataset.training.Auto mode": "Auto index",
"core.dataset.training.Auto mode Tip": "Increase the semantic richness of data blocks by generating related questions and summaries through sub-indexes and calling models, making it more conducive to retrieval. Requires more storage space and increases AI call times.",
"core.dataset.training.Chunk mode": "Default",
"core.dataset.training.Chunk mode": "Chunk",
"core.dataset.training.Full": "Estimated Over 5 Minutes",
"core.dataset.training.Leisure": "Idle",
"core.dataset.training.QA mode": "QA Chunks",
"core.dataset.training.QA mode": "QA",
"core.dataset.training.Vector queue": "Index Queue",
"core.dataset.training.Waiting": "Estimated 5 Minutes",
"core.dataset.training.Website Sync": "Website Sync",
@@ -861,7 +856,6 @@
"dataset.collections.Select Collection": "Select File",
"dataset.collections.Select One Collection To Store": "Select a File to Store",
"dataset.data.Can not edit": "No Edit Permission",
"dataset.data.Custom Index Number": "Custom Index {{number}}",
"dataset.data.Default Index": "Default Index",
"dataset.data.Delete Tip": "Confirm to Delete This Data?",
"dataset.data.Index Placeholder": "Enter Index Text Content",
@@ -956,6 +950,7 @@
"new_create": "Create New",
"no": "No",
"no_laf_env": "System Not Configured with Laf Environment",
"not_model_config": "No related model configured",
"not_yet_introduced": "No Introduction Yet",
"option": "Option",
"pay.amount": "Amount",
@@ -1121,7 +1116,6 @@
"support.wallet.invoice_detail": "Invoice Details",
"support.wallet.invoice_info": "The invoice will be sent to the email within 3-7 working days, please wait patiently",
"support.wallet.invoicing": "Invoicing",
"support.wallet.moduleName.index": "Index Generation",
"support.wallet.moduleName.qa": "QA Split",
"support.wallet.noBill": "No Bill Records",
"support.wallet.no_invoice": "No Invoice Records",