修改插件:Doc2X插件适配文件上传功能 (#2284)

* Change to download first and check contentType

* Up to date

* Some bug fix, still some bug with img

* Update tool to read from file

* improve formatting in PDF

* Add tool of img file OCR , but meet some bug

* Bug fix for parameter passing error.

* Modification Introduction
This commit is contained in:
Menghuan1918
2024-08-08 18:56:05 +08:00
committed by GitHub
parent 067f3f4098
commit 4d570ecd4f
10 changed files with 1230 additions and 31 deletions

View File

@@ -40,9 +40,35 @@ const main = async ({ apikey, url, ocr }: Props): Response => {
real_api_key = data.data.token;
}
//Get the image binary from the URL
//Fetch the pdf and check its contene type
let PDFResponse;
try {
PDFResponse = await fetch(url);
} catch (e) {
return {
result: `Failed to fetch PDF from URL: ${url} with error: ${e}`,
success: false
};
}
if (!PDFResponse.ok) {
return {
result: `Failed to fetch PDF from URL: ${url}`,
success: false
};
}
const contentType = PDFResponse.headers.get('content-type');
if (!contentType || !contentType.startsWith('application/pdf')) {
return {
result: `The provided URL does not point to a PDF: ${contentType}`,
success: false
};
}
const blob = await PDFResponse.blob();
const formData = new FormData();
formData.append('pdf_url', url);
const fileName = url.split('/').pop()?.split('?')[0] || 'pdf';
formData.append('file', blob, fileName);
formData.append('ocr', ocr ? '1' : '0');
let upload_url = 'https://api.doc2x.noedgeai.com/api/platform/async/pdf';