4.6.4-alpha (#582)

This commit is contained in:
Archer
2023-12-08 15:01:11 +08:00
committed by GitHub
parent 54d52d8d25
commit b58249fc3a
66 changed files with 962 additions and 527 deletions

View File

@@ -107,7 +107,7 @@ export const readPdfContent = (file: File) =>
/**
* read docx to markdown
*/
export const readDocContent = (file: File) =>
export const readDocContent = (file: File, metadata: Record<string, any>) =>
new Promise<string>((resolve, reject) => {
try {
const reader = new FileReader();
@@ -120,7 +120,7 @@ export const readDocContent = (file: File) =>
arrayBuffer: target.result as ArrayBuffer
});
const rawText = await formatMarkdown(res?.value);
const rawText = await formatMarkdown(res?.value, metadata);
resolve(rawText);
} catch (error) {
@@ -173,24 +173,25 @@ export const readCsvContent = async (file: File) => {
* 1. upload base64
* 2. replace \
*/
export const formatMarkdown = async (rawText: string = '') => {
export const formatMarkdown = async (rawText: string = '', metadata: Record<string, any>) => {
// match base64, upload and replace it
const base64Regex = /data:image\/.*;base64,([^\)]+)/g;
const base64Arr = rawText.match(base64Regex) || [];
// upload base64 and replace it
await Promise.all(
base64Arr.map(async (base64) => {
base64Arr.map(async (base64Img) => {
try {
const str = await compressBase64ImgAndUpload({
base64,
base64Img,
maxW: 4329,
maxH: 4329,
maxSize: 1024 * 1024 * 5
maxSize: 1024 * 1024 * 5,
metadata
});
rawText = rawText.replace(base64, str);
rawText = rawText.replace(base64Img, str);
} catch (error) {
rawText = rawText.replace(base64, '');
rawText = rawText.replace(base64Img, '');
rawText = rawText.replace(/!\[.*\]\(\)/g, '');
}
})