new framwork
This commit is contained in:
264
client/src/utils/file.ts
Normal file
264
client/src/utils/file.ts
Normal file
@@ -0,0 +1,264 @@
|
||||
import mammoth from 'mammoth';
|
||||
import Papa from 'papaparse';
|
||||
import { getOpenAiEncMap } from './plugin/openai';
|
||||
import { getErrText } from './tools';
|
||||
|
||||
/**
|
||||
* 读取 txt 文件内容
|
||||
*/
|
||||
export const readTxtContent = (file: File) => {
|
||||
return new Promise((resolve: (_: string) => void, reject) => {
|
||||
try {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => {
|
||||
resolve(reader.result as string);
|
||||
};
|
||||
reader.onerror = (err) => {
|
||||
console.log('error txt read:', err);
|
||||
reject('读取 txt 文件失败');
|
||||
};
|
||||
reader.readAsText(file);
|
||||
} catch (error) {
|
||||
reject('浏览器不支持文件内容读取');
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* 读取 pdf 内容
|
||||
*/
|
||||
export const readPdfContent = (file: File) =>
|
||||
new Promise<string>((resolve, reject) => {
|
||||
try {
|
||||
const pdfjsLib = window['pdfjs-dist/build/pdf'];
|
||||
pdfjsLib.workerSrc = '/js/pdf.worker.js';
|
||||
|
||||
const readPDFPage = async (doc: any, pageNo: number) => {
|
||||
const page = await doc.getPage(pageNo);
|
||||
const tokenizedText = await page.getTextContent();
|
||||
const pageText = tokenizedText.items.map((token: any) => token.str).join(' ');
|
||||
return pageText;
|
||||
};
|
||||
|
||||
let reader = new FileReader();
|
||||
reader.readAsArrayBuffer(file);
|
||||
reader.onload = async (event) => {
|
||||
if (!event?.target?.result) return reject('解析 PDF 失败');
|
||||
try {
|
||||
const doc = await pdfjsLib.getDocument(event.target.result).promise;
|
||||
const pageTextPromises = [];
|
||||
for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
|
||||
pageTextPromises.push(readPDFPage(doc, pageNo));
|
||||
}
|
||||
const pageTexts = await Promise.all(pageTextPromises);
|
||||
resolve(pageTexts.join('\n'));
|
||||
} catch (err) {
|
||||
console.log(err, 'pdfjs error');
|
||||
reject('解析 PDF 失败');
|
||||
}
|
||||
};
|
||||
reader.onerror = (err) => {
|
||||
console.log(err, 'reader error');
|
||||
reject('解析 PDF 失败');
|
||||
};
|
||||
} catch (error) {
|
||||
reject('浏览器不支持文件内容读取');
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* 读取doc
|
||||
*/
|
||||
export const readDocContent = (file: File) =>
|
||||
new Promise<string>((resolve, reject) => {
|
||||
try {
|
||||
const reader = new FileReader();
|
||||
reader.readAsArrayBuffer(file);
|
||||
reader.onload = async ({ target }) => {
|
||||
if (!target?.result) return reject('读取 doc 文件失败');
|
||||
try {
|
||||
const res = await mammoth.extractRawText({
|
||||
arrayBuffer: target.result as ArrayBuffer
|
||||
});
|
||||
resolve(res?.value);
|
||||
} catch (error) {
|
||||
reject('读取 doc 文件失败, 请转换成 PDF');
|
||||
}
|
||||
};
|
||||
reader.onerror = (err) => {
|
||||
console.log('error doc read:', err);
|
||||
|
||||
reject('读取 doc 文件失败');
|
||||
};
|
||||
} catch (error) {
|
||||
reject('浏览器不支持文件内容读取');
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* 读取csv
|
||||
*/
|
||||
export const readCsvContent = async (file: File) => {
|
||||
try {
|
||||
const textArr = await readTxtContent(file);
|
||||
const json = Papa.parse(textArr).data as string[][];
|
||||
if (json.length === 0) {
|
||||
throw new Error('csv 解析失败');
|
||||
}
|
||||
return {
|
||||
header: json.shift()?.filter((item) => item) as string[],
|
||||
data: json.map((item) => item?.filter((item) => item))
|
||||
};
|
||||
} catch (error) {
|
||||
return Promise.reject('解析 csv 文件失败');
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* file download
|
||||
*/
|
||||
export const fileDownload = ({
|
||||
text,
|
||||
type,
|
||||
filename
|
||||
}: {
|
||||
text: string;
|
||||
type: string;
|
||||
filename: string;
|
||||
}) => {
|
||||
// 导出为文件
|
||||
const blob = new Blob([`\uFEFF${text}`], { type: `${type};charset=utf-8;` });
|
||||
|
||||
// 创建下载链接
|
||||
const downloadLink = document.createElement('a');
|
||||
downloadLink.href = window.URL.createObjectURL(blob);
|
||||
downloadLink.download = filename;
|
||||
|
||||
// 添加链接到页面并触发下载
|
||||
document.body.appendChild(downloadLink);
|
||||
downloadLink.click();
|
||||
document.body.removeChild(downloadLink);
|
||||
};
|
||||
|
||||
/**
|
||||
* text split into chunks
|
||||
* maxLen - one chunk len. max: 3500
|
||||
* slideLen - The size of the before and after Text
|
||||
* maxLen > slideLen
|
||||
*/
|
||||
export const splitText_token = ({
|
||||
text,
|
||||
maxLen,
|
||||
slideLen
|
||||
}: {
|
||||
text: string;
|
||||
maxLen: number;
|
||||
slideLen: number;
|
||||
}) => {
|
||||
try {
|
||||
const enc = getOpenAiEncMap()['gpt-3.5-turbo'];
|
||||
// filter empty text. encode sentence
|
||||
const encodeText = enc.encode(text);
|
||||
|
||||
const chunks: string[] = [];
|
||||
let tokens = 0;
|
||||
|
||||
let startIndex = 0;
|
||||
let endIndex = Math.min(startIndex + maxLen, encodeText.length);
|
||||
let chunkEncodeArr = encodeText.slice(startIndex, endIndex);
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
while (startIndex < encodeText.length) {
|
||||
tokens += chunkEncodeArr.length;
|
||||
chunks.push(decoder.decode(enc.decode(chunkEncodeArr)));
|
||||
|
||||
startIndex += maxLen - slideLen;
|
||||
endIndex = Math.min(startIndex + maxLen, encodeText.length);
|
||||
chunkEncodeArr = encodeText.slice(
|
||||
Math.min(encodeText.length - slideLen, startIndex),
|
||||
endIndex
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
chunks,
|
||||
tokens
|
||||
};
|
||||
} catch (err) {
|
||||
throw new Error(getErrText(err));
|
||||
}
|
||||
};
|
||||
|
||||
export const fileToBase64 = (file: File) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.readAsDataURL(file);
|
||||
reader.onload = () => resolve(reader.result);
|
||||
reader.onerror = (error) => reject(error);
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* compress image. response base64
|
||||
* @param maxSize The max size of the compressed image
|
||||
*/
|
||||
export const compressImg = ({
|
||||
file,
|
||||
maxW = 200,
|
||||
maxH = 200,
|
||||
maxSize = 1024 * 100
|
||||
}: {
|
||||
file: File;
|
||||
maxW?: number;
|
||||
maxH?: number;
|
||||
maxSize?: number;
|
||||
}) =>
|
||||
new Promise<string>((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.readAsDataURL(file);
|
||||
reader.onload = () => {
|
||||
const img = new Image();
|
||||
// @ts-ignore
|
||||
img.src = reader.result;
|
||||
img.onload = () => {
|
||||
let width = img.width;
|
||||
let height = img.height;
|
||||
|
||||
if (width > height) {
|
||||
if (width > maxW) {
|
||||
height *= maxW / width;
|
||||
width = maxW;
|
||||
}
|
||||
} else {
|
||||
if (height > maxH) {
|
||||
width *= maxH / height;
|
||||
height = maxH;
|
||||
}
|
||||
}
|
||||
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = width;
|
||||
canvas.height = height;
|
||||
const ctx = canvas.getContext('2d');
|
||||
|
||||
if (!ctx) {
|
||||
return reject('压缩图片异常');
|
||||
}
|
||||
|
||||
ctx.drawImage(img, 0, 0, width, height);
|
||||
const compressedDataUrl = canvas.toDataURL(file.type, 1);
|
||||
// 移除 canvas 元素
|
||||
canvas.remove();
|
||||
|
||||
if (compressedDataUrl.length > maxSize) {
|
||||
return reject('图片太大了');
|
||||
}
|
||||
resolve(compressedDataUrl);
|
||||
};
|
||||
};
|
||||
reader.onerror = (err) => {
|
||||
console.log(err);
|
||||
reject('压缩图片异常');
|
||||
};
|
||||
});
|
||||
Reference in New Issue
Block a user