Feat: pptx and xlsx loader (#1118)

* perf: plan tip

* perf: upload size controller

* feat: add image ttl index

* feat: new upload file ux

* remove file

* feat: support read pptx

* feat: support xlsx

* fix: rerank docker flie
This commit is contained in:
Archer
2024-04-01 19:01:26 +08:00
committed by GitHub
parent f9d266a6af
commit 21288d1736
90 changed files with 2707 additions and 1678 deletions

View File

@@ -0,0 +1,35 @@
import mammoth from 'mammoth';
import { htmlToMarkdown } from '../../string/markdown';
import { ReadFileByBufferParams, ReadFileResponse } from './type';
import { initMarkdownText } from './utils';
/**
* read docx to markdown
*/
export const readWordFile = async ({
teamId,
buffer,
metadata = {}
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
try {
const { value: html } = await mammoth.convertToHtml({
buffer
});
const md = await htmlToMarkdown(html);
const rawText = await initMarkdownText({
teamId,
md,
metadata
});
return {
rawText,
metadata: {}
};
} catch (error) {
console.log('error doc read:', error);
return Promise.reject('Can not read doc file, please convert to PDF');
}
};