Feat: pptx and xlsx loader (#1118)
* perf: plan tip * perf: upload size controller * feat: add image ttl index * feat: new upload file ux * remove file * feat: support read pptx * feat: support xlsx * fix: rerank docker flie
This commit is contained in:
35
packages/service/common/file/read/word.ts
Normal file
35
packages/service/common/file/read/word.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import mammoth from 'mammoth';
|
||||
import { htmlToMarkdown } from '../../string/markdown';
|
||||
import { ReadFileByBufferParams, ReadFileResponse } from './type';
|
||||
import { initMarkdownText } from './utils';
|
||||
|
||||
/**
|
||||
* read docx to markdown
|
||||
*/
|
||||
export const readWordFile = async ({
|
||||
teamId,
|
||||
buffer,
|
||||
metadata = {}
|
||||
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
|
||||
try {
|
||||
const { value: html } = await mammoth.convertToHtml({
|
||||
buffer
|
||||
});
|
||||
|
||||
const md = await htmlToMarkdown(html);
|
||||
|
||||
const rawText = await initMarkdownText({
|
||||
teamId,
|
||||
md,
|
||||
metadata
|
||||
});
|
||||
|
||||
return {
|
||||
rawText,
|
||||
metadata: {}
|
||||
};
|
||||
} catch (error) {
|
||||
console.log('error doc read:', error);
|
||||
return Promise.reject('Can not read doc file, please convert to PDF');
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user