Feat: pptx and xlsx loader (#1118)
* perf: plan tip * perf: upload size controller * feat: add image ttl index * feat: new upload file ux * remove file * feat: support read pptx * feat: support xlsx * fix: rerank docker flie
This commit is contained in:
23
packages/service/common/file/read/html.ts
Normal file
23
packages/service/common/file/read/html.ts
Normal file
@@ -0,0 +1,23 @@
|
||||
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
|
||||
import { initMarkdownText } from './utils';
|
||||
import { htmlToMarkdown } from '../../string/markdown';
|
||||
import { readFileRawText } from './rawText';
|
||||
|
||||
export const readHtmlRawText = async (
|
||||
params: ReadFileByBufferParams
|
||||
): Promise<ReadFileResponse> => {
|
||||
const { teamId, metadata } = params;
|
||||
const { rawText: html } = readFileRawText(params);
|
||||
|
||||
const md = await htmlToMarkdown(html);
|
||||
|
||||
const rawText = await initMarkdownText({
|
||||
teamId,
|
||||
md,
|
||||
metadata
|
||||
});
|
||||
|
||||
return {
|
||||
rawText
|
||||
};
|
||||
};
|
||||
Reference in New Issue
Block a user