fix: pdf null byte error

This commit is contained in:
duanfuxiang
2025-06-13 11:11:04 +08:00
parent 540226a792
commit f6728f1b82
4 changed files with 39 additions and 22 deletions

View File

@@ -8,7 +8,7 @@ export async function parsePdfContent(file: TFile, app: App): Promise<string> {
try {
// 使用 Obsidian 内置的 PDF.js
const pdfjsLib = await loadPdfJs()
// Read PDF file as binary buffer
const pdfBuffer = await app.vault.readBinary(file)
@@ -26,7 +26,9 @@ export async function parsePdfContent(file: TFile, app: App): Promise<string> {
fullText += pageText + '\n\n'
}
return fullText || '(Empty PDF content)'
// 清理null字节防止PostgreSQL UTF8编码错误
const cleanText = (fullText || '(Empty PDF content)').replace(/\0/g, '')
return cleanText
} catch (error: any) {
console.error('Error parsing PDF:', error)
return `(Error reading PDF file: ${error?.message || 'Unknown error'})`
@@ -36,27 +38,42 @@ export async function parsePdfContent(file: TFile, app: App): Promise<string> {
export async function readTFileContent(
file: TFile,
vault: Vault,
): Promise<string> {
if (file.extension != 'md') {
return "(Binary file, unable to display content)"
}
const content = await vault.cachedRead(file)
// 清理null字节防止PostgreSQL UTF8编码错误
return content.replace(/\0/g, '')
}
export async function readTFileContentPdf(
file: TFile,
vault: Vault,
app?: App,
): Promise<string> {
if (file.extension === 'pdf') {
if (app) {
return await parsePdfContent(file, app)
const content = await parsePdfContent(file, app)
// 清理null字节防止PostgreSQL UTF8编码错误
return content.replace(/\0/g, '')
}
return "(PDF file, app context required for processing)"
}
if (file.extension != 'md') {
return "(Binary file, unable to display content)"
}
return await vault.cachedRead(file)
const content = await vault.cachedRead(file)
// 清理null字节防止PostgreSQL UTF8编码错误
return content.replace(/\0/g, '')
}
export async function readMultipleTFiles(
files: TFile[],
vault: Vault,
app?: App,
vault: Vault
): Promise<string[]> {
// Read files in parallel
const readPromises = files.map((file) => readTFileContent(file, vault, app))
const readPromises = files.map((file) => readTFileContent(file, vault))
return await Promise.all(readPromises)
}

View File

@@ -87,7 +87,7 @@ async function getFileOrFolderContent(
if (path.extension != 'md') {
return "(Binary file, unable to display content)"
}
return addLineNumbers(await readTFileContent(path, vault, app))
return addLineNumbers(await readTFileContent(path, vault))
} else if (path instanceof TFolder) {
const entries = path.children
let folderContent = ""
@@ -111,7 +111,7 @@ async function getFileOrFolderContent(
if (entry.extension != 'md') {
return undefined
}
const content = addLineNumbers(await readTFileContent(entry, vault, app))
const content = addLineNumbers(await readTFileContent(entry, vault))
return `<file_content path="${entry.path}">\n${content}\n</file_content>`
} catch (error) {
return undefined
@@ -883,7 +883,7 @@ ${customInstruction}
private async getCurrentFileMessage(
currentFile: TFile,
): Promise<RequestMessage> {
const fileContent = await readTFileContent(currentFile, this.app.vault, this.app)
const fileContent = await readTFileContent(currentFile, this.app.vault)
return {
role: 'user',
content: `# Inputs
@@ -905,7 +905,7 @@ ${fileContent}
return null;
}
const fileContent = await readTFileContent(currentFile, this.app.vault, this.app);
const fileContent = await readTFileContent(currentFile, this.app.vault);
const lines = fileContent.split('\n');
// 计算上下文范围,并处理边界情况