update apply diff

2025-03-23 09:34:44 +08:00
parent 570e8d9564
commit 635db9babd
34 changed files with 3161 additions and 410 deletions
--- a/src/utils/extract-text.ts
+++ b/src/utils/extract-text.ts
@@ -0,0 +1,215 @@
+// import * as path from "path"
+// // @ts-ignore-next-line
+// import pdf from "pdf-parse/lib/pdf-parse"
+// import mammoth from "mammoth"
+// import fs from "fs/promises"
+// import { isBinaryFile } from "isbinaryfile"
+
+// export async function extractTextFromFile(filePath: string): Promise<string> {
+// 	try {
+// 		await fs.access(filePath)
+// 	} catch (error) {
+// 		throw new Error(`File not found: ${filePath}`)
+// 	}
+// 	const fileExtension = path.extname(filePath).toLowerCase()
+// 	switch (fileExtension) {
+// 		case ".pdf":
+// 			return extractTextFromPDF(filePath)
+// 		case ".docx":
+// 			return extractTextFromDOCX(filePath)
+// 		case ".ipynb":
+// 			return extractTextFromIPYNB(filePath)
+// 		default:
+// 			const isBinary = await isBinaryFile(filePath).catch(() => false)
+// 			if (!isBinary) {
+// 				return addLineNumbers(await fs.readFile(filePath, "utf8"))
+// 			} else {
+// 				throw new Error(`Cannot read text for file type: ${fileExtension}`)
+// 			}
+// 	}
+// }
+
+// async function extractTextFromPDF(filePath: string): Promise<string> {
+// 	const dataBuffer = await fs.readFile(filePath)
+// 	const data = await pdf(dataBuffer)
+// 	return addLineNumbers(data.text)
+// }
+
+// async function extractTextFromDOCX(filePath: string): Promise<string> {
+// 	const result = await mammoth.extractRawText({ path: filePath })
+// 	return addLineNumbers(result.value)
+// }
+
+// async function extractTextFromIPYNB(filePath: string): Promise<string> {
+// 	const data = await fs.readFile(filePath, "utf8")
+// 	const notebook = JSON.parse(data)
+// 	let extractedText = ""
+
+// 	for (const cell of notebook.cells) {
+// 		if ((cell.cell_type === "markdown" || cell.cell_type === "code") && cell.source) {
+// 			extractedText += cell.source.join("\n") + "\n"
+// 		}
+// 	}
+
+// 	return addLineNumbers(extractedText)
+// }
+
+export function addLineNumbers(content: string, startLine: number = 1): string {
+	const lines = content.split("\n")
+	const maxLineNumberWidth = String(startLine + lines.length - 1).length
+	return lines
+		.map((line, index) => {
+			const lineNumber = String(startLine + index).padStart(maxLineNumberWidth, " ")
+			return `${lineNumber} | ${line}`
+		})
+		.join("\n")
+}
+
+
+// Checks if every line in the content has line numbers prefixed (e.g., "1 | content" or "123 | content")
+// Line numbers must be followed by a single pipe character (not double pipes)
+export function everyLineHasLineNumbers(content: string): boolean {
+	const lines = content.split(/\r?\n/)
+	return lines.length > 0 && lines.every((line) => /^\s*\d+\s+\|(?!\|)/.test(line))
+}
+
+// Strips line numbers from content while preserving the actual content
+// Handles formats like "1 | content", " 12 | content", "123 | content"
+// Preserves content that naturally starts with pipe characters
+export function stripLineNumbers(content: string): string {
+	// Split into lines to handle each line individually
+	const lines = content.split(/\r?\n/)
+
+	// Process each line
+	const processedLines = lines.map((line) => {
+		// Match line number pattern and capture everything after the pipe
+		const match = line.match(/^\s*\d+\s+\|(?!\|)\s?(.*)$/)
+		return match ? match[1] : line
+	})
+
+	// Join back with original line endings
+	const lineEnding = content.includes("\r\n") ? "\r\n" : "\n"
+	return processedLines.join(lineEnding)
+}
+
+// /**
+//  * Truncates multi-line output while preserving context from both the beginning and end.
+//  * When truncation is needed, it keeps 20% of the lines from the start and 80% from the end,
+//  * with a clear indicator of how many lines were omitted in between.
+//  *
+//  * @param content The multi-line string to truncate
+//  * @param lineLimit Optional maximum number of lines to keep. If not provided or 0, returns the original content
+//  * @returns The truncated string with an indicator of omitted lines, or the original content if no truncation needed
+//  *
+//  * @example
+//  * // With 10 line limit on 25 lines of content:
+//  * // - Keeps first 2 lines (20% of 10)
+//  * // - Keeps last 8 lines (80% of 10)
+//  * // - Adds "[...15 lines omitted...]" in between
+//  */
+// export function truncateOutput(content: string, lineLimit?: number): string {
+// 	if (!lineLimit) {
+// 		return content
+// 	}
+
+// 	// Count total lines
+// 	let totalLines = 0
+// 	let pos = -1
+// 	while ((pos = content.indexOf("\n", pos + 1)) !== -1) {
+// 		totalLines++
+// 	}
+// 	totalLines++ // Account for last line without newline
+
+// 	if (totalLines <= lineLimit) {
+// 		return content
+// 	}
+
+// 	const beforeLimit = Math.floor(lineLimit * 0.2) // 20% of lines before
+// 	const afterLimit = lineLimit - beforeLimit // remaining 80% after
+
+// 	// Find start section end position
+// 	let startEndPos = -1
+// 	let lineCount = 0
+// 	pos = 0
+// 	while (lineCount < beforeLimit && (pos = content.indexOf("\n", pos)) !== -1) {
+// 		startEndPos = pos
+// 		lineCount++
+// 		pos++
+// 	}
+
+// 	// Find end section start position
+// 	let endStartPos = content.length
+// 	lineCount = 0
+// 	pos = content.length
+// 	while (lineCount < afterLimit && (pos = content.lastIndexOf("\n", pos - 1)) !== -1) {
+// 		endStartPos = pos + 1 // Start after the newline
+// 		lineCount++
+// 	}
+
+// 	const omittedLines = totalLines - lineLimit
+// 	const startSection = content.slice(0, startEndPos + 1)
+// 	const endSection = content.slice(endStartPos)
+// 	return startSection + `\n[...${omittedLines} lines omitted...]\n\n` + endSection
+// }
+
+// /**
+//  * Applies run-length encoding to compress repeated lines in text.
+//  * Only compresses when the compression description is shorter than the repeated content.
+//  *
+//  * @param content The text content to compress
+//  * @returns The compressed text with run-length encoding applied
+//  */
+// export function applyRunLengthEncoding(content: string): string {
+// 	if (!content) {
+// 		return content
+// 	}
+
+// 	let result = ""
+// 	let pos = 0
+// 	let repeatCount = 0
+// 	let prevLine = null
+// 	let firstOccurrence = true
+
+// 	while (pos < content.length) {
+// 		const nextNewlineIdx = content.indexOf("\n", pos)
+// 		const currentLine = nextNewlineIdx === -1 ? content.slice(pos) : content.slice(pos, nextNewlineIdx + 1)
+
+// 		if (prevLine === null) {
+// 			prevLine = currentLine
+// 		} else if (currentLine === prevLine) {
+// 			repeatCount++
+// 		} else {
+// 			if (repeatCount > 0) {
+// 				const compressionDesc = `<previous line repeated ${repeatCount} additional times>\n`
+// 				if (compressionDesc.length < prevLine.length * (repeatCount + 1)) {
+// 					result += prevLine + compressionDesc
+// 				} else {
+// 					for (let i = 0; i <= repeatCount; i++) {
+// 						result += prevLine
+// 					}
+// 				}
+// 				repeatCount = 0
+// 			} else {
+// 				result += prevLine
+// 			}
+// 			prevLine = currentLine
+// 		}
+
+// 		pos = nextNewlineIdx === -1 ? content.length : nextNewlineIdx + 1
+// 	}
+
+// 	if (repeatCount > 0 && prevLine !== null) {
+// 		const compressionDesc = `<previous line repeated ${repeatCount} additional times>\n`
+// 		if (compressionDesc.length < prevLine.length * repeatCount) {
+// 			result += prevLine + compressionDesc
+// 		} else {
+// 			for (let i = 0; i <= repeatCount; i++) {
+// 				result += prevLine
+// 			}
+// 		}
+// 	} else if (prevLine !== null) {
+// 		result += prevLine
+// 	}
+
+// 	return result
+// }
--- a/src/utils/parse-infio-block.ts
+++ b/src/utils/parse-infio-block.ts
@@ -33,6 +33,7 @@ export type ParsedMsgBlock =
 	} | {
 		type: 'search_and_replace'
 		path: string
+		content: string
 		operations: {
 			search: string
 			replace: string
@@ -43,6 +44,11 @@ export type ParsedMsgBlock =
 			regex_flags?: string
 		}[]
 		finish: boolean
+	} | {
+		type: 'apply_diff'
+		path: string
+		diff: string
+		finish: boolean
 	} | {
 		type: 'ask_followup_question'
 		question: string,
@@ -224,7 +230,7 @@ export function parseMsgBlocks(
 				}
 				let path: string | undefined
 				let regex: string | undefined
-				
+
 				for (const childNode of node.childNodes) {
 					if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
 						path = childNode.childNodes[0].value
@@ -361,6 +367,7 @@ export function parseMsgBlocks(
 				}
 				let path: string | undefined
 				let operations = []
+				let content: string = ''

 				// 处理子标签
 				for (const childNode of node.childNodes) {
@@ -368,8 +375,8 @@ export function parseMsgBlocks(
 						path = childNode.childNodes[0].value
 					} else if (childNode.nodeName === 'operations' && childNode.childNodes.length > 0) {
 						try {
-							const operationsJson = childNode.childNodes[0].value
-							operations = JSON5.parse(operationsJson)
+							content = childNode.childNodes[0].value
+							operations = JSON5.parse(content)
 						} catch (error) {
 							console.error('Failed to parse operations JSON', error)
 						}
@@ -379,10 +386,41 @@ export function parseMsgBlocks(
 				parsedResult.push({
 					type: 'search_and_replace',
 					path,
+					content,
 					operations,
 					finish: node.sourceCodeLocation.endTag !== undefined
 				})
 				lastEndOffset = endOffset
+			} else if (node.nodeName === 'apply_diff') {
+				if (!node.sourceCodeLocation) {
+					throw new Error('sourceCodeLocation is undefined')
+				}
+				const startOffset = node.sourceCodeLocation.startOffset
+				const endOffset = node.sourceCodeLocation.endOffset
+				if (startOffset > lastEndOffset) {
+					parsedResult.push({
+						type: 'string',
+						content: input.slice(lastEndOffset, startOffset),
+					})
+				}
+				let path: string | undefined
+				let diff: string | undefined
+
+				for (const childNode of node.childNodes) {
+					if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
+						path = childNode.childNodes[0].value
+					} else if (childNode.nodeName === 'diff' && childNode.childNodes.length > 0) {
+						diff = childNode.childNodes[0].value
+					}
+				}
+
+				parsedResult.push({
+					type: 'apply_diff',
+					path,
+					diff,
+					finish: node.sourceCodeLocation.endTag !== undefined
+				})
+				lastEndOffset = endOffset
 			} else if (node.nodeName === 'attempt_completion') {
 				if (!node.sourceCodeLocation) {
 					throw new Error('sourceCodeLocation is undefined')
@@ -443,10 +481,10 @@ export function parseMsgBlocks(
 						content: input.slice(lastEndOffset, startOffset),
 					})
 				}
-				
+
 				let mode: string = ''
 				let reason: string = ''
-				
+
 				for (const childNode of node.childNodes) {
 					if (childNode.nodeName === 'mode_slug' && childNode.childNodes.length > 0) {
 						// @ts-ignore - 忽略 value 属性的类型错误
@@ -456,7 +494,7 @@ export function parseMsgBlocks(
 						reason = childNode.childNodes[0].value
 					}
 				}
-				
+
 				parsedResult.push({
 					type: 'switch_mode',
 					mode,
@@ -500,9 +538,9 @@ export function parseMsgBlocks(
 						content: input.slice(lastEndOffset, startOffset),
 					})
 				}
-				
+
 				let urls: string[] = []
-				
+
 				for (const childNode of node.childNodes) {
 					if (childNode.nodeName === 'urls' && childNode.childNodes.length > 0) {
 						try {
@@ -516,7 +554,7 @@ export function parseMsgBlocks(
 						}
 					}
 				}
-				
+
 				parsedResult.push({
 					type: 'fetch_urls_content',
 					urls,
--- a/src/utils/prompt-generator.ts
+++ b/src/utils/prompt-generator.ts
@@ -1,7 +1,8 @@
-import { App, MarkdownView, TAbstractFile, TFile, TFolder, Vault, htmlToMarkdown, requestUrl, getLanguage } from 'obsidian'
+import { App, MarkdownView, TAbstractFile, TFile, TFolder, Vault, getLanguage, htmlToMarkdown, requestUrl } from 'obsidian'

 import { editorStateToPlainText } from '../components/chat-view/chat-input/utils/editor-state-to-plain-text'
 import { QueryProgressState } from '../components/chat-view/QueryProgress'
+import { DiffStrategy } from '../core/diff/DiffStrategy'
 import { SYSTEM_PROMPT } from '../core/prompts/system'
 import { RAGEngine } from '../core/rag/rag-engine'
 import { SelectVector } from '../database/schema'
@@ -113,7 +114,7 @@ export class PromptGenerator {
 	private getRagEngine: () => Promise<RAGEngine>
 	private app: App
 	private settings: InfioSettings
-
+	private diffStrategy: DiffStrategy
 	private static readonly EMPTY_ASSISTANT_MESSAGE: RequestMessage = {
 		role: 'assistant',
 		content: '',
@@ -123,10 +124,12 @@ export class PromptGenerator {
 		getRagEngine: () => Promise<RAGEngine>,
 		app: App,
 		settings: InfioSettings,
+		diffStrategy?: DiffStrategy,
 	) {
 		this.getRagEngine = getRagEngine
 		this.app = app
 		this.settings = settings
+		this.diffStrategy = diffStrategy
 	}

 	public async generateRequestMessages({
@@ -165,7 +168,7 @@ export class PromptGenerator {
 				similaritySearchResults,
 			},
 		]
-		console.log('this.settings.mode', this.settings.mode)
+
 		let filesSearchMethod = this.settings.filesSearchMethod
 		if (filesSearchMethod === 'auto' && this.settings.embeddingModelId && this.settings.embeddingModelId !== '') {
 			filesSearchMethod = 'semantic'
@@ -173,10 +176,8 @@ export class PromptGenerator {
 			filesSearchMethod = 'regex'
 		}

-		console.log('filesSearchMethod: ', filesSearchMethod)
-
 		const userLanguage = getFullLanguageName(getLanguage())
-		console.log(' current user language: ', userLanguage)
+
 		const systemMessage = await this.getSystemMessageNew(this.settings.mode, filesSearchMethod, userLanguage)

 		const requestMessages: RequestMessage[] = [
@@ -466,7 +467,7 @@ export class PromptGenerator {
 	}

 	private async getSystemMessageNew(mode: Mode, filesSearchMethod: string, preferredLanguage: string): Promise<RequestMessage> {
-		const systemPrompt = await SYSTEM_PROMPT(this.app.vault.getRoot().path, false, mode, filesSearchMethod, preferredLanguage)
+		const systemPrompt = await SYSTEM_PROMPT(this.app.vault.getRoot().path, false, mode, filesSearchMethod, preferredLanguage, this.diffStrategy)

 		return {
 			role: 'system',