update apply diff

This commit is contained in:
duanfuxiang
2025-03-23 09:34:44 +08:00
parent 570e8d9564
commit 635db9babd
34 changed files with 3161 additions and 410 deletions

215
src/utils/extract-text.ts Normal file
View File

@@ -0,0 +1,215 @@
// import * as path from "path"
// // @ts-ignore-next-line
// import pdf from "pdf-parse/lib/pdf-parse"
// import mammoth from "mammoth"
// import fs from "fs/promises"
// import { isBinaryFile } from "isbinaryfile"
// export async function extractTextFromFile(filePath: string): Promise<string> {
// try {
// await fs.access(filePath)
// } catch (error) {
// throw new Error(`File not found: ${filePath}`)
// }
// const fileExtension = path.extname(filePath).toLowerCase()
// switch (fileExtension) {
// case ".pdf":
// return extractTextFromPDF(filePath)
// case ".docx":
// return extractTextFromDOCX(filePath)
// case ".ipynb":
// return extractTextFromIPYNB(filePath)
// default:
// const isBinary = await isBinaryFile(filePath).catch(() => false)
// if (!isBinary) {
// return addLineNumbers(await fs.readFile(filePath, "utf8"))
// } else {
// throw new Error(`Cannot read text for file type: ${fileExtension}`)
// }
// }
// }
// async function extractTextFromPDF(filePath: string): Promise<string> {
// const dataBuffer = await fs.readFile(filePath)
// const data = await pdf(dataBuffer)
// return addLineNumbers(data.text)
// }
// async function extractTextFromDOCX(filePath: string): Promise<string> {
// const result = await mammoth.extractRawText({ path: filePath })
// return addLineNumbers(result.value)
// }
// async function extractTextFromIPYNB(filePath: string): Promise<string> {
// const data = await fs.readFile(filePath, "utf8")
// const notebook = JSON.parse(data)
// let extractedText = ""
// for (const cell of notebook.cells) {
// if ((cell.cell_type === "markdown" || cell.cell_type === "code") && cell.source) {
// extractedText += cell.source.join("\n") + "\n"
// }
// }
// return addLineNumbers(extractedText)
// }
export function addLineNumbers(content: string, startLine: number = 1): string {
const lines = content.split("\n")
const maxLineNumberWidth = String(startLine + lines.length - 1).length
return lines
.map((line, index) => {
const lineNumber = String(startLine + index).padStart(maxLineNumberWidth, " ")
return `${lineNumber} | ${line}`
})
.join("\n")
}
// Checks if every line in the content has line numbers prefixed (e.g., "1 | content" or "123 | content")
// Line numbers must be followed by a single pipe character (not double pipes)
export function everyLineHasLineNumbers(content: string): boolean {
const lines = content.split(/\r?\n/)
return lines.length > 0 && lines.every((line) => /^\s*\d+\s+\|(?!\|)/.test(line))
}
// Strips line numbers from content while preserving the actual content
// Handles formats like "1 | content", " 12 | content", "123 | content"
// Preserves content that naturally starts with pipe characters
export function stripLineNumbers(content: string): string {
// Split into lines to handle each line individually
const lines = content.split(/\r?\n/)
// Process each line
const processedLines = lines.map((line) => {
// Match line number pattern and capture everything after the pipe
const match = line.match(/^\s*\d+\s+\|(?!\|)\s?(.*)$/)
return match ? match[1] : line
})
// Join back with original line endings
const lineEnding = content.includes("\r\n") ? "\r\n" : "\n"
return processedLines.join(lineEnding)
}
// /**
// * Truncates multi-line output while preserving context from both the beginning and end.
// * When truncation is needed, it keeps 20% of the lines from the start and 80% from the end,
// * with a clear indicator of how many lines were omitted in between.
// *
// * @param content The multi-line string to truncate
// * @param lineLimit Optional maximum number of lines to keep. If not provided or 0, returns the original content
// * @returns The truncated string with an indicator of omitted lines, or the original content if no truncation needed
// *
// * @example
// * // With 10 line limit on 25 lines of content:
// * // - Keeps first 2 lines (20% of 10)
// * // - Keeps last 8 lines (80% of 10)
// * // - Adds "[...15 lines omitted...]" in between
// */
// export function truncateOutput(content: string, lineLimit?: number): string {
// if (!lineLimit) {
// return content
// }
// // Count total lines
// let totalLines = 0
// let pos = -1
// while ((pos = content.indexOf("\n", pos + 1)) !== -1) {
// totalLines++
// }
// totalLines++ // Account for last line without newline
// if (totalLines <= lineLimit) {
// return content
// }
// const beforeLimit = Math.floor(lineLimit * 0.2) // 20% of lines before
// const afterLimit = lineLimit - beforeLimit // remaining 80% after
// // Find start section end position
// let startEndPos = -1
// let lineCount = 0
// pos = 0
// while (lineCount < beforeLimit && (pos = content.indexOf("\n", pos)) !== -1) {
// startEndPos = pos
// lineCount++
// pos++
// }
// // Find end section start position
// let endStartPos = content.length
// lineCount = 0
// pos = content.length
// while (lineCount < afterLimit && (pos = content.lastIndexOf("\n", pos - 1)) !== -1) {
// endStartPos = pos + 1 // Start after the newline
// lineCount++
// }
// const omittedLines = totalLines - lineLimit
// const startSection = content.slice(0, startEndPos + 1)
// const endSection = content.slice(endStartPos)
// return startSection + `\n[...${omittedLines} lines omitted...]\n\n` + endSection
// }
// /**
// * Applies run-length encoding to compress repeated lines in text.
// * Only compresses when the compression description is shorter than the repeated content.
// *
// * @param content The text content to compress
// * @returns The compressed text with run-length encoding applied
// */
// export function applyRunLengthEncoding(content: string): string {
// if (!content) {
// return content
// }
// let result = ""
// let pos = 0
// let repeatCount = 0
// let prevLine = null
// let firstOccurrence = true
// while (pos < content.length) {
// const nextNewlineIdx = content.indexOf("\n", pos)
// const currentLine = nextNewlineIdx === -1 ? content.slice(pos) : content.slice(pos, nextNewlineIdx + 1)
// if (prevLine === null) {
// prevLine = currentLine
// } else if (currentLine === prevLine) {
// repeatCount++
// } else {
// if (repeatCount > 0) {
// const compressionDesc = `<previous line repeated ${repeatCount} additional times>\n`
// if (compressionDesc.length < prevLine.length * (repeatCount + 1)) {
// result += prevLine + compressionDesc
// } else {
// for (let i = 0; i <= repeatCount; i++) {
// result += prevLine
// }
// }
// repeatCount = 0
// } else {
// result += prevLine
// }
// prevLine = currentLine
// }
// pos = nextNewlineIdx === -1 ? content.length : nextNewlineIdx + 1
// }
// if (repeatCount > 0 && prevLine !== null) {
// const compressionDesc = `<previous line repeated ${repeatCount} additional times>\n`
// if (compressionDesc.length < prevLine.length * repeatCount) {
// result += prevLine + compressionDesc
// } else {
// for (let i = 0; i <= repeatCount; i++) {
// result += prevLine
// }
// }
// } else if (prevLine !== null) {
// result += prevLine
// }
// return result
// }

View File

@@ -33,6 +33,7 @@ export type ParsedMsgBlock =
} | {
type: 'search_and_replace'
path: string
content: string
operations: {
search: string
replace: string
@@ -43,6 +44,11 @@ export type ParsedMsgBlock =
regex_flags?: string
}[]
finish: boolean
} | {
type: 'apply_diff'
path: string
diff: string
finish: boolean
} | {
type: 'ask_followup_question'
question: string,
@@ -224,7 +230,7 @@ export function parseMsgBlocks(
}
let path: string | undefined
let regex: string | undefined
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
path = childNode.childNodes[0].value
@@ -361,6 +367,7 @@ export function parseMsgBlocks(
}
let path: string | undefined
let operations = []
let content: string = ''
// 处理子标签
for (const childNode of node.childNodes) {
@@ -368,8 +375,8 @@ export function parseMsgBlocks(
path = childNode.childNodes[0].value
} else if (childNode.nodeName === 'operations' && childNode.childNodes.length > 0) {
try {
const operationsJson = childNode.childNodes[0].value
operations = JSON5.parse(operationsJson)
content = childNode.childNodes[0].value
operations = JSON5.parse(content)
} catch (error) {
console.error('Failed to parse operations JSON', error)
}
@@ -379,10 +386,41 @@ export function parseMsgBlocks(
parsedResult.push({
type: 'search_and_replace',
path,
content,
operations,
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'apply_diff') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let path: string | undefined
let diff: string | undefined
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
path = childNode.childNodes[0].value
} else if (childNode.nodeName === 'diff' && childNode.childNodes.length > 0) {
diff = childNode.childNodes[0].value
}
}
parsedResult.push({
type: 'apply_diff',
path,
diff,
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'attempt_completion') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
@@ -443,10 +481,10 @@ export function parseMsgBlocks(
content: input.slice(lastEndOffset, startOffset),
})
}
let mode: string = ''
let reason: string = ''
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'mode_slug' && childNode.childNodes.length > 0) {
// @ts-ignore - 忽略 value 属性的类型错误
@@ -456,7 +494,7 @@ export function parseMsgBlocks(
reason = childNode.childNodes[0].value
}
}
parsedResult.push({
type: 'switch_mode',
mode,
@@ -500,9 +538,9 @@ export function parseMsgBlocks(
content: input.slice(lastEndOffset, startOffset),
})
}
let urls: string[] = []
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'urls' && childNode.childNodes.length > 0) {
try {
@@ -516,7 +554,7 @@ export function parseMsgBlocks(
}
}
}
parsedResult.push({
type: 'fetch_urls_content',
urls,

View File

@@ -1,7 +1,8 @@
import { App, MarkdownView, TAbstractFile, TFile, TFolder, Vault, htmlToMarkdown, requestUrl, getLanguage } from 'obsidian'
import { App, MarkdownView, TAbstractFile, TFile, TFolder, Vault, getLanguage, htmlToMarkdown, requestUrl } from 'obsidian'
import { editorStateToPlainText } from '../components/chat-view/chat-input/utils/editor-state-to-plain-text'
import { QueryProgressState } from '../components/chat-view/QueryProgress'
import { DiffStrategy } from '../core/diff/DiffStrategy'
import { SYSTEM_PROMPT } from '../core/prompts/system'
import { RAGEngine } from '../core/rag/rag-engine'
import { SelectVector } from '../database/schema'
@@ -113,7 +114,7 @@ export class PromptGenerator {
private getRagEngine: () => Promise<RAGEngine>
private app: App
private settings: InfioSettings
private diffStrategy: DiffStrategy
private static readonly EMPTY_ASSISTANT_MESSAGE: RequestMessage = {
role: 'assistant',
content: '',
@@ -123,10 +124,12 @@ export class PromptGenerator {
getRagEngine: () => Promise<RAGEngine>,
app: App,
settings: InfioSettings,
diffStrategy?: DiffStrategy,
) {
this.getRagEngine = getRagEngine
this.app = app
this.settings = settings
this.diffStrategy = diffStrategy
}
public async generateRequestMessages({
@@ -165,7 +168,7 @@ export class PromptGenerator {
similaritySearchResults,
},
]
console.log('this.settings.mode', this.settings.mode)
let filesSearchMethod = this.settings.filesSearchMethod
if (filesSearchMethod === 'auto' && this.settings.embeddingModelId && this.settings.embeddingModelId !== '') {
filesSearchMethod = 'semantic'
@@ -173,10 +176,8 @@ export class PromptGenerator {
filesSearchMethod = 'regex'
}
console.log('filesSearchMethod: ', filesSearchMethod)
const userLanguage = getFullLanguageName(getLanguage())
console.log(' current user language: ', userLanguage)
const systemMessage = await this.getSystemMessageNew(this.settings.mode, filesSearchMethod, userLanguage)
const requestMessages: RequestMessage[] = [
@@ -466,7 +467,7 @@ export class PromptGenerator {
}
private async getSystemMessageNew(mode: Mode, filesSearchMethod: string, preferredLanguage: string): Promise<RequestMessage> {
const systemPrompt = await SYSTEM_PROMPT(this.app.vault.getRoot().path, false, mode, filesSearchMethod, preferredLanguage)
const systemPrompt = await SYSTEM_PROMPT(this.app.vault.getRoot().path, false, mode, filesSearchMethod, preferredLanguage, this.diffStrategy)
return {
role: 'system',