update apply diff
This commit is contained in:
215
src/utils/extract-text.ts
Normal file
215
src/utils/extract-text.ts
Normal file
@@ -0,0 +1,215 @@
|
||||
// import * as path from "path"
|
||||
// // @ts-ignore-next-line
|
||||
// import pdf from "pdf-parse/lib/pdf-parse"
|
||||
// import mammoth from "mammoth"
|
||||
// import fs from "fs/promises"
|
||||
// import { isBinaryFile } from "isbinaryfile"
|
||||
|
||||
// export async function extractTextFromFile(filePath: string): Promise<string> {
|
||||
// try {
|
||||
// await fs.access(filePath)
|
||||
// } catch (error) {
|
||||
// throw new Error(`File not found: ${filePath}`)
|
||||
// }
|
||||
// const fileExtension = path.extname(filePath).toLowerCase()
|
||||
// switch (fileExtension) {
|
||||
// case ".pdf":
|
||||
// return extractTextFromPDF(filePath)
|
||||
// case ".docx":
|
||||
// return extractTextFromDOCX(filePath)
|
||||
// case ".ipynb":
|
||||
// return extractTextFromIPYNB(filePath)
|
||||
// default:
|
||||
// const isBinary = await isBinaryFile(filePath).catch(() => false)
|
||||
// if (!isBinary) {
|
||||
// return addLineNumbers(await fs.readFile(filePath, "utf8"))
|
||||
// } else {
|
||||
// throw new Error(`Cannot read text for file type: ${fileExtension}`)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// async function extractTextFromPDF(filePath: string): Promise<string> {
|
||||
// const dataBuffer = await fs.readFile(filePath)
|
||||
// const data = await pdf(dataBuffer)
|
||||
// return addLineNumbers(data.text)
|
||||
// }
|
||||
|
||||
// async function extractTextFromDOCX(filePath: string): Promise<string> {
|
||||
// const result = await mammoth.extractRawText({ path: filePath })
|
||||
// return addLineNumbers(result.value)
|
||||
// }
|
||||
|
||||
// async function extractTextFromIPYNB(filePath: string): Promise<string> {
|
||||
// const data = await fs.readFile(filePath, "utf8")
|
||||
// const notebook = JSON.parse(data)
|
||||
// let extractedText = ""
|
||||
|
||||
// for (const cell of notebook.cells) {
|
||||
// if ((cell.cell_type === "markdown" || cell.cell_type === "code") && cell.source) {
|
||||
// extractedText += cell.source.join("\n") + "\n"
|
||||
// }
|
||||
// }
|
||||
|
||||
// return addLineNumbers(extractedText)
|
||||
// }
|
||||
|
||||
export function addLineNumbers(content: string, startLine: number = 1): string {
|
||||
const lines = content.split("\n")
|
||||
const maxLineNumberWidth = String(startLine + lines.length - 1).length
|
||||
return lines
|
||||
.map((line, index) => {
|
||||
const lineNumber = String(startLine + index).padStart(maxLineNumberWidth, " ")
|
||||
return `${lineNumber} | ${line}`
|
||||
})
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
|
||||
// Checks if every line in the content has line numbers prefixed (e.g., "1 | content" or "123 | content")
|
||||
// Line numbers must be followed by a single pipe character (not double pipes)
|
||||
export function everyLineHasLineNumbers(content: string): boolean {
|
||||
const lines = content.split(/\r?\n/)
|
||||
return lines.length > 0 && lines.every((line) => /^\s*\d+\s+\|(?!\|)/.test(line))
|
||||
}
|
||||
|
||||
// Strips line numbers from content while preserving the actual content
|
||||
// Handles formats like "1 | content", " 12 | content", "123 | content"
|
||||
// Preserves content that naturally starts with pipe characters
|
||||
export function stripLineNumbers(content: string): string {
|
||||
// Split into lines to handle each line individually
|
||||
const lines = content.split(/\r?\n/)
|
||||
|
||||
// Process each line
|
||||
const processedLines = lines.map((line) => {
|
||||
// Match line number pattern and capture everything after the pipe
|
||||
const match = line.match(/^\s*\d+\s+\|(?!\|)\s?(.*)$/)
|
||||
return match ? match[1] : line
|
||||
})
|
||||
|
||||
// Join back with original line endings
|
||||
const lineEnding = content.includes("\r\n") ? "\r\n" : "\n"
|
||||
return processedLines.join(lineEnding)
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Truncates multi-line output while preserving context from both the beginning and end.
|
||||
// * When truncation is needed, it keeps 20% of the lines from the start and 80% from the end,
|
||||
// * with a clear indicator of how many lines were omitted in between.
|
||||
// *
|
||||
// * @param content The multi-line string to truncate
|
||||
// * @param lineLimit Optional maximum number of lines to keep. If not provided or 0, returns the original content
|
||||
// * @returns The truncated string with an indicator of omitted lines, or the original content if no truncation needed
|
||||
// *
|
||||
// * @example
|
||||
// * // With 10 line limit on 25 lines of content:
|
||||
// * // - Keeps first 2 lines (20% of 10)
|
||||
// * // - Keeps last 8 lines (80% of 10)
|
||||
// * // - Adds "[...15 lines omitted...]" in between
|
||||
// */
|
||||
// export function truncateOutput(content: string, lineLimit?: number): string {
|
||||
// if (!lineLimit) {
|
||||
// return content
|
||||
// }
|
||||
|
||||
// // Count total lines
|
||||
// let totalLines = 0
|
||||
// let pos = -1
|
||||
// while ((pos = content.indexOf("\n", pos + 1)) !== -1) {
|
||||
// totalLines++
|
||||
// }
|
||||
// totalLines++ // Account for last line without newline
|
||||
|
||||
// if (totalLines <= lineLimit) {
|
||||
// return content
|
||||
// }
|
||||
|
||||
// const beforeLimit = Math.floor(lineLimit * 0.2) // 20% of lines before
|
||||
// const afterLimit = lineLimit - beforeLimit // remaining 80% after
|
||||
|
||||
// // Find start section end position
|
||||
// let startEndPos = -1
|
||||
// let lineCount = 0
|
||||
// pos = 0
|
||||
// while (lineCount < beforeLimit && (pos = content.indexOf("\n", pos)) !== -1) {
|
||||
// startEndPos = pos
|
||||
// lineCount++
|
||||
// pos++
|
||||
// }
|
||||
|
||||
// // Find end section start position
|
||||
// let endStartPos = content.length
|
||||
// lineCount = 0
|
||||
// pos = content.length
|
||||
// while (lineCount < afterLimit && (pos = content.lastIndexOf("\n", pos - 1)) !== -1) {
|
||||
// endStartPos = pos + 1 // Start after the newline
|
||||
// lineCount++
|
||||
// }
|
||||
|
||||
// const omittedLines = totalLines - lineLimit
|
||||
// const startSection = content.slice(0, startEndPos + 1)
|
||||
// const endSection = content.slice(endStartPos)
|
||||
// return startSection + `\n[...${omittedLines} lines omitted...]\n\n` + endSection
|
||||
// }
|
||||
|
||||
// /**
|
||||
// * Applies run-length encoding to compress repeated lines in text.
|
||||
// * Only compresses when the compression description is shorter than the repeated content.
|
||||
// *
|
||||
// * @param content The text content to compress
|
||||
// * @returns The compressed text with run-length encoding applied
|
||||
// */
|
||||
// export function applyRunLengthEncoding(content: string): string {
|
||||
// if (!content) {
|
||||
// return content
|
||||
// }
|
||||
|
||||
// let result = ""
|
||||
// let pos = 0
|
||||
// let repeatCount = 0
|
||||
// let prevLine = null
|
||||
// let firstOccurrence = true
|
||||
|
||||
// while (pos < content.length) {
|
||||
// const nextNewlineIdx = content.indexOf("\n", pos)
|
||||
// const currentLine = nextNewlineIdx === -1 ? content.slice(pos) : content.slice(pos, nextNewlineIdx + 1)
|
||||
|
||||
// if (prevLine === null) {
|
||||
// prevLine = currentLine
|
||||
// } else if (currentLine === prevLine) {
|
||||
// repeatCount++
|
||||
// } else {
|
||||
// if (repeatCount > 0) {
|
||||
// const compressionDesc = `<previous line repeated ${repeatCount} additional times>\n`
|
||||
// if (compressionDesc.length < prevLine.length * (repeatCount + 1)) {
|
||||
// result += prevLine + compressionDesc
|
||||
// } else {
|
||||
// for (let i = 0; i <= repeatCount; i++) {
|
||||
// result += prevLine
|
||||
// }
|
||||
// }
|
||||
// repeatCount = 0
|
||||
// } else {
|
||||
// result += prevLine
|
||||
// }
|
||||
// prevLine = currentLine
|
||||
// }
|
||||
|
||||
// pos = nextNewlineIdx === -1 ? content.length : nextNewlineIdx + 1
|
||||
// }
|
||||
|
||||
// if (repeatCount > 0 && prevLine !== null) {
|
||||
// const compressionDesc = `<previous line repeated ${repeatCount} additional times>\n`
|
||||
// if (compressionDesc.length < prevLine.length * repeatCount) {
|
||||
// result += prevLine + compressionDesc
|
||||
// } else {
|
||||
// for (let i = 0; i <= repeatCount; i++) {
|
||||
// result += prevLine
|
||||
// }
|
||||
// }
|
||||
// } else if (prevLine !== null) {
|
||||
// result += prevLine
|
||||
// }
|
||||
|
||||
// return result
|
||||
// }
|
||||
@@ -33,6 +33,7 @@ export type ParsedMsgBlock =
|
||||
} | {
|
||||
type: 'search_and_replace'
|
||||
path: string
|
||||
content: string
|
||||
operations: {
|
||||
search: string
|
||||
replace: string
|
||||
@@ -43,6 +44,11 @@ export type ParsedMsgBlock =
|
||||
regex_flags?: string
|
||||
}[]
|
||||
finish: boolean
|
||||
} | {
|
||||
type: 'apply_diff'
|
||||
path: string
|
||||
diff: string
|
||||
finish: boolean
|
||||
} | {
|
||||
type: 'ask_followup_question'
|
||||
question: string,
|
||||
@@ -224,7 +230,7 @@ export function parseMsgBlocks(
|
||||
}
|
||||
let path: string | undefined
|
||||
let regex: string | undefined
|
||||
|
||||
|
||||
for (const childNode of node.childNodes) {
|
||||
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
|
||||
path = childNode.childNodes[0].value
|
||||
@@ -361,6 +367,7 @@ export function parseMsgBlocks(
|
||||
}
|
||||
let path: string | undefined
|
||||
let operations = []
|
||||
let content: string = ''
|
||||
|
||||
// 处理子标签
|
||||
for (const childNode of node.childNodes) {
|
||||
@@ -368,8 +375,8 @@ export function parseMsgBlocks(
|
||||
path = childNode.childNodes[0].value
|
||||
} else if (childNode.nodeName === 'operations' && childNode.childNodes.length > 0) {
|
||||
try {
|
||||
const operationsJson = childNode.childNodes[0].value
|
||||
operations = JSON5.parse(operationsJson)
|
||||
content = childNode.childNodes[0].value
|
||||
operations = JSON5.parse(content)
|
||||
} catch (error) {
|
||||
console.error('Failed to parse operations JSON', error)
|
||||
}
|
||||
@@ -379,10 +386,41 @@ export function parseMsgBlocks(
|
||||
parsedResult.push({
|
||||
type: 'search_and_replace',
|
||||
path,
|
||||
content,
|
||||
operations,
|
||||
finish: node.sourceCodeLocation.endTag !== undefined
|
||||
})
|
||||
lastEndOffset = endOffset
|
||||
} else if (node.nodeName === 'apply_diff') {
|
||||
if (!node.sourceCodeLocation) {
|
||||
throw new Error('sourceCodeLocation is undefined')
|
||||
}
|
||||
const startOffset = node.sourceCodeLocation.startOffset
|
||||
const endOffset = node.sourceCodeLocation.endOffset
|
||||
if (startOffset > lastEndOffset) {
|
||||
parsedResult.push({
|
||||
type: 'string',
|
||||
content: input.slice(lastEndOffset, startOffset),
|
||||
})
|
||||
}
|
||||
let path: string | undefined
|
||||
let diff: string | undefined
|
||||
|
||||
for (const childNode of node.childNodes) {
|
||||
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
|
||||
path = childNode.childNodes[0].value
|
||||
} else if (childNode.nodeName === 'diff' && childNode.childNodes.length > 0) {
|
||||
diff = childNode.childNodes[0].value
|
||||
}
|
||||
}
|
||||
|
||||
parsedResult.push({
|
||||
type: 'apply_diff',
|
||||
path,
|
||||
diff,
|
||||
finish: node.sourceCodeLocation.endTag !== undefined
|
||||
})
|
||||
lastEndOffset = endOffset
|
||||
} else if (node.nodeName === 'attempt_completion') {
|
||||
if (!node.sourceCodeLocation) {
|
||||
throw new Error('sourceCodeLocation is undefined')
|
||||
@@ -443,10 +481,10 @@ export function parseMsgBlocks(
|
||||
content: input.slice(lastEndOffset, startOffset),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
let mode: string = ''
|
||||
let reason: string = ''
|
||||
|
||||
|
||||
for (const childNode of node.childNodes) {
|
||||
if (childNode.nodeName === 'mode_slug' && childNode.childNodes.length > 0) {
|
||||
// @ts-ignore - 忽略 value 属性的类型错误
|
||||
@@ -456,7 +494,7 @@ export function parseMsgBlocks(
|
||||
reason = childNode.childNodes[0].value
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
parsedResult.push({
|
||||
type: 'switch_mode',
|
||||
mode,
|
||||
@@ -500,9 +538,9 @@ export function parseMsgBlocks(
|
||||
content: input.slice(lastEndOffset, startOffset),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
let urls: string[] = []
|
||||
|
||||
|
||||
for (const childNode of node.childNodes) {
|
||||
if (childNode.nodeName === 'urls' && childNode.childNodes.length > 0) {
|
||||
try {
|
||||
@@ -516,7 +554,7 @@ export function parseMsgBlocks(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
parsedResult.push({
|
||||
type: 'fetch_urls_content',
|
||||
urls,
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import { App, MarkdownView, TAbstractFile, TFile, TFolder, Vault, htmlToMarkdown, requestUrl, getLanguage } from 'obsidian'
|
||||
import { App, MarkdownView, TAbstractFile, TFile, TFolder, Vault, getLanguage, htmlToMarkdown, requestUrl } from 'obsidian'
|
||||
|
||||
import { editorStateToPlainText } from '../components/chat-view/chat-input/utils/editor-state-to-plain-text'
|
||||
import { QueryProgressState } from '../components/chat-view/QueryProgress'
|
||||
import { DiffStrategy } from '../core/diff/DiffStrategy'
|
||||
import { SYSTEM_PROMPT } from '../core/prompts/system'
|
||||
import { RAGEngine } from '../core/rag/rag-engine'
|
||||
import { SelectVector } from '../database/schema'
|
||||
@@ -113,7 +114,7 @@ export class PromptGenerator {
|
||||
private getRagEngine: () => Promise<RAGEngine>
|
||||
private app: App
|
||||
private settings: InfioSettings
|
||||
|
||||
private diffStrategy: DiffStrategy
|
||||
private static readonly EMPTY_ASSISTANT_MESSAGE: RequestMessage = {
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
@@ -123,10 +124,12 @@ export class PromptGenerator {
|
||||
getRagEngine: () => Promise<RAGEngine>,
|
||||
app: App,
|
||||
settings: InfioSettings,
|
||||
diffStrategy?: DiffStrategy,
|
||||
) {
|
||||
this.getRagEngine = getRagEngine
|
||||
this.app = app
|
||||
this.settings = settings
|
||||
this.diffStrategy = diffStrategy
|
||||
}
|
||||
|
||||
public async generateRequestMessages({
|
||||
@@ -165,7 +168,7 @@ export class PromptGenerator {
|
||||
similaritySearchResults,
|
||||
},
|
||||
]
|
||||
console.log('this.settings.mode', this.settings.mode)
|
||||
|
||||
let filesSearchMethod = this.settings.filesSearchMethod
|
||||
if (filesSearchMethod === 'auto' && this.settings.embeddingModelId && this.settings.embeddingModelId !== '') {
|
||||
filesSearchMethod = 'semantic'
|
||||
@@ -173,10 +176,8 @@ export class PromptGenerator {
|
||||
filesSearchMethod = 'regex'
|
||||
}
|
||||
|
||||
console.log('filesSearchMethod: ', filesSearchMethod)
|
||||
|
||||
const userLanguage = getFullLanguageName(getLanguage())
|
||||
console.log(' current user language: ', userLanguage)
|
||||
|
||||
const systemMessage = await this.getSystemMessageNew(this.settings.mode, filesSearchMethod, userLanguage)
|
||||
|
||||
const requestMessages: RequestMessage[] = [
|
||||
@@ -466,7 +467,7 @@ export class PromptGenerator {
|
||||
}
|
||||
|
||||
private async getSystemMessageNew(mode: Mode, filesSearchMethod: string, preferredLanguage: string): Promise<RequestMessage> {
|
||||
const systemPrompt = await SYSTEM_PROMPT(this.app.vault.getRoot().path, false, mode, filesSearchMethod, preferredLanguage)
|
||||
const systemPrompt = await SYSTEM_PROMPT(this.app.vault.getRoot().path, false, mode, filesSearchMethod, preferredLanguage, this.diffStrategy)
|
||||
|
||||
return {
|
||||
role: 'system',
|
||||
|
||||
Reference in New Issue
Block a user