add tool use, update system prompt

This commit is contained in:
duanfuxiang
2025-03-12 21:39:29 +08:00
parent cabf2d5fa4
commit b0fbbb22d3
36 changed files with 7149 additions and 430 deletions

View File

@@ -0,0 +1,295 @@
import { applyContextMatching, applyDMP, applyGitFallback } from "../edit-strategies"
import { Hunk } from "../types"
const testCases = [
{
name: "should return original content if no match is found",
hunk: {
changes: [
{ type: "context", content: "line1" },
{ type: "add", content: "line2" },
],
} as Hunk,
content: ["line1", "line3"],
matchPosition: -1,
expected: {
confidence: 0,
result: ["line1", "line3"],
},
expectedResult: "line1\nline3",
strategies: ["context", "dmp"],
},
{
name: "should apply a simple add change",
hunk: {
changes: [
{ type: "context", content: "line1" },
{ type: "add", content: "line2" },
],
} as Hunk,
content: ["line1", "line3"],
matchPosition: 0,
expected: {
confidence: 1,
result: ["line1", "line2", "line3"],
},
expectedResult: "line1\nline2\nline3",
strategies: ["context", "dmp"],
},
{
name: "should apply a simple remove change",
hunk: {
changes: [
{ type: "context", content: "line1" },
{ type: "remove", content: "line2" },
],
} as Hunk,
content: ["line1", "line2", "line3"],
matchPosition: 0,
expected: {
confidence: 1,
result: ["line1", "line3"],
},
expectedResult: "line1\nline3",
strategies: ["context", "dmp"],
},
{
name: "should apply a simple context change",
hunk: {
changes: [{ type: "context", content: "line1" }],
} as Hunk,
content: ["line1", "line2", "line3"],
matchPosition: 0,
expected: {
confidence: 1,
result: ["line1", "line2", "line3"],
},
expectedResult: "line1\nline2\nline3",
strategies: ["context", "dmp"],
},
{
name: "should apply a multi-line add change",
hunk: {
changes: [
{ type: "context", content: "line1" },
{ type: "add", content: "line2\nline3" },
],
} as Hunk,
content: ["line1", "line4"],
matchPosition: 0,
expected: {
confidence: 1,
result: ["line1", "line2\nline3", "line4"],
},
expectedResult: "line1\nline2\nline3\nline4",
strategies: ["context", "dmp"],
},
{
name: "should apply a multi-line remove change",
hunk: {
changes: [
{ type: "context", content: "line1" },
{ type: "remove", content: "line2\nline3" },
],
} as Hunk,
content: ["line1", "line2", "line3", "line4"],
matchPosition: 0,
expected: {
confidence: 1,
result: ["line1", "line4"],
},
expectedResult: "line1\nline4",
strategies: ["context", "dmp"],
},
{
name: "should apply a multi-line context change",
hunk: {
changes: [
{ type: "context", content: "line1" },
{ type: "context", content: "line2\nline3" },
],
} as Hunk,
content: ["line1", "line2", "line3", "line4"],
matchPosition: 0,
expected: {
confidence: 1,
result: ["line1", "line2\nline3", "line4"],
},
expectedResult: "line1\nline2\nline3\nline4",
strategies: ["context", "dmp"],
},
{
name: "should apply a change with indentation",
hunk: {
changes: [
{ type: "context", content: " line1" },
{ type: "add", content: " line2" },
],
} as Hunk,
content: [" line1", " line3"],
matchPosition: 0,
expected: {
confidence: 1,
result: [" line1", " line2", " line3"],
},
expectedResult: " line1\n line2\n line3",
strategies: ["context", "dmp"],
},
{
name: "should apply a change with mixed indentation",
hunk: {
changes: [
{ type: "context", content: "\tline1" },
{ type: "add", content: " line2" },
],
} as Hunk,
content: ["\tline1", " line3"],
matchPosition: 0,
expected: {
confidence: 1,
result: ["\tline1", " line2", " line3"],
},
expectedResult: "\tline1\n line2\n line3",
strategies: ["context", "dmp"],
},
{
name: "should apply a change with mixed indentation and multi-line",
hunk: {
changes: [
{ type: "context", content: " line1" },
{ type: "add", content: "\tline2\n line3" },
],
} as Hunk,
content: [" line1", " line4"],
matchPosition: 0,
expected: {
confidence: 1,
result: [" line1", "\tline2\n line3", " line4"],
},
expectedResult: " line1\n\tline2\n line3\n line4",
strategies: ["context", "dmp"],
},
{
name: "should apply a complex change with mixed indentation and multi-line",
hunk: {
changes: [
{ type: "context", content: " line1" },
{ type: "remove", content: " line2" },
{ type: "add", content: "\tline3\n line4" },
{ type: "context", content: " line5" },
],
} as Hunk,
content: [" line1", " line2", " line5", " line6"],
matchPosition: 0,
expected: {
confidence: 1,
result: [" line1", "\tline3\n line4", " line5", " line6"],
},
expectedResult: " line1\n\tline3\n line4\n line5\n line6",
strategies: ["context", "dmp"],
},
{
name: "should apply a complex change with mixed indentation and multi-line and context",
hunk: {
changes: [
{ type: "context", content: " line1" },
{ type: "remove", content: " line2" },
{ type: "add", content: "\tline3\n line4" },
{ type: "context", content: " line5" },
{ type: "context", content: " line6" },
],
} as Hunk,
content: [" line1", " line2", " line5", " line6", " line7"],
matchPosition: 0,
expected: {
confidence: 1,
result: [" line1", "\tline3\n line4", " line5", " line6", " line7"],
},
expectedResult: " line1\n\tline3\n line4\n line5\n line6\n line7",
strategies: ["context", "dmp"],
},
{
name: "should apply a complex change with mixed indentation and multi-line and context and a different match position",
hunk: {
changes: [
{ type: "context", content: " line1" },
{ type: "remove", content: " line2" },
{ type: "add", content: "\tline3\n line4" },
{ type: "context", content: " line5" },
{ type: "context", content: " line6" },
],
} as Hunk,
content: [" line0", " line1", " line2", " line5", " line6", " line7"],
matchPosition: 1,
expected: {
confidence: 1,
result: [" line0", " line1", "\tline3\n line4", " line5", " line6", " line7"],
},
expectedResult: " line0\n line1\n\tline3\n line4\n line5\n line6\n line7",
strategies: ["context", "dmp"],
},
]
describe("applyContextMatching", () => {
testCases.forEach(({ name, hunk, content, matchPosition, expected, strategies, expectedResult }) => {
if (!strategies?.includes("context")) {
return
}
it(name, () => {
const result = applyContextMatching(hunk, content, matchPosition)
expect(result.result.join("\n")).toEqual(expectedResult)
expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence)
expect(result.strategy).toBe("context")
})
})
})
describe("applyDMP", () => {
testCases.forEach(({ name, hunk, content, matchPosition, expected, strategies, expectedResult }) => {
if (!strategies?.includes("dmp")) {
return
}
it(name, () => {
const result = applyDMP(hunk, content, matchPosition)
expect(result.result.join("\n")).toEqual(expectedResult)
expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence)
expect(result.strategy).toBe("dmp")
})
})
})
describe("applyGitFallback", () => {
it("should successfully apply changes using git operations", async () => {
const hunk = {
changes: [
{ type: "context", content: "line1", indent: "" },
{ type: "remove", content: "line2", indent: "" },
{ type: "add", content: "new line2", indent: "" },
{ type: "context", content: "line3", indent: "" },
],
} as Hunk
const content = ["line1", "line2", "line3"]
const result = await applyGitFallback(hunk, content)
expect(result.result.join("\n")).toEqual("line1\nnew line2\nline3")
expect(result.confidence).toBe(1)
expect(result.strategy).toBe("git-fallback")
})
it("should return original content with 0 confidence when changes cannot be applied", async () => {
const hunk = {
changes: [
{ type: "context", content: "nonexistent", indent: "" },
{ type: "add", content: "new line", indent: "" },
],
} as Hunk
const content = ["line1", "line2", "line3"]
const result = await applyGitFallback(hunk, content)
expect(result.result).toEqual(content)
expect(result.confidence).toBe(0)
expect(result.strategy).toBe("git-fallback")
})
})

View File

@@ -0,0 +1,262 @@
import { findAnchorMatch, findExactMatch, findSimilarityMatch, findLevenshteinMatch } from "../search-strategies"
type SearchStrategy = (
searchStr: string,
content: string[],
startIndex?: number,
) => {
index: number
confidence: number
strategy: string
}
const testCases = [
{
name: "should return no match if the search string is not found",
searchStr: "not found",
content: ["line1", "line2", "line3"],
expected: { index: -1, confidence: 0 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return a match if the search string is found",
searchStr: "line2",
content: ["line1", "line2", "line3"],
expected: { index: 1, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return a match with correct index when startIndex is provided",
searchStr: "line3",
content: ["line1", "line2", "line3", "line4", "line3"],
startIndex: 3,
expected: { index: 4, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return a match even if there are more lines in content",
searchStr: "line2",
content: ["line1", "line2", "line3", "line4", "line5"],
expected: { index: 1, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return a match even if the search string is at the beginning of the content",
searchStr: "line1",
content: ["line1", "line2", "line3"],
expected: { index: 0, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return a match even if the search string is at the end of the content",
searchStr: "line3",
content: ["line1", "line2", "line3"],
expected: { index: 2, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return a match for a multi-line search string",
searchStr: "line2\nline3",
content: ["line1", "line2", "line3", "line4"],
expected: { index: 1, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return no match if a multi-line search string is not found",
searchStr: "line2\nline4",
content: ["line1", "line2", "line3", "line4"],
expected: { index: -1, confidence: 0 },
strategies: ["exact", "similarity"],
},
{
name: "should return a match with indentation",
searchStr: " line2",
content: ["line1", " line2", "line3"],
expected: { index: 1, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return a match with more complex indentation",
searchStr: " line3",
content: [" line1", " line2", " line3", " line4"],
expected: { index: 2, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return a match with mixed indentation",
searchStr: "\tline2",
content: [" line1", "\tline2", " line3"],
expected: { index: 1, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return a match with mixed indentation and multi-line",
searchStr: " line2\n\tline3",
content: ["line1", " line2", "\tline3", " line4"],
expected: { index: 1, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return no match if mixed indentation and multi-line is not found",
searchStr: " line2\n line4",
content: ["line1", " line2", "\tline3", " line4"],
expected: { index: -1, confidence: 0 },
strategies: ["exact", "similarity"],
},
{
name: "should return a match with leading and trailing spaces",
searchStr: " line2 ",
content: ["line1", " line2 ", "line3"],
expected: { index: 1, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return a match with leading and trailing tabs",
searchStr: "\tline2\t",
content: ["line1", "\tline2\t", "line3"],
expected: { index: 1, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return a match with mixed leading and trailing spaces and tabs",
searchStr: " \tline2\t ",
content: ["line1", " \tline2\t ", "line3"],
expected: { index: 1, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return a match with mixed leading and trailing spaces and tabs and multi-line",
searchStr: " \tline2\t \n line3 ",
content: ["line1", " \tline2\t ", " line3 ", "line4"],
expected: { index: 1, confidence: 1 },
strategies: ["exact", "similarity", "levenshtein"],
},
{
name: "should return no match if mixed leading and trailing spaces and tabs and multi-line is not found",
searchStr: " \tline2\t \n line4 ",
content: ["line1", " \tline2\t ", " line3 ", "line4"],
expected: { index: -1, confidence: 0 },
strategies: ["exact", "similarity"],
},
]
describe("findExactMatch", () => {
testCases.forEach(({ name, searchStr, content, startIndex, expected, strategies }) => {
if (!strategies?.includes("exact")) {
return
}
it(name, () => {
const result = findExactMatch(searchStr, content, startIndex)
expect(result.index).toBe(expected.index)
expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence)
expect(result.strategy).toMatch(/exact(-overlapping)?/)
})
})
})
describe("findAnchorMatch", () => {
const anchorTestCases = [
{
name: "should return no match if no anchors are found",
searchStr: " \n \n ",
content: ["line1", "line2", "line3"],
expected: { index: -1, confidence: 0 },
},
{
name: "should return no match if anchor positions cannot be validated",
searchStr: "unique line\ncontext line 1\ncontext line 2",
content: [
"different line 1",
"different line 2",
"different line 3",
"another unique line",
"context line 1",
"context line 2",
],
expected: { index: -1, confidence: 0 },
},
{
name: "should return a match if anchor positions can be validated",
searchStr: "unique line\ncontext line 1\ncontext line 2",
content: ["line1", "line2", "unique line", "context line 1", "context line 2", "line 6"],
expected: { index: 2, confidence: 1 },
},
{
name: "should return a match with correct index when startIndex is provided",
searchStr: "unique line\ncontext line 1\ncontext line 2",
content: ["line1", "line2", "line3", "unique line", "context line 1", "context line 2", "line 7"],
startIndex: 3,
expected: { index: 3, confidence: 1 },
},
{
name: "should return a match even if there are more lines in content",
searchStr: "unique line\ncontext line 1\ncontext line 2",
content: [
"line1",
"line2",
"unique line",
"context line 1",
"context line 2",
"line 6",
"extra line 1",
"extra line 2",
],
expected: { index: 2, confidence: 1 },
},
{
name: "should return a match even if the anchor is at the beginning of the content",
searchStr: "unique line\ncontext line 1\ncontext line 2",
content: ["unique line", "context line 1", "context line 2", "line 6"],
expected: { index: 0, confidence: 1 },
},
{
name: "should return a match even if the anchor is at the end of the content",
searchStr: "unique line\ncontext line 1\ncontext line 2",
content: ["line1", "line2", "unique line", "context line 1", "context line 2"],
expected: { index: 2, confidence: 1 },
},
{
name: "should return no match if no valid anchor is found",
searchStr: "non-unique line\ncontext line 1\ncontext line 2",
content: ["line1", "line2", "non-unique line", "context line 1", "context line 2", "non-unique line"],
expected: { index: -1, confidence: 0 },
},
]
anchorTestCases.forEach(({ name, searchStr, content, startIndex, expected }) => {
it(name, () => {
const result = findAnchorMatch(searchStr, content, startIndex)
expect(result.index).toBe(expected.index)
expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence)
expect(result.strategy).toBe("anchor")
})
})
})
describe("findSimilarityMatch", () => {
testCases.forEach(({ name, searchStr, content, startIndex, expected, strategies }) => {
if (!strategies?.includes("similarity")) {
return
}
it(name, () => {
const result = findSimilarityMatch(searchStr, content, startIndex)
expect(result.index).toBe(expected.index)
expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence)
expect(result.strategy).toBe("similarity")
})
})
})
describe("findLevenshteinMatch", () => {
testCases.forEach(({ name, searchStr, content, startIndex, expected, strategies }) => {
if (!strategies?.includes("levenshtein")) {
return
}
it(name, () => {
const result = findLevenshteinMatch(searchStr, content, startIndex)
expect(result.index).toBe(expected.index)
expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence)
expect(result.strategy).toBe("levenshtein")
})
})
})

View File

@@ -0,0 +1,297 @@
import { diff_match_patch } from "diff-match-patch"
import { EditResult, Hunk } from "./types"
import { getDMPSimilarity, validateEditResult } from "./search-strategies"
import * as path from "path"
import simpleGit, { SimpleGit } from "simple-git"
import * as tmp from "tmp"
import * as fs from "fs"
// Helper function to infer indentation - simplified version
function inferIndentation(line: string, contextLines: string[], previousIndent: string = ""): string {
// If the line has explicit indentation in the change, use it exactly
const lineMatch = line.match(/^(\s+)/)
if (lineMatch) {
return lineMatch[1]
}
// If we have context lines, use the indentation from the first context line
const contextLine = contextLines[0]
if (contextLine) {
const contextMatch = contextLine.match(/^(\s+)/)
if (contextMatch) {
return contextMatch[1]
}
}
// Fallback to previous indent
return previousIndent
}
// Context matching edit strategy
export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult {
if (matchPosition === -1) {
return { confidence: 0, result: content, strategy: "context" }
}
const newResult = [...content.slice(0, matchPosition)]
let sourceIndex = matchPosition
for (const change of hunk.changes) {
if (change.type === "context") {
// Use the original line from content if available
if (sourceIndex < content.length) {
newResult.push(content[sourceIndex])
} else {
const line = change.indent ? change.indent + change.content : change.content
newResult.push(line)
}
sourceIndex++
} else if (change.type === "add") {
// Use exactly the indentation from the change
const baseIndent = change.indent || ""
// Handle multi-line additions
const lines = change.content.split("\n").map((line) => {
// If the line already has indentation, preserve it relative to the base indent
const lineIndentMatch = line.match(/^(\s*)(.*)/)
if (lineIndentMatch) {
const [, lineIndent, content] = lineIndentMatch
// Only add base indent if the line doesn't already have it
return lineIndent ? line : baseIndent + content
}
return baseIndent + line
})
newResult.push(...lines)
} else if (change.type === "remove") {
// Handle multi-line removes by incrementing sourceIndex for each line
const removedLines = change.content.split("\n").length
sourceIndex += removedLines
}
}
// Append remaining content
newResult.push(...content.slice(sourceIndex))
// Calculate confidence based on the actual changes
const afterText = newResult.slice(matchPosition, newResult.length - (content.length - sourceIndex)).join("\n")
const confidence = validateEditResult(hunk, afterText)
return {
confidence,
result: newResult,
strategy: "context",
}
}
// DMP edit strategy
export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): EditResult {
if (matchPosition === -1) {
return { confidence: 0, result: content, strategy: "dmp" }
}
const dmp = new diff_match_patch()
// Calculate total lines in before block accounting for multi-line content
const beforeLineCount = hunk.changes
.filter((change) => change.type === "context" || change.type === "remove")
.reduce((count, change) => count + change.content.split("\n").length, 0)
// Build BEFORE block (context + removals)
const beforeLines = hunk.changes
.filter((change) => change.type === "context" || change.type === "remove")
.map((change) => {
if (change.originalLine) {
return change.originalLine
}
return change.indent ? change.indent + change.content : change.content
})
// Build AFTER block (context + additions)
const afterLines = hunk.changes
.filter((change) => change.type === "context" || change.type === "add")
.map((change) => {
if (change.originalLine) {
return change.originalLine
}
return change.indent ? change.indent + change.content : change.content
})
// Convert to text with proper line endings
const beforeText = beforeLines.join("\n")
const afterText = afterLines.join("\n")
// Create and apply patch
const patch = dmp.patch_make(beforeText, afterText)
const targetText = content.slice(matchPosition, matchPosition + beforeLineCount).join("\n")
const [patchedText] = dmp.patch_apply(patch, targetText)
// Split result and preserve line endings
const patchedLines = patchedText.split("\n")
// Construct final result
const newResult = [
...content.slice(0, matchPosition),
...patchedLines,
...content.slice(matchPosition + beforeLineCount),
]
const confidence = validateEditResult(hunk, patchedText)
return {
confidence,
result: newResult,
strategy: "dmp",
}
}
// Git fallback strategy that works with full content
export async function applyGitFallback(hunk: Hunk, content: string[]): Promise<EditResult> {
let tmpDir: tmp.DirResult | undefined
try {
tmpDir = tmp.dirSync({ unsafeCleanup: true })
const git: SimpleGit = simpleGit(tmpDir.name)
await git.init()
await git.addConfig("user.name", "Temp")
await git.addConfig("user.email", "temp@example.com")
const filePath = path.join(tmpDir.name, "file.txt")
const searchLines = hunk.changes
.filter((change) => change.type === "context" || change.type === "remove")
.map((change) => change.originalLine || change.indent + change.content)
const replaceLines = hunk.changes
.filter((change) => change.type === "context" || change.type === "add")
.map((change) => change.originalLine || change.indent + change.content)
const searchText = searchLines.join("\n")
const replaceText = replaceLines.join("\n")
const originalText = content.join("\n")
try {
fs.writeFileSync(filePath, originalText)
await git.add("file.txt")
const originalCommit = await git.commit("original")
console.log("Strategy 1 - Original commit:", originalCommit.commit)
fs.writeFileSync(filePath, searchText)
await git.add("file.txt")
const searchCommit1 = await git.commit("search")
console.log("Strategy 1 - Search commit:", searchCommit1.commit)
fs.writeFileSync(filePath, replaceText)
await git.add("file.txt")
const replaceCommit = await git.commit("replace")
console.log("Strategy 1 - Replace commit:", replaceCommit.commit)
console.log("Strategy 1 - Attempting checkout of:", originalCommit.commit)
await git.raw(["checkout", originalCommit.commit])
try {
console.log("Strategy 1 - Attempting cherry-pick of:", replaceCommit.commit)
await git.raw(["cherry-pick", "--minimal", replaceCommit.commit])
const newText = fs.readFileSync(filePath, "utf-8")
const newLines = newText.split("\n")
return {
confidence: 1,
result: newLines,
strategy: "git-fallback",
}
} catch (cherryPickError) {
console.error("Strategy 1 failed with merge conflict")
}
} catch (error) {
console.error("Strategy 1 failed:", error)
}
try {
await git.init()
await git.addConfig("user.name", "Temp")
await git.addConfig("user.email", "temp@example.com")
fs.writeFileSync(filePath, searchText)
await git.add("file.txt")
const searchCommit = await git.commit("search")
const searchHash = searchCommit.commit.replace(/^HEAD /, "")
console.log("Strategy 2 - Search commit:", searchHash)
fs.writeFileSync(filePath, replaceText)
await git.add("file.txt")
const replaceCommit = await git.commit("replace")
const replaceHash = replaceCommit.commit.replace(/^HEAD /, "")
console.log("Strategy 2 - Replace commit:", replaceHash)
console.log("Strategy 2 - Attempting checkout of:", searchHash)
await git.raw(["checkout", searchHash])
fs.writeFileSync(filePath, originalText)
await git.add("file.txt")
const originalCommit2 = await git.commit("original")
console.log("Strategy 2 - Original commit:", originalCommit2.commit)
try {
console.log("Strategy 2 - Attempting cherry-pick of:", replaceHash)
await git.raw(["cherry-pick", "--minimal", replaceHash])
const newText = fs.readFileSync(filePath, "utf-8")
const newLines = newText.split("\n")
return {
confidence: 1,
result: newLines,
strategy: "git-fallback",
}
} catch (cherryPickError) {
console.error("Strategy 2 failed with merge conflict")
}
} catch (error) {
console.error("Strategy 2 failed:", error)
}
console.error("Git fallback failed")
return { confidence: 0, result: content, strategy: "git-fallback" }
} catch (error) {
console.error("Git fallback strategy failed:", error)
return { confidence: 0, result: content, strategy: "git-fallback" }
} finally {
if (tmpDir) {
tmpDir.removeCallback()
}
}
}
// Main edit function that tries strategies sequentially
export async function applyEdit(
hunk: Hunk,
content: string[],
matchPosition: number,
confidence: number,
confidenceThreshold: number = 0.97,
): Promise<EditResult> {
// Don't attempt regular edits if confidence is too low
if (confidence < confidenceThreshold) {
console.log(
`Search confidence (${confidence}) below minimum threshold (${confidenceThreshold}), trying git fallback...`,
)
return applyGitFallback(hunk, content)
}
// Try each strategy in sequence until one succeeds
const strategies = [
{ name: "dmp", apply: () => applyDMP(hunk, content, matchPosition) },
{ name: "context", apply: () => applyContextMatching(hunk, content, matchPosition) },
{ name: "git-fallback", apply: () => applyGitFallback(hunk, content) },
]
// Try strategies sequentially until one succeeds
for (const strategy of strategies) {
const result = await strategy.apply()
if (result.confidence >= confidenceThreshold) {
return result
}
}
return { confidence: 0, result: content, strategy: "none" }
}

View File

@@ -0,0 +1,350 @@
import { Diff, Hunk, Change } from "./types"
import { findBestMatch, prepareSearchString } from "./search-strategies"
import { applyEdit } from "./edit-strategies"
import { DiffResult, DiffStrategy } from "../../types"
export class NewUnifiedDiffStrategy implements DiffStrategy {
private readonly confidenceThreshold: number
constructor(confidenceThreshold: number = 1) {
this.confidenceThreshold = Math.max(confidenceThreshold, 0.8)
}
private parseUnifiedDiff(diff: string): Diff {
const MAX_CONTEXT_LINES = 6 // Number of context lines to keep before/after changes
const lines = diff.split("\n")
const hunks: Hunk[] = []
let currentHunk: Hunk | null = null
let i = 0
while (i < lines.length && !lines[i].startsWith("@@")) {
i++
}
for (; i < lines.length; i++) {
const line = lines[i]
if (line.startsWith("@@")) {
if (
currentHunk &&
currentHunk.changes.length > 0 &&
currentHunk.changes.some((change) => change.type === "add" || change.type === "remove")
) {
const changes = currentHunk.changes
let startIdx = 0
let endIdx = changes.length - 1
for (let j = 0; j < changes.length; j++) {
if (changes[j].type !== "context") {
startIdx = Math.max(0, j - MAX_CONTEXT_LINES)
break
}
}
for (let j = changes.length - 1; j >= 0; j--) {
if (changes[j].type !== "context") {
endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES)
break
}
}
currentHunk.changes = changes.slice(startIdx, endIdx + 1)
hunks.push(currentHunk)
}
currentHunk = { changes: [] }
continue
}
if (!currentHunk) {
continue
}
const content = line.slice(1)
const indentMatch = content.match(/^(\s*)/)
const indent = indentMatch ? indentMatch[0] : ""
const trimmedContent = content.slice(indent.length)
if (line.startsWith(" ")) {
currentHunk.changes.push({
type: "context",
content: trimmedContent,
indent,
originalLine: content,
})
} else if (line.startsWith("+")) {
currentHunk.changes.push({
type: "add",
content: trimmedContent,
indent,
originalLine: content,
})
} else if (line.startsWith("-")) {
currentHunk.changes.push({
type: "remove",
content: trimmedContent,
indent,
originalLine: content,
})
} else {
const finalContent = trimmedContent ? " " + trimmedContent : " "
currentHunk.changes.push({
type: "context",
content: finalContent,
indent,
originalLine: content,
})
}
}
if (
currentHunk &&
currentHunk.changes.length > 0 &&
currentHunk.changes.some((change) => change.type === "add" || change.type === "remove")
) {
hunks.push(currentHunk)
}
return { hunks }
}
getToolDescription(args: { cwd: string; toolOptions?: { [key: string]: string } }): string {
return `# apply_diff Tool - Generate Precise Code Changes
Generate a unified diff that can be cleanly applied to modify code files.
## Step-by-Step Instructions:
1. Start with file headers:
- First line: "--- {original_file_path}"
- Second line: "+++ {new_file_path}"
2. For each change section:
- Begin with "@@ ... @@" separator line without line numbers
- Include 2-3 lines of context before and after changes
- Mark removed lines with "-"
- Mark added lines with "+"
- Preserve exact indentation
3. Group related changes:
- Keep related modifications in the same hunk
- Start new hunks for logically separate changes
- When modifying functions/methods, include the entire block
## Requirements:
1. MUST include exact indentation
2. MUST include sufficient context for unique matching
3. MUST group related changes together
4. MUST use proper unified diff format
5. MUST NOT include timestamps in file headers
6. MUST NOT include line numbers in the @@ header
## Examples:
✅ Good diff (follows all requirements):
\`\`\`diff
--- src/utils.ts
+++ src/utils.ts
@@ ... @@
def calculate_total(items):
- total = 0
- for item in items:
- total += item.price
+ return sum(item.price for item in items)
\`\`\`
❌ Bad diff (violates requirements #1 and #2):
\`\`\`diff
--- src/utils.ts
+++ src/utils.ts
@@ ... @@
-total = 0
-for item in items:
+return sum(item.price for item in items)
\`\`\`
Parameters:
- path: (required) File path relative to ${args.cwd}
- diff: (required) Unified diff content in unified format to apply to the file.
Usage:
<apply_diff>
<path>path/to/file.ext</path>
<diff>
Your diff here
</diff>
</apply_diff>`
}
// Helper function to split a hunk into smaller hunks based on contiguous changes
private splitHunk(hunk: Hunk): Hunk[] {
const result: Hunk[] = []
let currentHunk: Hunk | null = null
let contextBefore: Change[] = []
let contextAfter: Change[] = []
const MAX_CONTEXT_LINES = 3 // Keep 3 lines of context before/after changes
for (let i = 0; i < hunk.changes.length; i++) {
const change = hunk.changes[i]
if (change.type === "context") {
if (!currentHunk) {
contextBefore.push(change)
if (contextBefore.length > MAX_CONTEXT_LINES) {
contextBefore.shift()
}
} else {
contextAfter.push(change)
if (contextAfter.length > MAX_CONTEXT_LINES) {
// We've collected enough context after changes, create a new hunk
currentHunk.changes.push(...contextAfter)
result.push(currentHunk)
currentHunk = null
// Keep the last few context lines for the next hunk
contextBefore = contextAfter
contextAfter = []
}
}
} else {
if (!currentHunk) {
currentHunk = { changes: [...contextBefore] }
contextAfter = []
} else if (contextAfter.length > 0) {
// Add accumulated context to current hunk
currentHunk.changes.push(...contextAfter)
contextAfter = []
}
currentHunk.changes.push(change)
}
}
// Add any remaining changes
if (currentHunk) {
if (contextAfter.length > 0) {
currentHunk.changes.push(...contextAfter)
}
result.push(currentHunk)
}
return result
}
async applyDiff(
originalContent: string,
diffContent: string,
startLine?: number,
endLine?: number,
): Promise<DiffResult> {
const parsedDiff = this.parseUnifiedDiff(diffContent)
const originalLines = originalContent.split("\n")
let result = [...originalLines]
if (!parsedDiff.hunks.length) {
return {
success: false,
error: "No hunks found in diff. Please ensure your diff includes actual changes and follows the unified diff format.",
}
}
for (const hunk of parsedDiff.hunks) {
const contextStr = prepareSearchString(hunk.changes)
const {
index: matchPosition,
confidence,
strategy,
} = findBestMatch(contextStr, result, 0, this.confidenceThreshold)
if (confidence < this.confidenceThreshold) {
console.log("Full hunk application failed, trying sub-hunks strategy")
// Try splitting the hunk into smaller hunks
const subHunks = this.splitHunk(hunk)
let subHunkSuccess = true
let subHunkResult = [...result]
for (const subHunk of subHunks) {
const subContextStr = prepareSearchString(subHunk.changes)
const subSearchResult = findBestMatch(subContextStr, subHunkResult, 0, this.confidenceThreshold)
if (subSearchResult.confidence >= this.confidenceThreshold) {
const subEditResult = await applyEdit(
subHunk,
subHunkResult,
subSearchResult.index,
subSearchResult.confidence,
this.confidenceThreshold,
)
if (subEditResult.confidence >= this.confidenceThreshold) {
subHunkResult = subEditResult.result
continue
}
}
subHunkSuccess = false
break
}
if (subHunkSuccess) {
result = subHunkResult
continue
}
// If sub-hunks also failed, return the original error
const contextLines = hunk.changes.filter((c) => c.type === "context").length
const totalLines = hunk.changes.length
const contextRatio = contextLines / totalLines
let errorMsg = `Failed to find a matching location in the file (${Math.floor(
confidence * 100,
)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n`
errorMsg += "Debug Info:\n"
errorMsg += `- Search Strategy Used: ${strategy}\n`
errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(
contextRatio * 100,
)}%)\n`
errorMsg += `- Attempted to split into ${subHunks.length} sub-hunks but still failed\n`
if (contextRatio < 0.2) {
errorMsg += "\nPossible Issues:\n"
errorMsg += "- Not enough context lines to uniquely identify the location\n"
errorMsg += "- Add a few more lines of unchanged code around your changes\n"
} else if (contextRatio > 0.5) {
errorMsg += "\nPossible Issues:\n"
errorMsg += "- Too many context lines may reduce search accuracy\n"
errorMsg += "- Try to keep only 2-3 lines of context before and after changes\n"
} else {
errorMsg += "\nPossible Issues:\n"
errorMsg += "- The diff may be targeting a different version of the file\n"
errorMsg +=
"- There may be too many changes in a single hunk, try splitting the changes into multiple hunks\n"
}
if (startLine && endLine) {
errorMsg += `\nSearch Range: lines ${startLine}-${endLine}\n`
}
return { success: false, error: errorMsg }
}
const editResult = await applyEdit(hunk, result, matchPosition, confidence, this.confidenceThreshold)
if (editResult.confidence >= this.confidenceThreshold) {
result = editResult.result
} else {
// Edit failure - likely due to content mismatch
let errorMsg = `Failed to apply the edit using ${editResult.strategy} strategy (${Math.floor(
editResult.confidence * 100,
)}% confidence)\n\n`
errorMsg += "Debug Info:\n"
errorMsg += "- The location was found but the content didn't match exactly\n"
errorMsg += "- This usually means the file has been modified since the diff was created\n"
errorMsg += "- Or the diff may be targeting a different version of the file\n"
errorMsg += "\nPossible Solutions:\n"
errorMsg += "1. Refresh your view of the file and create a new diff\n"
errorMsg += "2. Double-check that the removed lines (-) match the current file content\n"
errorMsg += "3. Ensure your diff targets the correct version of the file"
return { success: false, error: errorMsg }
}
}
return { success: true, content: result.join("\n") }
}
}

View File

@@ -0,0 +1,408 @@
import { compareTwoStrings } from "string-similarity"
import { closest } from "fastest-levenshtein"
import { diff_match_patch } from "diff-match-patch"
import { Change, Hunk } from "./types"
export type SearchResult = {
index: number
confidence: number
strategy: string
}
const LARGE_FILE_THRESHOLD = 1000 // lines
const UNIQUE_CONTENT_BOOST = 0.05
const DEFAULT_OVERLAP_SIZE = 3 // lines of overlap between windows
const MAX_WINDOW_SIZE = 500 // maximum lines in a window
// Helper function to calculate adaptive confidence threshold based on file size
function getAdaptiveThreshold(contentLength: number, baseThreshold: number): number {
if (contentLength <= LARGE_FILE_THRESHOLD) {
return baseThreshold
}
return Math.max(baseThreshold - 0.07, 0.8) // Reduce threshold for large files but keep minimum at 80%
}
// Helper function to evaluate content uniqueness
function evaluateContentUniqueness(searchStr: string, content: string[]): number {
const searchLines = searchStr.split("\n")
const uniqueLines = new Set(searchLines)
const contentStr = content.join("\n")
// Calculate how many search lines are relatively unique in the content
let uniqueCount = 0
for (const line of uniqueLines) {
const regex = new RegExp(line.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g")
const matches = contentStr.match(regex)
if (matches && matches.length <= 2) {
// Line appears at most twice
uniqueCount++
}
}
return uniqueCount / uniqueLines.size
}
// Helper function to prepare search string from context
export function prepareSearchString(changes: Change[]): string {
const lines = changes.filter((c) => c.type === "context" || c.type === "remove").map((c) => c.originalLine)
return lines.join("\n")
}
// Helper function to evaluate similarity between two texts
export function evaluateSimilarity(original: string, modified: string): number {
return compareTwoStrings(original, modified)
}
// Helper function to validate using diff-match-patch
export function getDMPSimilarity(original: string, modified: string): number {
const dmp = new diff_match_patch()
const diffs = dmp.diff_main(original, modified)
dmp.diff_cleanupSemantic(diffs)
const patches = dmp.patch_make(original, diffs)
const [expectedText] = dmp.patch_apply(patches, original)
const similarity = evaluateSimilarity(expectedText, modified)
return similarity
}
// Helper function to validate edit results using hunk information
export function validateEditResult(hunk: Hunk, result: string): number {
// Build the expected text from the hunk
const expectedText = hunk.changes
.filter((change) => change.type === "context" || change.type === "add")
.map((change) => (change.indent ? change.indent + change.content : change.content))
.join("\n")
// Calculate similarity between the result and expected text
const similarity = getDMPSimilarity(expectedText, result)
// If the result is unchanged from original, return low confidence
const originalText = hunk.changes
.filter((change) => change.type === "context" || change.type === "remove")
.map((change) => (change.indent ? change.indent + change.content : change.content))
.join("\n")
const originalSimilarity = getDMPSimilarity(originalText, result)
if (originalSimilarity > 0.97 && similarity !== 1) {
return 0.8 * similarity // Some confidence since we found the right location
}
// For partial matches, scale the confidence but keep it high if we're close
return similarity
}
// Helper function to validate context lines against original content
function validateContextLines(searchStr: string, content: string, confidenceThreshold: number): number {
// Extract just the context lines from the search string
const contextLines = searchStr.split("\n").filter((line) => !line.startsWith("-")) // Exclude removed lines
// Compare context lines with content
const similarity = evaluateSimilarity(contextLines.join("\n"), content)
// Get adaptive threshold based on content size
const threshold = getAdaptiveThreshold(content.split("\n").length, confidenceThreshold)
// Calculate uniqueness boost
const uniquenessScore = evaluateContentUniqueness(searchStr, content.split("\n"))
const uniquenessBoost = uniquenessScore * UNIQUE_CONTENT_BOOST
// Adjust confidence based on threshold and uniqueness
return similarity < threshold ? similarity * 0.3 + uniquenessBoost : similarity + uniquenessBoost
}
// Helper function to create overlapping windows
function createOverlappingWindows(
content: string[],
searchSize: number,
overlapSize: number = DEFAULT_OVERLAP_SIZE,
): { window: string[]; startIndex: number }[] {
const windows: { window: string[]; startIndex: number }[] = []
// Ensure minimum window size is at least searchSize
const effectiveWindowSize = Math.max(searchSize, Math.min(searchSize * 2, MAX_WINDOW_SIZE))
// Ensure overlap size doesn't exceed window size
const effectiveOverlapSize = Math.min(overlapSize, effectiveWindowSize - 1)
// Calculate step size, ensure it's at least 1
const stepSize = Math.max(1, effectiveWindowSize - effectiveOverlapSize)
for (let i = 0; i < content.length; i += stepSize) {
const windowContent = content.slice(i, i + effectiveWindowSize)
if (windowContent.length >= searchSize) {
windows.push({ window: windowContent, startIndex: i })
}
}
return windows
}
// Helper function to combine overlapping matches
function combineOverlappingMatches(
matches: (SearchResult & { windowIndex: number })[],
overlapSize: number = DEFAULT_OVERLAP_SIZE,
): SearchResult[] {
if (matches.length === 0) {
return []
}
// Sort matches by confidence
matches.sort((a, b) => b.confidence - a.confidence)
const combinedMatches: SearchResult[] = []
const usedIndices = new Set<number>()
for (const match of matches) {
if (usedIndices.has(match.windowIndex)) {
continue
}
// Find overlapping matches
const overlapping = matches.filter(
(m) =>
Math.abs(m.windowIndex - match.windowIndex) === 1 &&
Math.abs(m.index - match.index) <= overlapSize &&
!usedIndices.has(m.windowIndex),
)
if (overlapping.length > 0) {
// Boost confidence if we find same match in overlapping windows
const avgConfidence =
(match.confidence + overlapping.reduce((sum, m) => sum + m.confidence, 0)) / (overlapping.length + 1)
const boost = Math.min(0.05 * overlapping.length, 0.1) // Max 10% boost
combinedMatches.push({
index: match.index,
confidence: Math.min(1, avgConfidence + boost),
strategy: `${match.strategy}-overlapping`,
})
usedIndices.add(match.windowIndex)
overlapping.forEach((m) => usedIndices.add(m.windowIndex))
} else {
combinedMatches.push({
index: match.index,
confidence: match.confidence,
strategy: match.strategy,
})
usedIndices.add(match.windowIndex)
}
}
return combinedMatches
}
export function findExactMatch(
searchStr: string,
content: string[],
startIndex: number = 0,
confidenceThreshold: number = 0.97,
): SearchResult {
const searchLines = searchStr.split("\n")
const windows = createOverlappingWindows(content.slice(startIndex), searchLines.length)
const matches: (SearchResult & { windowIndex: number })[] = []
windows.forEach((windowData, windowIndex) => {
const windowStr = windowData.window.join("\n")
const exactMatch = windowStr.indexOf(searchStr)
if (exactMatch !== -1) {
const matchedContent = windowData.window
.slice(
windowStr.slice(0, exactMatch).split("\n").length - 1,
windowStr.slice(0, exactMatch).split("\n").length - 1 + searchLines.length,
)
.join("\n")
const similarity = getDMPSimilarity(searchStr, matchedContent)
const contextSimilarity = validateContextLines(searchStr, matchedContent, confidenceThreshold)
const confidence = Math.min(similarity, contextSimilarity)
matches.push({
index: startIndex + windowData.startIndex + windowStr.slice(0, exactMatch).split("\n").length - 1,
confidence,
strategy: "exact",
windowIndex,
})
}
})
const combinedMatches = combineOverlappingMatches(matches)
return combinedMatches.length > 0 ? combinedMatches[0] : { index: -1, confidence: 0, strategy: "exact" }
}
// String similarity strategy
export function findSimilarityMatch(
searchStr: string,
content: string[],
startIndex: number = 0,
confidenceThreshold: number = 0.97,
): SearchResult {
const searchLines = searchStr.split("\n")
let bestScore = 0
let bestIndex = -1
for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
const windowStr = content.slice(i, i + searchLines.length).join("\n")
const score = compareTwoStrings(searchStr, windowStr)
if (score > bestScore && score >= confidenceThreshold) {
const similarity = getDMPSimilarity(searchStr, windowStr)
const contextSimilarity = validateContextLines(searchStr, windowStr, confidenceThreshold)
const adjustedScore = Math.min(similarity, contextSimilarity) * score
if (adjustedScore > bestScore) {
bestScore = adjustedScore
bestIndex = i
}
}
}
return {
index: bestIndex,
confidence: bestIndex !== -1 ? bestScore : 0,
strategy: "similarity",
}
}
// Levenshtein strategy
export function findLevenshteinMatch(
searchStr: string,
content: string[],
startIndex: number = 0,
confidenceThreshold: number = 0.97,
): SearchResult {
const searchLines = searchStr.split("\n")
const candidates = []
for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
candidates.push(content.slice(i, i + searchLines.length).join("\n"))
}
if (candidates.length > 0) {
const closestMatch = closest(searchStr, candidates)
const index = startIndex + candidates.indexOf(closestMatch)
const similarity = getDMPSimilarity(searchStr, closestMatch)
const contextSimilarity = validateContextLines(searchStr, closestMatch, confidenceThreshold)
const confidence = Math.min(similarity, contextSimilarity)
return {
index: confidence === 0 ? -1 : index,
confidence: index !== -1 ? confidence : 0,
strategy: "levenshtein",
}
}
return { index: -1, confidence: 0, strategy: "levenshtein" }
}
// Helper function to identify anchor lines
function identifyAnchors(searchStr: string): { first: string | null; last: string | null } {
const searchLines = searchStr.split("\n")
let first: string | null = null
let last: string | null = null
// Find the first non-empty line
for (const line of searchLines) {
if (line.trim()) {
first = line
break
}
}
// Find the last non-empty line
for (let i = searchLines.length - 1; i >= 0; i--) {
if (searchLines[i].trim()) {
last = searchLines[i]
break
}
}
return { first, last }
}
// Anchor-based search strategy
export function findAnchorMatch(
searchStr: string,
content: string[],
startIndex: number = 0,
confidenceThreshold: number = 0.97,
): SearchResult {
const searchLines = searchStr.split("\n")
const { first, last } = identifyAnchors(searchStr)
if (!first || !last) {
return { index: -1, confidence: 0, strategy: "anchor" }
}
let firstIndex = -1
let lastIndex = -1
// Check if the first anchor is unique
let firstOccurrences = 0
for (const contentLine of content) {
if (contentLine === first) {
firstOccurrences++
}
}
if (firstOccurrences !== 1) {
return { index: -1, confidence: 0, strategy: "anchor" }
}
// Find the first anchor
for (let i = startIndex; i < content.length; i++) {
if (content[i] === first) {
firstIndex = i
break
}
}
// Find the last anchor
for (let i = content.length - 1; i >= startIndex; i--) {
if (content[i] === last) {
lastIndex = i
break
}
}
if (firstIndex === -1 || lastIndex === -1 || lastIndex <= firstIndex) {
return { index: -1, confidence: 0, strategy: "anchor" }
}
// Validate the context
const expectedContext = searchLines.slice(searchLines.indexOf(first) + 1, searchLines.indexOf(last)).join("\n")
const actualContext = content.slice(firstIndex + 1, lastIndex).join("\n")
const contextSimilarity = evaluateSimilarity(expectedContext, actualContext)
if (contextSimilarity < getAdaptiveThreshold(content.length, confidenceThreshold)) {
return { index: -1, confidence: 0, strategy: "anchor" }
}
const confidence = 1
return {
index: firstIndex,
confidence: confidence,
strategy: "anchor",
}
}
// Main search function that tries all strategies
export function findBestMatch(
searchStr: string,
content: string[],
startIndex: number = 0,
confidenceThreshold: number = 0.97,
): SearchResult {
const strategies = [findExactMatch, findAnchorMatch, findSimilarityMatch, findLevenshteinMatch]
let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" }
for (const strategy of strategies) {
const result = strategy(searchStr, content, startIndex, confidenceThreshold)
if (result.confidence > bestResult.confidence) {
bestResult = result
}
}
return bestResult
}

View File

@@ -0,0 +1,20 @@
export type Change = {
type: "context" | "add" | "remove"
content: string
indent: string
originalLine?: string
}
export type Hunk = {
changes: Change[]
}
export type Diff = {
hunks: Hunk[]
}
export type EditResult = {
confidence: number
result: string[]
strategy: string
}