mirror of
https://github.com/EthanMarti/infio-copilot.git
synced 2026-05-09 16:38:19 +00:00
add tool use, update system prompt
This commit is contained in:
@@ -0,0 +1,295 @@
|
||||
import { applyContextMatching, applyDMP, applyGitFallback } from "../edit-strategies"
|
||||
import { Hunk } from "../types"
|
||||
|
||||
const testCases = [
|
||||
{
|
||||
name: "should return original content if no match is found",
|
||||
hunk: {
|
||||
changes: [
|
||||
{ type: "context", content: "line1" },
|
||||
{ type: "add", content: "line2" },
|
||||
],
|
||||
} as Hunk,
|
||||
content: ["line1", "line3"],
|
||||
matchPosition: -1,
|
||||
expected: {
|
||||
confidence: 0,
|
||||
result: ["line1", "line3"],
|
||||
},
|
||||
expectedResult: "line1\nline3",
|
||||
strategies: ["context", "dmp"],
|
||||
},
|
||||
{
|
||||
name: "should apply a simple add change",
|
||||
hunk: {
|
||||
changes: [
|
||||
{ type: "context", content: "line1" },
|
||||
{ type: "add", content: "line2" },
|
||||
],
|
||||
} as Hunk,
|
||||
content: ["line1", "line3"],
|
||||
matchPosition: 0,
|
||||
expected: {
|
||||
confidence: 1,
|
||||
result: ["line1", "line2", "line3"],
|
||||
},
|
||||
expectedResult: "line1\nline2\nline3",
|
||||
strategies: ["context", "dmp"],
|
||||
},
|
||||
{
|
||||
name: "should apply a simple remove change",
|
||||
hunk: {
|
||||
changes: [
|
||||
{ type: "context", content: "line1" },
|
||||
{ type: "remove", content: "line2" },
|
||||
],
|
||||
} as Hunk,
|
||||
content: ["line1", "line2", "line3"],
|
||||
matchPosition: 0,
|
||||
expected: {
|
||||
confidence: 1,
|
||||
result: ["line1", "line3"],
|
||||
},
|
||||
expectedResult: "line1\nline3",
|
||||
strategies: ["context", "dmp"],
|
||||
},
|
||||
{
|
||||
name: "should apply a simple context change",
|
||||
hunk: {
|
||||
changes: [{ type: "context", content: "line1" }],
|
||||
} as Hunk,
|
||||
content: ["line1", "line2", "line3"],
|
||||
matchPosition: 0,
|
||||
expected: {
|
||||
confidence: 1,
|
||||
result: ["line1", "line2", "line3"],
|
||||
},
|
||||
expectedResult: "line1\nline2\nline3",
|
||||
strategies: ["context", "dmp"],
|
||||
},
|
||||
{
|
||||
name: "should apply a multi-line add change",
|
||||
hunk: {
|
||||
changes: [
|
||||
{ type: "context", content: "line1" },
|
||||
{ type: "add", content: "line2\nline3" },
|
||||
],
|
||||
} as Hunk,
|
||||
content: ["line1", "line4"],
|
||||
matchPosition: 0,
|
||||
expected: {
|
||||
confidence: 1,
|
||||
result: ["line1", "line2\nline3", "line4"],
|
||||
},
|
||||
expectedResult: "line1\nline2\nline3\nline4",
|
||||
strategies: ["context", "dmp"],
|
||||
},
|
||||
{
|
||||
name: "should apply a multi-line remove change",
|
||||
hunk: {
|
||||
changes: [
|
||||
{ type: "context", content: "line1" },
|
||||
{ type: "remove", content: "line2\nline3" },
|
||||
],
|
||||
} as Hunk,
|
||||
content: ["line1", "line2", "line3", "line4"],
|
||||
matchPosition: 0,
|
||||
expected: {
|
||||
confidence: 1,
|
||||
result: ["line1", "line4"],
|
||||
},
|
||||
expectedResult: "line1\nline4",
|
||||
strategies: ["context", "dmp"],
|
||||
},
|
||||
{
|
||||
name: "should apply a multi-line context change",
|
||||
hunk: {
|
||||
changes: [
|
||||
{ type: "context", content: "line1" },
|
||||
{ type: "context", content: "line2\nline3" },
|
||||
],
|
||||
} as Hunk,
|
||||
content: ["line1", "line2", "line3", "line4"],
|
||||
matchPosition: 0,
|
||||
expected: {
|
||||
confidence: 1,
|
||||
result: ["line1", "line2\nline3", "line4"],
|
||||
},
|
||||
expectedResult: "line1\nline2\nline3\nline4",
|
||||
strategies: ["context", "dmp"],
|
||||
},
|
||||
{
|
||||
name: "should apply a change with indentation",
|
||||
hunk: {
|
||||
changes: [
|
||||
{ type: "context", content: " line1" },
|
||||
{ type: "add", content: " line2" },
|
||||
],
|
||||
} as Hunk,
|
||||
content: [" line1", " line3"],
|
||||
matchPosition: 0,
|
||||
expected: {
|
||||
confidence: 1,
|
||||
result: [" line1", " line2", " line3"],
|
||||
},
|
||||
expectedResult: " line1\n line2\n line3",
|
||||
strategies: ["context", "dmp"],
|
||||
},
|
||||
{
|
||||
name: "should apply a change with mixed indentation",
|
||||
hunk: {
|
||||
changes: [
|
||||
{ type: "context", content: "\tline1" },
|
||||
{ type: "add", content: " line2" },
|
||||
],
|
||||
} as Hunk,
|
||||
content: ["\tline1", " line3"],
|
||||
matchPosition: 0,
|
||||
expected: {
|
||||
confidence: 1,
|
||||
result: ["\tline1", " line2", " line3"],
|
||||
},
|
||||
expectedResult: "\tline1\n line2\n line3",
|
||||
strategies: ["context", "dmp"],
|
||||
},
|
||||
{
|
||||
name: "should apply a change with mixed indentation and multi-line",
|
||||
hunk: {
|
||||
changes: [
|
||||
{ type: "context", content: " line1" },
|
||||
{ type: "add", content: "\tline2\n line3" },
|
||||
],
|
||||
} as Hunk,
|
||||
content: [" line1", " line4"],
|
||||
matchPosition: 0,
|
||||
expected: {
|
||||
confidence: 1,
|
||||
result: [" line1", "\tline2\n line3", " line4"],
|
||||
},
|
||||
expectedResult: " line1\n\tline2\n line3\n line4",
|
||||
strategies: ["context", "dmp"],
|
||||
},
|
||||
{
|
||||
name: "should apply a complex change with mixed indentation and multi-line",
|
||||
hunk: {
|
||||
changes: [
|
||||
{ type: "context", content: " line1" },
|
||||
{ type: "remove", content: " line2" },
|
||||
{ type: "add", content: "\tline3\n line4" },
|
||||
{ type: "context", content: " line5" },
|
||||
],
|
||||
} as Hunk,
|
||||
content: [" line1", " line2", " line5", " line6"],
|
||||
matchPosition: 0,
|
||||
expected: {
|
||||
confidence: 1,
|
||||
result: [" line1", "\tline3\n line4", " line5", " line6"],
|
||||
},
|
||||
expectedResult: " line1\n\tline3\n line4\n line5\n line6",
|
||||
strategies: ["context", "dmp"],
|
||||
},
|
||||
{
|
||||
name: "should apply a complex change with mixed indentation and multi-line and context",
|
||||
hunk: {
|
||||
changes: [
|
||||
{ type: "context", content: " line1" },
|
||||
{ type: "remove", content: " line2" },
|
||||
{ type: "add", content: "\tline3\n line4" },
|
||||
{ type: "context", content: " line5" },
|
||||
{ type: "context", content: " line6" },
|
||||
],
|
||||
} as Hunk,
|
||||
content: [" line1", " line2", " line5", " line6", " line7"],
|
||||
matchPosition: 0,
|
||||
expected: {
|
||||
confidence: 1,
|
||||
result: [" line1", "\tline3\n line4", " line5", " line6", " line7"],
|
||||
},
|
||||
expectedResult: " line1\n\tline3\n line4\n line5\n line6\n line7",
|
||||
strategies: ["context", "dmp"],
|
||||
},
|
||||
{
|
||||
name: "should apply a complex change with mixed indentation and multi-line and context and a different match position",
|
||||
hunk: {
|
||||
changes: [
|
||||
{ type: "context", content: " line1" },
|
||||
{ type: "remove", content: " line2" },
|
||||
{ type: "add", content: "\tline3\n line4" },
|
||||
{ type: "context", content: " line5" },
|
||||
{ type: "context", content: " line6" },
|
||||
],
|
||||
} as Hunk,
|
||||
content: [" line0", " line1", " line2", " line5", " line6", " line7"],
|
||||
matchPosition: 1,
|
||||
expected: {
|
||||
confidence: 1,
|
||||
result: [" line0", " line1", "\tline3\n line4", " line5", " line6", " line7"],
|
||||
},
|
||||
expectedResult: " line0\n line1\n\tline3\n line4\n line5\n line6\n line7",
|
||||
strategies: ["context", "dmp"],
|
||||
},
|
||||
]
|
||||
|
||||
describe("applyContextMatching", () => {
|
||||
testCases.forEach(({ name, hunk, content, matchPosition, expected, strategies, expectedResult }) => {
|
||||
if (!strategies?.includes("context")) {
|
||||
return
|
||||
}
|
||||
it(name, () => {
|
||||
const result = applyContextMatching(hunk, content, matchPosition)
|
||||
expect(result.result.join("\n")).toEqual(expectedResult)
|
||||
expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence)
|
||||
expect(result.strategy).toBe("context")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("applyDMP", () => {
|
||||
testCases.forEach(({ name, hunk, content, matchPosition, expected, strategies, expectedResult }) => {
|
||||
if (!strategies?.includes("dmp")) {
|
||||
return
|
||||
}
|
||||
it(name, () => {
|
||||
const result = applyDMP(hunk, content, matchPosition)
|
||||
expect(result.result.join("\n")).toEqual(expectedResult)
|
||||
expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence)
|
||||
expect(result.strategy).toBe("dmp")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("applyGitFallback", () => {
|
||||
it("should successfully apply changes using git operations", async () => {
|
||||
const hunk = {
|
||||
changes: [
|
||||
{ type: "context", content: "line1", indent: "" },
|
||||
{ type: "remove", content: "line2", indent: "" },
|
||||
{ type: "add", content: "new line2", indent: "" },
|
||||
{ type: "context", content: "line3", indent: "" },
|
||||
],
|
||||
} as Hunk
|
||||
|
||||
const content = ["line1", "line2", "line3"]
|
||||
const result = await applyGitFallback(hunk, content)
|
||||
|
||||
expect(result.result.join("\n")).toEqual("line1\nnew line2\nline3")
|
||||
expect(result.confidence).toBe(1)
|
||||
expect(result.strategy).toBe("git-fallback")
|
||||
})
|
||||
|
||||
it("should return original content with 0 confidence when changes cannot be applied", async () => {
|
||||
const hunk = {
|
||||
changes: [
|
||||
{ type: "context", content: "nonexistent", indent: "" },
|
||||
{ type: "add", content: "new line", indent: "" },
|
||||
],
|
||||
} as Hunk
|
||||
|
||||
const content = ["line1", "line2", "line3"]
|
||||
const result = await applyGitFallback(hunk, content)
|
||||
|
||||
expect(result.result).toEqual(content)
|
||||
expect(result.confidence).toBe(0)
|
||||
expect(result.strategy).toBe("git-fallback")
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,262 @@
|
||||
import { findAnchorMatch, findExactMatch, findSimilarityMatch, findLevenshteinMatch } from "../search-strategies"
|
||||
|
||||
type SearchStrategy = (
|
||||
searchStr: string,
|
||||
content: string[],
|
||||
startIndex?: number,
|
||||
) => {
|
||||
index: number
|
||||
confidence: number
|
||||
strategy: string
|
||||
}
|
||||
|
||||
const testCases = [
|
||||
{
|
||||
name: "should return no match if the search string is not found",
|
||||
searchStr: "not found",
|
||||
content: ["line1", "line2", "line3"],
|
||||
expected: { index: -1, confidence: 0 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return a match if the search string is found",
|
||||
searchStr: "line2",
|
||||
content: ["line1", "line2", "line3"],
|
||||
expected: { index: 1, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return a match with correct index when startIndex is provided",
|
||||
searchStr: "line3",
|
||||
content: ["line1", "line2", "line3", "line4", "line3"],
|
||||
startIndex: 3,
|
||||
expected: { index: 4, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return a match even if there are more lines in content",
|
||||
searchStr: "line2",
|
||||
content: ["line1", "line2", "line3", "line4", "line5"],
|
||||
expected: { index: 1, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return a match even if the search string is at the beginning of the content",
|
||||
searchStr: "line1",
|
||||
content: ["line1", "line2", "line3"],
|
||||
expected: { index: 0, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return a match even if the search string is at the end of the content",
|
||||
searchStr: "line3",
|
||||
content: ["line1", "line2", "line3"],
|
||||
expected: { index: 2, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return a match for a multi-line search string",
|
||||
searchStr: "line2\nline3",
|
||||
content: ["line1", "line2", "line3", "line4"],
|
||||
expected: { index: 1, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return no match if a multi-line search string is not found",
|
||||
searchStr: "line2\nline4",
|
||||
content: ["line1", "line2", "line3", "line4"],
|
||||
expected: { index: -1, confidence: 0 },
|
||||
strategies: ["exact", "similarity"],
|
||||
},
|
||||
{
|
||||
name: "should return a match with indentation",
|
||||
searchStr: " line2",
|
||||
content: ["line1", " line2", "line3"],
|
||||
expected: { index: 1, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return a match with more complex indentation",
|
||||
searchStr: " line3",
|
||||
content: [" line1", " line2", " line3", " line4"],
|
||||
expected: { index: 2, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return a match with mixed indentation",
|
||||
searchStr: "\tline2",
|
||||
content: [" line1", "\tline2", " line3"],
|
||||
expected: { index: 1, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return a match with mixed indentation and multi-line",
|
||||
searchStr: " line2\n\tline3",
|
||||
content: ["line1", " line2", "\tline3", " line4"],
|
||||
expected: { index: 1, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return no match if mixed indentation and multi-line is not found",
|
||||
searchStr: " line2\n line4",
|
||||
content: ["line1", " line2", "\tline3", " line4"],
|
||||
expected: { index: -1, confidence: 0 },
|
||||
strategies: ["exact", "similarity"],
|
||||
},
|
||||
{
|
||||
name: "should return a match with leading and trailing spaces",
|
||||
searchStr: " line2 ",
|
||||
content: ["line1", " line2 ", "line3"],
|
||||
expected: { index: 1, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return a match with leading and trailing tabs",
|
||||
searchStr: "\tline2\t",
|
||||
content: ["line1", "\tline2\t", "line3"],
|
||||
expected: { index: 1, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return a match with mixed leading and trailing spaces and tabs",
|
||||
searchStr: " \tline2\t ",
|
||||
content: ["line1", " \tline2\t ", "line3"],
|
||||
expected: { index: 1, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return a match with mixed leading and trailing spaces and tabs and multi-line",
|
||||
searchStr: " \tline2\t \n line3 ",
|
||||
content: ["line1", " \tline2\t ", " line3 ", "line4"],
|
||||
expected: { index: 1, confidence: 1 },
|
||||
strategies: ["exact", "similarity", "levenshtein"],
|
||||
},
|
||||
{
|
||||
name: "should return no match if mixed leading and trailing spaces and tabs and multi-line is not found",
|
||||
searchStr: " \tline2\t \n line4 ",
|
||||
content: ["line1", " \tline2\t ", " line3 ", "line4"],
|
||||
expected: { index: -1, confidence: 0 },
|
||||
strategies: ["exact", "similarity"],
|
||||
},
|
||||
]
|
||||
|
||||
describe("findExactMatch", () => {
|
||||
testCases.forEach(({ name, searchStr, content, startIndex, expected, strategies }) => {
|
||||
if (!strategies?.includes("exact")) {
|
||||
return
|
||||
}
|
||||
it(name, () => {
|
||||
const result = findExactMatch(searchStr, content, startIndex)
|
||||
expect(result.index).toBe(expected.index)
|
||||
expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence)
|
||||
expect(result.strategy).toMatch(/exact(-overlapping)?/)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("findAnchorMatch", () => {
|
||||
const anchorTestCases = [
|
||||
{
|
||||
name: "should return no match if no anchors are found",
|
||||
searchStr: " \n \n ",
|
||||
content: ["line1", "line2", "line3"],
|
||||
expected: { index: -1, confidence: 0 },
|
||||
},
|
||||
{
|
||||
name: "should return no match if anchor positions cannot be validated",
|
||||
searchStr: "unique line\ncontext line 1\ncontext line 2",
|
||||
content: [
|
||||
"different line 1",
|
||||
"different line 2",
|
||||
"different line 3",
|
||||
"another unique line",
|
||||
"context line 1",
|
||||
"context line 2",
|
||||
],
|
||||
expected: { index: -1, confidence: 0 },
|
||||
},
|
||||
{
|
||||
name: "should return a match if anchor positions can be validated",
|
||||
searchStr: "unique line\ncontext line 1\ncontext line 2",
|
||||
content: ["line1", "line2", "unique line", "context line 1", "context line 2", "line 6"],
|
||||
expected: { index: 2, confidence: 1 },
|
||||
},
|
||||
{
|
||||
name: "should return a match with correct index when startIndex is provided",
|
||||
searchStr: "unique line\ncontext line 1\ncontext line 2",
|
||||
content: ["line1", "line2", "line3", "unique line", "context line 1", "context line 2", "line 7"],
|
||||
startIndex: 3,
|
||||
expected: { index: 3, confidence: 1 },
|
||||
},
|
||||
{
|
||||
name: "should return a match even if there are more lines in content",
|
||||
searchStr: "unique line\ncontext line 1\ncontext line 2",
|
||||
content: [
|
||||
"line1",
|
||||
"line2",
|
||||
"unique line",
|
||||
"context line 1",
|
||||
"context line 2",
|
||||
"line 6",
|
||||
"extra line 1",
|
||||
"extra line 2",
|
||||
],
|
||||
expected: { index: 2, confidence: 1 },
|
||||
},
|
||||
{
|
||||
name: "should return a match even if the anchor is at the beginning of the content",
|
||||
searchStr: "unique line\ncontext line 1\ncontext line 2",
|
||||
content: ["unique line", "context line 1", "context line 2", "line 6"],
|
||||
expected: { index: 0, confidence: 1 },
|
||||
},
|
||||
{
|
||||
name: "should return a match even if the anchor is at the end of the content",
|
||||
searchStr: "unique line\ncontext line 1\ncontext line 2",
|
||||
content: ["line1", "line2", "unique line", "context line 1", "context line 2"],
|
||||
expected: { index: 2, confidence: 1 },
|
||||
},
|
||||
{
|
||||
name: "should return no match if no valid anchor is found",
|
||||
searchStr: "non-unique line\ncontext line 1\ncontext line 2",
|
||||
content: ["line1", "line2", "non-unique line", "context line 1", "context line 2", "non-unique line"],
|
||||
expected: { index: -1, confidence: 0 },
|
||||
},
|
||||
]
|
||||
|
||||
anchorTestCases.forEach(({ name, searchStr, content, startIndex, expected }) => {
|
||||
it(name, () => {
|
||||
const result = findAnchorMatch(searchStr, content, startIndex)
|
||||
expect(result.index).toBe(expected.index)
|
||||
expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence)
|
||||
expect(result.strategy).toBe("anchor")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("findSimilarityMatch", () => {
|
||||
testCases.forEach(({ name, searchStr, content, startIndex, expected, strategies }) => {
|
||||
if (!strategies?.includes("similarity")) {
|
||||
return
|
||||
}
|
||||
it(name, () => {
|
||||
const result = findSimilarityMatch(searchStr, content, startIndex)
|
||||
expect(result.index).toBe(expected.index)
|
||||
expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence)
|
||||
expect(result.strategy).toBe("similarity")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("findLevenshteinMatch", () => {
|
||||
testCases.forEach(({ name, searchStr, content, startIndex, expected, strategies }) => {
|
||||
if (!strategies?.includes("levenshtein")) {
|
||||
return
|
||||
}
|
||||
it(name, () => {
|
||||
const result = findLevenshteinMatch(searchStr, content, startIndex)
|
||||
expect(result.index).toBe(expected.index)
|
||||
expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence)
|
||||
expect(result.strategy).toBe("levenshtein")
|
||||
})
|
||||
})
|
||||
})
|
||||
297
src/core/diff/strategies/new-unified/edit-strategies.ts
Normal file
297
src/core/diff/strategies/new-unified/edit-strategies.ts
Normal file
@@ -0,0 +1,297 @@
|
||||
import { diff_match_patch } from "diff-match-patch"
|
||||
import { EditResult, Hunk } from "./types"
|
||||
import { getDMPSimilarity, validateEditResult } from "./search-strategies"
|
||||
import * as path from "path"
|
||||
import simpleGit, { SimpleGit } from "simple-git"
|
||||
import * as tmp from "tmp"
|
||||
import * as fs from "fs"
|
||||
|
||||
// Helper function to infer indentation - simplified version
|
||||
function inferIndentation(line: string, contextLines: string[], previousIndent: string = ""): string {
|
||||
// If the line has explicit indentation in the change, use it exactly
|
||||
const lineMatch = line.match(/^(\s+)/)
|
||||
if (lineMatch) {
|
||||
return lineMatch[1]
|
||||
}
|
||||
|
||||
// If we have context lines, use the indentation from the first context line
|
||||
const contextLine = contextLines[0]
|
||||
if (contextLine) {
|
||||
const contextMatch = contextLine.match(/^(\s+)/)
|
||||
if (contextMatch) {
|
||||
return contextMatch[1]
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to previous indent
|
||||
return previousIndent
|
||||
}
|
||||
|
||||
// Context matching edit strategy
|
||||
export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult {
|
||||
if (matchPosition === -1) {
|
||||
return { confidence: 0, result: content, strategy: "context" }
|
||||
}
|
||||
|
||||
const newResult = [...content.slice(0, matchPosition)]
|
||||
let sourceIndex = matchPosition
|
||||
|
||||
for (const change of hunk.changes) {
|
||||
if (change.type === "context") {
|
||||
// Use the original line from content if available
|
||||
if (sourceIndex < content.length) {
|
||||
newResult.push(content[sourceIndex])
|
||||
} else {
|
||||
const line = change.indent ? change.indent + change.content : change.content
|
||||
newResult.push(line)
|
||||
}
|
||||
sourceIndex++
|
||||
} else if (change.type === "add") {
|
||||
// Use exactly the indentation from the change
|
||||
const baseIndent = change.indent || ""
|
||||
|
||||
// Handle multi-line additions
|
||||
const lines = change.content.split("\n").map((line) => {
|
||||
// If the line already has indentation, preserve it relative to the base indent
|
||||
const lineIndentMatch = line.match(/^(\s*)(.*)/)
|
||||
if (lineIndentMatch) {
|
||||
const [, lineIndent, content] = lineIndentMatch
|
||||
// Only add base indent if the line doesn't already have it
|
||||
return lineIndent ? line : baseIndent + content
|
||||
}
|
||||
return baseIndent + line
|
||||
})
|
||||
|
||||
newResult.push(...lines)
|
||||
} else if (change.type === "remove") {
|
||||
// Handle multi-line removes by incrementing sourceIndex for each line
|
||||
const removedLines = change.content.split("\n").length
|
||||
sourceIndex += removedLines
|
||||
}
|
||||
}
|
||||
|
||||
// Append remaining content
|
||||
newResult.push(...content.slice(sourceIndex))
|
||||
|
||||
// Calculate confidence based on the actual changes
|
||||
const afterText = newResult.slice(matchPosition, newResult.length - (content.length - sourceIndex)).join("\n")
|
||||
|
||||
const confidence = validateEditResult(hunk, afterText)
|
||||
|
||||
return {
|
||||
confidence,
|
||||
result: newResult,
|
||||
strategy: "context",
|
||||
}
|
||||
}
|
||||
|
||||
// DMP edit strategy
|
||||
export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): EditResult {
|
||||
if (matchPosition === -1) {
|
||||
return { confidence: 0, result: content, strategy: "dmp" }
|
||||
}
|
||||
|
||||
const dmp = new diff_match_patch()
|
||||
|
||||
// Calculate total lines in before block accounting for multi-line content
|
||||
const beforeLineCount = hunk.changes
|
||||
.filter((change) => change.type === "context" || change.type === "remove")
|
||||
.reduce((count, change) => count + change.content.split("\n").length, 0)
|
||||
|
||||
// Build BEFORE block (context + removals)
|
||||
const beforeLines = hunk.changes
|
||||
.filter((change) => change.type === "context" || change.type === "remove")
|
||||
.map((change) => {
|
||||
if (change.originalLine) {
|
||||
return change.originalLine
|
||||
}
|
||||
return change.indent ? change.indent + change.content : change.content
|
||||
})
|
||||
|
||||
// Build AFTER block (context + additions)
|
||||
const afterLines = hunk.changes
|
||||
.filter((change) => change.type === "context" || change.type === "add")
|
||||
.map((change) => {
|
||||
if (change.originalLine) {
|
||||
return change.originalLine
|
||||
}
|
||||
return change.indent ? change.indent + change.content : change.content
|
||||
})
|
||||
|
||||
// Convert to text with proper line endings
|
||||
const beforeText = beforeLines.join("\n")
|
||||
const afterText = afterLines.join("\n")
|
||||
|
||||
// Create and apply patch
|
||||
const patch = dmp.patch_make(beforeText, afterText)
|
||||
const targetText = content.slice(matchPosition, matchPosition + beforeLineCount).join("\n")
|
||||
const [patchedText] = dmp.patch_apply(patch, targetText)
|
||||
|
||||
// Split result and preserve line endings
|
||||
const patchedLines = patchedText.split("\n")
|
||||
|
||||
// Construct final result
|
||||
const newResult = [
|
||||
...content.slice(0, matchPosition),
|
||||
...patchedLines,
|
||||
...content.slice(matchPosition + beforeLineCount),
|
||||
]
|
||||
|
||||
const confidence = validateEditResult(hunk, patchedText)
|
||||
|
||||
return {
|
||||
confidence,
|
||||
result: newResult,
|
||||
strategy: "dmp",
|
||||
}
|
||||
}
|
||||
|
||||
// Git fallback strategy that works with full content
|
||||
export async function applyGitFallback(hunk: Hunk, content: string[]): Promise<EditResult> {
|
||||
let tmpDir: tmp.DirResult | undefined
|
||||
|
||||
try {
|
||||
tmpDir = tmp.dirSync({ unsafeCleanup: true })
|
||||
const git: SimpleGit = simpleGit(tmpDir.name)
|
||||
|
||||
await git.init()
|
||||
await git.addConfig("user.name", "Temp")
|
||||
await git.addConfig("user.email", "temp@example.com")
|
||||
|
||||
const filePath = path.join(tmpDir.name, "file.txt")
|
||||
|
||||
const searchLines = hunk.changes
|
||||
.filter((change) => change.type === "context" || change.type === "remove")
|
||||
.map((change) => change.originalLine || change.indent + change.content)
|
||||
|
||||
const replaceLines = hunk.changes
|
||||
.filter((change) => change.type === "context" || change.type === "add")
|
||||
.map((change) => change.originalLine || change.indent + change.content)
|
||||
|
||||
const searchText = searchLines.join("\n")
|
||||
const replaceText = replaceLines.join("\n")
|
||||
const originalText = content.join("\n")
|
||||
|
||||
try {
|
||||
fs.writeFileSync(filePath, originalText)
|
||||
await git.add("file.txt")
|
||||
const originalCommit = await git.commit("original")
|
||||
console.log("Strategy 1 - Original commit:", originalCommit.commit)
|
||||
|
||||
fs.writeFileSync(filePath, searchText)
|
||||
await git.add("file.txt")
|
||||
const searchCommit1 = await git.commit("search")
|
||||
console.log("Strategy 1 - Search commit:", searchCommit1.commit)
|
||||
|
||||
fs.writeFileSync(filePath, replaceText)
|
||||
await git.add("file.txt")
|
||||
const replaceCommit = await git.commit("replace")
|
||||
console.log("Strategy 1 - Replace commit:", replaceCommit.commit)
|
||||
|
||||
console.log("Strategy 1 - Attempting checkout of:", originalCommit.commit)
|
||||
await git.raw(["checkout", originalCommit.commit])
|
||||
try {
|
||||
console.log("Strategy 1 - Attempting cherry-pick of:", replaceCommit.commit)
|
||||
await git.raw(["cherry-pick", "--minimal", replaceCommit.commit])
|
||||
|
||||
const newText = fs.readFileSync(filePath, "utf-8")
|
||||
const newLines = newText.split("\n")
|
||||
return {
|
||||
confidence: 1,
|
||||
result: newLines,
|
||||
strategy: "git-fallback",
|
||||
}
|
||||
} catch (cherryPickError) {
|
||||
console.error("Strategy 1 failed with merge conflict")
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Strategy 1 failed:", error)
|
||||
}
|
||||
|
||||
try {
|
||||
await git.init()
|
||||
await git.addConfig("user.name", "Temp")
|
||||
await git.addConfig("user.email", "temp@example.com")
|
||||
|
||||
fs.writeFileSync(filePath, searchText)
|
||||
await git.add("file.txt")
|
||||
const searchCommit = await git.commit("search")
|
||||
const searchHash = searchCommit.commit.replace(/^HEAD /, "")
|
||||
console.log("Strategy 2 - Search commit:", searchHash)
|
||||
|
||||
fs.writeFileSync(filePath, replaceText)
|
||||
await git.add("file.txt")
|
||||
const replaceCommit = await git.commit("replace")
|
||||
const replaceHash = replaceCommit.commit.replace(/^HEAD /, "")
|
||||
console.log("Strategy 2 - Replace commit:", replaceHash)
|
||||
|
||||
console.log("Strategy 2 - Attempting checkout of:", searchHash)
|
||||
await git.raw(["checkout", searchHash])
|
||||
fs.writeFileSync(filePath, originalText)
|
||||
await git.add("file.txt")
|
||||
const originalCommit2 = await git.commit("original")
|
||||
console.log("Strategy 2 - Original commit:", originalCommit2.commit)
|
||||
|
||||
try {
|
||||
console.log("Strategy 2 - Attempting cherry-pick of:", replaceHash)
|
||||
await git.raw(["cherry-pick", "--minimal", replaceHash])
|
||||
|
||||
const newText = fs.readFileSync(filePath, "utf-8")
|
||||
const newLines = newText.split("\n")
|
||||
return {
|
||||
confidence: 1,
|
||||
result: newLines,
|
||||
strategy: "git-fallback",
|
||||
}
|
||||
} catch (cherryPickError) {
|
||||
console.error("Strategy 2 failed with merge conflict")
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Strategy 2 failed:", error)
|
||||
}
|
||||
|
||||
console.error("Git fallback failed")
|
||||
return { confidence: 0, result: content, strategy: "git-fallback" }
|
||||
} catch (error) {
|
||||
console.error("Git fallback strategy failed:", error)
|
||||
return { confidence: 0, result: content, strategy: "git-fallback" }
|
||||
} finally {
|
||||
if (tmpDir) {
|
||||
tmpDir.removeCallback()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Main edit function that tries strategies sequentially
|
||||
export async function applyEdit(
|
||||
hunk: Hunk,
|
||||
content: string[],
|
||||
matchPosition: number,
|
||||
confidence: number,
|
||||
confidenceThreshold: number = 0.97,
|
||||
): Promise<EditResult> {
|
||||
// Don't attempt regular edits if confidence is too low
|
||||
if (confidence < confidenceThreshold) {
|
||||
console.log(
|
||||
`Search confidence (${confidence}) below minimum threshold (${confidenceThreshold}), trying git fallback...`,
|
||||
)
|
||||
return applyGitFallback(hunk, content)
|
||||
}
|
||||
|
||||
// Try each strategy in sequence until one succeeds
|
||||
const strategies = [
|
||||
{ name: "dmp", apply: () => applyDMP(hunk, content, matchPosition) },
|
||||
{ name: "context", apply: () => applyContextMatching(hunk, content, matchPosition) },
|
||||
{ name: "git-fallback", apply: () => applyGitFallback(hunk, content) },
|
||||
]
|
||||
|
||||
// Try strategies sequentially until one succeeds
|
||||
for (const strategy of strategies) {
|
||||
const result = await strategy.apply()
|
||||
if (result.confidence >= confidenceThreshold) {
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
return { confidence: 0, result: content, strategy: "none" }
|
||||
}
|
||||
350
src/core/diff/strategies/new-unified/index.ts
Normal file
350
src/core/diff/strategies/new-unified/index.ts
Normal file
@@ -0,0 +1,350 @@
|
||||
import { Diff, Hunk, Change } from "./types"
|
||||
import { findBestMatch, prepareSearchString } from "./search-strategies"
|
||||
import { applyEdit } from "./edit-strategies"
|
||||
import { DiffResult, DiffStrategy } from "../../types"
|
||||
|
||||
export class NewUnifiedDiffStrategy implements DiffStrategy {
|
||||
private readonly confidenceThreshold: number
|
||||
|
||||
constructor(confidenceThreshold: number = 1) {
|
||||
this.confidenceThreshold = Math.max(confidenceThreshold, 0.8)
|
||||
}
|
||||
|
||||
private parseUnifiedDiff(diff: string): Diff {
|
||||
const MAX_CONTEXT_LINES = 6 // Number of context lines to keep before/after changes
|
||||
const lines = diff.split("\n")
|
||||
const hunks: Hunk[] = []
|
||||
let currentHunk: Hunk | null = null
|
||||
|
||||
let i = 0
|
||||
while (i < lines.length && !lines[i].startsWith("@@")) {
|
||||
i++
|
||||
}
|
||||
|
||||
for (; i < lines.length; i++) {
|
||||
const line = lines[i]
|
||||
|
||||
if (line.startsWith("@@")) {
|
||||
if (
|
||||
currentHunk &&
|
||||
currentHunk.changes.length > 0 &&
|
||||
currentHunk.changes.some((change) => change.type === "add" || change.type === "remove")
|
||||
) {
|
||||
const changes = currentHunk.changes
|
||||
let startIdx = 0
|
||||
let endIdx = changes.length - 1
|
||||
|
||||
for (let j = 0; j < changes.length; j++) {
|
||||
if (changes[j].type !== "context") {
|
||||
startIdx = Math.max(0, j - MAX_CONTEXT_LINES)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for (let j = changes.length - 1; j >= 0; j--) {
|
||||
if (changes[j].type !== "context") {
|
||||
endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
currentHunk.changes = changes.slice(startIdx, endIdx + 1)
|
||||
hunks.push(currentHunk)
|
||||
}
|
||||
currentHunk = { changes: [] }
|
||||
continue
|
||||
}
|
||||
|
||||
if (!currentHunk) {
|
||||
continue
|
||||
}
|
||||
|
||||
const content = line.slice(1)
|
||||
const indentMatch = content.match(/^(\s*)/)
|
||||
const indent = indentMatch ? indentMatch[0] : ""
|
||||
const trimmedContent = content.slice(indent.length)
|
||||
|
||||
if (line.startsWith(" ")) {
|
||||
currentHunk.changes.push({
|
||||
type: "context",
|
||||
content: trimmedContent,
|
||||
indent,
|
||||
originalLine: content,
|
||||
})
|
||||
} else if (line.startsWith("+")) {
|
||||
currentHunk.changes.push({
|
||||
type: "add",
|
||||
content: trimmedContent,
|
||||
indent,
|
||||
originalLine: content,
|
||||
})
|
||||
} else if (line.startsWith("-")) {
|
||||
currentHunk.changes.push({
|
||||
type: "remove",
|
||||
content: trimmedContent,
|
||||
indent,
|
||||
originalLine: content,
|
||||
})
|
||||
} else {
|
||||
const finalContent = trimmedContent ? " " + trimmedContent : " "
|
||||
currentHunk.changes.push({
|
||||
type: "context",
|
||||
content: finalContent,
|
||||
indent,
|
||||
originalLine: content,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
currentHunk &&
|
||||
currentHunk.changes.length > 0 &&
|
||||
currentHunk.changes.some((change) => change.type === "add" || change.type === "remove")
|
||||
) {
|
||||
hunks.push(currentHunk)
|
||||
}
|
||||
|
||||
return { hunks }
|
||||
}
|
||||
|
||||
getToolDescription(args: { cwd: string; toolOptions?: { [key: string]: string } }): string {
|
||||
return `# apply_diff Tool - Generate Precise Code Changes
|
||||
|
||||
Generate a unified diff that can be cleanly applied to modify code files.
|
||||
|
||||
## Step-by-Step Instructions:
|
||||
|
||||
1. Start with file headers:
|
||||
- First line: "--- {original_file_path}"
|
||||
- Second line: "+++ {new_file_path}"
|
||||
|
||||
2. For each change section:
|
||||
- Begin with "@@ ... @@" separator line without line numbers
|
||||
- Include 2-3 lines of context before and after changes
|
||||
- Mark removed lines with "-"
|
||||
- Mark added lines with "+"
|
||||
- Preserve exact indentation
|
||||
|
||||
3. Group related changes:
|
||||
- Keep related modifications in the same hunk
|
||||
- Start new hunks for logically separate changes
|
||||
- When modifying functions/methods, include the entire block
|
||||
|
||||
## Requirements:
|
||||
|
||||
1. MUST include exact indentation
|
||||
2. MUST include sufficient context for unique matching
|
||||
3. MUST group related changes together
|
||||
4. MUST use proper unified diff format
|
||||
5. MUST NOT include timestamps in file headers
|
||||
6. MUST NOT include line numbers in the @@ header
|
||||
|
||||
## Examples:
|
||||
|
||||
✅ Good diff (follows all requirements):
|
||||
\`\`\`diff
|
||||
--- src/utils.ts
|
||||
+++ src/utils.ts
|
||||
@@ ... @@
|
||||
def calculate_total(items):
|
||||
- total = 0
|
||||
- for item in items:
|
||||
- total += item.price
|
||||
+ return sum(item.price for item in items)
|
||||
\`\`\`
|
||||
|
||||
❌ Bad diff (violates requirements #1 and #2):
|
||||
\`\`\`diff
|
||||
--- src/utils.ts
|
||||
+++ src/utils.ts
|
||||
@@ ... @@
|
||||
-total = 0
|
||||
-for item in items:
|
||||
+return sum(item.price for item in items)
|
||||
\`\`\`
|
||||
|
||||
Parameters:
|
||||
- path: (required) File path relative to ${args.cwd}
|
||||
- diff: (required) Unified diff content in unified format to apply to the file.
|
||||
|
||||
Usage:
|
||||
<apply_diff>
|
||||
<path>path/to/file.ext</path>
|
||||
<diff>
|
||||
Your diff here
|
||||
</diff>
|
||||
</apply_diff>`
|
||||
}
|
||||
|
||||
// Helper function to split a hunk into smaller hunks based on contiguous changes
|
||||
private splitHunk(hunk: Hunk): Hunk[] {
|
||||
const result: Hunk[] = []
|
||||
let currentHunk: Hunk | null = null
|
||||
let contextBefore: Change[] = []
|
||||
let contextAfter: Change[] = []
|
||||
const MAX_CONTEXT_LINES = 3 // Keep 3 lines of context before/after changes
|
||||
|
||||
for (let i = 0; i < hunk.changes.length; i++) {
|
||||
const change = hunk.changes[i]
|
||||
|
||||
if (change.type === "context") {
|
||||
if (!currentHunk) {
|
||||
contextBefore.push(change)
|
||||
if (contextBefore.length > MAX_CONTEXT_LINES) {
|
||||
contextBefore.shift()
|
||||
}
|
||||
} else {
|
||||
contextAfter.push(change)
|
||||
if (contextAfter.length > MAX_CONTEXT_LINES) {
|
||||
// We've collected enough context after changes, create a new hunk
|
||||
currentHunk.changes.push(...contextAfter)
|
||||
result.push(currentHunk)
|
||||
currentHunk = null
|
||||
// Keep the last few context lines for the next hunk
|
||||
contextBefore = contextAfter
|
||||
contextAfter = []
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!currentHunk) {
|
||||
currentHunk = { changes: [...contextBefore] }
|
||||
contextAfter = []
|
||||
} else if (contextAfter.length > 0) {
|
||||
// Add accumulated context to current hunk
|
||||
currentHunk.changes.push(...contextAfter)
|
||||
contextAfter = []
|
||||
}
|
||||
currentHunk.changes.push(change)
|
||||
}
|
||||
}
|
||||
|
||||
// Add any remaining changes
|
||||
if (currentHunk) {
|
||||
if (contextAfter.length > 0) {
|
||||
currentHunk.changes.push(...contextAfter)
|
||||
}
|
||||
result.push(currentHunk)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
async applyDiff(
|
||||
originalContent: string,
|
||||
diffContent: string,
|
||||
startLine?: number,
|
||||
endLine?: number,
|
||||
): Promise<DiffResult> {
|
||||
const parsedDiff = this.parseUnifiedDiff(diffContent)
|
||||
const originalLines = originalContent.split("\n")
|
||||
let result = [...originalLines]
|
||||
|
||||
if (!parsedDiff.hunks.length) {
|
||||
return {
|
||||
success: false,
|
||||
error: "No hunks found in diff. Please ensure your diff includes actual changes and follows the unified diff format.",
|
||||
}
|
||||
}
|
||||
|
||||
for (const hunk of parsedDiff.hunks) {
|
||||
const contextStr = prepareSearchString(hunk.changes)
|
||||
const {
|
||||
index: matchPosition,
|
||||
confidence,
|
||||
strategy,
|
||||
} = findBestMatch(contextStr, result, 0, this.confidenceThreshold)
|
||||
|
||||
if (confidence < this.confidenceThreshold) {
|
||||
console.log("Full hunk application failed, trying sub-hunks strategy")
|
||||
// Try splitting the hunk into smaller hunks
|
||||
const subHunks = this.splitHunk(hunk)
|
||||
let subHunkSuccess = true
|
||||
let subHunkResult = [...result]
|
||||
|
||||
for (const subHunk of subHunks) {
|
||||
const subContextStr = prepareSearchString(subHunk.changes)
|
||||
const subSearchResult = findBestMatch(subContextStr, subHunkResult, 0, this.confidenceThreshold)
|
||||
|
||||
if (subSearchResult.confidence >= this.confidenceThreshold) {
|
||||
const subEditResult = await applyEdit(
|
||||
subHunk,
|
||||
subHunkResult,
|
||||
subSearchResult.index,
|
||||
subSearchResult.confidence,
|
||||
this.confidenceThreshold,
|
||||
)
|
||||
if (subEditResult.confidence >= this.confidenceThreshold) {
|
||||
subHunkResult = subEditResult.result
|
||||
continue
|
||||
}
|
||||
}
|
||||
subHunkSuccess = false
|
||||
break
|
||||
}
|
||||
|
||||
if (subHunkSuccess) {
|
||||
result = subHunkResult
|
||||
continue
|
||||
}
|
||||
|
||||
// If sub-hunks also failed, return the original error
|
||||
const contextLines = hunk.changes.filter((c) => c.type === "context").length
|
||||
const totalLines = hunk.changes.length
|
||||
const contextRatio = contextLines / totalLines
|
||||
|
||||
let errorMsg = `Failed to find a matching location in the file (${Math.floor(
|
||||
confidence * 100,
|
||||
)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n`
|
||||
errorMsg += "Debug Info:\n"
|
||||
errorMsg += `- Search Strategy Used: ${strategy}\n`
|
||||
errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(
|
||||
contextRatio * 100,
|
||||
)}%)\n`
|
||||
errorMsg += `- Attempted to split into ${subHunks.length} sub-hunks but still failed\n`
|
||||
|
||||
if (contextRatio < 0.2) {
|
||||
errorMsg += "\nPossible Issues:\n"
|
||||
errorMsg += "- Not enough context lines to uniquely identify the location\n"
|
||||
errorMsg += "- Add a few more lines of unchanged code around your changes\n"
|
||||
} else if (contextRatio > 0.5) {
|
||||
errorMsg += "\nPossible Issues:\n"
|
||||
errorMsg += "- Too many context lines may reduce search accuracy\n"
|
||||
errorMsg += "- Try to keep only 2-3 lines of context before and after changes\n"
|
||||
} else {
|
||||
errorMsg += "\nPossible Issues:\n"
|
||||
errorMsg += "- The diff may be targeting a different version of the file\n"
|
||||
errorMsg +=
|
||||
"- There may be too many changes in a single hunk, try splitting the changes into multiple hunks\n"
|
||||
}
|
||||
|
||||
if (startLine && endLine) {
|
||||
errorMsg += `\nSearch Range: lines ${startLine}-${endLine}\n`
|
||||
}
|
||||
|
||||
return { success: false, error: errorMsg }
|
||||
}
|
||||
|
||||
const editResult = await applyEdit(hunk, result, matchPosition, confidence, this.confidenceThreshold)
|
||||
if (editResult.confidence >= this.confidenceThreshold) {
|
||||
result = editResult.result
|
||||
} else {
|
||||
// Edit failure - likely due to content mismatch
|
||||
let errorMsg = `Failed to apply the edit using ${editResult.strategy} strategy (${Math.floor(
|
||||
editResult.confidence * 100,
|
||||
)}% confidence)\n\n`
|
||||
errorMsg += "Debug Info:\n"
|
||||
errorMsg += "- The location was found but the content didn't match exactly\n"
|
||||
errorMsg += "- This usually means the file has been modified since the diff was created\n"
|
||||
errorMsg += "- Or the diff may be targeting a different version of the file\n"
|
||||
errorMsg += "\nPossible Solutions:\n"
|
||||
errorMsg += "1. Refresh your view of the file and create a new diff\n"
|
||||
errorMsg += "2. Double-check that the removed lines (-) match the current file content\n"
|
||||
errorMsg += "3. Ensure your diff targets the correct version of the file"
|
||||
|
||||
return { success: false, error: errorMsg }
|
||||
}
|
||||
}
|
||||
|
||||
return { success: true, content: result.join("\n") }
|
||||
}
|
||||
}
|
||||
408
src/core/diff/strategies/new-unified/search-strategies.ts
Normal file
408
src/core/diff/strategies/new-unified/search-strategies.ts
Normal file
@@ -0,0 +1,408 @@
|
||||
import { compareTwoStrings } from "string-similarity"
|
||||
import { closest } from "fastest-levenshtein"
|
||||
import { diff_match_patch } from "diff-match-patch"
|
||||
import { Change, Hunk } from "./types"
|
||||
|
||||
export type SearchResult = {
|
||||
index: number
|
||||
confidence: number
|
||||
strategy: string
|
||||
}
|
||||
|
||||
const LARGE_FILE_THRESHOLD = 1000 // lines
|
||||
const UNIQUE_CONTENT_BOOST = 0.05
|
||||
const DEFAULT_OVERLAP_SIZE = 3 // lines of overlap between windows
|
||||
const MAX_WINDOW_SIZE = 500 // maximum lines in a window
|
||||
|
||||
// Helper function to calculate adaptive confidence threshold based on file size
|
||||
function getAdaptiveThreshold(contentLength: number, baseThreshold: number): number {
|
||||
if (contentLength <= LARGE_FILE_THRESHOLD) {
|
||||
return baseThreshold
|
||||
}
|
||||
return Math.max(baseThreshold - 0.07, 0.8) // Reduce threshold for large files but keep minimum at 80%
|
||||
}
|
||||
|
||||
// Helper function to evaluate content uniqueness
|
||||
function evaluateContentUniqueness(searchStr: string, content: string[]): number {
|
||||
const searchLines = searchStr.split("\n")
|
||||
const uniqueLines = new Set(searchLines)
|
||||
const contentStr = content.join("\n")
|
||||
|
||||
// Calculate how many search lines are relatively unique in the content
|
||||
let uniqueCount = 0
|
||||
for (const line of uniqueLines) {
|
||||
const regex = new RegExp(line.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g")
|
||||
const matches = contentStr.match(regex)
|
||||
if (matches && matches.length <= 2) {
|
||||
// Line appears at most twice
|
||||
uniqueCount++
|
||||
}
|
||||
}
|
||||
|
||||
return uniqueCount / uniqueLines.size
|
||||
}
|
||||
|
||||
// Helper function to prepare search string from context
|
||||
export function prepareSearchString(changes: Change[]): string {
|
||||
const lines = changes.filter((c) => c.type === "context" || c.type === "remove").map((c) => c.originalLine)
|
||||
return lines.join("\n")
|
||||
}
|
||||
|
||||
// Helper function to evaluate similarity between two texts
|
||||
export function evaluateSimilarity(original: string, modified: string): number {
|
||||
return compareTwoStrings(original, modified)
|
||||
}
|
||||
|
||||
// Helper function to validate using diff-match-patch
|
||||
export function getDMPSimilarity(original: string, modified: string): number {
|
||||
const dmp = new diff_match_patch()
|
||||
const diffs = dmp.diff_main(original, modified)
|
||||
dmp.diff_cleanupSemantic(diffs)
|
||||
const patches = dmp.patch_make(original, diffs)
|
||||
const [expectedText] = dmp.patch_apply(patches, original)
|
||||
|
||||
const similarity = evaluateSimilarity(expectedText, modified)
|
||||
return similarity
|
||||
}
|
||||
|
||||
// Helper function to validate edit results using hunk information
|
||||
export function validateEditResult(hunk: Hunk, result: string): number {
|
||||
// Build the expected text from the hunk
|
||||
const expectedText = hunk.changes
|
||||
.filter((change) => change.type === "context" || change.type === "add")
|
||||
.map((change) => (change.indent ? change.indent + change.content : change.content))
|
||||
.join("\n")
|
||||
|
||||
// Calculate similarity between the result and expected text
|
||||
const similarity = getDMPSimilarity(expectedText, result)
|
||||
|
||||
// If the result is unchanged from original, return low confidence
|
||||
const originalText = hunk.changes
|
||||
.filter((change) => change.type === "context" || change.type === "remove")
|
||||
.map((change) => (change.indent ? change.indent + change.content : change.content))
|
||||
.join("\n")
|
||||
|
||||
const originalSimilarity = getDMPSimilarity(originalText, result)
|
||||
if (originalSimilarity > 0.97 && similarity !== 1) {
|
||||
return 0.8 * similarity // Some confidence since we found the right location
|
||||
}
|
||||
|
||||
// For partial matches, scale the confidence but keep it high if we're close
|
||||
return similarity
|
||||
}
|
||||
|
||||
// Helper function to validate context lines against original content
|
||||
function validateContextLines(searchStr: string, content: string, confidenceThreshold: number): number {
|
||||
// Extract just the context lines from the search string
|
||||
const contextLines = searchStr.split("\n").filter((line) => !line.startsWith("-")) // Exclude removed lines
|
||||
|
||||
// Compare context lines with content
|
||||
const similarity = evaluateSimilarity(contextLines.join("\n"), content)
|
||||
|
||||
// Get adaptive threshold based on content size
|
||||
const threshold = getAdaptiveThreshold(content.split("\n").length, confidenceThreshold)
|
||||
|
||||
// Calculate uniqueness boost
|
||||
const uniquenessScore = evaluateContentUniqueness(searchStr, content.split("\n"))
|
||||
const uniquenessBoost = uniquenessScore * UNIQUE_CONTENT_BOOST
|
||||
|
||||
// Adjust confidence based on threshold and uniqueness
|
||||
return similarity < threshold ? similarity * 0.3 + uniquenessBoost : similarity + uniquenessBoost
|
||||
}
|
||||
|
||||
// Helper function to create overlapping windows
|
||||
function createOverlappingWindows(
|
||||
content: string[],
|
||||
searchSize: number,
|
||||
overlapSize: number = DEFAULT_OVERLAP_SIZE,
|
||||
): { window: string[]; startIndex: number }[] {
|
||||
const windows: { window: string[]; startIndex: number }[] = []
|
||||
|
||||
// Ensure minimum window size is at least searchSize
|
||||
const effectiveWindowSize = Math.max(searchSize, Math.min(searchSize * 2, MAX_WINDOW_SIZE))
|
||||
|
||||
// Ensure overlap size doesn't exceed window size
|
||||
const effectiveOverlapSize = Math.min(overlapSize, effectiveWindowSize - 1)
|
||||
|
||||
// Calculate step size, ensure it's at least 1
|
||||
const stepSize = Math.max(1, effectiveWindowSize - effectiveOverlapSize)
|
||||
|
||||
for (let i = 0; i < content.length; i += stepSize) {
|
||||
const windowContent = content.slice(i, i + effectiveWindowSize)
|
||||
if (windowContent.length >= searchSize) {
|
||||
windows.push({ window: windowContent, startIndex: i })
|
||||
}
|
||||
}
|
||||
|
||||
return windows
|
||||
}
|
||||
|
||||
// Helper function to combine overlapping matches
|
||||
function combineOverlappingMatches(
|
||||
matches: (SearchResult & { windowIndex: number })[],
|
||||
overlapSize: number = DEFAULT_OVERLAP_SIZE,
|
||||
): SearchResult[] {
|
||||
if (matches.length === 0) {
|
||||
return []
|
||||
}
|
||||
|
||||
// Sort matches by confidence
|
||||
matches.sort((a, b) => b.confidence - a.confidence)
|
||||
|
||||
const combinedMatches: SearchResult[] = []
|
||||
const usedIndices = new Set<number>()
|
||||
|
||||
for (const match of matches) {
|
||||
if (usedIndices.has(match.windowIndex)) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Find overlapping matches
|
||||
const overlapping = matches.filter(
|
||||
(m) =>
|
||||
Math.abs(m.windowIndex - match.windowIndex) === 1 &&
|
||||
Math.abs(m.index - match.index) <= overlapSize &&
|
||||
!usedIndices.has(m.windowIndex),
|
||||
)
|
||||
|
||||
if (overlapping.length > 0) {
|
||||
// Boost confidence if we find same match in overlapping windows
|
||||
const avgConfidence =
|
||||
(match.confidence + overlapping.reduce((sum, m) => sum + m.confidence, 0)) / (overlapping.length + 1)
|
||||
const boost = Math.min(0.05 * overlapping.length, 0.1) // Max 10% boost
|
||||
|
||||
combinedMatches.push({
|
||||
index: match.index,
|
||||
confidence: Math.min(1, avgConfidence + boost),
|
||||
strategy: `${match.strategy}-overlapping`,
|
||||
})
|
||||
|
||||
usedIndices.add(match.windowIndex)
|
||||
overlapping.forEach((m) => usedIndices.add(m.windowIndex))
|
||||
} else {
|
||||
combinedMatches.push({
|
||||
index: match.index,
|
||||
confidence: match.confidence,
|
||||
strategy: match.strategy,
|
||||
})
|
||||
usedIndices.add(match.windowIndex)
|
||||
}
|
||||
}
|
||||
|
||||
return combinedMatches
|
||||
}
|
||||
|
||||
export function findExactMatch(
|
||||
searchStr: string,
|
||||
content: string[],
|
||||
startIndex: number = 0,
|
||||
confidenceThreshold: number = 0.97,
|
||||
): SearchResult {
|
||||
const searchLines = searchStr.split("\n")
|
||||
const windows = createOverlappingWindows(content.slice(startIndex), searchLines.length)
|
||||
const matches: (SearchResult & { windowIndex: number })[] = []
|
||||
|
||||
windows.forEach((windowData, windowIndex) => {
|
||||
const windowStr = windowData.window.join("\n")
|
||||
const exactMatch = windowStr.indexOf(searchStr)
|
||||
|
||||
if (exactMatch !== -1) {
|
||||
const matchedContent = windowData.window
|
||||
.slice(
|
||||
windowStr.slice(0, exactMatch).split("\n").length - 1,
|
||||
windowStr.slice(0, exactMatch).split("\n").length - 1 + searchLines.length,
|
||||
)
|
||||
.join("\n")
|
||||
|
||||
const similarity = getDMPSimilarity(searchStr, matchedContent)
|
||||
const contextSimilarity = validateContextLines(searchStr, matchedContent, confidenceThreshold)
|
||||
const confidence = Math.min(similarity, contextSimilarity)
|
||||
|
||||
matches.push({
|
||||
index: startIndex + windowData.startIndex + windowStr.slice(0, exactMatch).split("\n").length - 1,
|
||||
confidence,
|
||||
strategy: "exact",
|
||||
windowIndex,
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
const combinedMatches = combineOverlappingMatches(matches)
|
||||
return combinedMatches.length > 0 ? combinedMatches[0] : { index: -1, confidence: 0, strategy: "exact" }
|
||||
}
|
||||
|
||||
// String similarity strategy
|
||||
export function findSimilarityMatch(
|
||||
searchStr: string,
|
||||
content: string[],
|
||||
startIndex: number = 0,
|
||||
confidenceThreshold: number = 0.97,
|
||||
): SearchResult {
|
||||
const searchLines = searchStr.split("\n")
|
||||
let bestScore = 0
|
||||
let bestIndex = -1
|
||||
|
||||
for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
|
||||
const windowStr = content.slice(i, i + searchLines.length).join("\n")
|
||||
const score = compareTwoStrings(searchStr, windowStr)
|
||||
if (score > bestScore && score >= confidenceThreshold) {
|
||||
const similarity = getDMPSimilarity(searchStr, windowStr)
|
||||
const contextSimilarity = validateContextLines(searchStr, windowStr, confidenceThreshold)
|
||||
const adjustedScore = Math.min(similarity, contextSimilarity) * score
|
||||
|
||||
if (adjustedScore > bestScore) {
|
||||
bestScore = adjustedScore
|
||||
bestIndex = i
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
index: bestIndex,
|
||||
confidence: bestIndex !== -1 ? bestScore : 0,
|
||||
strategy: "similarity",
|
||||
}
|
||||
}
|
||||
|
||||
// Levenshtein strategy
|
||||
export function findLevenshteinMatch(
|
||||
searchStr: string,
|
||||
content: string[],
|
||||
startIndex: number = 0,
|
||||
confidenceThreshold: number = 0.97,
|
||||
): SearchResult {
|
||||
const searchLines = searchStr.split("\n")
|
||||
const candidates = []
|
||||
|
||||
for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
|
||||
candidates.push(content.slice(i, i + searchLines.length).join("\n"))
|
||||
}
|
||||
|
||||
if (candidates.length > 0) {
|
||||
const closestMatch = closest(searchStr, candidates)
|
||||
const index = startIndex + candidates.indexOf(closestMatch)
|
||||
const similarity = getDMPSimilarity(searchStr, closestMatch)
|
||||
const contextSimilarity = validateContextLines(searchStr, closestMatch, confidenceThreshold)
|
||||
const confidence = Math.min(similarity, contextSimilarity)
|
||||
return {
|
||||
index: confidence === 0 ? -1 : index,
|
||||
confidence: index !== -1 ? confidence : 0,
|
||||
strategy: "levenshtein",
|
||||
}
|
||||
}
|
||||
|
||||
return { index: -1, confidence: 0, strategy: "levenshtein" }
|
||||
}
|
||||
|
||||
// Helper function to identify anchor lines
|
||||
function identifyAnchors(searchStr: string): { first: string | null; last: string | null } {
|
||||
const searchLines = searchStr.split("\n")
|
||||
let first: string | null = null
|
||||
let last: string | null = null
|
||||
|
||||
// Find the first non-empty line
|
||||
for (const line of searchLines) {
|
||||
if (line.trim()) {
|
||||
first = line
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Find the last non-empty line
|
||||
for (let i = searchLines.length - 1; i >= 0; i--) {
|
||||
if (searchLines[i].trim()) {
|
||||
last = searchLines[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return { first, last }
|
||||
}
|
||||
|
||||
// Anchor-based search strategy
|
||||
export function findAnchorMatch(
|
||||
searchStr: string,
|
||||
content: string[],
|
||||
startIndex: number = 0,
|
||||
confidenceThreshold: number = 0.97,
|
||||
): SearchResult {
|
||||
const searchLines = searchStr.split("\n")
|
||||
const { first, last } = identifyAnchors(searchStr)
|
||||
|
||||
if (!first || !last) {
|
||||
return { index: -1, confidence: 0, strategy: "anchor" }
|
||||
}
|
||||
|
||||
let firstIndex = -1
|
||||
let lastIndex = -1
|
||||
|
||||
// Check if the first anchor is unique
|
||||
let firstOccurrences = 0
|
||||
for (const contentLine of content) {
|
||||
if (contentLine === first) {
|
||||
firstOccurrences++
|
||||
}
|
||||
}
|
||||
|
||||
if (firstOccurrences !== 1) {
|
||||
return { index: -1, confidence: 0, strategy: "anchor" }
|
||||
}
|
||||
|
||||
// Find the first anchor
|
||||
for (let i = startIndex; i < content.length; i++) {
|
||||
if (content[i] === first) {
|
||||
firstIndex = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Find the last anchor
|
||||
for (let i = content.length - 1; i >= startIndex; i--) {
|
||||
if (content[i] === last) {
|
||||
lastIndex = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (firstIndex === -1 || lastIndex === -1 || lastIndex <= firstIndex) {
|
||||
return { index: -1, confidence: 0, strategy: "anchor" }
|
||||
}
|
||||
|
||||
// Validate the context
|
||||
const expectedContext = searchLines.slice(searchLines.indexOf(first) + 1, searchLines.indexOf(last)).join("\n")
|
||||
const actualContext = content.slice(firstIndex + 1, lastIndex).join("\n")
|
||||
const contextSimilarity = evaluateSimilarity(expectedContext, actualContext)
|
||||
|
||||
if (contextSimilarity < getAdaptiveThreshold(content.length, confidenceThreshold)) {
|
||||
return { index: -1, confidence: 0, strategy: "anchor" }
|
||||
}
|
||||
|
||||
const confidence = 1
|
||||
|
||||
return {
|
||||
index: firstIndex,
|
||||
confidence: confidence,
|
||||
strategy: "anchor",
|
||||
}
|
||||
}
|
||||
|
||||
// Main search function that tries all strategies
|
||||
export function findBestMatch(
|
||||
searchStr: string,
|
||||
content: string[],
|
||||
startIndex: number = 0,
|
||||
confidenceThreshold: number = 0.97,
|
||||
): SearchResult {
|
||||
const strategies = [findExactMatch, findAnchorMatch, findSimilarityMatch, findLevenshteinMatch]
|
||||
|
||||
let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" }
|
||||
|
||||
for (const strategy of strategies) {
|
||||
const result = strategy(searchStr, content, startIndex, confidenceThreshold)
|
||||
if (result.confidence > bestResult.confidence) {
|
||||
bestResult = result
|
||||
}
|
||||
}
|
||||
|
||||
return bestResult
|
||||
}
|
||||
20
src/core/diff/strategies/new-unified/types.ts
Normal file
20
src/core/diff/strategies/new-unified/types.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
export type Change = {
|
||||
type: "context" | "add" | "remove"
|
||||
content: string
|
||||
indent: string
|
||||
originalLine?: string
|
||||
}
|
||||
|
||||
export type Hunk = {
|
||||
changes: Change[]
|
||||
}
|
||||
|
||||
export type Diff = {
|
||||
hunks: Hunk[]
|
||||
}
|
||||
|
||||
export type EditResult = {
|
||||
confidence: number
|
||||
result: string[]
|
||||
strategy: string
|
||||
}
|
||||
Reference in New Issue
Block a user