Skip to content

Commit df97e07

Browse files
authored
Merge pull request sergi#108 from pakohan/master
fix DiffCleanupSemantic
2 parents a87b244 + e013302 commit df97e07

File tree

2 files changed

+46
-9
lines changed

2 files changed

+46
-9
lines changed

diffmatchpatch/diff.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -670,16 +670,16 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
670670
// An insertion or deletion.
671671

672672
if diffs[pointer].Type == DiffInsert {
673-
lengthInsertions2 += len(diffs[pointer].Text)
673+
lengthInsertions2 += utf8.RuneCountInString(diffs[pointer].Text)
674674
} else {
675-
lengthDeletions2 += len(diffs[pointer].Text)
675+
lengthDeletions2 += utf8.RuneCountInString(diffs[pointer].Text)
676676
}
677677
// Eliminate an equality that is smaller or equal to the edits on both sides of it.
678678
difference1 := int(math.Max(float64(lengthInsertions1), float64(lengthDeletions1)))
679679
difference2 := int(math.Max(float64(lengthInsertions2), float64(lengthDeletions2)))
680-
if len(lastequality) > 0 &&
681-
(len(lastequality) <= difference1) &&
682-
(len(lastequality) <= difference2) {
680+
if utf8.RuneCountInString(lastequality) > 0 &&
681+
(utf8.RuneCountInString(lastequality) <= difference1) &&
682+
(utf8.RuneCountInString(lastequality) <= difference2) {
683683
// Duplicate record.
684684
insPoint := equalities[len(equalities)-1]
685685
diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})
@@ -728,8 +728,8 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
728728
overlapLength1 := dmp.DiffCommonOverlap(deletion, insertion)
729729
overlapLength2 := dmp.DiffCommonOverlap(insertion, deletion)
730730
if overlapLength1 >= overlapLength2 {
731-
if float64(overlapLength1) >= float64(len(deletion))/2 ||
732-
float64(overlapLength1) >= float64(len(insertion))/2 {
731+
if float64(overlapLength1) >= float64(utf8.RuneCountInString(deletion))/2 ||
732+
float64(overlapLength1) >= float64(utf8.RuneCountInString(insertion))/2 {
733733

734734
// Overlap found. Insert an equality and trim the surrounding edits.
735735
diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]})
@@ -739,8 +739,8 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
739739
pointer++
740740
}
741741
} else {
742-
if float64(overlapLength2) >= float64(len(deletion))/2 ||
743-
float64(overlapLength2) >= float64(len(insertion))/2 {
742+
if float64(overlapLength2) >= float64(utf8.RuneCountInString(deletion))/2 ||
743+
float64(overlapLength2) >= float64(utf8.RuneCountInString(insertion))/2 {
744744
// Reverse overlap found. Insert an equality and swap and trim the surrounding edits.
745745
overlap := Diff{DiffEqual, deletion[:overlapLength2]}
746746
diffs = splice(diffs, pointer, 0, overlap)

diffmatchpatch/diff_test.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,43 @@ func TestDiffCleanupSemantic(t *testing.T) {
821821
{DiffDelete, " deal"},
822822
},
823823
},
824+
{
825+
"Taken from python / CPP library",
826+
[]Diff{
827+
{DiffInsert, "星球大戰:新的希望 "},
828+
{DiffEqual, "star wars: "},
829+
{DiffDelete, "episodio iv - un"},
830+
{DiffEqual, "a n"},
831+
{DiffDelete, "u"},
832+
{DiffEqual, "e"},
833+
{DiffDelete, "va"},
834+
{DiffInsert, "w"},
835+
{DiffEqual, " "},
836+
{DiffDelete, "es"},
837+
{DiffInsert, "ho"},
838+
{DiffEqual, "pe"},
839+
{DiffDelete, "ranza"},
840+
},
841+
[]Diff{
842+
{DiffInsert, "星球大戰:新的希望 "},
843+
{DiffEqual, "star wars: "},
844+
{DiffDelete, "episodio iv - una nueva esperanza"},
845+
{DiffInsert, "a new hope"},
846+
},
847+
},
848+
{
849+
"panic",
850+
[]Diff{
851+
{DiffInsert, "킬러 인 "},
852+
{DiffEqual, "리커버리"},
853+
{DiffDelete, " 보이즈"},
854+
},
855+
[]Diff{
856+
{DiffInsert, "킬러 인 "},
857+
{DiffEqual, "리커버리"},
858+
{DiffDelete, " 보이즈"},
859+
},
860+
},
824861
} {
825862
actual := dmp.DiffCleanupSemantic(tc.Diffs)
826863
assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %s", i, tc.Name))

0 commit comments

Comments
 (0)