-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathJLDiff.py
More file actions
267 lines (218 loc) · 8.58 KB
/
JLDiff.py
File metadata and controls
267 lines (218 loc) · 8.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
#!/usr/bin/python
#Copyright (c) 2016, Joshua Lansford @ LaserLinc
#All rights reserved.
#Redistribution and use in source and binary forms, with or without
#modification, are permitted provided that the following conditions are met:
#1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from __future__ import with_statement
from collections import defaultdict
import sys
import cgi
import codecs
if hasattr( cgi, "escape" ):
cgi_escape = cgi.escape
else:
import html
cgi_escape = html.escape
STATE_PASSING_1ST = 0
STATE_PASSING_2ND = 1
STATE_MATCH = 2
class lineCompIndex(object):
__slots__ = ['errorCount', 'previous', 'state', 'content' ]
def __init__( self ):
self.errorCount = 0
self.previous = None
self.state = STATE_PASSING_1ST
self.content = ""
def __str__( self ):
result = ""
if self.state == STATE_PASSING_1ST:
result = "1st"
elif self.state == STATE_PASSING_2ND:
result = "2nd"
elif self.state == STATE_MATCH:
result = "M"
result += " e" + str( self.errorCount )
result += " " + self.content
return result
def __repr__(self) -> str:
return str(self)
u_intern_dict = {}
def u_intern( value ):
if value in u_intern_dict:
return u_intern_dict[value]
u_intern_dict[value]=value
return value
def compute_diff( file1, file2, talk=True, axis_penalty=False ):
lastLine = []
thisLine = []
#init the root root
thisIndex = lineCompIndex()
thisIndex.state = STATE_MATCH
thisLine.append( thisIndex )
#init the root top case
columnIndex = 1
for char2 in file2:
thisIndex = lineCompIndex()
thisIndex.previous = thisLine[ columnIndex-1 ]
thisIndex.errorCount = thisIndex.previous.errorCount+1
thisIndex.content = u_intern(char2)
thisIndex.state = STATE_PASSING_2ND
thisLine.append( thisIndex )
columnIndex += 1
rowIndex = 1
for char1 in file1:
lastLine = thisLine
thisLine = []
if talk:
try:
sys.stdout.write( char1 )
except Exception:
pass
#init the root left case
thisIndex = lineCompIndex()
thisIndex.previous = lastLine[ 0 ]
thisIndex.errorCount = thisIndex.previous.errorCount+1
thisIndex.content = u_intern(char1)
thisIndex.state = STATE_PASSING_1ST
thisLine.append( thisIndex )
columnIndex = 1
for char2 in file2:
thisIndex = lineCompIndex()
#just assume it will in this direction.
thisIndex.previous = thisLine[ columnIndex-1 ]
thisIndex.content = u_intern(char2)
thisIndex.state = STATE_PASSING_2ND
thisIndex.errorCount = thisIndex.previous.errorCount+1
#but if the other direction is better overwrite.
if lastLine[ columnIndex ].errorCount+1 < thisIndex.errorCount:
thisIndex.previous = lastLine[ columnIndex ]
thisIndex.content = u_intern(char1)
thisIndex.state = STATE_PASSING_1ST
thisIndex.errorCount = thisIndex.previous.errorCount+1
#if we have a match we have a third option
if( char2 == char1 ):
axis_penalty_amount = abs(columnIndex - rowIndex)*1e-5 if axis_penalty else 0
if lastLine[ columnIndex-1 ].errorCount+axis_penalty_amount < thisIndex.errorCount:
thisIndex.previous = lastLine[ columnIndex-1 ]
thisIndex.errorCount = thisIndex.previous.errorCount+axis_penalty_amount
thisIndex.state = STATE_MATCH
thisIndex.content = u_intern(char2)
thisLine.append( thisIndex )
columnIndex += 1
rowIndex += 1
backwardsList = []
currentNode = thisLine[ len( thisLine )-1 ]
while not currentNode is None:
backwardsList.append( currentNode )
currentNode = currentNode.previous
backwardsList.reverse()
return backwardsList
def printDiffs( nodesToPrint, outputFile ):
isblack = True
isred = False
isgreen = False
def escape( inputStr ):
answer = ""
if inputStr == " ":
answer = " "
elif inputStr == "\t":
answer = " "
else:
answer = cgi_escape( inputStr )
return answer
for nodeToPrint in nodesToPrint:
if nodeToPrint.content == "\n":
outputFile.write( "<br>\n" )
else:
if(nodeToPrint.state == STATE_MATCH):
if not isblack:
outputFile.write( "</span>" )
isblack = True
isred = False
isgreen = False
elif(nodeToPrint.state == STATE_PASSING_2ND ):
if not isred:
if not isblack:
outputFile.write( "</span>" )
outputFile.write( "<span class='new'>" )
isblack = False
isred = True
isgreen = False
else:
if not isgreen:
if not isblack:
outputFile.write( "</span>" )
outputFile.write( "<span class='old'>" )
isblack = False
isred = False
isgreen = True
outputFile.write( escape( nodeToPrint.content ) )
if not isblack:
outputFile.write( "</span>" )
isblack = True
isred = False
isgreen = False
def main( argv ):
filename1 = None
filename2 = None
output = None
same_size = False
for arg in argv:
if arg.startswith( "-" ):
if arg == "--same_size":
same_size = True
else:
raise Exception( "Unknown arg " + arg )
else:
if not filename1:
filename1 = arg
elif not filename2:
filename2 = arg
elif not output:
output = arg
else:
raise Exception( "Extra argument " + arg )
if not filename1 or not filename2 or not output:
print( "Usage: JLDiff file1 file2 resultFile [--same_size]" )
exit(1)
with codecs.open( filename1, 'r', 'utf-8', errors='ignore' ) as fileHandle1:
with codecs.open( filename2, 'r', 'utf-8', errors='ignore' ) as fileHandle2:
file1 = fileHandle1.read()
file2 = fileHandle2.read()
result = compute_diff( file1, file2 )
with codecs.open( output, 'w', 'utf-8', errors='ignore' ) as outFile:
outFile.write( "<!DOCTYPE html>\n" )
outFile.write( "<html>\n" )
outFile.write( "<head>\n" )
outFile.write( "<meta charset='utf-8'>\n" )
outFile.write( "<title>diff of " + cgi_escape( filename1 ) + " and " + cgi_escape( filename2 ) + "</title>\n" )
outFile.write( "<style>\n" )
if same_size:
outFile.write( ".new{color:darkgreen}\n" )
outFile.write( ".old{color:red}\n" )
else:
outFile.write( ".new{color:darkgreen;font-size: 25px;}\n" )
outFile.write( ".old{color:red;font-size: 25px;}\n" )
outFile.write( "</style>\n" )
outFile.write( "</head>\n" )
outFile.write( "<body>\n" )
printDiffs( result, outFile )
outFile.write( "</body>\n" )
outFile.write( "</html>\n" )
if __name__ == "__main__":
main(sys.argv[1:])