comparison rhodecode/lib/diffs.py @ 1753:1d1ccb873d00 beta

moved soon-to-be-deleted code from vcs to rhodecode - diff lib - annotate highlighter
author Marcin Kuzminski <marcin@python-works.com>
date Sun, 04 Dec 2011 23:39:32 +0200
parents
children 5610fd9b6803
comparison
equal deleted inserted replaced
1752:f28dc032adf0 1753:1d1ccb873d00
1 # -*- coding: utf-8 -*-
2 """
3 rhodecode.lib.diffs
4 ~~~~~~~~~~~~~~~~~~~
5
6 Set of diffing helpers, previously part of vcs
7
8
9 :created_on: Dec 4, 2011
10 :author: marcink
11 :copyright: (C) 2009-2011 Marcin Kuzminski <marcin@python-works.com>
12 :original copyright: 2007-2008 by Armin Ronacher
13 :license: GPLv3, see COPYING for more details.
14 """
15 # This program is free software: you can redistribute it and/or modify
16 # it under the terms of the GNU General Public License as published by
17 # the Free Software Foundation, either version 3 of the License, or
18 # (at your option) any later version.
19 #
20 # This program is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU General Public License for more details.
24 #
25 # You should have received a copy of the GNU General Public License
26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
28 import re
29 import difflib
30
31 from itertools import tee, imap
32
33 from mercurial.match import match
34
35 from vcs.exceptions import VCSError
36 from vcs.nodes import FileNode
37
38 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True):
39 """
40 Returns git style diff between given ``filenode_old`` and ``filenode_new``.
41
42 :param ignore_whitespace: ignore whitespaces in diff
43 """
44
45 for filenode in (filenode_old, filenode_new):
46 if not isinstance(filenode, FileNode):
47 raise VCSError("Given object should be FileNode object, not %s"
48 % filenode.__class__)
49
50 old_raw_id = getattr(filenode_old.changeset, 'raw_id', '0' * 40)
51 new_raw_id = getattr(filenode_new.changeset, 'raw_id', '0' * 40)
52
53 repo = filenode_new.changeset.repository
54 vcs_gitdiff = repo._get_diff(old_raw_id, new_raw_id, filenode_new.path,
55 ignore_whitespace)
56
57 return vcs_gitdiff
58
59
60 class DiffProcessor(object):
61 """
62 Give it a unified diff and it returns a list of the files that were
63 mentioned in the diff together with a dict of meta information that
64 can be used to render it in a HTML template.
65 """
66 _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
67
68 def __init__(self, diff, differ='diff', format='udiff'):
69 """
70 :param diff: a text in diff format or generator
71 :param format: format of diff passed, `udiff` or `gitdiff`
72 """
73 if isinstance(diff, basestring):
74 diff = [diff]
75
76 self.__udiff = diff
77 self.__format = format
78 self.adds = 0
79 self.removes = 0
80
81 if isinstance(self.__udiff, basestring):
82 self.lines = iter(self.__udiff.splitlines(1))
83
84 elif self.__format == 'gitdiff':
85 udiff_copy = self.copy_iterator()
86 self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
87 else:
88 udiff_copy = self.copy_iterator()
89 self.lines = imap(self.escaper, udiff_copy)
90
91 # Select a differ.
92 if differ == 'difflib':
93 self.differ = self._highlight_line_difflib
94 else:
95 self.differ = self._highlight_line_udiff
96
97 def escaper(self, string):
98 return string.replace('<', '&lt;').replace('>', '&gt;')
99
100 def copy_iterator(self):
101 """
102 make a fresh copy of generator, we should not iterate thru
103 an original as it's needed for repeating operations on
104 this instance of DiffProcessor
105 """
106 self.__udiff, iterator_copy = tee(self.__udiff)
107 return iterator_copy
108
109 def _extract_rev(self, line1, line2):
110 """
111 Extract the filename and revision hint from a line.
112 """
113
114 try:
115 if line1.startswith('--- ') and line2.startswith('+++ '):
116 l1 = line1[4:].split(None, 1)
117 old_filename = l1[0].lstrip('a/') if len(l1) >= 1 else None
118 old_rev = l1[1] if len(l1) == 2 else 'old'
119
120 l2 = line2[4:].split(None, 1)
121 new_filename = l2[0].lstrip('b/') if len(l1) >= 1 else None
122 new_rev = l2[1] if len(l2) == 2 else 'new'
123
124 filename = old_filename if (old_filename !=
125 'dev/null') else new_filename
126
127 return filename, new_rev, old_rev
128 except (ValueError, IndexError):
129 pass
130
131 return None, None, None
132
133 def _parse_gitdiff(self, diffiterator):
134 def line_decoder(l):
135 if l.startswith('+') and not l.startswith('+++'):
136 self.adds += 1
137 elif l.startswith('-') and not l.startswith('---'):
138 self.removes += 1
139 return l.decode('utf8', 'replace')
140
141 output = list(diffiterator)
142 size = len(output)
143
144 if size == 2:
145 l = []
146 l.extend([output[0]])
147 l.extend(output[1].splitlines(1))
148 return map(line_decoder, l)
149 elif size == 1:
150 return map(line_decoder, output[0].splitlines(1))
151 elif size == 0:
152 return []
153
154 raise Exception('wrong size of diff %s' % size)
155
156 def _highlight_line_difflib(self, line, next):
157 """
158 Highlight inline changes in both lines.
159 """
160
161 if line['action'] == 'del':
162 old, new = line, next
163 else:
164 old, new = next, line
165
166 oldwords = re.split(r'(\W)', old['line'])
167 newwords = re.split(r'(\W)', new['line'])
168
169 sequence = difflib.SequenceMatcher(None, oldwords, newwords)
170
171 oldfragments, newfragments = [], []
172 for tag, i1, i2, j1, j2 in sequence.get_opcodes():
173 oldfrag = ''.join(oldwords[i1:i2])
174 newfrag = ''.join(newwords[j1:j2])
175 if tag != 'equal':
176 if oldfrag:
177 oldfrag = '<del>%s</del>' % oldfrag
178 if newfrag:
179 newfrag = '<ins>%s</ins>' % newfrag
180 oldfragments.append(oldfrag)
181 newfragments.append(newfrag)
182
183 old['line'] = "".join(oldfragments)
184 new['line'] = "".join(newfragments)
185
186 def _highlight_line_udiff(self, line, next):
187 """
188 Highlight inline changes in both lines.
189 """
190 start = 0
191 limit = min(len(line['line']), len(next['line']))
192 while start < limit and line['line'][start] == next['line'][start]:
193 start += 1
194 end = -1
195 limit -= start
196 while -end <= limit and line['line'][end] == next['line'][end]:
197 end -= 1
198 end += 1
199 if start or end:
200 def do(l):
201 last = end + len(l['line'])
202 if l['action'] == 'add':
203 tag = 'ins'
204 else:
205 tag = 'del'
206 l['line'] = '%s<%s>%s</%s>%s' % (
207 l['line'][:start],
208 tag,
209 l['line'][start:last],
210 tag,
211 l['line'][last:]
212 )
213 do(line)
214 do(next)
215
216 def _parse_udiff(self):
217 """
218 Parse the diff an return data for the template.
219 """
220 lineiter = self.lines
221 files = []
222 try:
223 line = lineiter.next()
224 # skip first context
225 skipfirst = True
226 while 1:
227 # continue until we found the old file
228 if not line.startswith('--- '):
229 line = lineiter.next()
230 continue
231
232 chunks = []
233 filename, old_rev, new_rev = \
234 self._extract_rev(line, lineiter.next())
235 files.append({
236 'filename': filename,
237 'old_revision': old_rev,
238 'new_revision': new_rev,
239 'chunks': chunks
240 })
241
242 line = lineiter.next()
243 while line:
244 match = self._chunk_re.match(line)
245 if not match:
246 break
247
248 lines = []
249 chunks.append(lines)
250
251 old_line, old_end, new_line, new_end = \
252 [int(x or 1) for x in match.groups()[:-1]]
253 old_line -= 1
254 new_line -= 1
255 context = len(match.groups()) == 5
256 old_end += old_line
257 new_end += new_line
258
259 if context:
260 if not skipfirst:
261 lines.append({
262 'old_lineno': '...',
263 'new_lineno': '...',
264 'action': 'context',
265 'line': line,
266 })
267 else:
268 skipfirst = False
269
270 line = lineiter.next()
271 while old_line < old_end or new_line < new_end:
272 if line:
273 command, line = line[0], line[1:]
274 else:
275 command = ' '
276 affects_old = affects_new = False
277
278 # ignore those if we don't expect them
279 if command in '#@':
280 continue
281 elif command == '+':
282 affects_new = True
283 action = 'add'
284 elif command == '-':
285 affects_old = True
286 action = 'del'
287 else:
288 affects_old = affects_new = True
289 action = 'unmod'
290
291 old_line += affects_old
292 new_line += affects_new
293 lines.append({
294 'old_lineno': affects_old and old_line or '',
295 'new_lineno': affects_new and new_line or '',
296 'action': action,
297 'line': line
298 })
299 line = lineiter.next()
300
301 except StopIteration:
302 pass
303
304 # highlight inline changes
305 for file in files:
306 for chunk in chunks:
307 lineiter = iter(chunk)
308 #first = True
309 try:
310 while 1:
311 line = lineiter.next()
312 if line['action'] != 'unmod':
313 nextline = lineiter.next()
314 if nextline['action'] == 'unmod' or \
315 nextline['action'] == line['action']:
316 continue
317 self.differ(line, nextline)
318 except StopIteration:
319 pass
320
321 return files
322
323 def prepare(self):
324 """
325 Prepare the passed udiff for HTML rendering. It'l return a list
326 of dicts
327 """
328 return self._parse_udiff()
329
330 def _safe_id(self, idstring):
331 """Make a string safe for including in an id attribute.
332
333 The HTML spec says that id attributes 'must begin with
334 a letter ([A-Za-z]) and may be followed by any number
335 of letters, digits ([0-9]), hyphens ("-"), underscores
336 ("_"), colons (":"), and periods (".")'. These regexps
337 are slightly over-zealous, in that they remove colons
338 and periods unnecessarily.
339
340 Whitespace is transformed into underscores, and then
341 anything which is not a hyphen or a character that
342 matches \w (alphanumerics and underscore) is removed.
343
344 """
345 # Transform all whitespace to underscore
346 idstring = re.sub(r'\s', "_", '%s' % idstring)
347 # Remove everything that is not a hyphen or a member of \w
348 idstring = re.sub(r'(?!-)\W', "", idstring).lower()
349 return idstring
350
351 def raw_diff(self):
352 """
353 Returns raw string as udiff
354 """
355 udiff_copy = self.copy_iterator()
356 if self.__format == 'gitdiff':
357 udiff_copy = self._parse_gitdiff(udiff_copy)
358 return u''.join(udiff_copy)
359
360 def as_html(self, table_class='code-difftable', line_class='line',
361 new_lineno_class='lineno old', old_lineno_class='lineno new',
362 code_class='code'):
363 """
364 Return udiff as html table with customized css classes
365 """
366 def _link_to_if(condition, label, url):
367 """
368 Generates a link if condition is meet or just the label if not.
369 """
370
371 if condition:
372 return '''<a href="%(url)s">%(label)s</a>''' % {'url': url,
373 'label': label}
374 else:
375 return label
376 diff_lines = self.prepare()
377 _html_empty = True
378 _html = []
379 _html.append('''<table class="%(table_class)s">\n''' \
380 % {'table_class': table_class})
381 for diff in diff_lines:
382 for line in diff['chunks']:
383 _html_empty = False
384 for change in line:
385 _html.append('''<tr class="%(line_class)s %(action)s">\n''' \
386 % {'line_class': line_class,
387 'action': change['action']})
388 anchor_old_id = ''
389 anchor_new_id = ''
390 anchor_old = "%(filename)s_o%(oldline_no)s" % \
391 {'filename': self._safe_id(diff['filename']),
392 'oldline_no': change['old_lineno']}
393 anchor_new = "%(filename)s_n%(oldline_no)s" % \
394 {'filename': self._safe_id(diff['filename']),
395 'oldline_no': change['new_lineno']}
396 cond_old = change['old_lineno'] != '...' and \
397 change['old_lineno']
398 cond_new = change['new_lineno'] != '...' and \
399 change['new_lineno']
400 if cond_old:
401 anchor_old_id = 'id="%s"' % anchor_old
402 if cond_new:
403 anchor_new_id = 'id="%s"' % anchor_new
404 ###########################################################
405 # OLD LINE NUMBER
406 ###########################################################
407 _html.append('''\t<td %(a_id)s class="%(old_lineno_cls)s">''' \
408 % {'a_id': anchor_old_id,
409 'old_lineno_cls': old_lineno_class})
410
411 _html.append('''<pre>%(link)s</pre>''' \
412 % {'link':
413 _link_to_if(cond_old, change['old_lineno'], '#%s' \
414 % anchor_old)})
415 _html.append('''</td>\n''')
416 ###########################################################
417 # NEW LINE NUMBER
418 ###########################################################
419
420 _html.append('''\t<td %(a_id)s class="%(new_lineno_cls)s">''' \
421 % {'a_id': anchor_new_id,
422 'new_lineno_cls': new_lineno_class})
423
424 _html.append('''<pre>%(link)s</pre>''' \
425 % {'link':
426 _link_to_if(cond_new, change['new_lineno'], '#%s' \
427 % anchor_new)})
428 _html.append('''</td>\n''')
429 ###########################################################
430 # CODE
431 ###########################################################
432 _html.append('''\t<td class="%(code_class)s">''' \
433 % {'code_class': code_class})
434 _html.append('''\n\t\t<pre>%(code)s</pre>\n''' \
435 % {'code': change['line']})
436 _html.append('''\t</td>''')
437 _html.append('''\n</tr>\n''')
438 _html.append('''</table>''')
439 if _html_empty:
440 return None
441 return ''.join(_html)
442
443 def stat(self):
444 """
445 Returns tuple of added, and removed lines for this instance
446 """
447 return self.adds, self.removes