kallithea: rhodecode/lib/diffs.py comparison

comparison rhodecode/lib/diffs.py @ 1753:1d1ccb873d00 beta

moved soon-to-be-deleted code from vcs to rhodecode - diff lib - annotate highlighter

author	Marcin Kuzminski <marcin@python-works.com>
date	Sun, 04 Dec 2011 23:39:32 +0200
parents
children	5610fd9b6803

comparison

equal deleted inserted replaced

-:f28dc032adf0
+:1d1ccb873d00
+# -*- coding: utf-8 -*-
+"""
+rhodecode.lib.diffs
+~~~~~~~~~~~~~~~~~~~
+Set of diffing helpers, previously part of vcs
+:created_on: Dec 4, 2011
+:author: marcink
+:copyright: (C) 2009-2011 Marcin Kuzminski <marcin@python-works.com>
+:original copyright: 2007-2008 by Armin Ronacher
+:license: GPLv3, see COPYING for more details.
+"""
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+import re
+import difflib
+from itertools import tee, imap
+from mercurial.match import match
+from vcs.exceptions import VCSError
+from vcs.nodes import FileNode
+def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True):
+"""
+Returns git style diff between given ``filenode_old`` and ``filenode_new``.
+:param ignore_whitespace: ignore whitespaces in diff
+"""
+for filenode in (filenode_old, filenode_new):
+if not isinstance(filenode, FileNode):
+raise VCSError("Given object should be FileNode object, not %s"
+% filenode.__class__)
+old_raw_id = getattr(filenode_old.changeset, 'raw_id', '0' * 40)
+new_raw_id = getattr(filenode_new.changeset, 'raw_id', '0' * 40)
+repo = filenode_new.changeset.repository
+vcs_gitdiff = repo._get_diff(old_raw_id, new_raw_id, filenode_new.path,
+ignore_whitespace)
+return vcs_gitdiff
+class DiffProcessor(object):
+"""
+Give it a unified diff and it returns a list of the files that were
+mentioned in the diff together with a dict of meta information that
+can be used to render it in a HTML template.
+"""
+_chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
+def __init__(self, diff, differ='diff', format='udiff'):
+"""
+:param diff:   a text in diff format or generator
+:param format: format of diff passed, `udiff` or `gitdiff`
+"""
+if isinstance(diff, basestring):
+diff = [diff]
+self.__udiff = diff
+self.__format = format
+self.adds = 0
+self.removes = 0
+if isinstance(self.__udiff, basestring):
+self.lines = iter(self.__udiff.splitlines(1))
+elif self.__format == 'gitdiff':
+udiff_copy = self.copy_iterator()
+self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
+else:
+udiff_copy = self.copy_iterator()
+self.lines = imap(self.escaper, udiff_copy)
+# Select a differ.
+if differ == 'difflib':
+self.differ = self._highlight_line_difflib
+else:
+self.differ = self._highlight_line_udiff
+def escaper(self, string):
+return string.replace('<', '&lt;').replace('>', '&gt;')
+def copy_iterator(self):
+"""
+make a fresh copy of generator, we should not iterate thru
+an original as it's needed for repeating operations on
+this instance of DiffProcessor
+"""
+self.__udiff, iterator_copy = tee(self.__udiff)
+return iterator_copy
+def _extract_rev(self, line1, line2):
+"""
+Extract the filename and revision hint from a line.
+"""
+try:
+if line1.startswith('--- ') and line2.startswith('+++ '):
+l1 = line1[4:].split(None, 1)
+old_filename = l1[0].lstrip('a/') if len(l1) >= 1 else None
+old_rev = l1[1] if len(l1) == 2 else 'old'
+l2 = line2[4:].split(None, 1)
+new_filename = l2[0].lstrip('b/') if len(l1) >= 1 else None
+new_rev = l2[1] if len(l2) == 2 else 'new'
+filename = old_filename if (old_filename !=
+'dev/null') else new_filename
+return filename, new_rev, old_rev
+except (ValueError, IndexError):
+pass
+return None, None, None
+def _parse_gitdiff(self, diffiterator):
+def line_decoder(l):
+if l.startswith('+') and not l.startswith('+++'):
+self.adds += 1
+elif l.startswith('-') and not l.startswith('---'):
+self.removes += 1
+return l.decode('utf8', 'replace')
+output = list(diffiterator)
+size = len(output)
+if size == 2:
+l = []
+l.extend([output[0]])
+l.extend(output[1].splitlines(1))
+return map(line_decoder, l)
+elif size == 1:
+return  map(line_decoder, output[0].splitlines(1))
+elif size == 0:
+return []
+raise Exception('wrong size of diff %s' % size)
+def _highlight_line_difflib(self, line, next):
+"""
+Highlight inline changes in both lines.
+"""
+if line['action'] == 'del':
+old, new = line, next
+else:
+old, new = next, line
+oldwords = re.split(r'(\W)', old['line'])
+newwords = re.split(r'(\W)', new['line'])
+sequence = difflib.SequenceMatcher(None, oldwords, newwords)
+oldfragments, newfragments = [], []
+for tag, i1, i2, j1, j2 in sequence.get_opcodes():
+oldfrag = ''.join(oldwords[i1:i2])
+newfrag = ''.join(newwords[j1:j2])
+if tag != 'equal':
+if oldfrag:
+oldfrag = '<del>%s</del>' % oldfrag
+if newfrag:
+newfrag = '<ins>%s</ins>' % newfrag
+oldfragments.append(oldfrag)
+newfragments.append(newfrag)
+old['line'] = "".join(oldfragments)
+new['line'] = "".join(newfragments)
+def _highlight_line_udiff(self, line, next):
+"""
+Highlight inline changes in both lines.
+"""
+start = 0
+limit = min(len(line['line']), len(next['line']))
+while start < limit and line['line'][start] == next['line'][start]:
+start += 1
+end = -1
+limit -= start
+while -end <= limit and line['line'][end] == next['line'][end]:
+end -= 1
+end += 1
+if start or end:
+def do(l):
+last = end + len(l['line'])
+if l['action'] == 'add':
+tag = 'ins'
+else:
+tag = 'del'
+l['line'] = '%s<%s>%s</%s>%s' % (
+l['line'][:start],
+tag,
+l['line'][start:last],
+tag,
+l['line'][last:]
+)
+do(line)
+do(next)
+def _parse_udiff(self):
+"""
+Parse the diff an return data for the template.
+"""
+lineiter = self.lines
+files = []
+try:
+line = lineiter.next()
+# skip first context
+skipfirst = True
+while 1:
+# continue until we found the old file
+if not line.startswith('--- '):
+line = lineiter.next()
+continue
+chunks = []
+filename, old_rev, new_rev = \
+self._extract_rev(line, lineiter.next())
+files.append({
+'filename':         filename,
+'old_revision':     old_rev,
+'new_revision':     new_rev,
+'chunks':           chunks
+})
+line = lineiter.next()
+while line:
+match = self._chunk_re.match(line)
+if not match:
+break
+lines = []
+chunks.append(lines)
+old_line, old_end, new_line, new_end = \
+[int(x or 1) for x in match.groups()[:-1]]
+old_line -= 1
+new_line -= 1
+context = len(match.groups()) == 5
+old_end += old_line
+new_end += new_line
+if context:
+if not skipfirst:
+lines.append({
+'old_lineno': '...',
+'new_lineno': '...',
+'action': 'context',
+'line': line,
+})
+else:
+skipfirst = False
+line = lineiter.next()
+while old_line < old_end or new_line < new_end:
+if line:
+command, line = line[0], line[1:]
+else:
+command = ' '
+affects_old = affects_new = False
+# ignore those if we don't expect them
+if command in '#@':
+continue
+elif command == '+':
+affects_new = True
+action = 'add'
+elif command == '-':
+affects_old = True
+action = 'del'
+else:
+affects_old = affects_new = True
+action = 'unmod'
+old_line += affects_old
+new_line += affects_new
+lines.append({
+'old_lineno':   affects_old and old_line or '',
+'new_lineno':   affects_new and new_line or '',
+'action':       action,
+'line':         line
+})
+line = lineiter.next()
+except StopIteration:
+pass
+# highlight inline changes
+for file in files:
+for chunk in chunks:
+lineiter = iter(chunk)
+#first = True
+try:
+while 1:
+line = lineiter.next()
+if line['action'] != 'unmod':
+nextline = lineiter.next()
+if nextline['action'] == 'unmod' or \
+nextline['action'] == line['action']:
+continue
+self.differ(line, nextline)
+except StopIteration:
+pass
+return files
+def prepare(self):
+"""
+Prepare the passed udiff for HTML rendering. It'l return a list
+of dicts
+"""
+return self._parse_udiff()
+def _safe_id(self, idstring):
+"""Make a string safe for including in an id attribute.
+The HTML spec says that id attributes 'must begin with
+a letter ([A-Za-z]) and may be followed by any number
+of letters, digits ([0-9]), hyphens ("-"), underscores
+("_"), colons (":"), and periods (".")'. These regexps
+are slightly over-zealous, in that they remove colons
+and periods unnecessarily.
+Whitespace is transformed into underscores, and then
+anything which is not a hyphen or a character that
+matches \w (alphanumerics and underscore) is removed.
+"""
+# Transform all whitespace to underscore
+idstring = re.sub(r'\s', "_", '%s' % idstring)
+# Remove everything that is not a hyphen or a member of \w
+idstring = re.sub(r'(?!-)\W', "", idstring).lower()
+return idstring
+def raw_diff(self):
+"""
+Returns raw string as udiff
+"""
+udiff_copy = self.copy_iterator()
+if self.__format == 'gitdiff':
+udiff_copy = self._parse_gitdiff(udiff_copy)
+return u''.join(udiff_copy)
+def as_html(self, table_class='code-difftable', line_class='line',
+new_lineno_class='lineno old', old_lineno_class='lineno new',
+code_class='code'):
+"""
+Return udiff as html table with customized css classes
+"""
+def _link_to_if(condition, label, url):
+"""
+Generates a link if condition is meet or just the label if not.
+"""
+if condition:
+return '''<a href="%(url)s">%(label)s</a>''' % {'url': url,
+'label': label}
+else:
+return label
+diff_lines = self.prepare()
+_html_empty = True
+_html = []
+_html.append('''<table class="%(table_class)s">\n''' \
+% {'table_class': table_class})
+for diff in diff_lines:
+for line in diff['chunks']:
+_html_empty = False
+for change in line:
+_html.append('''<tr class="%(line_class)s %(action)s">\n''' \
+% {'line_class': line_class,
+'action': change['action']})
+anchor_old_id = ''
+anchor_new_id = ''
+anchor_old = "%(filename)s_o%(oldline_no)s" % \
+{'filename': self._safe_id(diff['filename']),
+'oldline_no': change['old_lineno']}
+anchor_new = "%(filename)s_n%(oldline_no)s" % \
+{'filename': self._safe_id(diff['filename']),
+'oldline_no': change['new_lineno']}
+cond_old = change['old_lineno'] != '...' and \
+change['old_lineno']
+cond_new = change['new_lineno'] != '...' and \
+change['new_lineno']
+if cond_old:
+anchor_old_id = 'id="%s"' % anchor_old
+if cond_new:
+anchor_new_id = 'id="%s"' % anchor_new
+###########################################################
+# OLD LINE NUMBER
+###########################################################
+_html.append('''\t<td %(a_id)s class="%(old_lineno_cls)s">''' \
+% {'a_id': anchor_old_id,
+'old_lineno_cls': old_lineno_class})
+_html.append('''<pre>%(link)s</pre>''' \
+% {'link':
+_link_to_if(cond_old, change['old_lineno'], '#%s' \
+% anchor_old)})
+_html.append('''</td>\n''')
+###########################################################
+# NEW LINE NUMBER
+###########################################################
+_html.append('''\t<td %(a_id)s class="%(new_lineno_cls)s">''' \
+% {'a_id': anchor_new_id,
+'new_lineno_cls': new_lineno_class})
+_html.append('''<pre>%(link)s</pre>''' \
+% {'link':
+_link_to_if(cond_new, change['new_lineno'], '#%s' \
+% anchor_new)})
+_html.append('''</td>\n''')
+###########################################################
+# CODE
+###########################################################
+_html.append('''\t<td class="%(code_class)s">''' \
+% {'code_class': code_class})
+_html.append('''\n\t\t<pre>%(code)s</pre>\n''' \
+% {'code': change['line']})
+_html.append('''\t</td>''')
+_html.append('''\n</tr>\n''')
+_html.append('''</table>''')
+if _html_empty:
+return None
+return ''.join(_html)
+def stat(self):
+"""
+Returns tuple of added, and removed lines for this instance
+"""
+return self.adds, self.removes

Mercurial > kallithea

comparison rhodecode/lib/diffs.py @ 1753:1d1ccb873d00 beta