changeset 6907:71a29042ab8a

diffs: very minor refactorings, reformatting, documentation, and removal of unused code
author Mads Kiilerich <mads@kiilerich.com>
date Tue, 03 Oct 2017 00:14:40 +0200
parents b343a4599178
children 791430c43bca
files kallithea/lib/diffs.py kallithea/lib/vcs/utils/diffs.py
diffstat 2 files changed, 38 insertions(+), 516 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/lib/diffs.py	Tue Oct 03 00:14:40 2017 +0200
+++ b/kallithea/lib/diffs.py	Tue Oct 03 00:14:40 2017 +0200
@@ -29,7 +29,7 @@
 import difflib
 import logging
 
-from itertools import tee, imap
+from itertools import imap
 
 from tg.i18n import ugettext as _
 
@@ -42,23 +42,25 @@
 log = logging.getLogger(__name__)
 
 
-def wrap_to_table(str_):
-    return '''<table class="code-difftable">
+def wrap_to_table(html):
+    """Given a string with html, return it wrapped in a table, similar to what
+    DiffProcessor returns."""
+    return '''\
+              <table class="code-difftable">
                 <tr class="line no-comment">
                 <td class="lineno new"></td>
                 <td class="code no-comment"><pre>%s</pre></td>
                 </tr>
-              </table>''' % str_
+              </table>''' % html
 
 
 def wrapped_diff(filenode_old, filenode_new, diff_limit=None,
                 ignore_whitespace=True, line_context=3,
                 enable_comments=False):
     """
-    returns a wrapped diff into a table, checks for diff_limit and presents
-    proper message
+    Returns a file diff wrapped into a table.
+    Checks for diff_limit and presents a message if the diff is too big.
     """
-
     if filenode_old is None:
         filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
 
@@ -107,8 +109,6 @@
 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
     """
     Returns git style diff between given ``filenode_old`` and ``filenode_new``.
-
-    :param ignore_whitespace: ignore whitespaces in diff
     """
     # make sure we pass in default context
     context = context or 3
@@ -237,22 +237,10 @@
             self.differ = self._highlight_line_udiff
             self._parser = self._parse_udiff
 
-    def _copy_iterator(self):
-        """
-        make a fresh copy of generator, we should not iterate thru
-        an original as it's needed for repeating operations on
-        this instance of DiffProcessor
-        """
-        self.__udiff, iterator_copy = tee(self.__udiff)
-        return iterator_copy
-
     def _escaper(self, string):
         """
-        Escaper for diff escapes special chars and checks the diff limit
-
-        :param string:
+        Do HTML escaping/markup and check the diff limit
         """
-
         self.cur_diff_size += len(string)
 
         # escaper gets iterated on each .next() call and it checks if each
@@ -278,23 +266,11 @@
 
         return self._escape_re.sub(substitute, safe_unicode(string))
 
-    def _line_counter(self, l):
-        """
-        Checks each line and bumps total adds/removes for this diff
-
-        :param l:
-        """
-        if l.startswith('+') and not l.startswith('+++'):
-            self.adds += 1
-        elif l.startswith('-') and not l.startswith('---'):
-            self.removes += 1
-        return safe_unicode(l)
-
     def _highlight_line_difflib(self, old, new):
         """
-        Highlight inline changes in both lines.
+        Highlight simple add/remove in two lines given as info dicts. They are
+        modified in place and given markup with <del>/<ins>.
         """
-
         assert old['action'] == 'del'
         assert new['action'] == 'add'
 
@@ -349,16 +325,16 @@
 
     def _get_header(self, diff_chunk):
         """
-        parses the diff header, and returns parts, and leftover diff
-        parts consists of 14 elements::
+        Parses a Git diff for a single file (header and chunks) and returns a tuple with:
+
+        1. A dict with meta info:
 
             a_path, b_path, similarity_index, rename_from, rename_to,
             old_mode, new_mode, new_file_mode, deleted_file_mode,
             a_blob_id, b_blob_id, b_mode, a_file, b_file
 
-        :param diff_chunk:
+        2. An iterator yielding lines with simple HTML markup.
         """
-
         match = None
         if self.vcs == 'git':
             match = self._git_header_re.match(diff_chunk)
@@ -366,21 +342,25 @@
             match = self._hg_header_re.match(diff_chunk)
         if match is None:
             raise Exception('diff not recognized as valid %s diff' % self.vcs)
-        groups = match.groupdict()
+        meta_info = match.groupdict()
         rest = diff_chunk[match.end():]
         if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
             raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000]))
         difflines = imap(self._escaper, re.findall(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
-        return groups, difflines
+        return meta_info, difflines
 
     def _clean_line(self, line, command):
+        """Given a diff line, strip the leading character if it is a plus/minus/context line."""
         if command in ['+', '-', ' ']:
-            # only modify the line if it's actually a diff thing
             line = line[1:]
         return line
 
     def _parse_gitdiff(self, inline_diff=True):
-        _files = []
+        """Parse self._diff and return a list of dicts with meta info and chunks for each file.
+        If diff is truncated, wrap it in LimitedDiffContainer.
+        Optionally, do an extra pass and to extra markup of one-liner changes.
+        """
+        _files = [] # list of dicts with meta info and chunks
         diff_container = lambda arg: arg
 
         # split the diff in chunks of separate --git a/file b/file chunks
@@ -445,10 +425,10 @@
             # a real non-binary diff
             if head['a_file'] or head['b_file']:
                 try:
-                    chunks, _stats = self._parse_lines(diff)
+                    chunks, added, deleted = self._parse_lines(diff)
                     stats['binary'] = False
-                    stats['added'] = _stats[0]
-                    stats['deleted'] = _stats[1]
+                    stats['added'] = added
+                    stats['deleted'] = deleted
                     # explicit mark that it's a modified file
                     if op == 'M':
                         stats['ops'][MOD_FILENODE] = 'modified file'
@@ -528,11 +508,11 @@
 
     def _parse_lines(self, diff):
         """
-        Parse the diff and return data for the template.
+        Given an iterator of diff body lines, parse them and return a dict per
+        line and added/removed totals.
         """
-
-        stats = [0, 0]
-        (old_line, old_end, new_line, new_end) = (None, None, None, None)
+        added = deleted = 0
+        old_line = old_end = new_line = new_end = None
 
         try:
             chunks = []
@@ -579,11 +559,11 @@
                     if command == '+':
                         affects_new = True
                         action = 'add'
-                        stats[0] += 1
+                        added += 1
                     elif command == '-':
                         affects_old = True
                         action = 'del'
-                        stats[1] += 1
+                        deleted += 1
                     elif command == ' ':
                         affects_old = affects_new = True
                         action = 'unmod'
@@ -613,15 +593,15 @@
                         })
                         line = diff.next()
                 if old_line > old_end:
-                        raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
+                    raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
                 if new_line > new_end:
-                        raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
+                    raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
         except StopIteration:
             pass
         if old_line != old_end or new_line != new_end:
             raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
 
-        return chunks, stats
+        return chunks, added, deleted
 
     def _safe_id(self, idstring):
         """Make a string safe for including in an id attribute.
@@ -654,12 +634,11 @@
         self.parsed_diff = parsed
         return parsed
 
-    def as_raw(self, diff_lines=None):
+    def as_raw(self):
         """
-        Returns raw string diff
+        Returns raw string diff, exactly as it was passed in the first place.
         """
         return self._diff
-        #return u''.join(imap(self._line_counter, self._diff.splitlines(1)))
 
     def as_html(self, table_class='code-difftable', line_class='line',
                 old_lineno_class='lineno old', new_lineno_class='lineno new',
--- a/kallithea/lib/vcs/utils/diffs.py	Tue Oct 03 00:14:40 2017 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,457 +0,0 @@
-# -*- coding: utf-8 -*-
-# original copyright: 2007-2008 by Armin Ronacher
-# licensed under the BSD license.
-
-import re
-import difflib
-import logging
-import itertools
-
-from difflib import unified_diff
-
-from kallithea.lib.vcs.exceptions import VCSError
-from kallithea.lib.vcs.nodes import FileNode, NodeError
-from kallithea.lib.vcs.utils import safe_unicode
-
-
-def get_udiff(filenode_old, filenode_new, show_whitespace=True):
-    """
-    Returns unified diff between given ``filenode_old`` and ``filenode_new``.
-    """
-    try:
-        filenode_old_date = filenode_old.changeset.date
-    except NodeError:
-        filenode_old_date = None
-
-    try:
-        filenode_new_date = filenode_new.changeset.date
-    except NodeError:
-        filenode_new_date = None
-
-    for filenode in (filenode_old, filenode_new):
-        if not isinstance(filenode, FileNode):
-            raise VCSError("Given object should be FileNode object, not %s"
-                % filenode.__class__)
-
-    if filenode_old_date and filenode_new_date:
-        if not filenode_old_date < filenode_new_date:
-            logging.debug("Generating udiff for filenodes with not increasing "
-                "dates")
-
-    vcs_udiff = unified_diff(filenode_old.content.splitlines(True),
-                               filenode_new.content.splitlines(True),
-                               filenode_old.name,
-                               filenode_new.name,
-                               filenode_old_date,
-                               filenode_old_date)
-    return vcs_udiff
-
-
-def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True):
-    """
-    Returns git style diff between given ``filenode_old`` and ``filenode_new``.
-
-    :param ignore_whitespace: ignore whitespaces in diff
-    """
-
-    for filenode in (filenode_old, filenode_new):
-        if not isinstance(filenode, FileNode):
-            raise VCSError("Given object should be FileNode object, not %s"
-                % filenode.__class__)
-
-    old_raw_id = getattr(filenode_old.changeset, 'raw_id', '0' * 40)
-    new_raw_id = getattr(filenode_new.changeset, 'raw_id', '0' * 40)
-
-    repo = filenode_new.changeset.repository
-    vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
-                                 ignore_whitespace)
-
-    return vcs_gitdiff
-
-
-class DiffProcessor(object):
-    """
-    Give it a unified diff and it returns a list of the files that were
-    mentioned in the diff together with a dict of meta information that
-    can be used to render it in a HTML template.
-    """
-    _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
-
-    def __init__(self, diff, differ='diff', format='udiff'):
-        """
-        :param diff:   a text in diff format or generator
-        :param format: format of diff passed, `udiff` or `gitdiff`
-        """
-        if isinstance(diff, basestring):
-            diff = [diff]
-
-        self.__udiff = diff
-        self.__format = format
-        self.adds = 0
-        self.removes = 0
-
-        if isinstance(self.__udiff, basestring):
-            self.lines = iter(self.__udiff.splitlines(1))
-
-        elif self.__format == 'gitdiff':
-            udiff_copy = self.copy_iterator()
-            self.lines = itertools.imap(self.escaper,
-                                        self._parse_gitdiff(udiff_copy))
-        else:
-            udiff_copy = self.copy_iterator()
-            self.lines = itertools.imap(self.escaper, udiff_copy)
-
-        # Select a differ.
-        if differ == 'difflib':
-            self.differ = self._highlight_line_difflib
-        else:
-            self.differ = self._highlight_line_udiff
-
-    def escaper(self, string):
-        return string.replace('<', '&lt;').replace('>', '&gt;')
-
-    def copy_iterator(self):
-        """
-        make a fresh copy of generator, we should not iterate thru
-        an original as it's needed for repeating operations on
-        this instance of DiffProcessor
-        """
-        self.__udiff, iterator_copy = itertools.tee(self.__udiff)
-        return iterator_copy
-
-    def _extract_rev(self, line1, line2):
-        """
-        Extract the filename and revision hint from a line.
-        """
-
-        try:
-            if line1.startswith('--- ') and line2.startswith('+++ '):
-                l1 = line1[4:].split(None, 1)
-                old_filename = l1[0].lstrip('a/') if len(l1) >= 1 else None
-                old_rev = l1[1] if len(l1) == 2 else 'old'
-
-                l2 = line2[4:].split(None, 1)
-                new_filename = l2[0].lstrip('b/') if len(l1) >= 1 else None
-                new_rev = l2[1] if len(l2) == 2 else 'new'
-
-                filename = old_filename if (old_filename !=
-                                            'dev/null') else new_filename
-
-                return filename, new_rev, old_rev
-        except (ValueError, IndexError):
-            pass
-
-        return None, None, None
-
-    def _parse_gitdiff(self, diffiterator):
-        def line_decoder(l):
-            if l.startswith('+') and not l.startswith('+++'):
-                self.adds += 1
-            elif l.startswith('-') and not l.startswith('---'):
-                self.removes += 1
-            return safe_unicode(l)
-
-        output = list(diffiterator)
-        size = len(output)
-
-        if size == 2:
-            l = []
-            l.extend([output[0]])
-            l.extend(output[1].splitlines(1))
-            return map(line_decoder, l)
-        elif size == 1:
-            return map(line_decoder, output[0].splitlines(1))
-        elif size == 0:
-            return []
-
-        raise Exception('wrong size of diff %s' % size)
-
-    def _highlight_line_difflib(self, line, next):
-        """
-        Highlight inline changes in both lines.
-        """
-
-        if line['action'] == 'del':
-            old, new = line, next
-        else:
-            old, new = next, line
-
-        oldwords = re.split(r'(\W)', old['line'])
-        newwords = re.split(r'(\W)', new['line'])
-
-        sequence = difflib.SequenceMatcher(None, oldwords, newwords)
-
-        oldfragments, newfragments = [], []
-        for tag, i1, i2, j1, j2 in sequence.get_opcodes():
-            oldfrag = ''.join(oldwords[i1:i2])
-            newfrag = ''.join(newwords[j1:j2])
-            if tag != 'equal':
-                if oldfrag:
-                    oldfrag = '<del>%s</del>' % oldfrag
-                if newfrag:
-                    newfrag = '<ins>%s</ins>' % newfrag
-            oldfragments.append(oldfrag)
-            newfragments.append(newfrag)
-
-        old['line'] = "".join(oldfragments)
-        new['line'] = "".join(newfragments)
-
-    def _highlight_line_udiff(self, line, next):
-        """
-        Highlight inline changes in both lines.
-        """
-        start = 0
-        limit = min(len(line['line']), len(next['line']))
-        while start < limit and line['line'][start] == next['line'][start]:
-            start += 1
-        end = -1
-        limit -= start
-        while -end <= limit and line['line'][end] == next['line'][end]:
-            end -= 1
-        end += 1
-        if start or end:
-            def do(l):
-                last = end + len(l['line'])
-                if l['action'] == 'add':
-                    tag = 'ins'
-                else:
-                    tag = 'del'
-                l['line'] = '%s<%s>%s</%s>%s' % (
-                    l['line'][:start],
-                    tag,
-                    l['line'][start:last],
-                    tag,
-                    l['line'][last:]
-                )
-            do(line)
-            do(next)
-
-    def _parse_udiff(self):
-        """
-        Parse the diff an return data for the template.
-        """
-        lineiter = self.lines
-        files = []
-        try:
-            line = lineiter.next()
-            # skip first context
-            skipfirst = True
-            while 1:
-                # continue until we found the old file
-                if not line.startswith('--- '):
-                    line = lineiter.next()
-                    continue
-
-                chunks = []
-                filename, old_rev, new_rev = \
-                    self._extract_rev(line, lineiter.next())
-                files.append({
-                    'filename':         filename,
-                    'old_revision':     old_rev,
-                    'new_revision':     new_rev,
-                    'chunks':           chunks
-                })
-
-                line = lineiter.next()
-                while line:
-                    match = self._chunk_re.match(line)
-                    if not match:
-                        break
-
-                    lines = []
-                    chunks.append(lines)
-
-                    old_line, old_end, new_line, new_end = \
-                        [int(x or 1) for x in match.groups()[:-1]]
-                    old_line -= 1
-                    new_line -= 1
-                    context = len(match.groups()) == 5
-                    old_end += old_line
-                    new_end += new_line
-
-                    if context:
-                        if not skipfirst:
-                            lines.append({
-                                'old_lineno': '...',
-                                'new_lineno': '...',
-                                'action': 'context',
-                                'line': line,
-                            })
-                        else:
-                            skipfirst = False
-
-                    line = lineiter.next()
-                    while old_line < old_end or new_line < new_end:
-                        if line:
-                            command, line = line[0], line[1:]
-                        else:
-                            command = ' '
-                        affects_old = affects_new = False
-
-                        # ignore those if we don't expect them
-                        if command in '#@':
-                            continue
-                        elif command == '+':
-                            affects_new = True
-                            action = 'add'
-                        elif command == '-':
-                            affects_old = True
-                            action = 'del'
-                        else:
-                            affects_old = affects_new = True
-                            action = 'unmod'
-
-                        old_line += affects_old
-                        new_line += affects_new
-                        lines.append({
-                            'old_lineno':   affects_old and old_line or '',
-                            'new_lineno':   affects_new and new_line or '',
-                            'action':       action,
-                            'line':         line
-                        })
-                        line = lineiter.next()
-
-        except StopIteration:
-            pass
-
-        # highlight inline changes
-        for file in files:
-            for chunk in chunks:
-                lineiter = iter(chunk)
-                #first = True
-                try:
-                    while 1:
-                        line = lineiter.next()
-                        if line['action'] != 'unmod':
-                            nextline = lineiter.next()
-                            if nextline['action'] == 'unmod' or \
-                               nextline['action'] == line['action']:
-                                continue
-                            self.differ(line, nextline)
-                except StopIteration:
-                    pass
-
-        return files
-
-    def prepare(self):
-        """
-        Prepare the passed udiff for HTML rendering. It'll return a list
-        of dicts
-        """
-        return self._parse_udiff()
-
-    def _safe_id(self, idstring):
-        """Make a string safe for including in an id attribute.
-
-        The HTML spec says that id attributes 'must begin with
-        a letter ([A-Za-z]) and may be followed by any number
-        of letters, digits ([0-9]), hyphens ("-"), underscores
-        ("_"), colons (":"), and periods (".")'. These regexps
-        are slightly over-zealous, in that they remove colons
-        and periods unnecessarily.
-
-        Whitespace is transformed into underscores, and then
-        anything which is not a hyphen or a character that
-        matches \w (alphanumerics and underscore) is removed.
-
-        """
-        # Transform all whitespace to underscore
-        idstring = re.sub(r'\s', "_", idstring)
-        # Remove everything that is not a hyphen or a member of \w
-        idstring = re.sub(r'(?!-)\W', "", idstring).lower()
-        return idstring
-
-    def raw_diff(self):
-        """
-        Returns raw string as udiff
-        """
-        udiff_copy = self.copy_iterator()
-        if self.__format == 'gitdiff':
-            udiff_copy = self._parse_gitdiff(udiff_copy)
-        return u''.join(udiff_copy)
-
-    def as_html(self, table_class='code-difftable', line_class='line',
-                new_lineno_class='lineno old', old_lineno_class='lineno new',
-                code_class='code'):
-        """
-        Return udiff as html table with customized css classes
-        """
-        def _link_to_if(condition, label, url):
-            """
-            Generates a link if condition is meet or just the label if not.
-            """
-
-            if condition:
-                return '''<a href="%(url)s">%(label)s</a>''' % {'url': url,
-                                                                'label': label}
-            else:
-                return label
-        diff_lines = self.prepare()
-        _html_empty = True
-        _html = []
-        _html.append('''<table class="%(table_class)s">\n'''
-                     % {'table_class': table_class})
-        for diff in diff_lines:
-            for line in diff['chunks']:
-                _html_empty = False
-                for change in line:
-                    _html.append('''<tr class="%(line_class)s %(action)s">\n'''
-                                 % {'line_class': line_class,
-                                    'action': change['action']})
-                    anchor_old_id = ''
-                    anchor_new_id = ''
-                    anchor_old = "%(filename)s_o%(oldline_no)s" % \
-                                {'filename': self._safe_id(diff['filename']),
-                                 'oldline_no': change['old_lineno']}
-                    anchor_new = "%(filename)s_n%(oldline_no)s" % \
-                                {'filename': self._safe_id(diff['filename']),
-                                 'oldline_no': change['new_lineno']}
-                    cond_old = change['old_lineno'] != '...' and \
-                                                        change['old_lineno']
-                    cond_new = change['new_lineno'] != '...' and \
-                                                        change['new_lineno']
-                    if cond_old:
-                        anchor_old_id = 'id="%s"' % anchor_old
-                    if cond_new:
-                        anchor_new_id = 'id="%s"' % anchor_new
-                    ###########################################################
-                    # OLD LINE NUMBER
-                    ###########################################################
-                    _html.append('''\t<td %(a_id)s class="%(old_lineno_cls)s">'''
-                                  % {'a_id': anchor_old_id,
-                                     'old_lineno_cls': old_lineno_class})
-
-                    _html.append('''<pre>%(link)s</pre>'''
-                                 % {'link':
-                                    _link_to_if(cond_old, change['old_lineno'], '#%s' % anchor_old)})
-                    _html.append('''</td>\n''')
-                    ###########################################################
-                    # NEW LINE NUMBER
-                    ###########################################################
-
-                    _html.append('''\t<td %(a_id)s class="%(new_lineno_cls)s">'''
-                                  % {'a_id': anchor_new_id,
-                                     'new_lineno_cls': new_lineno_class})
-
-                    _html.append('''<pre>%(link)s</pre>'''
-                                 % {'link': _link_to_if(cond_new, change['new_lineno'], '#%s' % anchor_new)})
-                    _html.append('''</td>\n''')
-                    ###########################################################
-                    # CODE
-                    ###########################################################
-                    _html.append('''\t<td class="%(code_class)s">'''
-                                 % {'code_class': code_class})
-                    _html.append('''\n\t\t<pre>%(code)s</pre>\n'''
-                                 % {'code': change['line']})
-                    _html.append('''\t</td>''')
-                    _html.append('''\n</tr>\n''')
-        _html.append('''</table>''')
-        if _html_empty:
-            return None
-        return ''.join(_html)
-
-    def stat(self):
-        """
-        Returns tuple of adde,and removed lines for this instance
-        """
-        return self.adds, self.removes