# HG changeset patch # User Mads Kiilerich # Date 1506982480 -7200 # Node ID 71a29042ab8afed4d8e27d150f2594a4bddd68f0 # Parent b343a4599178188ea2b2041e9797e6cd631a4333 diffs: very minor refactorings, reformatting, documentation, and removal of unused code diff -r b343a4599178 -r 71a29042ab8a kallithea/lib/diffs.py --- a/kallithea/lib/diffs.py Tue Oct 03 00:14:40 2017 +0200 +++ b/kallithea/lib/diffs.py Tue Oct 03 00:14:40 2017 +0200 @@ -29,7 +29,7 @@ import difflib import logging -from itertools import tee, imap +from itertools import imap from tg.i18n import ugettext as _ @@ -42,23 +42,25 @@ log = logging.getLogger(__name__) -def wrap_to_table(str_): - return ''' +def wrap_to_table(html): + """Given a string with html, return it wrapped in a table, similar to what + DiffProcessor returns.""" + return '''\ +
-
%s
''' % str_ + ''' % html def wrapped_diff(filenode_old, filenode_new, diff_limit=None, ignore_whitespace=True, line_context=3, enable_comments=False): """ - returns a wrapped diff into a table, checks for diff_limit and presents - proper message + Returns a file diff wrapped into a table. + Checks for diff_limit and presents a message if the diff is too big. """ - if filenode_old is None: filenode_old = FileNode(filenode_new.path, '', EmptyChangeset()) @@ -107,8 +109,6 @@ def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3): """ Returns git style diff between given ``filenode_old`` and ``filenode_new``. - - :param ignore_whitespace: ignore whitespaces in diff """ # make sure we pass in default context context = context or 3 @@ -237,22 +237,10 @@ self.differ = self._highlight_line_udiff self._parser = self._parse_udiff - def _copy_iterator(self): - """ - make a fresh copy of generator, we should not iterate thru - an original as it's needed for repeating operations on - this instance of DiffProcessor - """ - self.__udiff, iterator_copy = tee(self.__udiff) - return iterator_copy - def _escaper(self, string): """ - Escaper for diff escapes special chars and checks the diff limit - - :param string: + Do HTML escaping/markup and check the diff limit """ - self.cur_diff_size += len(string) # escaper gets iterated on each .next() call and it checks if each @@ -278,23 +266,11 @@ return self._escape_re.sub(substitute, safe_unicode(string)) - def _line_counter(self, l): - """ - Checks each line and bumps total adds/removes for this diff - - :param l: - """ - if l.startswith('+') and not l.startswith('+++'): - self.adds += 1 - elif l.startswith('-') and not l.startswith('---'): - self.removes += 1 - return safe_unicode(l) - def _highlight_line_difflib(self, old, new): """ - Highlight inline changes in both lines. + Highlight simple add/remove in two lines given as info dicts. They are + modified in place and given markup with /. """ - assert old['action'] == 'del' assert new['action'] == 'add' @@ -349,16 +325,16 @@ def _get_header(self, diff_chunk): """ - parses the diff header, and returns parts, and leftover diff - parts consists of 14 elements:: + Parses a Git diff for a single file (header and chunks) and returns a tuple with: + + 1. A dict with meta info: a_path, b_path, similarity_index, rename_from, rename_to, old_mode, new_mode, new_file_mode, deleted_file_mode, a_blob_id, b_blob_id, b_mode, a_file, b_file - :param diff_chunk: + 2. An iterator yielding lines with simple HTML markup. """ - match = None if self.vcs == 'git': match = self._git_header_re.match(diff_chunk) @@ -366,21 +342,25 @@ match = self._hg_header_re.match(diff_chunk) if match is None: raise Exception('diff not recognized as valid %s diff' % self.vcs) - groups = match.groupdict() + meta_info = match.groupdict() rest = diff_chunk[match.end():] if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '): raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000])) difflines = imap(self._escaper, re.findall(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do - return groups, difflines + return meta_info, difflines def _clean_line(self, line, command): + """Given a diff line, strip the leading character if it is a plus/minus/context line.""" if command in ['+', '-', ' ']: - # only modify the line if it's actually a diff thing line = line[1:] return line def _parse_gitdiff(self, inline_diff=True): - _files = [] + """Parse self._diff and return a list of dicts with meta info and chunks for each file. + If diff is truncated, wrap it in LimitedDiffContainer. + Optionally, do an extra pass and to extra markup of one-liner changes. + """ + _files = [] # list of dicts with meta info and chunks diff_container = lambda arg: arg # split the diff in chunks of separate --git a/file b/file chunks @@ -445,10 +425,10 @@ # a real non-binary diff if head['a_file'] or head['b_file']: try: - chunks, _stats = self._parse_lines(diff) + chunks, added, deleted = self._parse_lines(diff) stats['binary'] = False - stats['added'] = _stats[0] - stats['deleted'] = _stats[1] + stats['added'] = added + stats['deleted'] = deleted # explicit mark that it's a modified file if op == 'M': stats['ops'][MOD_FILENODE] = 'modified file' @@ -528,11 +508,11 @@ def _parse_lines(self, diff): """ - Parse the diff and return data for the template. + Given an iterator of diff body lines, parse them and return a dict per + line and added/removed totals. """ - - stats = [0, 0] - (old_line, old_end, new_line, new_end) = (None, None, None, None) + added = deleted = 0 + old_line = old_end = new_line = new_end = None try: chunks = [] @@ -579,11 +559,11 @@ if command == '+': affects_new = True action = 'add' - stats[0] += 1 + added += 1 elif command == '-': affects_old = True action = 'del' - stats[1] += 1 + deleted += 1 elif command == ' ': affects_old = affects_new = True action = 'unmod' @@ -613,15 +593,15 @@ }) line = diff.next() if old_line > old_end: - raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line)) + raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line)) if new_line > new_end: - raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line)) + raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line)) except StopIteration: pass if old_line != old_end or new_line != new_end: raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line)) - return chunks, stats + return chunks, added, deleted def _safe_id(self, idstring): """Make a string safe for including in an id attribute. @@ -654,12 +634,11 @@ self.parsed_diff = parsed return parsed - def as_raw(self, diff_lines=None): + def as_raw(self): """ - Returns raw string diff + Returns raw string diff, exactly as it was passed in the first place. """ return self._diff - #return u''.join(imap(self._line_counter, self._diff.splitlines(1))) def as_html(self, table_class='code-difftable', line_class='line', old_lineno_class='lineno old', new_lineno_class='lineno new', diff -r b343a4599178 -r 71a29042ab8a kallithea/lib/vcs/utils/diffs.py --- a/kallithea/lib/vcs/utils/diffs.py Tue Oct 03 00:14:40 2017 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,457 +0,0 @@ -# -*- coding: utf-8 -*- -# original copyright: 2007-2008 by Armin Ronacher -# licensed under the BSD license. - -import re -import difflib -import logging -import itertools - -from difflib import unified_diff - -from kallithea.lib.vcs.exceptions import VCSError -from kallithea.lib.vcs.nodes import FileNode, NodeError -from kallithea.lib.vcs.utils import safe_unicode - - -def get_udiff(filenode_old, filenode_new, show_whitespace=True): - """ - Returns unified diff between given ``filenode_old`` and ``filenode_new``. - """ - try: - filenode_old_date = filenode_old.changeset.date - except NodeError: - filenode_old_date = None - - try: - filenode_new_date = filenode_new.changeset.date - except NodeError: - filenode_new_date = None - - for filenode in (filenode_old, filenode_new): - if not isinstance(filenode, FileNode): - raise VCSError("Given object should be FileNode object, not %s" - % filenode.__class__) - - if filenode_old_date and filenode_new_date: - if not filenode_old_date < filenode_new_date: - logging.debug("Generating udiff for filenodes with not increasing " - "dates") - - vcs_udiff = unified_diff(filenode_old.content.splitlines(True), - filenode_new.content.splitlines(True), - filenode_old.name, - filenode_new.name, - filenode_old_date, - filenode_old_date) - return vcs_udiff - - -def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True): - """ - Returns git style diff between given ``filenode_old`` and ``filenode_new``. - - :param ignore_whitespace: ignore whitespaces in diff - """ - - for filenode in (filenode_old, filenode_new): - if not isinstance(filenode, FileNode): - raise VCSError("Given object should be FileNode object, not %s" - % filenode.__class__) - - old_raw_id = getattr(filenode_old.changeset, 'raw_id', '0' * 40) - new_raw_id = getattr(filenode_new.changeset, 'raw_id', '0' * 40) - - repo = filenode_new.changeset.repository - vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path, - ignore_whitespace) - - return vcs_gitdiff - - -class DiffProcessor(object): - """ - Give it a unified diff and it returns a list of the files that were - mentioned in the diff together with a dict of meta information that - can be used to render it in a HTML template. - """ - _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)') - - def __init__(self, diff, differ='diff', format='udiff'): - """ - :param diff: a text in diff format or generator - :param format: format of diff passed, `udiff` or `gitdiff` - """ - if isinstance(diff, basestring): - diff = [diff] - - self.__udiff = diff - self.__format = format - self.adds = 0 - self.removes = 0 - - if isinstance(self.__udiff, basestring): - self.lines = iter(self.__udiff.splitlines(1)) - - elif self.__format == 'gitdiff': - udiff_copy = self.copy_iterator() - self.lines = itertools.imap(self.escaper, - self._parse_gitdiff(udiff_copy)) - else: - udiff_copy = self.copy_iterator() - self.lines = itertools.imap(self.escaper, udiff_copy) - - # Select a differ. - if differ == 'difflib': - self.differ = self._highlight_line_difflib - else: - self.differ = self._highlight_line_udiff - - def escaper(self, string): - return string.replace('<', '<').replace('>', '>') - - def copy_iterator(self): - """ - make a fresh copy of generator, we should not iterate thru - an original as it's needed for repeating operations on - this instance of DiffProcessor - """ - self.__udiff, iterator_copy = itertools.tee(self.__udiff) - return iterator_copy - - def _extract_rev(self, line1, line2): - """ - Extract the filename and revision hint from a line. - """ - - try: - if line1.startswith('--- ') and line2.startswith('+++ '): - l1 = line1[4:].split(None, 1) - old_filename = l1[0].lstrip('a/') if len(l1) >= 1 else None - old_rev = l1[1] if len(l1) == 2 else 'old' - - l2 = line2[4:].split(None, 1) - new_filename = l2[0].lstrip('b/') if len(l1) >= 1 else None - new_rev = l2[1] if len(l2) == 2 else 'new' - - filename = old_filename if (old_filename != - 'dev/null') else new_filename - - return filename, new_rev, old_rev - except (ValueError, IndexError): - pass - - return None, None, None - - def _parse_gitdiff(self, diffiterator): - def line_decoder(l): - if l.startswith('+') and not l.startswith('+++'): - self.adds += 1 - elif l.startswith('-') and not l.startswith('---'): - self.removes += 1 - return safe_unicode(l) - - output = list(diffiterator) - size = len(output) - - if size == 2: - l = [] - l.extend([output[0]]) - l.extend(output[1].splitlines(1)) - return map(line_decoder, l) - elif size == 1: - return map(line_decoder, output[0].splitlines(1)) - elif size == 0: - return [] - - raise Exception('wrong size of diff %s' % size) - - def _highlight_line_difflib(self, line, next): - """ - Highlight inline changes in both lines. - """ - - if line['action'] == 'del': - old, new = line, next - else: - old, new = next, line - - oldwords = re.split(r'(\W)', old['line']) - newwords = re.split(r'(\W)', new['line']) - - sequence = difflib.SequenceMatcher(None, oldwords, newwords) - - oldfragments, newfragments = [], [] - for tag, i1, i2, j1, j2 in sequence.get_opcodes(): - oldfrag = ''.join(oldwords[i1:i2]) - newfrag = ''.join(newwords[j1:j2]) - if tag != 'equal': - if oldfrag: - oldfrag = '%s' % oldfrag - if newfrag: - newfrag = '%s' % newfrag - oldfragments.append(oldfrag) - newfragments.append(newfrag) - - old['line'] = "".join(oldfragments) - new['line'] = "".join(newfragments) - - def _highlight_line_udiff(self, line, next): - """ - Highlight inline changes in both lines. - """ - start = 0 - limit = min(len(line['line']), len(next['line'])) - while start < limit and line['line'][start] == next['line'][start]: - start += 1 - end = -1 - limit -= start - while -end <= limit and line['line'][end] == next['line'][end]: - end -= 1 - end += 1 - if start or end: - def do(l): - last = end + len(l['line']) - if l['action'] == 'add': - tag = 'ins' - else: - tag = 'del' - l['line'] = '%s<%s>%s%s' % ( - l['line'][:start], - tag, - l['line'][start:last], - tag, - l['line'][last:] - ) - do(line) - do(next) - - def _parse_udiff(self): - """ - Parse the diff an return data for the template. - """ - lineiter = self.lines - files = [] - try: - line = lineiter.next() - # skip first context - skipfirst = True - while 1: - # continue until we found the old file - if not line.startswith('--- '): - line = lineiter.next() - continue - - chunks = [] - filename, old_rev, new_rev = \ - self._extract_rev(line, lineiter.next()) - files.append({ - 'filename': filename, - 'old_revision': old_rev, - 'new_revision': new_rev, - 'chunks': chunks - }) - - line = lineiter.next() - while line: - match = self._chunk_re.match(line) - if not match: - break - - lines = [] - chunks.append(lines) - - old_line, old_end, new_line, new_end = \ - [int(x or 1) for x in match.groups()[:-1]] - old_line -= 1 - new_line -= 1 - context = len(match.groups()) == 5 - old_end += old_line - new_end += new_line - - if context: - if not skipfirst: - lines.append({ - 'old_lineno': '...', - 'new_lineno': '...', - 'action': 'context', - 'line': line, - }) - else: - skipfirst = False - - line = lineiter.next() - while old_line < old_end or new_line < new_end: - if line: - command, line = line[0], line[1:] - else: - command = ' ' - affects_old = affects_new = False - - # ignore those if we don't expect them - if command in '#@': - continue - elif command == '+': - affects_new = True - action = 'add' - elif command == '-': - affects_old = True - action = 'del' - else: - affects_old = affects_new = True - action = 'unmod' - - old_line += affects_old - new_line += affects_new - lines.append({ - 'old_lineno': affects_old and old_line or '', - 'new_lineno': affects_new and new_line or '', - 'action': action, - 'line': line - }) - line = lineiter.next() - - except StopIteration: - pass - - # highlight inline changes - for file in files: - for chunk in chunks: - lineiter = iter(chunk) - #first = True - try: - while 1: - line = lineiter.next() - if line['action'] != 'unmod': - nextline = lineiter.next() - if nextline['action'] == 'unmod' or \ - nextline['action'] == line['action']: - continue - self.differ(line, nextline) - except StopIteration: - pass - - return files - - def prepare(self): - """ - Prepare the passed udiff for HTML rendering. It'll return a list - of dicts - """ - return self._parse_udiff() - - def _safe_id(self, idstring): - """Make a string safe for including in an id attribute. - - The HTML spec says that id attributes 'must begin with - a letter ([A-Za-z]) and may be followed by any number - of letters, digits ([0-9]), hyphens ("-"), underscores - ("_"), colons (":"), and periods (".")'. These regexps - are slightly over-zealous, in that they remove colons - and periods unnecessarily. - - Whitespace is transformed into underscores, and then - anything which is not a hyphen or a character that - matches \w (alphanumerics and underscore) is removed. - - """ - # Transform all whitespace to underscore - idstring = re.sub(r'\s', "_", idstring) - # Remove everything that is not a hyphen or a member of \w - idstring = re.sub(r'(?!-)\W', "", idstring).lower() - return idstring - - def raw_diff(self): - """ - Returns raw string as udiff - """ - udiff_copy = self.copy_iterator() - if self.__format == 'gitdiff': - udiff_copy = self._parse_gitdiff(udiff_copy) - return u''.join(udiff_copy) - - def as_html(self, table_class='code-difftable', line_class='line', - new_lineno_class='lineno old', old_lineno_class='lineno new', - code_class='code'): - """ - Return udiff as html table with customized css classes - """ - def _link_to_if(condition, label, url): - """ - Generates a link if condition is meet or just the label if not. - """ - - if condition: - return '''%(label)s''' % {'url': url, - 'label': label} - else: - return label - diff_lines = self.prepare() - _html_empty = True - _html = [] - _html.append('''\n''' - % {'table_class': table_class}) - for diff in diff_lines: - for line in diff['chunks']: - _html_empty = False - for change in line: - _html.append('''\n''' - % {'line_class': line_class, - 'action': change['action']}) - anchor_old_id = '' - anchor_new_id = '' - anchor_old = "%(filename)s_o%(oldline_no)s" % \ - {'filename': self._safe_id(diff['filename']), - 'oldline_no': change['old_lineno']} - anchor_new = "%(filename)s_n%(oldline_no)s" % \ - {'filename': self._safe_id(diff['filename']), - 'oldline_no': change['new_lineno']} - cond_old = change['old_lineno'] != '...' and \ - change['old_lineno'] - cond_new = change['new_lineno'] != '...' and \ - change['new_lineno'] - if cond_old: - anchor_old_id = 'id="%s"' % anchor_old - if cond_new: - anchor_new_id = 'id="%s"' % anchor_new - ########################################################### - # OLD LINE NUMBER - ########################################################### - _html.append('''\t\n''') - ########################################################### - # NEW LINE NUMBER - ########################################################### - - _html.append('''\t\n''') - ########################################################### - # CODE - ########################################################### - _html.append('''\t''') - _html.append('''\n\n''') - _html.append('''
''' - % {'a_id': anchor_old_id, - 'old_lineno_cls': old_lineno_class}) - - _html.append('''
%(link)s
''' - % {'link': - _link_to_if(cond_old, change['old_lineno'], '#%s' % anchor_old)}) - _html.append('''
''' - % {'a_id': anchor_new_id, - 'new_lineno_cls': new_lineno_class}) - - _html.append('''
%(link)s
''' - % {'link': _link_to_if(cond_new, change['new_lineno'], '#%s' % anchor_new)}) - _html.append('''
''' - % {'code_class': code_class}) - _html.append('''\n\t\t
%(code)s
\n''' - % {'code': change['line']}) - _html.append('''\t
''') - if _html_empty: - return None - return ''.join(_html) - - def stat(self): - """ - Returns tuple of adde,and removed lines for this instance - """ - return self.adds, self.removes