view rhodecode/lib/diffs.py @ 2801:69420c48a0e6 beta

fixes #550 mercurial repositories comparision failed when origin repo had additional not-common changesets + added regresion test for this
author Marcin Kuzminski <marcin@python-works.com>
date Tue, 04 Sep 2012 22:46:05 +0200
parents 2b6939a77052
children ab75def5c15d
line wrap: on
line source

# -*- coding: utf-8 -*-
"""
    rhodecode.lib.diffs
    ~~~~~~~~~~~~~~~~~~~

    Set of diffing helpers, previously part of vcs


    :created_on: Dec 4, 2011
    :author: marcink
    :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
    :original copyright: 2007-2008 by Armin Ronacher
    :license: GPLv3, see COPYING for more details.
"""
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import re
import difflib
import markupsafe

from itertools import tee, imap

from mercurial import patch
from mercurial.mdiff import diffopts
from mercurial.bundlerepo import bundlerepository

from pylons.i18n.translation import _

from rhodecode.lib.compat import BytesIO
from rhodecode.lib.vcs.utils.hgcompat import localrepo
from rhodecode.lib.vcs.exceptions import VCSError
from rhodecode.lib.vcs.nodes import FileNode, SubModuleNode
from rhodecode.lib.vcs.backends.base import EmptyChangeset
from rhodecode.lib.helpers import escape
from rhodecode.lib.utils import make_ui


def wrap_to_table(str_):
    return '''<table class="code-difftable">
                <tr class="line no-comment">
                <td class="lineno new"></td>
                <td class="code no-comment"><pre>%s</pre></td>
                </tr>
              </table>''' % str_


def wrapped_diff(filenode_old, filenode_new, cut_off_limit=None,
                ignore_whitespace=True, line_context=3,
                enable_comments=False):
    """
    returns a wrapped diff into a table, checks for cut_off_limit and presents
    proper message
    """

    if filenode_old is None:
        filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())

    if filenode_old.is_binary or filenode_new.is_binary:
        diff = wrap_to_table(_('binary file'))
        stats = (0, 0)
        size = 0

    elif cut_off_limit != -1 and (cut_off_limit is None or
    (filenode_old.size < cut_off_limit and filenode_new.size < cut_off_limit)):

        f_gitdiff = get_gitdiff(filenode_old, filenode_new,
                                ignore_whitespace=ignore_whitespace,
                                context=line_context)
        diff_processor = DiffProcessor(f_gitdiff, format='gitdiff')

        diff = diff_processor.as_html(enable_comments=enable_comments)
        stats = diff_processor.stat()
        size = len(diff or '')
    else:
        diff = wrap_to_table(_('Changeset was too big and was cut off, use '
                               'diff menu to display this diff'))
        stats = (0, 0)
        size = 0
    if not diff:
        submodules = filter(lambda o: isinstance(o, SubModuleNode),
                            [filenode_new, filenode_old])
        if submodules:
            diff = wrap_to_table(escape('Submodule %r' % submodules[0]))
        else:
            diff = wrap_to_table(_('No changes detected'))

    cs1 = filenode_old.changeset.raw_id
    cs2 = filenode_new.changeset.raw_id

    return size, cs1, cs2, diff, stats


def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
    """
    Returns git style diff between given ``filenode_old`` and ``filenode_new``.

    :param ignore_whitespace: ignore whitespaces in diff
    """
    # make sure we pass in default context
    context = context or 3
    submodules = filter(lambda o: isinstance(o, SubModuleNode),
                        [filenode_new, filenode_old])
    if submodules:
        return ''

    for filenode in (filenode_old, filenode_new):
        if not isinstance(filenode, FileNode):
            raise VCSError("Given object should be FileNode object, not %s"
                % filenode.__class__)

    repo = filenode_new.changeset.repository
    old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
    new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)

    vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
                                 ignore_whitespace, context)
    return vcs_gitdiff


class DiffProcessor(object):
    """
    Give it a unified diff and it returns a list of the files that were
    mentioned in the diff together with a dict of meta information that
    can be used to render it in a HTML template.
    """
    _chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
    _newline_marker = '\\ No newline at end of file\n'

    def __init__(self, diff, differ='diff', format='gitdiff'):
        """
        :param diff:   a text in diff format or generator
        :param format: format of diff passed, `udiff` or `gitdiff`
        """
        if isinstance(diff, basestring):
            diff = [diff]

        self.__udiff = diff
        self.__format = format
        self.adds = 0
        self.removes = 0

        if isinstance(self.__udiff, basestring):
            self.lines = iter(self.__udiff.splitlines(1))

        elif self.__format == 'gitdiff':
            udiff_copy = self.copy_iterator()
            self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
        else:
            udiff_copy = self.copy_iterator()
            self.lines = imap(self.escaper, udiff_copy)

        # Select a differ.
        if differ == 'difflib':
            self.differ = self._highlight_line_difflib
        else:
            self.differ = self._highlight_line_udiff

    def escaper(self, string):
        return markupsafe.escape(string)

    def copy_iterator(self):
        """
        make a fresh copy of generator, we should not iterate thru
        an original as it's needed for repeating operations on
        this instance of DiffProcessor
        """
        self.__udiff, iterator_copy = tee(self.__udiff)
        return iterator_copy

    def _extract_rev(self, line1, line2):
        """
        Extract the operation (A/M/D), filename and revision hint from a line.
        """

        try:
            if line1.startswith('--- ') and line2.startswith('+++ '):
                l1 = line1[4:].split(None, 1)
                old_filename = (l1[0].replace('a/', '', 1)
                                if len(l1) >= 1 else None)
                old_rev = l1[1] if len(l1) == 2 else 'old'

                l2 = line2[4:].split(None, 1)
                new_filename = (l2[0].replace('b/', '', 1)
                                if len(l1) >= 1 else None)
                new_rev = l2[1] if len(l2) == 2 else 'new'

                filename = (old_filename
                            if old_filename != '/dev/null' else new_filename)

                operation = 'D' if new_filename == '/dev/null' else None
                if not operation:
                    operation = 'M' if old_filename != '/dev/null' else 'A'

                return operation, filename, new_rev, old_rev
        except (ValueError, IndexError):
            pass

        return None, None, None, None

    def _parse_gitdiff(self, diffiterator):
        def line_decoder(l):
            if l.startswith('+') and not l.startswith('+++'):
                self.adds += 1
            elif l.startswith('-') and not l.startswith('---'):
                self.removes += 1
            return l.decode('utf8', 'replace')

        output = list(diffiterator)
        size = len(output)

        if size == 2:
            l = []
            l.extend([output[0]])
            l.extend(output[1].splitlines(1))
            return map(line_decoder, l)
        elif size == 1:
            return  map(line_decoder, output[0].splitlines(1))
        elif size == 0:
            return []

        raise Exception('wrong size of diff %s' % size)

    def _highlight_line_difflib(self, line, next_):
        """
        Highlight inline changes in both lines.
        """

        if line['action'] == 'del':
            old, new = line, next_
        else:
            old, new = next_, line

        oldwords = re.split(r'(\W)', old['line'])
        newwords = re.split(r'(\W)', new['line'])

        sequence = difflib.SequenceMatcher(None, oldwords, newwords)

        oldfragments, newfragments = [], []
        for tag, i1, i2, j1, j2 in sequence.get_opcodes():
            oldfrag = ''.join(oldwords[i1:i2])
            newfrag = ''.join(newwords[j1:j2])
            if tag != 'equal':
                if oldfrag:
                    oldfrag = '<del>%s</del>' % oldfrag
                if newfrag:
                    newfrag = '<ins>%s</ins>' % newfrag
            oldfragments.append(oldfrag)
            newfragments.append(newfrag)

        old['line'] = "".join(oldfragments)
        new['line'] = "".join(newfragments)

    def _highlight_line_udiff(self, line, next_):
        """
        Highlight inline changes in both lines.
        """
        start = 0
        limit = min(len(line['line']), len(next_['line']))
        while start < limit and line['line'][start] == next_['line'][start]:
            start += 1
        end = -1
        limit -= start
        while -end <= limit and line['line'][end] == next_['line'][end]:
            end -= 1
        end += 1
        if start or end:
            def do(l):
                last = end + len(l['line'])
                if l['action'] == 'add':
                    tag = 'ins'
                else:
                    tag = 'del'
                l['line'] = '%s<%s>%s</%s>%s' % (
                    l['line'][:start],
                    tag,
                    l['line'][start:last],
                    tag,
                    l['line'][last:]
                )
            do(line)
            do(next_)

    def _parse_udiff(self, inline_diff=True):
        """
        Parse the diff an return data for the template.
        """
        lineiter = self.lines
        files = []
        try:
            line = lineiter.next()
            while 1:
                # continue until we found the old file
                if not line.startswith('--- '):
                    line = lineiter.next()
                    continue

                chunks = []
                stats = [0, 0]
                operation, filename, old_rev, new_rev = \
                    self._extract_rev(line, lineiter.next())
                files.append({
                    'filename':         filename,
                    'old_revision':     old_rev,
                    'new_revision':     new_rev,
                    'chunks':           chunks,
                    'operation':        operation,
                    'stats':            stats,
                })

                line = lineiter.next()
                while line:
                    match = self._chunk_re.match(line)
                    if not match:
                        break

                    lines = []
                    chunks.append(lines)

                    old_line, old_end, new_line, new_end = \
                        [int(x or 1) for x in match.groups()[:-1]]
                    old_line -= 1
                    new_line -= 1
                    gr = match.groups()
                    context = len(gr) == 5
                    old_end += old_line
                    new_end += new_line

                    if context:
                        # skip context only if it's first line
                        if int(gr[0]) > 1:
                            lines.append({
                                'old_lineno': '...',
                                'new_lineno': '...',
                                'action':     'context',
                                'line':       line,
                            })

                    line = lineiter.next()

                    while old_line < old_end or new_line < new_end:
                        if line:
                            command = line[0]
                            if command in ['+', '-', ' ']:
                                #only modify the line if it's actually a diff
                                # thing
                                line = line[1:]
                        else:
                            command = ' '

                        affects_old = affects_new = False

                        # ignore those if we don't expect them
                        if command in '#@':
                            continue
                        elif command == '+':
                            affects_new = True
                            action = 'add'
                            stats[0] += 1
                        elif command == '-':
                            affects_old = True
                            action = 'del'
                            stats[1] += 1
                        else:
                            affects_old = affects_new = True
                            action = 'unmod'

                        if line != self._newline_marker:
                            old_line += affects_old
                            new_line += affects_new
                            lines.append({
                                'old_lineno':   affects_old and old_line or '',
                                'new_lineno':   affects_new and new_line or '',
                                'action':       action,
                                'line':         line
                            })

                        line = lineiter.next()
                        if line == self._newline_marker:
                            # we need to append to lines, since this is not
                            # counted in the line specs of diff
                            lines.append({
                                'old_lineno':   '...',
                                'new_lineno':   '...',
                                'action':       'context',
                                'line':         line
                            })

        except StopIteration:
            pass

        sorter = lambda info: {'A': 0, 'M': 1, 'D': 2}.get(info['operation'])
        if inline_diff is False:
            return sorted(files, key=sorter)

        # highlight inline changes
        for diff_data in files:
            for chunk in diff_data['chunks']:
                lineiter = iter(chunk)
                try:
                    while 1:
                        line = lineiter.next()
                        if line['action'] not in ['unmod', 'context']:
                            nextline = lineiter.next()
                            if nextline['action'] in ['unmod', 'context'] or \
                               nextline['action'] == line['action']:
                                continue
                            self.differ(line, nextline)
                except StopIteration:
                    pass

        return sorted(files, key=sorter)

    def prepare(self, inline_diff=True):
        """
        Prepare the passed udiff for HTML rendering. It'l return a list
        of dicts
        """
        return self._parse_udiff(inline_diff=inline_diff)

    def _safe_id(self, idstring):
        """Make a string safe for including in an id attribute.

        The HTML spec says that id attributes 'must begin with
        a letter ([A-Za-z]) and may be followed by any number
        of letters, digits ([0-9]), hyphens ("-"), underscores
        ("_"), colons (":"), and periods (".")'. These regexps
        are slightly over-zealous, in that they remove colons
        and periods unnecessarily.

        Whitespace is transformed into underscores, and then
        anything which is not a hyphen or a character that
        matches \w (alphanumerics and underscore) is removed.

        """
        # Transform all whitespace to underscore
        idstring = re.sub(r'\s', "_", '%s' % idstring)
        # Remove everything that is not a hyphen or a member of \w
        idstring = re.sub(r'(?!-)\W', "", idstring).lower()
        return idstring

    def raw_diff(self):
        """
        Returns raw string as udiff
        """
        udiff_copy = self.copy_iterator()
        if self.__format == 'gitdiff':
            udiff_copy = self._parse_gitdiff(udiff_copy)
        return u''.join(udiff_copy)

    def as_html(self, table_class='code-difftable', line_class='line',
                new_lineno_class='lineno old', old_lineno_class='lineno new',
                code_class='code', enable_comments=False, diff_lines=None):
        """
        Return given diff as html table with customized css classes
        """
        def _link_to_if(condition, label, url):
            """
            Generates a link if condition is meet or just the label if not.
            """

            if condition:
                return '''<a href="%(url)s">%(label)s</a>''' % {
                    'url': url,
                    'label': label
                }
            else:
                return label
        if diff_lines is None:
            diff_lines = self.prepare()
        _html_empty = True
        _html = []
        _html.append('''<table class="%(table_class)s">\n''' % {
            'table_class': table_class
        })
        for diff in diff_lines:
            for line in diff['chunks']:
                _html_empty = False
                for change in line:
                    _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
                        'lc': line_class,
                        'action': change['action']
                    })
                    anchor_old_id = ''
                    anchor_new_id = ''
                    anchor_old = "%(filename)s_o%(oldline_no)s" % {
                        'filename': self._safe_id(diff['filename']),
                        'oldline_no': change['old_lineno']
                    }
                    anchor_new = "%(filename)s_n%(oldline_no)s" % {
                        'filename': self._safe_id(diff['filename']),
                        'oldline_no': change['new_lineno']
                    }
                    cond_old = (change['old_lineno'] != '...' and
                                change['old_lineno'])
                    cond_new = (change['new_lineno'] != '...' and
                                change['new_lineno'])
                    if cond_old:
                        anchor_old_id = 'id="%s"' % anchor_old
                    if cond_new:
                        anchor_new_id = 'id="%s"' % anchor_new
                    ###########################################################
                    # OLD LINE NUMBER
                    ###########################################################
                    _html.append('''\t<td %(a_id)s class="%(olc)s">''' % {
                        'a_id': anchor_old_id,
                        'olc': old_lineno_class
                    })

                    _html.append('''%(link)s''' % {
                        'link': _link_to_if(True, change['old_lineno'],
                                            '#%s' % anchor_old)
                    })
                    _html.append('''</td>\n''')
                    ###########################################################
                    # NEW LINE NUMBER
                    ###########################################################

                    _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
                        'a_id': anchor_new_id,
                        'nlc': new_lineno_class
                    })

                    _html.append('''%(link)s''' % {
                        'link': _link_to_if(True, change['new_lineno'],
                                            '#%s' % anchor_new)
                    })
                    _html.append('''</td>\n''')
                    ###########################################################
                    # CODE
                    ###########################################################
                    comments = '' if enable_comments else 'no-comment'
                    _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
                        'cc': code_class,
                        'inc': comments
                    })
                    _html.append('''\n\t\t<pre>%(code)s</pre>\n''' % {
                        'code': change['line']
                    })
                    _html.append('''\t</td>''')
                    _html.append('''\n</tr>\n''')
        _html.append('''</table>''')
        if _html_empty:
            return None
        return ''.join(_html)

    def stat(self):
        """
        Returns tuple of added, and removed lines for this instance
        """
        return self.adds, self.removes


class InMemoryBundleRepo(bundlerepository):
    def __init__(self, ui, path, bundlestream):
        self._tempparent = None
        localrepo.localrepository.__init__(self, ui, path)
        self.ui.setconfig('phases', 'publish', False)

        self.bundle = bundlestream

        # dict with the mapping 'filename' -> position in the bundle
        self.bundlefilespos = {}


def differ(org_repo, org_ref, other_repo, other_ref, discovery_data=None):
    """
    General differ between branches, bookmarks or separate but releated
    repositories

    :param org_repo:
    :type org_repo:
    :param org_ref:
    :type org_ref:
    :param other_repo:
    :type other_repo:
    :param other_ref:
    :type other_ref:
    """

    bundlerepo = None
    ignore_whitespace = False
    context = 3
    org_repo = org_repo.scm_instance._repo
    other_repo = other_repo.scm_instance._repo
    opts = diffopts(git=True, ignorews=ignore_whitespace, context=context)
    org_ref = org_ref[1]
    other_ref = other_ref[1]

    if org_repo != other_repo:

        common, incoming, rheads = discovery_data
        other_repo_peer = localrepo.locallegacypeer(other_repo.local())
        # create a bundle (uncompressed if other repo is not local)
        if other_repo_peer.capable('getbundle') and incoming:
            # disable repo hooks here since it's just bundle !
            # patch and reset hooks section of UI config to not run any
            # hooks on fetching archives with subrepos
            for k, _ in other_repo.ui.configitems('hooks'):
                other_repo.ui.setconfig('hooks', k, None)

            unbundle = other_repo.getbundle('incoming', common=common,
                                            heads=None)

            buf = BytesIO()
            while True:
                chunk = unbundle._stream.read(1024 * 4)
                if not chunk:
                    break
                buf.write(chunk)

            buf.seek(0)
            # replace chunked _stream with data that can do tell() and seek()
            unbundle._stream = buf

            ui = make_ui('db')
            bundlerepo = InMemoryBundleRepo(ui, path=org_repo.root,
                                            bundlestream=unbundle)

        return ''.join(patch.diff(bundlerepo or org_repo,
                                  node1=org_repo[org_ref].node(),
                                  node2=other_repo[other_ref].node(),
                                  opts=opts))
    else:
        return ''.join(patch.diff(org_repo, node1=org_ref, node2=other_ref,
                                  opts=opts))