# HG changeset patch # User Mads Kiilerich # Date 1506982480 -7200 # Node ID ef6991dee3b1e64407228d0a9d7848efa4ae7b54 # Parent e708b26819cd5ffb7c7a28a986a47dacac810cc4 diffs: extract _get_header as a pure function diff -r e708b26819cd -r ef6991dee3b1 kallithea/lib/diffs.py --- a/kallithea/lib/diffs.py Tue Oct 03 00:14:40 2017 +0200 +++ b/kallithea/lib/diffs.py Tue Oct 03 00:14:40 2017 +0200 @@ -270,40 +270,6 @@ _diff_git_re = re.compile('^diff --git', re.MULTILINE) _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)') _newline_marker = re.compile(r'^\\ No newline at end of file') - _git_header_re = re.compile(r""" - ^diff[ ]--git - [ ]a/(?P.+?)[ ]b/(?P.+?)\n - (?:^old[ ]mode[ ](?P\d+)\n - ^new[ ]mode[ ](?P\d+)(?:\n|$))? - (?:^similarity[ ]index[ ](?P\d+)%\n - ^rename[ ]from[ ](?P.+)\n - ^rename[ ]to[ ](?P.+)(?:\n|$))? - (?:^new[ ]file[ ]mode[ ](?P.+)(?:\n|$))? - (?:^deleted[ ]file[ ]mode[ ](?P.+)(?:\n|$))? - (?:^index[ ](?P[0-9A-Fa-f]+) - \.\.(?P[0-9A-Fa-f]+)[ ]?(?P.+)?(?:\n|$))? - (?:^(?PGIT[ ]binary[ ]patch)(?:\n|$))? - (?:^---[ ](a/(?P.+?)|/dev/null)\t?(?:\n|$))? - (?:^\+\+\+[ ](b/(?P.+?)|/dev/null)\t?(?:\n|$))? - """, re.VERBOSE | re.MULTILINE) - _hg_header_re = re.compile(r""" - ^diff[ ]--git - [ ]a/(?P.+?)[ ]b/(?P.+?)\n - (?:^old[ ]mode[ ](?P\d+)\n - ^new[ ]mode[ ](?P\d+)(?:\n|$))? - (?:^similarity[ ]index[ ](?P\d+)%(?:\n|$))? - (?:^rename[ ]from[ ](?P.+)\n - ^rename[ ]to[ ](?P.+)(?:\n|$))? - (?:^copy[ ]from[ ](?P.+)\n - ^copy[ ]to[ ](?P.+)(?:\n|$))? - (?:^new[ ]file[ ]mode[ ](?P.+)(?:\n|$))? - (?:^deleted[ ]file[ ]mode[ ](?P.+)(?:\n|$))? - (?:^index[ ](?P[0-9A-Fa-f]+) - \.\.(?P[0-9A-Fa-f]+)[ ]?(?P.+)?(?:\n|$))? - (?:^(?PGIT[ ]binary[ ]patch)(?:\n|$))? - (?:^---[ ](a/(?P.+?)|/dev/null)\t?(?:\n|$))? - (?:^\+\+\+[ ](b/(?P.+?)|/dev/null)\t?(?:\n|$))? - """, re.VERBOSE | re.MULTILINE) def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True): """ @@ -324,32 +290,6 @@ self.vcs = vcs self.parsed = self._parse_gitdiff(inline_diff=inline_diff) - def _get_header(self, diff_chunk): - """ - Parses a Git diff for a single file (header and chunks) and returns a tuple with: - - 1. A dict with meta info: - - a_path, b_path, similarity_index, rename_from, rename_to, - old_mode, new_mode, new_file_mode, deleted_file_mode, - a_blob_id, b_blob_id, b_mode, a_file, b_file - - 2. An iterator yielding lines with simple HTML markup. - """ - match = None - if self.vcs == 'git': - match = self._git_header_re.match(diff_chunk) - elif self.vcs == 'hg': - match = self._hg_header_re.match(diff_chunk) - if match is None: - raise Exception('diff not recognized as valid %s diff' % self.vcs) - meta_info = match.groupdict() - rest = diff_chunk[match.end():] - if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '): - raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000])) - diff_lines = (_escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do - return meta_info, diff_lines - def _parse_gitdiff(self, inline_diff): """Parse self._diff and return a list of dicts with meta info and chunks for each file. Might set limited_diff. @@ -365,7 +305,7 @@ self.limited_diff = True continue - head, diff_lines = self._get_header(buffer(self._diff, start, end - start)) + head, diff_lines = _get_header(self.vcs, buffer(self._diff, start, end - start)) op = None stats = { @@ -627,6 +567,69 @@ return _escape_re.sub(substitute, safe_unicode(string)) +_git_header_re = re.compile(r""" + ^diff[ ]--git[ ]a/(?P.+?)[ ]b/(?P.+?)\n + (?:^old[ ]mode[ ](?P\d+)\n + ^new[ ]mode[ ](?P\d+)(?:\n|$))? + (?:^similarity[ ]index[ ](?P\d+)%\n + ^rename[ ]from[ ](?P.+)\n + ^rename[ ]to[ ](?P.+)(?:\n|$))? + (?:^new[ ]file[ ]mode[ ](?P.+)(?:\n|$))? + (?:^deleted[ ]file[ ]mode[ ](?P.+)(?:\n|$))? + (?:^index[ ](?P[0-9A-Fa-f]+) + \.\.(?P[0-9A-Fa-f]+)[ ]?(?P.+)?(?:\n|$))? + (?:^(?PGIT[ ]binary[ ]patch)(?:\n|$))? + (?:^---[ ](a/(?P.+?)|/dev/null)\t?(?:\n|$))? + (?:^\+\+\+[ ](b/(?P.+?)|/dev/null)\t?(?:\n|$))? +""", re.VERBOSE | re.MULTILINE) + + +_hg_header_re = re.compile(r""" + ^diff[ ]--git[ ]a/(?P.+?)[ ]b/(?P.+?)\n + (?:^old[ ]mode[ ](?P\d+)\n + ^new[ ]mode[ ](?P\d+)(?:\n|$))? + (?:^similarity[ ]index[ ](?P\d+)%(?:\n|$))? + (?:^rename[ ]from[ ](?P.+)\n + ^rename[ ]to[ ](?P.+)(?:\n|$))? + (?:^copy[ ]from[ ](?P.+)\n + ^copy[ ]to[ ](?P.+)(?:\n|$))? + (?:^new[ ]file[ ]mode[ ](?P.+)(?:\n|$))? + (?:^deleted[ ]file[ ]mode[ ](?P.+)(?:\n|$))? + (?:^index[ ](?P[0-9A-Fa-f]+) + \.\.(?P[0-9A-Fa-f]+)[ ]?(?P.+)?(?:\n|$))? + (?:^(?PGIT[ ]binary[ ]patch)(?:\n|$))? + (?:^---[ ](a/(?P.+?)|/dev/null)\t?(?:\n|$))? + (?:^\+\+\+[ ](b/(?P.+?)|/dev/null)\t?(?:\n|$))? +""", re.VERBOSE | re.MULTILINE) + + +def _get_header(vcs, diff_chunk): + """ + Parses a Git diff for a single file (header and chunks) and returns a tuple with: + + 1. A dict with meta info: + + a_path, b_path, similarity_index, rename_from, rename_to, + old_mode, new_mode, new_file_mode, deleted_file_mode, + a_blob_id, b_blob_id, b_mode, a_file, b_file + + 2. An iterator yielding lines with simple HTML markup. + """ + match = None + if vcs == 'git': + match = _git_header_re.match(diff_chunk) + elif vcs == 'hg': + match = _hg_header_re.match(diff_chunk) + if match is None: + raise Exception('diff not recognized as valid %s diff' % vcs) + meta_info = match.groupdict() + rest = diff_chunk[match.end():] + if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '): + raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, diff_chunk[:match.end()], rest[:1000])) + diff_lines = (_escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do + return meta_info, diff_lines + + # Used for inline highlighter word split, must match the substitutions in _escaper _token_re = re.compile(r'()(&|<|>|\t|| |\W+?)')