Mercurial > kallithea
changeset 6911:7dbe020e93fe
diffs: avoid extra copy of diff when splitting into files
Instead of allocating memory for an extra copy of the whole raw diff, just use
buffer() to give read-only string views of the relevant sections of the big
diff string given as input.
author | Mads Kiilerich <mads@kiilerich.com> |
---|---|
date | Tue, 03 Oct 2017 00:14:40 +0200 |
parents | 0c19e4661b71 |
children | e025863fae9c |
files | kallithea/lib/diffs.py |
diffstat | 1 files changed, 8 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/kallithea/lib/diffs.py Tue Oct 03 00:14:40 2017 +0200 +++ b/kallithea/lib/diffs.py Tue Oct 03 00:14:40 2017 +0200 @@ -162,11 +162,11 @@ mentioned in the diff together with a dict of meta information that can be used to render it in a HTML template. """ + _diff_git_re = re.compile('^diff --git', re.MULTILINE) _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)') _newline_marker = re.compile(r'^\\ No newline at end of file') _git_header_re = re.compile(r""" - # has already been split on this: - # ^diff[ ]--git + ^diff[ ]--git [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n (?:^old[ ]mode[ ](?P<old_mode>\d+)\n ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? @@ -182,8 +182,7 @@ (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))? """, re.VERBOSE | re.MULTILINE) _hg_header_re = re.compile(r""" - # has already been split on this: - # ^diff[ ]--git + ^diff[ ]--git [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n (?:^old[ ]mode[ ](?P<old_mode>\d+)\n ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? @@ -318,9 +317,11 @@ _files = [] # list of dicts with meta info and chunks diff_container = lambda arg: arg - # split the diff in chunks of separate --git a/file b/file chunks - for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]: - head, diff = self._get_header(raw_diff) + starts = [m.start() for m in self._diff_git_re.finditer(self._diff)] + starts.append(len(self._diff)) + + for start, end in zip(starts, starts[1:]): + head, diff = self._get_header(buffer(self._diff, start, end - start)) op = None stats = {