# HG changeset patch # User Mads Kiilerich # Date 1506982480 -7200 # Node ID 7dbe020e93fe58ce34dd087494b98a4fb1096ed6 # Parent 0c19e4661b71aab2cdc19a19f9c6842c63f52d2c diffs: avoid extra copy of diff when splitting into files Instead of allocating memory for an extra copy of the whole raw diff, just use buffer() to give read-only string views of the relevant sections of the big diff string given as input. diff -r 0c19e4661b71 -r 7dbe020e93fe kallithea/lib/diffs.py --- a/kallithea/lib/diffs.py Tue Oct 03 00:14:40 2017 +0200 +++ b/kallithea/lib/diffs.py Tue Oct 03 00:14:40 2017 +0200 @@ -162,11 +162,11 @@ mentioned in the diff together with a dict of meta information that can be used to render it in a HTML template. """ + _diff_git_re = re.compile('^diff --git', re.MULTILINE) _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)') _newline_marker = re.compile(r'^\\ No newline at end of file') _git_header_re = re.compile(r""" - # has already been split on this: - # ^diff[ ]--git + ^diff[ ]--git [ ]a/(?P.+?)[ ]b/(?P.+?)\n (?:^old[ ]mode[ ](?P\d+)\n ^new[ ]mode[ ](?P\d+)(?:\n|$))? @@ -182,8 +182,7 @@ (?:^\+\+\+[ ](b/(?P.+?)|/dev/null)\t?(?:\n|$))? """, re.VERBOSE | re.MULTILINE) _hg_header_re = re.compile(r""" - # has already been split on this: - # ^diff[ ]--git + ^diff[ ]--git [ ]a/(?P.+?)[ ]b/(?P.+?)\n (?:^old[ ]mode[ ](?P\d+)\n ^new[ ]mode[ ](?P\d+)(?:\n|$))? @@ -318,9 +317,11 @@ _files = [] # list of dicts with meta info and chunks diff_container = lambda arg: arg - # split the diff in chunks of separate --git a/file b/file chunks - for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]: - head, diff = self._get_header(raw_diff) + starts = [m.start() for m in self._diff_git_re.finditer(self._diff)] + starts.append(len(self._diff)) + + for start, end in zip(starts, starts[1:]): + head, diff = self._get_header(buffer(self._diff, start, end - start)) op = None stats = {