changeset 6911:7dbe020e93fe

diffs: avoid extra copy of diff when splitting into files Instead of allocating memory for an extra copy of the whole raw diff, just use buffer() to give read-only string views of the relevant sections of the big diff string given as input.
author Mads Kiilerich <mads@kiilerich.com>
date Tue, 03 Oct 2017 00:14:40 +0200
parents 0c19e4661b71
children e025863fae9c
files kallithea/lib/diffs.py
diffstat 1 files changed, 8 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/lib/diffs.py	Tue Oct 03 00:14:40 2017 +0200
+++ b/kallithea/lib/diffs.py	Tue Oct 03 00:14:40 2017 +0200
@@ -162,11 +162,11 @@
     mentioned in the diff together with a dict of meta information that
     can be used to render it in a HTML template.
     """
+    _diff_git_re = re.compile('^diff --git', re.MULTILINE)
     _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
     _newline_marker = re.compile(r'^\\ No newline at end of file')
     _git_header_re = re.compile(r"""
-        # has already been split on this:
-        # ^diff[ ]--git
+        ^diff[ ]--git
             [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
         (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
            ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
@@ -182,8 +182,7 @@
         (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
     """, re.VERBOSE | re.MULTILINE)
     _hg_header_re = re.compile(r"""
-        # has already been split on this:
-        # ^diff[ ]--git
+        ^diff[ ]--git
             [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
         (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
            ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
@@ -318,9 +317,11 @@
         _files = [] # list of dicts with meta info and chunks
         diff_container = lambda arg: arg
 
-        # split the diff in chunks of separate --git a/file b/file chunks
-        for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
-            head, diff = self._get_header(raw_diff)
+        starts = [m.start() for m in self._diff_git_re.finditer(self._diff)]
+        starts.append(len(self._diff))
+
+        for start, end in zip(starts, starts[1:]):
+            head, diff = self._get_header(buffer(self._diff, start, end - start))
 
             op = None
             stats = {