changeset 6912:e025863fae9c

diffs: avoid extra copy of diff when trying to create a lazy diff line iterator Make the generator actually be lazy and avoid creating an extra full copy of all the lines in a file diff at once. And consistently call it diff_lines.
author Mads Kiilerich <mads@kiilerich.com>
date Tue, 03 Oct 2017 00:14:40 +0200
parents 7dbe020e93fe
children 24a9bec8138c
files kallithea/lib/diffs.py
diffstat 1 files changed, 9 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/lib/diffs.py	Tue Oct 03 00:14:40 2017 +0200
+++ b/kallithea/lib/diffs.py	Tue Oct 03 00:14:40 2017 +0200
@@ -29,8 +29,6 @@
 import difflib
 import logging
 
-from itertools import imap
-
 from tg.i18n import ugettext as _
 
 from kallithea.lib.vcs.exceptions import VCSError
@@ -306,8 +304,8 @@
         rest = diff_chunk[match.end():]
         if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
             raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000]))
-        difflines = imap(self._escaper, re.findall(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
-        return meta_info, difflines
+        diff_lines = (self._escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
+        return meta_info, diff_lines
 
     def _parse_gitdiff(self, inline_diff=True):
         """Parse self._diff and return a list of dicts with meta info and chunks for each file.
@@ -321,7 +319,7 @@
         starts.append(len(self._diff))
 
         for start, end in zip(starts, starts[1:]):
-            head, diff = self._get_header(buffer(self._diff, start, end - start))
+            head, diff_lines = self._get_header(buffer(self._diff, start, end - start))
 
             op = None
             stats = {
@@ -381,7 +379,7 @@
             # a real non-binary diff
             if head['a_file'] or head['b_file']:
                 try:
-                    chunks, added, deleted = self._parse_lines(diff)
+                    chunks, added, deleted = self._parse_lines(diff_lines)
                     stats['binary'] = False
                     stats['added'] = added
                     stats['deleted'] = deleted
@@ -459,7 +457,7 @@
 
         return diff_container(_files)
 
-    def _parse_lines(self, diff):
+    def _parse_lines(self, diff_lines):
         """
         Given an iterator of diff body lines, parse them and return a dict per
         line and added/removed totals.
@@ -469,7 +467,7 @@
 
         try:
             chunks = []
-            line = diff.next()
+            line = diff_lines.next()
 
             while True:
                 lines = []
@@ -500,7 +498,7 @@
                             'line':       line,
                         })
 
-                line = diff.next()
+                line = diff_lines.next()
 
                 while old_line < old_end or new_line < new_end:
                     if not line:
@@ -533,7 +531,7 @@
                             'line':         line[1:],
                         })
 
-                    line = diff.next()
+                    line = diff_lines.next()
 
                     if self._newline_marker.match(line):
                         # we need to append to lines, since this is not
@@ -544,7 +542,7 @@
                             'action':       'context',
                             'line':         line,
                         })
-                        line = diff.next()
+                        line = diff_lines.next()
                 if old_line > old_end:
                     raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
                 if new_line > new_end: