changeset 8707:161cf7e3566f

diff: refactor to avoid complexity of passing unnecessary trailing newline around _escaper tried to handle these optional newlines ... but did not always preserve them. Keep it simple. In normal operations, all diff lines end with \n . We only saw un-terminated lines in a couple of old test cases.
author Mads Kiilerich <mads@kiilerich.com>
date Wed, 28 Oct 2020 14:08:28 +0100
parents c6964daffe57
children 3fb80ff77bda
files kallithea/lib/diffs.py kallithea/tests/models/test_diff_parsers.py
diffstat 2 files changed, 22 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/lib/diffs.py	Thu Oct 22 11:29:32 2020 +0200
+++ b/kallithea/lib/diffs.py	Wed Oct 28 14:08:28 2020 +0100
@@ -445,7 +445,7 @@
         return self.adds, self.removes
 
 
-_escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)|(\t\n|\t$)')
+_escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( $)|(\t$)')
 
 
 def _escaper(diff_line):
@@ -571,9 +571,13 @@
         raise Exception('diff not recognized as valid %s diff' % vcs)
     meta_info = {k: None if v is None else safe_str(v) for k, v in match.groupdict().items()}
     rest = diff_chunk[match.end():]
-    if rest and _header_next_check.match(rest):
-        raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000]))))
-    diff_lines = (_escaper(safe_str(m.group(0))) for m in re.finditer(br'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
+    if rest:
+        if _header_next_check.match(rest):
+            raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000]))))
+        if rest[-1:] != b'\n':
+            # The diff will generally already have trailing \n (and be a memoryview). It might also be huge so we don't want to allocate it twice. But in this very rare case, we don't care.
+            rest = bytes(rest) + b'\n'
+    diff_lines = (_escaper(safe_str(m.group(1))) for m in re.finditer(br'(.*)\n', rest))
     return meta_info, diff_lines
 
 
--- a/kallithea/tests/models/test_diff_parsers.py	Thu Oct 22 11:29:32 2020 +0200
+++ b/kallithea/tests/models/test_diff_parsers.py	Wed Oct 28 14:08:28 2020 +0100
@@ -295,20 +295,20 @@
             l.append('%(action)-7s %(new_lineno)3s %(old_lineno)3s %(line)r\n' % d)
         s = ''.join(l)
         assert s == r'''
-context         '@@ -51,6 +51,13 @@\n'
-unmod    51  51 '<u>\t</u>begin();\n'
-unmod    52  52 '<u>\t</u>\n'
-add      53     '<u>\t</u>int foo;<u class="cr"></u>\n'
-add      54     '<u>\t</u>int bar; <u class="cr"></u>\n'
-add      55     '<u>\t</u>int baz;<u>\t</u><u class="cr"></u>\n'
+context         '@@ -51,6 +51,13 @@'
+unmod    51  51 '<u>\t</u>begin();'
+unmod    52  52 '<u>\t</u>'
+add      53     '<u>\t</u>int foo;<u class="cr"></u>'
+add      54     '<u>\t</u>int bar; <u class="cr"></u>'
+add      55     '<u>\t</u>int baz;<u>\t</u><u class="cr"></u>'
 add      56     '<u>\t</u>int space; <i></i>'
-add      57     '<u>\t</u>int tab;<u>\t</u>\n'
-add      58     '<u>\t</u>\n'
+add      57     '<u>\t</u>int tab;<u>\t</u>'
+add      58     '<u>\t</u>'
 unmod    59  53 ' <i></i>'
-del          54 '<u>\t</u>#define MAX_STEPS (48)\n'
-add      60     '<u>\t</u><u class="cr"></u>\n'
-add      61     '<u>\t</u>#define MAX_STEPS (64)<u class="cr"></u>\n'
-unmod    62  55 '\n'
-del          56 '<u>\t</u>#define MIN_STEPS (<del>48</del>)\n'
-add      63     '<u>\t</u>#define MIN_STEPS (<ins>42</ins>)\n'
+del          54 '<u>\t</u>#define MAX_STEPS (48)'
+add      60     '<u>\t</u><u class="cr"></u>'
+add      61     '<u>\t</u>#define MAX_STEPS (64)<u class="cr"></u>'
+unmod    62  55 ''
+del          56 '<u>\t</u>#define MIN_STEPS (<del>48</del>)'
+add      63     '<u>\t</u>#define MIN_STEPS (<ins>42</ins>)'
 '''