changeset 6918:e708b26819cd

diffs: move _escaper to a pure function It used to count diff size - now it is a clean function.
author Mads Kiilerich <mads@kiilerich.com>
date Tue, 03 Oct 2017 00:14:40 +0200
parents 22074446ac5b
children ef6991dee3b1
files kallithea/lib/diffs.py
diffstat 1 files changed, 28 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/lib/diffs.py	Tue Oct 03 00:14:40 2017 +0200
+++ b/kallithea/lib/diffs.py	Tue Oct 03 00:14:40 2017 +0200
@@ -305,8 +305,6 @@
         (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
     """, re.VERBOSE | re.MULTILINE)
 
-    _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)')
-
     def __init__(self, diff, vcs='hg', diff_limit=None, inline_diff=True):
         """
         :param diff:   a text in diff format
@@ -326,28 +324,6 @@
         self.vcs = vcs
         self.parsed = self._parse_gitdiff(inline_diff=inline_diff)
 
-    def _escaper(self, string):
-        """
-        Do HTML escaping/markup
-        """
-        def substitute(m):
-            groups = m.groups()
-            if groups[0]:
-                return '&amp;'
-            if groups[1]:
-                return '&lt;'
-            if groups[2]:
-                return '&gt;'
-            if groups[3]:
-                return '<u>\t</u>'
-            if groups[4]:
-                return '<u class="cr"></u>'
-            if groups[5]:
-                return ' <i></i>'
-            assert False
-
-        return self._escape_re.sub(substitute, safe_unicode(string))
-
     def _get_header(self, diff_chunk):
         """
         Parses a Git diff for a single file (header and chunks) and returns a tuple with:
@@ -371,7 +347,7 @@
         rest = diff_chunk[match.end():]
         if rest and not rest.startswith('@') and not rest.startswith('literal ') and not rest.startswith('delta '):
             raise Exception('cannot parse %s diff header: %r followed by %r' % (self.vcs, diff_chunk[:match.end()], rest[:1000]))
-        diff_lines = (self._escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
+        diff_lines = (_escaper(m.group(0)) for m in re.finditer(r'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
         return meta_info, diff_lines
 
     def _parse_gitdiff(self, inline_diff):
@@ -624,6 +600,33 @@
         return self.adds, self.removes
 
 
+_escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)')
+
+
+def _escaper(string):
+    """
+    Do HTML escaping/markup
+    """
+
+    def substitute(m):
+        groups = m.groups()
+        if groups[0]:
+            return '&amp;'
+        if groups[1]:
+            return '&lt;'
+        if groups[2]:
+            return '&gt;'
+        if groups[3]:
+            return '<u>\t</u>'
+        if groups[4]:
+            return '<u class="cr"></u>'
+        if groups[5]:
+            return ' <i></i>'
+        assert False
+
+    return _escape_re.sub(substitute, safe_unicode(string))
+
+
 # Used for inline highlighter word split, must match the substitutions in _escaper
 _token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')