# HG changeset patch # User Thomas De Schampheleire # Date 1603104470 -7200 # Node ID 53142fd5af4e7f0a6425fd322be4b8634c06f4fc # Parent 3fb80ff77bda3a47d684a4f0e4ca8125c6aa24e1 lib/diffs: make sure that trailing tabs are indicated Between the initial submission and final version of commit f79c40759d6f, changes were made that turn out to be incorrect. The changes assume that the later match on trailing tabs will 'win' from the plain 'tab' match. However, Python 're' documentation says: As the target string is scanned, REs separated by '|' are tried from left to right. When one pattern completely matches, that branch is accepted. This means that once A matches, B will not be tested further, even if it would produce a longer overall match. In other words, the '|' operator is never greedy. https://docs.python.org/3.8/library/re.html As a result, a trailing tab is seen as a plain tab and not highlighted in a special way. Unify the tab handling to make it unambiguous how they should be parsed. The change diff mainly shows re group numbers shifting. diff -r 3fb80ff77bda -r 53142fd5af4e kallithea/lib/diffs.py --- a/kallithea/lib/diffs.py Wed Oct 28 14:51:26 2020 +0100 +++ b/kallithea/lib/diffs.py Mon Oct 19 12:47:50 2020 +0200 @@ -445,7 +445,7 @@ return self.adds, self.removes -_escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|( $)|(\t$)') +_escape_re = re.compile(r'(&)|(<)|(>)|(\t)($)?|(\r)|( $)') def _escaper(diff_line): @@ -467,7 +467,7 @@ >>> _escaper(' foo\rbar\r') ' foobar' >>> _escaper(' foo\t') - ' foo\t' + ' foo\t' >>> _escaper(' foo ') ' foo ' >>> _escaper(' foo ') @@ -477,15 +477,15 @@ >>> _escaper(' ') ' ' >>> _escaper(' \t') - ' \t' + ' \t' >>> _escaper(' \t ') ' \t ' >>> _escaper(' \t') - ' \t' + ' \t' >>> _escaper(' \t\t ') ' \t\t ' >>> _escaper(' \t\t') - ' \t\t' + ' \t\t' >>> _escaper(' foo&bar ') ' foo&bar<baz> ' """ @@ -499,15 +499,15 @@ if groups[2]: return '>' if groups[3]: - return '\t' # Note: trailing tabs will get a longer match later - if groups[4]: + if groups[4] is not None: # end of line + return '\t' + return '\t' + if groups[5]: return '' - if groups[5]: + if groups[6]: if m.start() == 0: return ' ' # first column space shouldn't make empty lines show up as trailing space return ' ' - if groups[6]: - return '\t' assert False return _escape_re.sub(substitute, diff_line) diff -r 3fb80ff77bda -r 53142fd5af4e kallithea/tests/models/test_diff_parsers.py --- a/kallithea/tests/models/test_diff_parsers.py Wed Oct 28 14:51:26 2020 +0100 +++ b/kallithea/tests/models/test_diff_parsers.py Mon Oct 19 12:47:50 2020 +0200 @@ -297,13 +297,13 @@ assert s == r''' context '@@ -51,6 +51,13 @@' unmod 51 51 '\tbegin();' -unmod 52 52 '\t' +unmod 52 52 '\t' add 53 '\tint foo;' add 54 '\tint bar; ' add 55 '\tint baz;\t' add 56 '\tint space; ' -add 57 '\tint tab;\t' -add 58 '\t' +add 57 '\tint tab;\t' +add 58 '\t' unmod 59 53 ' ' del 54 '\t#define MAX_STEPS (48)' add 60 '\t'