Mercurial > kallithea
changeset 8768:fd61f678577f
diff: improved handling of Git diffs with " quoting
Kallithea would intentionally and explicitly fail with an ugly exception when
trying to parse Git diffs with quoted filenames.
Improve this by parsing quotes ... and ignore them, as long as they are
matching. The content inside the quotes might be \-escaped ... but that could
potentially also be the case without quoting. We will fix that later.
Adding some test cases that before would have failed to parse and raised an
exception.
Thanks to stypr of Flatt Security for raising this.
author | Mads Kiilerich <mads@kiilerich.com> |
---|---|
date | Sat, 14 Nov 2020 15:20:40 +0100 |
parents | 817dcce30d67 |
children | d35d14b05b82 |
files | kallithea/lib/diffs.py kallithea/tests/fixtures/git_diff_quoting.diff kallithea/tests/models/test_diff_parsers.py |
diffstat | 3 files changed, 112 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/kallithea/lib/diffs.py Sat Nov 14 15:18:05 2020 +0100 +++ b/kallithea/lib/diffs.py Sat Nov 14 15:20:40 2020 +0100 @@ -511,7 +511,7 @@ _git_header_re = re.compile(br""" - ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n + ^diff[ ]--git[ ](?P<a_path_quote>"?)a/(?P<a_path>.+?)(?P=a_path_quote)[ ](?P<b_path_quote>"?)b/(?P<b_path>.+?)(?P=a_path_quote)\n (?:^old[ ]mode[ ](?P<old_mode>\d+)\n ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n @@ -522,8 +522,8 @@ (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))? - (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))? - (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))? + (?:^---[ ](?P<a_file_quote>"?)(a/(?P<a_file>.+?)(?P=a_file_quote)|/dev/null)\t?(?:\n|$))? + (?:^\+\+\+[ ](?P<b_file_quote>"?)(b/(?P<b_file>.+?)(?P=b_file_quote)|/dev/null)\t?(?:\n|$))? """, re.VERBOSE | re.MULTILINE)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kallithea/tests/fixtures/git_diff_quoting.diff Sat Nov 14 15:20:40 2020 +0100 @@ -0,0 +1,53 @@ +diff --git "a/\"foo\"" "b/\"foo\"" +new file mode 100644 +index 0000000..8b13789 +--- /dev/null ++++ "b/\"foo\"" +@@ -0,0 +1 @@ ++ +diff --git a/'foo' b/'foo' +new file mode 100644 +index 0000000..8b13789 +--- /dev/null ++++ b/'foo' +@@ -0,0 +1 @@ ++ +diff --git "a/'foo'\"foo\"" "b/'foo'\"foo\"" +new file mode 100644 +index 0000000..8b13789 +--- /dev/null ++++ "b/'foo'\"foo\"" +@@ -0,0 +1 @@ ++ +diff --git "a/a\r\nb" "b/a\r\nb" +new file mode 100644 +index 0000000..30d74d2 +--- /dev/null ++++ "b/a\r\nb" +@@ -0,0 +1 @@ ++test +\ No newline at end of file +diff --git "a/foo\rfoo" "b/foo\rfoo" +new file mode 100644 +index 0000000..e69de29 +diff --git a/foo bar b/foo bar +new file mode 100644 +index 0000000..219ea2b +--- /dev/null ++++ b/foo bar +@@ -0,0 +1 @@ ++foo bar +\ No newline at end of file +diff --git a/test b/test +new file mode 100644 +index 0000000..9daeafb +--- /dev/null ++++ b/test +@@ -0,0 +1 @@ ++test +diff --git "a/esc\033foo" "b/esc\033foo" +new file mode 100644 +index 0000000..e69de29 +diff --git "a/tab\tfoo" "b/tab\tfoo" +new file mode 100644 +index 0000000..e69de29
--- a/kallithea/tests/models/test_diff_parsers.py Sat Nov 14 15:18:05 2020 +0100 +++ b/kallithea/tests/models/test_diff_parsers.py Sat Nov 14 15:20:40 2020 +0100 @@ -268,6 +268,62 @@ 'binary': False, 'ops': {RENAMED_FILENODE: 'file renamed from oh no to oh yes'}}), ], + 'git_diff_quoting.diff': [ + (r'\"foo\"', # TODO: quotes should not be escaped + 'added', + {'added': 1, + 'binary': False, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + ("'foo'", + 'added', + {'added': 1, + 'binary': False, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + ("'foo'" r'\"foo\"', # TODO: quotes should not be escaped + 'added', + {'added': 1, + 'binary': False, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + (r'a\r\nb', # TODO: escaped + 'added', + {'added': 1, + 'binary': False, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + (r'foo\rfoo', # TODO: escaped + 'added', + {'added': 0, + 'binary': True, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + ('foo bar', + 'added', + {'added': 1, + 'binary': False, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + ('test', + 'added', + {'added': 1, + 'binary': False, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + (r'esc\033foo', # TODO: escaped + 'added', + {'added': 0, + 'binary': True, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + (r'tab\tfoo', # TODO: escaped + 'added', + {'added': 0, + 'binary': True, + 'deleted': 0, + 'ops': {1: 'new file 100644'}}), + ], }