changeset 8768:fd61f678577f

diff: improved handling of Git diffs with " quoting Kallithea would intentionally and explicitly fail with an ugly exception when trying to parse Git diffs with quoted filenames. Improve this by parsing quotes ... and ignore them, as long as they are matching. The content inside the quotes might be \-escaped ... but that could potentially also be the case without quoting. We will fix that later. Adding some test cases that before would have failed to parse and raised an exception. Thanks to stypr of Flatt Security for raising this.
author Mads Kiilerich <mads@kiilerich.com>
date Sat, 14 Nov 2020 15:20:40 +0100
parents 817dcce30d67
children d35d14b05b82
files kallithea/lib/diffs.py kallithea/tests/fixtures/git_diff_quoting.diff kallithea/tests/models/test_diff_parsers.py
diffstat 3 files changed, 112 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/lib/diffs.py	Sat Nov 14 15:18:05 2020 +0100
+++ b/kallithea/lib/diffs.py	Sat Nov 14 15:20:40 2020 +0100
@@ -511,7 +511,7 @@
 
 
 _git_header_re = re.compile(br"""
-    ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
+    ^diff[ ]--git[ ](?P<a_path_quote>"?)a/(?P<a_path>.+?)(?P=a_path_quote)[ ](?P<b_path_quote>"?)b/(?P<b_path>.+?)(?P=a_path_quote)\n
     (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
        ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
     (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
@@ -522,8 +522,8 @@
     (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
         \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
     (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
-    (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
-    (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
+    (?:^---[ ](?P<a_file_quote>"?)(a/(?P<a_file>.+?)(?P=a_file_quote)|/dev/null)\t?(?:\n|$))?
+    (?:^\+\+\+[ ](?P<b_file_quote>"?)(b/(?P<b_file>.+?)(?P=b_file_quote)|/dev/null)\t?(?:\n|$))?
 """, re.VERBOSE | re.MULTILINE)
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kallithea/tests/fixtures/git_diff_quoting.diff	Sat Nov 14 15:20:40 2020 +0100
@@ -0,0 +1,53 @@
+diff --git "a/\"foo\"" "b/\"foo\""
+new file mode 100644
+index 0000000..8b13789
+--- /dev/null
++++ "b/\"foo\""
+@@ -0,0 +1 @@
++
+diff --git a/'foo' b/'foo'
+new file mode 100644
+index 0000000..8b13789
+--- /dev/null
++++ b/'foo'
+@@ -0,0 +1 @@
++
+diff --git "a/'foo'\"foo\"" "b/'foo'\"foo\""
+new file mode 100644
+index 0000000..8b13789
+--- /dev/null
++++ "b/'foo'\"foo\""
+@@ -0,0 +1 @@
++
+diff --git "a/a\r\nb" "b/a\r\nb"
+new file mode 100644
+index 0000000..30d74d2
+--- /dev/null
++++ "b/a\r\nb"
+@@ -0,0 +1 @@
++test
+\ No newline at end of file
+diff --git "a/foo\rfoo" "b/foo\rfoo"
+new file mode 100644
+index 0000000..e69de29
+diff --git a/foo bar b/foo bar
+new file mode 100644
+index 0000000..219ea2b
+--- /dev/null
++++ b/foo bar	
+@@ -0,0 +1 @@
++foo  bar
+\ No newline at end of file
+diff --git a/test b/test
+new file mode 100644
+index 0000000..9daeafb
+--- /dev/null
++++ b/test
+@@ -0,0 +1 @@
++test
+diff --git "a/esc\033foo" "b/esc\033foo"
+new file mode 100644
+index 0000000..e69de29
+diff --git "a/tab\tfoo" "b/tab\tfoo"
+new file mode 100644
+index 0000000..e69de29
--- a/kallithea/tests/models/test_diff_parsers.py	Sat Nov 14 15:18:05 2020 +0100
+++ b/kallithea/tests/models/test_diff_parsers.py	Sat Nov 14 15:20:40 2020 +0100
@@ -268,6 +268,62 @@
           'binary': False,
           'ops': {RENAMED_FILENODE: 'file renamed from oh no to oh yes'}}),
     ],
+    'git_diff_quoting.diff': [
+        (r'\"foo\"',  # TODO: quotes should not be escaped
+         'added',
+         {'added': 1,
+          'binary': False,
+          'deleted': 0,
+          'ops': {1: 'new file 100644'}}),
+        ("'foo'",
+         'added',
+         {'added': 1,
+          'binary': False,
+          'deleted': 0,
+          'ops': {1: 'new file 100644'}}),
+        ("'foo'" r'\"foo\"',  # TODO: quotes should not be escaped
+         'added',
+         {'added': 1,
+          'binary': False,
+          'deleted': 0,
+          'ops': {1: 'new file 100644'}}),
+        (r'a\r\nb',  # TODO: escaped
+         'added',
+         {'added': 1,
+          'binary': False,
+          'deleted': 0,
+          'ops': {1: 'new file 100644'}}),
+        (r'foo\rfoo',  # TODO: escaped
+         'added',
+        {'added': 0,
+         'binary': True,
+         'deleted': 0,
+          'ops': {1: 'new file 100644'}}),
+        ('foo bar',
+         'added',
+         {'added': 1,
+          'binary': False,
+          'deleted': 0,
+          'ops': {1: 'new file 100644'}}),
+        ('test',
+         'added',
+         {'added': 1,
+          'binary': False,
+          'deleted': 0,
+          'ops': {1: 'new file 100644'}}),
+        (r'esc\033foo',  # TODO: escaped
+         'added',
+         {'added': 0,
+          'binary': True,
+          'deleted': 0,
+          'ops': {1: 'new file 100644'}}),
+        (r'tab\tfoo',  # TODO: escaped
+         'added',
+         {'added': 0,
+          'binary': True,
+          'deleted': 0,
+          'ops': {1: 'new file 100644'}}),
+    ],
 }