changeset 8765:d2f59de17bef

vcs: use urlparse in _check_url This makes URL checking more standard and more explicit.
author Mads Kiilerich <mads@kiilerich.com>
date Thu, 03 Dec 2020 11:16:44 +0100
parents 526c8751d75b
children c3b6ad713a0f
files kallithea/lib/vcs/backends/git/repository.py kallithea/lib/vcs/backends/hg/repository.py
diffstat 2 files changed, 32 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/lib/vcs/backends/git/repository.py	Thu Dec 03 10:39:32 2020 +0100
+++ b/kallithea/lib/vcs/backends/git/repository.py	Thu Dec 03 11:16:44 2020 +0100
@@ -157,6 +157,10 @@
 
         >>> GitRepository._check_url('git://example.com/my%20fine repo')
 
+        >>> GitRepository._check_url('http://example.com:65537/repo')
+        Traceback (most recent call last):
+        ...
+        urllib.error.URLError: <urlopen error Error parsing URL: 'http://example.com:65537/repo'>
         >>> GitRepository._check_url('foo')
         Traceback (most recent call last):
         ...
@@ -190,26 +194,28 @@
         ...
         urllib.error.URLError: <urlopen error Invalid whitespace character in path: '\t'>
         """
+        try:
+            parsed_url = urllib.parse.urlparse(url)
+            parsed_url.port  # trigger netloc parsing which might raise ValueError
+        except ValueError:
+            raise urllib.error.URLError("Error parsing URL: %r" % url)
+
         # check first if it's not an local url
         if os.path.isabs(url) and os.path.isdir(url):
             return
 
-        if url.startswith('git://'):
-            try:
-                _git_colon, _empty, _host, path = url.split('/', 3)
-            except ValueError:
-                raise urllib.error.URLError("Invalid URL: %r" % url)
+        if parsed_url.scheme == 'git':
             # Mitigate problems elsewhere with incorrect handling of encoded paths.
             # Don't trust urllib.parse.unquote but be prepared for more flexible implementations elsewhere.
             # Space is the only allowed whitespace character - directly or % encoded. No other % or \ is allowed.
-            for c in path.replace('%20', ' '):
+            for c in parsed_url.path.replace('%20', ' '):
                 if c in '%\\':
                     raise urllib.error.URLError("Invalid escape character in path: '%s'" % c)
                 if c.isspace() and c != ' ':
                     raise urllib.error.URLError("Invalid whitespace character in path: %r" % c)
             return
 
-        if not url.startswith('http://') and not url.startswith('https://'):
+        if parsed_url.scheme not in ['http', 'https']:
             raise urllib.error.URLError("Unsupported protocol in URL %r" % url)
 
         url_obj = mercurial.util.url(safe_bytes(url))
--- a/kallithea/lib/vcs/backends/hg/repository.py	Thu Dec 03 10:39:32 2020 +0100
+++ b/kallithea/lib/vcs/backends/hg/repository.py	Thu Dec 03 11:16:44 2020 +0100
@@ -291,20 +291,35 @@
 
         On failures it'll raise urllib2.HTTPError, exception is also thrown
         when the return code is non 200
+
+        >>> MercurialRepository._check_url('file:///repo')
+
+        >>> MercurialRepository._check_url('http://example.com:65537/repo')
+        Traceback (most recent call last):
+        ...
+        urllib.error.URLError: <urlopen error Error parsing URL: 'http://example.com:65537/repo'>
         """
+        try:
+            parsed_url = urllib.parse.urlparse(url)
+            parsed_url.port  # trigger netloc parsing which might raise ValueError
+        except ValueError:
+            raise urllib.error.URLError("Error parsing URL: %r" % url)
+
         # check first if it's not an local url
-        url = safe_bytes(url)
-        if os.path.isdir(url) or url.startswith(b'file:'):
+        if os.path.isdir(url) or parsed_url.scheme == 'file':
+            # When creating repos, _get_url will use file protocol for local paths
             return
 
-        if url.startswith(b'ssh:'):
+        url = safe_bytes(url)
+
+        if parsed_url.scheme == 'ssh':
             # in case of invalid uri or authentication issues, sshpeer will
             # throw an exception.
             mercurial.sshpeer.instance(repoui or mercurial.ui.ui(), url, False).lookup(b'tip')
             return
 
         url_prefix = None
-        if b'+' in url[:url.find(b'://')]:
+        if '+' in parsed_url.scheme:
             url_prefix, url = url.split(b'+', 1)
 
         url_obj = mercurial.util.url(url)