# HG changeset patch # User Mads Kiilerich # Date 1606990604 -3600 # Node ID d2f59de17bef6d4a0d4ef2330a57d87fed27fde4 # Parent 526c8751d75bb094d929c0a4afb1486abe94de54 vcs: use urlparse in _check_url This makes URL checking more standard and more explicit. diff -r 526c8751d75b -r d2f59de17bef kallithea/lib/vcs/backends/git/repository.py --- a/kallithea/lib/vcs/backends/git/repository.py Thu Dec 03 10:39:32 2020 +0100 +++ b/kallithea/lib/vcs/backends/git/repository.py Thu Dec 03 11:16:44 2020 +0100 @@ -157,6 +157,10 @@ >>> GitRepository._check_url('git://example.com/my%20fine repo') + >>> GitRepository._check_url('http://example.com:65537/repo') + Traceback (most recent call last): + ... + urllib.error.URLError: >>> GitRepository._check_url('foo') Traceback (most recent call last): ... @@ -190,26 +194,28 @@ ... urllib.error.URLError: """ + try: + parsed_url = urllib.parse.urlparse(url) + parsed_url.port # trigger netloc parsing which might raise ValueError + except ValueError: + raise urllib.error.URLError("Error parsing URL: %r" % url) + # check first if it's not an local url if os.path.isabs(url) and os.path.isdir(url): return - if url.startswith('git://'): - try: - _git_colon, _empty, _host, path = url.split('/', 3) - except ValueError: - raise urllib.error.URLError("Invalid URL: %r" % url) + if parsed_url.scheme == 'git': # Mitigate problems elsewhere with incorrect handling of encoded paths. # Don't trust urllib.parse.unquote but be prepared for more flexible implementations elsewhere. # Space is the only allowed whitespace character - directly or % encoded. No other % or \ is allowed. - for c in path.replace('%20', ' '): + for c in parsed_url.path.replace('%20', ' '): if c in '%\\': raise urllib.error.URLError("Invalid escape character in path: '%s'" % c) if c.isspace() and c != ' ': raise urllib.error.URLError("Invalid whitespace character in path: %r" % c) return - if not url.startswith('http://') and not url.startswith('https://'): + if parsed_url.scheme not in ['http', 'https']: raise urllib.error.URLError("Unsupported protocol in URL %r" % url) url_obj = mercurial.util.url(safe_bytes(url)) diff -r 526c8751d75b -r d2f59de17bef kallithea/lib/vcs/backends/hg/repository.py --- a/kallithea/lib/vcs/backends/hg/repository.py Thu Dec 03 10:39:32 2020 +0100 +++ b/kallithea/lib/vcs/backends/hg/repository.py Thu Dec 03 11:16:44 2020 +0100 @@ -291,20 +291,35 @@ On failures it'll raise urllib2.HTTPError, exception is also thrown when the return code is non 200 + + >>> MercurialRepository._check_url('file:///repo') + + >>> MercurialRepository._check_url('http://example.com:65537/repo') + Traceback (most recent call last): + ... + urllib.error.URLError: """ + try: + parsed_url = urllib.parse.urlparse(url) + parsed_url.port # trigger netloc parsing which might raise ValueError + except ValueError: + raise urllib.error.URLError("Error parsing URL: %r" % url) + # check first if it's not an local url - url = safe_bytes(url) - if os.path.isdir(url) or url.startswith(b'file:'): + if os.path.isdir(url) or parsed_url.scheme == 'file': + # When creating repos, _get_url will use file protocol for local paths return - if url.startswith(b'ssh:'): + url = safe_bytes(url) + + if parsed_url.scheme == 'ssh': # in case of invalid uri or authentication issues, sshpeer will # throw an exception. mercurial.sshpeer.instance(repoui or mercurial.ui.ui(), url, False).lookup(b'tip') return url_prefix = None - if b'+' in url[:url.find(b'://')]: + if '+' in parsed_url.scheme: url_prefix, url = url.split(b'+', 1) url_obj = mercurial.util.url(url)