Mercurial > kallithea
changeset 8202:6c381371d106
py3: fix non-ASCII URLs - decode unicode correctly before passing them to controllers as unicode strings
This is needed for supporting localized repo path names in the path of URLs.
Some references:
https://www.python.org/dev/peps/pep-0333/#unicode-issues
https://bugs.python.org/issue16679
http://lucumr.pocoo.org/2010/5/25/wsgi-on-python-3/
https://bugs.launchpad.net/pecan/+bug/1451842
https://github.com/tipabu/eventlet/commit/a5a7751b013fe99b6d30acbca79e819770e9ae5d
author | Mads Kiilerich <mads@kiilerich.com> |
---|---|
date | Mon, 23 Dec 2019 00:56:45 +0100 |
parents | 620c13a373c5 |
children | c146a2ab50a8 |
files | kallithea/config/routing.py kallithea/lib/base.py kallithea/lib/middleware/permanent_repo_url.py |
diffstat | 3 files changed, 30 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/kallithea/config/routing.py Thu Dec 26 15:17:51 2019 +0100 +++ b/kallithea/config/routing.py Mon Dec 23 00:56:45 2019 +0100 @@ -19,14 +19,34 @@ refer to the routes manual at http://routes.groovie.org/docs/ """ -from routes import Mapper +import routes from tg import request +from kallithea.lib.utils2 import safe_str + # prefix for non repository related links needs to be prefixed with `/` ADMIN_PREFIX = '/_admin' +class Mapper(routes.Mapper): + """ + Subclassed Mapper with routematch patched to decode "unicode" str url to + *real* unicode str before applying matches and invoking controller methods. + """ + + def routematch(self, url=None, environ=None): + """ + routematch that also decode url from "fake bytes" to real unicode + string before matching and invoking controllers. + """ + # Process url like get_path_info does ... but PATH_INFO has already + # been retrieved from environ and is passed, so - let's just use that + # instead. + url = safe_str(url.encode('latin1')) + return super().routematch(url=url, environ=environ) + + def make_map(config): """Create, configure and return the routes Mapper""" rmap = Mapper(directory=config['paths']['controllers'],
--- a/kallithea/lib/base.py Thu Dec 26 15:17:51 2019 +0100 +++ b/kallithea/lib/base.py Mon Dec 23 00:56:45 2019 +0100 @@ -97,12 +97,17 @@ def get_path_info(environ): - """Return unicode PATH_INFO from environ ... using tg.original_request if available. + """Return PATH_INFO from environ ... using tg.original_request if available. + + In Python 3 WSGI, PATH_INFO is a unicode str, but kind of contains encoded + bytes. The code points are guaranteed to only use the lower 8 bit bits, and + encoding the string with the 1:1 encoding latin1 will give the + corresponding byte string ... which then can be decoded to proper unicode. """ org_req = environ.get('tg.original_request') if org_req is not None: environ = org_req.environ - return safe_str(environ['PATH_INFO']) + return safe_str(environ['PATH_INFO'].encode('latin1')) def log_in_user(user, remember, is_external_auth, ip_addr):
--- a/kallithea/lib/middleware/permanent_repo_url.py Thu Dec 26 15:17:51 2019 +0100 +++ b/kallithea/lib/middleware/permanent_repo_url.py Mon Dec 23 00:56:45 2019 +0100 @@ -33,9 +33,9 @@ def __call__(self, environ, start_response): # Extract path_info as get_path_info does, but do it explicitly because # we also have to do the reverse operation when patching it back in - path_info = safe_str(environ['PATH_INFO']) + path_info = safe_str(environ['PATH_INFO'].encode('latin1')) if path_info.startswith('/'): # it must path_info = '/' + fix_repo_id_name(path_info[1:]) - environ['PATH_INFO'] = safe_bytes(path_info) + environ['PATH_INFO'] = safe_bytes(path_info).decode('latin1') return self.application(environ, start_response)