Mercurial > kallithea
changeset 8039:4e565c5d7b7d
lib: establish py3 compatible strategy for string handling: introducing safe_bytes and deprecating safe_str
The meaning of safe_str will change when moving to py3. All use of safe_str is
thus tech debt that we have to chop off, mostly by moving to either
safe_unicode or safe_bytes ... or dropping because we know what we are doing
and rely on the improved type safety in py3.
author | Mads Kiilerich <mads@kiilerich.com> |
---|---|
date | Sun, 15 Dec 2019 20:00:38 +0100 |
parents | ed08a2117d8f |
children | 0f69b5c35b2b |
files | kallithea/lib/utils2.py kallithea/lib/vcs/utils/__init__.py |
diffstat | 2 files changed, 9 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/kallithea/lib/utils2.py Thu Dec 26 15:07:36 2019 +0100 +++ b/kallithea/lib/utils2.py Sun Dec 15 20:00:38 2019 +0100 @@ -43,7 +43,7 @@ from webhelpers2.text import collapse, remove_formatting, strip_tags from kallithea.lib.compat import json -from kallithea.lib.vcs.utils import safe_str, safe_unicode # re-export +from kallithea.lib.vcs.utils import safe_bytes, safe_str, safe_unicode # re-export from kallithea.lib.vcs.utils.lazy import LazyProperty
--- a/kallithea/lib/vcs/utils/__init__.py Thu Dec 26 15:07:36 2019 +0100 +++ b/kallithea/lib/vcs/utils/__init__.py Sun Dec 15 20:00:38 2019 +0100 @@ -76,7 +76,7 @@ if isinstance(s, unicode): return s - if not isinstance(s, str): # use __str__ / __unicode__ and don't expect UnicodeDecodeError + if not isinstance(s, bytes): # use __str__ / __unicode__ and don't expect UnicodeDecodeError return unicode(s) from kallithea.lib.vcs.conf import settings @@ -97,16 +97,16 @@ return unicode(s, settings.DEFAULT_ENCODINGS[0], 'replace') -def safe_str(s): +def safe_bytes(s): """ - Safe str function. Use a few tricks to turn s into bytes string: + Safe bytes function. Use a few tricks to turn s into bytes string: In case of UnicodeEncodeError with configured default encodings, fall back to first configured encoding with errors replaced. """ - if isinstance(s, str): + if isinstance(s, bytes): return s - assert isinstance(s, unicode), s # don't use safe_str to coerce non-strings + assert isinstance(s, unicode), repr(s) # bytes cannot coerse with __str__ or handle None or int from kallithea.lib.vcs.conf import settings for enc in settings.DEFAULT_ENCODINGS: @@ -118,6 +118,9 @@ return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace') +safe_str = safe_bytes # safe_str is deprecated - it will be redefined when changing to py3 + + # Regex taken from http://www.regular-expressions.info/email.html email_re = re.compile( r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@"""