changeset 8039:4e565c5d7b7d

lib: establish py3 compatible strategy for string handling: introducing safe_bytes and deprecating safe_str The meaning of safe_str will change when moving to py3. All use of safe_str is thus tech debt that we have to chop off, mostly by moving to either safe_unicode or safe_bytes ... or dropping because we know what we are doing and rely on the improved type safety in py3.
author Mads Kiilerich <mads@kiilerich.com>
date Sun, 15 Dec 2019 20:00:38 +0100
parents ed08a2117d8f
children 0f69b5c35b2b
files kallithea/lib/utils2.py kallithea/lib/vcs/utils/__init__.py
diffstat 2 files changed, 9 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/lib/utils2.py	Thu Dec 26 15:07:36 2019 +0100
+++ b/kallithea/lib/utils2.py	Sun Dec 15 20:00:38 2019 +0100
@@ -43,7 +43,7 @@
 from webhelpers2.text import collapse, remove_formatting, strip_tags
 
 from kallithea.lib.compat import json
-from kallithea.lib.vcs.utils import safe_str, safe_unicode  # re-export
+from kallithea.lib.vcs.utils import safe_bytes, safe_str, safe_unicode  # re-export
 from kallithea.lib.vcs.utils.lazy import LazyProperty
 
 
--- a/kallithea/lib/vcs/utils/__init__.py	Thu Dec 26 15:07:36 2019 +0100
+++ b/kallithea/lib/vcs/utils/__init__.py	Sun Dec 15 20:00:38 2019 +0100
@@ -76,7 +76,7 @@
     if isinstance(s, unicode):
         return s
 
-    if not isinstance(s, str):  # use __str__ / __unicode__ and don't expect UnicodeDecodeError
+    if not isinstance(s, bytes):  # use __str__ / __unicode__ and don't expect UnicodeDecodeError
         return unicode(s)
 
     from kallithea.lib.vcs.conf import settings
@@ -97,16 +97,16 @@
     return unicode(s, settings.DEFAULT_ENCODINGS[0], 'replace')
 
 
-def safe_str(s):
+def safe_bytes(s):
     """
-    Safe str function. Use a few tricks to turn s into bytes string:
+    Safe bytes function. Use a few tricks to turn s into bytes string:
     In case of UnicodeEncodeError with configured default encodings, fall back
     to first configured encoding with errors replaced.
     """
-    if isinstance(s, str):
+    if isinstance(s, bytes):
         return s
 
-    assert isinstance(s, unicode), s  # don't use safe_str to coerce non-strings
+    assert isinstance(s, unicode), repr(s)  # bytes cannot coerse with __str__ or handle None or int
 
     from kallithea.lib.vcs.conf import settings
     for enc in settings.DEFAULT_ENCODINGS:
@@ -118,6 +118,9 @@
     return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace')
 
 
+safe_str = safe_bytes  # safe_str is deprecated - it will be redefined when changing to py3
+
+
 # Regex taken from http://www.regular-expressions.info/email.html
 email_re = re.compile(
     r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@"""