Mercurial > kallithea
changeset 8031:84847aa20d77
cleanup: sanitize safe_str and safe_unicode, while staying in py2 land
author | Mads Kiilerich <mads@kiilerich.com> |
---|---|
date | Sun, 24 Nov 2019 23:23:42 +0100 |
parents | e8e1bf4743df |
children | 287b5f1cb40a |
files | kallithea/lib/vcs/utils/__init__.py |
diffstat | 1 files changed, 31 insertions(+), 63 deletions(-) [+] |
line wrap: on
line diff
--- a/kallithea/lib/vcs/utils/__init__.py Mon Dec 16 01:14:47 2019 +0100 +++ b/kallithea/lib/vcs/utils/__init__.py Sun Nov 24 23:23:42 2019 +0100 @@ -66,89 +66,57 @@ return val -def safe_unicode(str_, from_encoding=None): +def safe_unicode(s): """ - safe unicode function. Does few trick to turn str_ into unicode - - In case of UnicodeDecode error we try to return it with encoding detected - by chardet library if it fails fallback to unicode with errors replaced - - :param str_: string to decode - :rtype: unicode - :returns: unicode object + Safe unicode function. Use a few tricks to turn s into unicode string: + In case of UnicodeDecodeError with configured default encodings, try to + detect encoding with chardet library, then fall back to first encoding with + errors replaced. """ - if isinstance(str_, unicode): - return str_ + if isinstance(s, unicode): + return s - if not from_encoding: - from kallithea.lib.vcs.conf import settings - from_encoding = settings.DEFAULT_ENCODINGS - - if not isinstance(from_encoding, (list, tuple)): - from_encoding = [from_encoding] + if not isinstance(s, str): # use __str__ / __unicode__ and don't expect UnicodeDecodeError + return unicode(s) - try: - return unicode(str_) - except UnicodeDecodeError: - pass - - for enc in from_encoding: + from kallithea.lib.vcs.conf import settings + for enc in settings.DEFAULT_ENCODINGS: try: - return unicode(str_, enc) + return unicode(s, enc) except UnicodeDecodeError: pass try: import chardet - encoding = chardet.detect(str_)['encoding'] - if encoding is None: - raise Exception() - return str_.decode(encoding) - except (ImportError, UnicodeDecodeError, Exception): - return unicode(str_, from_encoding[0], 'replace') + encoding = chardet.detect(s)['encoding'] + if encoding is not None: + return s.decode(encoding) + except (ImportError, UnicodeDecodeError): + pass + + return unicode(s, settings.DEFAULT_ENCODINGS[0], 'replace') -def safe_str(unicode_, to_encoding=None): +def safe_str(s): """ - safe str function. Does few trick to turn unicode_ into string - - In case of UnicodeEncodeError we try to return it with encoding detected - by chardet library if it fails fallback to string with errors replaced - - :param unicode_: unicode to encode - :rtype: str - :returns: str object + Safe str function. Use a few tricks to turn s into bytes string: + In case of UnicodeEncodeError with configured default encodings, fall back + to first configured encoding with errors replaced. """ + if isinstance(s, str): + return s - # if it's not basestr cast to str - if not isinstance(unicode_, basestring): - return str(unicode_) - - if isinstance(unicode_, str): - return unicode_ + if not isinstance(s, unicode): + return str(s) - if not to_encoding: - from kallithea.lib.vcs.conf import settings - to_encoding = settings.DEFAULT_ENCODINGS - - if not isinstance(to_encoding, (list, tuple)): - to_encoding = [to_encoding] - - for enc in to_encoding: + from kallithea.lib.vcs.conf import settings + for enc in settings.DEFAULT_ENCODINGS: try: - return unicode_.encode(enc) + return s.encode(enc) except UnicodeEncodeError: pass - try: - import chardet - encoding = chardet.detect(unicode_)['encoding'] - if encoding is None: - raise UnicodeEncodeError() - - return unicode_.encode(encoding) - except (ImportError, UnicodeEncodeError): - return unicode_.encode(to_encoding[0], 'replace') + return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace') # Regex taken from http://www.regular-expressions.info/email.html