Mercurial > kallithea
changeset 3008:6e76b4892d72 beta
Implemented #647, option to pass list of default encoding used to encode to/decode from unicode
author | Marcin Kuzminski <marcin@python-works.com> |
---|---|
date | Thu, 15 Nov 2012 00:57:52 +0100 |
parents | b13ca18ac527 |
children | f0e19116f154 |
files | development.ini production.ini rhodecode/config/deployment.ini_tmpl rhodecode/lib/utils2.py rhodecode/lib/vcs/utils/__init__.py |
diffstat | 5 files changed, 59 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/development.ini Wed Nov 14 23:22:38 2012 +0100 +++ b/development.ini Thu Nov 15 00:57:52 2012 +0100 @@ -76,6 +76,8 @@ container_auth_enabled = false proxypass_auth_enabled = false +## default encoding used to convert from and to unicode +## can be also a comma seperated list of encoding in case of mixed encodings default_encoding = utf8 ## overwrite schema of clone url
--- a/production.ini Wed Nov 14 23:22:38 2012 +0100 +++ b/production.ini Thu Nov 15 00:57:52 2012 +0100 @@ -76,6 +76,8 @@ container_auth_enabled = false proxypass_auth_enabled = false +## default encoding used to convert from and to unicode +## can be also a comma seperated list of encoding in case of mixed encodings default_encoding = utf8 ## overwrite schema of clone url
--- a/rhodecode/config/deployment.ini_tmpl Wed Nov 14 23:22:38 2012 +0100 +++ b/rhodecode/config/deployment.ini_tmpl Thu Nov 15 00:57:52 2012 +0100 @@ -76,6 +76,8 @@ container_auth_enabled = false proxypass_auth_enabled = false +## default encoding used to convert from and to unicode +## can be also a comma seperated list of encoding in case of mixed encodings default_encoding = utf8 ## overwrite schema of clone url
--- a/rhodecode/lib/utils2.py Wed Nov 14 23:22:38 2012 +0100 +++ b/rhodecode/lib/utils2.py Thu Nov 15 00:57:52 2012 +0100 @@ -66,6 +66,7 @@ return dict(d) + def str2bool(_str): """ returs True/False value from given string, it tries to translate the @@ -83,6 +84,27 @@ return _str in ('t', 'true', 'y', 'yes', 'on', '1') +def aslist(obj, sep=None, strip=True): + """ + Returns given string separated by sep as list + + :param obj: + :param sep: + :param strip: + """ + if isinstance(obj, (basestring)): + lst = obj.split(sep) + if strip: + lst = [v.strip() for v in lst] + return lst + elif isinstance(obj, (list, tuple)): + return obj + elif obj is None: + return [] + else: + return [obj] + + def convert_line_endings(line, mode): """ Converts a given line "line end" accordingly to given mode @@ -182,18 +204,23 @@ if not from_encoding: import rhodecode - DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding','utf8') - from_encoding = DEFAULT_ENCODING + DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding', + 'utf8'), sep=',') + from_encoding = DEFAULT_ENCODINGS + + if not isinstance(from_encoding, (list, tuple)): + from_encoding = [from_encoding] try: return unicode(str_) except UnicodeDecodeError: pass - try: - return unicode(str_, from_encoding) - except UnicodeDecodeError: - pass + for enc in from_encoding: + try: + return unicode(str_, enc) + except UnicodeDecodeError: + pass try: import chardet @@ -202,7 +229,7 @@ raise Exception() return str_.decode(encoding) except (ImportError, UnicodeDecodeError, Exception): - return unicode(str_, from_encoding, 'replace') + return unicode(str_, from_encoding[0], 'replace') def safe_str(unicode_, to_encoding=None): @@ -226,13 +253,18 @@ if not to_encoding: import rhodecode - DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding','utf8') - to_encoding = DEFAULT_ENCODING + DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding', + 'utf8'), sep=',') + to_encoding = DEFAULT_ENCODINGS - try: - return unicode_.encode(to_encoding) - except UnicodeEncodeError: - pass + if not isinstance(to_encoding, (list, tuple)): + to_encoding = [to_encoding] + + for enc in to_encoding: + try: + return unicode_.encode(enc) + except UnicodeEncodeError: + pass try: import chardet @@ -242,7 +274,7 @@ return unicode_.encode(encoding) except (ImportError, UnicodeEncodeError): - return unicode_.encode(to_encoding, 'replace') + return unicode_.encode(to_encoding[0], 'replace') return safe_str
--- a/rhodecode/lib/vcs/utils/__init__.py Wed Nov 14 23:22:38 2012 +0100 +++ b/rhodecode/lib/vcs/utils/__init__.py Thu Nov 15 00:57:52 2012 +0100 @@ -38,12 +38,12 @@ :rtype: unicode :returns: unicode object """ + from rhodecode.lib.utils2 import safe_unicode + return safe_unicode(str_, from_encoding) + if isinstance(str_, unicode): return str_ - if not from_encoding: - import rhodecode - DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding', 'utf8') - from_encoding = DEFAULT_ENCODING + try: return unicode(str_) except UnicodeDecodeError: @@ -75,13 +75,12 @@ :rtype: str :returns: str object """ + from rhodecode.lib.utils2 import safe_str + return safe_str(unicode_, to_encoding) if isinstance(unicode_, str): return unicode_ - if not to_encoding: - import rhodecode - DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding', 'utf8') - to_encoding = DEFAULT_ENCODING + try: return unicode_.encode(to_encoding) except UnicodeEncodeError: