changeset 3008:6e76b4892d72 beta

Implemented #647, option to pass list of default encoding used to encode to/decode from unicode
author Marcin Kuzminski <marcin@python-works.com>
date Thu, 15 Nov 2012 00:57:52 +0100
parents b13ca18ac527
children f0e19116f154
files development.ini production.ini rhodecode/config/deployment.ini_tmpl rhodecode/lib/utils2.py rhodecode/lib/vcs/utils/__init__.py
diffstat 5 files changed, 59 insertions(+), 22 deletions(-) [+]
line wrap: on
line diff
--- a/development.ini	Wed Nov 14 23:22:38 2012 +0100
+++ b/development.ini	Thu Nov 15 00:57:52 2012 +0100
@@ -76,6 +76,8 @@
 
 container_auth_enabled = false
 proxypass_auth_enabled = false
+## default encoding used to convert from and to unicode
+## can be also a comma seperated list of encoding in case of mixed encodings
 default_encoding = utf8
 
 ## overwrite schema of clone url
--- a/production.ini	Wed Nov 14 23:22:38 2012 +0100
+++ b/production.ini	Thu Nov 15 00:57:52 2012 +0100
@@ -76,6 +76,8 @@
 
 container_auth_enabled = false
 proxypass_auth_enabled = false
+## default encoding used to convert from and to unicode
+## can be also a comma seperated list of encoding in case of mixed encodings
 default_encoding = utf8
 
 ## overwrite schema of clone url
--- a/rhodecode/config/deployment.ini_tmpl	Wed Nov 14 23:22:38 2012 +0100
+++ b/rhodecode/config/deployment.ini_tmpl	Thu Nov 15 00:57:52 2012 +0100
@@ -76,6 +76,8 @@
 
 container_auth_enabled = false
 proxypass_auth_enabled = false
+## default encoding used to convert from and to unicode
+## can be also a comma seperated list of encoding in case of mixed encodings
 default_encoding = utf8
 
 ## overwrite schema of clone url
--- a/rhodecode/lib/utils2.py	Wed Nov 14 23:22:38 2012 +0100
+++ b/rhodecode/lib/utils2.py	Thu Nov 15 00:57:52 2012 +0100
@@ -66,6 +66,7 @@
 
     return dict(d)
 
+
 def str2bool(_str):
     """
     returs True/False value from given string, it tries to translate the
@@ -83,6 +84,27 @@
     return _str in ('t', 'true', 'y', 'yes', 'on', '1')
 
 
+def aslist(obj, sep=None, strip=True):
+    """
+    Returns given string separated by sep as list
+
+    :param obj:
+    :param sep:
+    :param strip:
+    """
+    if isinstance(obj, (basestring)):
+        lst = obj.split(sep)
+        if strip:
+            lst = [v.strip() for v in lst]
+        return lst
+    elif isinstance(obj, (list, tuple)):
+        return obj
+    elif obj is None:
+        return []
+    else:
+        return [obj]
+
+
 def convert_line_endings(line, mode):
     """
     Converts a given line  "line end" accordingly to given mode
@@ -182,18 +204,23 @@
 
     if not from_encoding:
         import rhodecode
-        DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding','utf8')
-        from_encoding = DEFAULT_ENCODING
+        DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding',
+                                                        'utf8'), sep=',')
+        from_encoding = DEFAULT_ENCODINGS
+
+    if not isinstance(from_encoding, (list, tuple)):
+        from_encoding = [from_encoding]
 
     try:
         return unicode(str_)
     except UnicodeDecodeError:
         pass
 
-    try:
-        return unicode(str_, from_encoding)
-    except UnicodeDecodeError:
-        pass
+    for enc in from_encoding:
+        try:
+            return unicode(str_, enc)
+        except UnicodeDecodeError:
+            pass
 
     try:
         import chardet
@@ -202,7 +229,7 @@
             raise Exception()
         return str_.decode(encoding)
     except (ImportError, UnicodeDecodeError, Exception):
-        return unicode(str_, from_encoding, 'replace')
+        return unicode(str_, from_encoding[0], 'replace')
 
 
 def safe_str(unicode_, to_encoding=None):
@@ -226,13 +253,18 @@
 
     if not to_encoding:
         import rhodecode
-        DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding','utf8')
-        to_encoding = DEFAULT_ENCODING
+        DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding',
+                                                        'utf8'), sep=',')
+        to_encoding = DEFAULT_ENCODINGS
 
-    try:
-        return unicode_.encode(to_encoding)
-    except UnicodeEncodeError:
-        pass
+    if not isinstance(to_encoding, (list, tuple)):
+        to_encoding = [to_encoding]
+
+    for enc in to_encoding:
+        try:
+            return unicode_.encode(enc)
+        except UnicodeEncodeError:
+            pass
 
     try:
         import chardet
@@ -242,7 +274,7 @@
 
         return unicode_.encode(encoding)
     except (ImportError, UnicodeEncodeError):
-        return unicode_.encode(to_encoding, 'replace')
+        return unicode_.encode(to_encoding[0], 'replace')
 
     return safe_str
 
--- a/rhodecode/lib/vcs/utils/__init__.py	Wed Nov 14 23:22:38 2012 +0100
+++ b/rhodecode/lib/vcs/utils/__init__.py	Thu Nov 15 00:57:52 2012 +0100
@@ -38,12 +38,12 @@
     :rtype: unicode
     :returns: unicode object
     """
+    from rhodecode.lib.utils2 import safe_unicode
+    return safe_unicode(str_, from_encoding)
+
     if isinstance(str_, unicode):
         return str_
-    if not from_encoding:
-        import rhodecode
-        DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding', 'utf8')
-        from_encoding = DEFAULT_ENCODING
+
     try:
         return unicode(str_)
     except UnicodeDecodeError:
@@ -75,13 +75,12 @@
     :rtype: str
     :returns: str object
     """
+    from rhodecode.lib.utils2 import safe_str
+    return safe_str(unicode_, to_encoding)
 
     if isinstance(unicode_, str):
         return unicode_
-    if not to_encoding:
-        import rhodecode
-        DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding', 'utf8')
-        to_encoding = DEFAULT_ENCODING
+
     try:
         return unicode_.encode(to_encoding)
     except UnicodeEncodeError: