Mercurial > kallithea
changeset 443:e5157e2a530e
added safe unicode funtion, and implemented it in whoosh indexer
author | Marcin Kuzminski <marcin@python-works.com> |
---|---|
date | Wed, 01 Sep 2010 23:38:03 +0200 |
parents | d66a7fa7689b |
children | 0668919c307c |
files | pylons_app/lib/helpers.py pylons_app/lib/indexers/daemon.py |
diffstat | 2 files changed, 18 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/pylons_app/lib/helpers.py Wed Sep 01 23:32:47 2010 +0200 +++ b/pylons_app/lib/helpers.py Wed Sep 01 23:38:03 2010 +0200 @@ -336,3 +336,19 @@ gravatar_url += urllib.urlencode({'d':default, 's':str(size)}) return gravatar_url + +def safe_unicode(str): + """safe unicode function. In case of UnicodeDecode error we try to return + unicode with errors replace, if this failes we return unicode with + string_escape decoding """ + + try: + u_str = unicode(str) + except UnicodeDecodeError: + try: + u_str = unicode(str, 'utf-8', 'replace') + except UnicodeDecodeError: + #incase we have a decode error just represent as byte string + u_str = unicode(str(str).encode('string_escape')) + + return u_str \ No newline at end of file
--- a/pylons_app/lib/indexers/daemon.py Wed Sep 01 23:32:47 2010 +0200 +++ b/pylons_app/lib/indexers/daemon.py Wed Sep 01 23:38:03 2010 +0200 @@ -36,6 +36,7 @@ import traceback from pylons_app.config.environment import load_environment from pylons_app.model.hg_model import HgModel +from pylons_app.lib.helpers import safe_unicode from whoosh.index import create_in, open_dir from shutil import rmtree from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \ @@ -77,11 +78,7 @@ fobj = open(path, 'rb') content = fobj.read() fobj.close() - try: - u_content = unicode(content) - except UnicodeDecodeError: - #incase we have a decode error just represent as byte string - u_content = unicode(str(content).encode('string_escape')) + u_content = safe_unicode(content) else: log.debug(' >> %s' % path) #just index file name without it's content