# HG changeset patch # User Marcin Kuzminski # Date 1283377083 -7200 # Node ID e5157e2a530e3c42298a229ccdcd9387b4822113 # Parent d66a7fa7689b4796f153fcf40331b81bdf5eb29b added safe unicode funtion, and implemented it in whoosh indexer diff -r d66a7fa7689b -r e5157e2a530e pylons_app/lib/helpers.py --- a/pylons_app/lib/helpers.py Wed Sep 01 23:32:47 2010 +0200 +++ b/pylons_app/lib/helpers.py Wed Sep 01 23:38:03 2010 +0200 @@ -336,3 +336,19 @@ gravatar_url += urllib.urlencode({'d':default, 's':str(size)}) return gravatar_url + +def safe_unicode(str): + """safe unicode function. In case of UnicodeDecode error we try to return + unicode with errors replace, if this failes we return unicode with + string_escape decoding """ + + try: + u_str = unicode(str) + except UnicodeDecodeError: + try: + u_str = unicode(str, 'utf-8', 'replace') + except UnicodeDecodeError: + #incase we have a decode error just represent as byte string + u_str = unicode(str(str).encode('string_escape')) + + return u_str \ No newline at end of file diff -r d66a7fa7689b -r e5157e2a530e pylons_app/lib/indexers/daemon.py --- a/pylons_app/lib/indexers/daemon.py Wed Sep 01 23:32:47 2010 +0200 +++ b/pylons_app/lib/indexers/daemon.py Wed Sep 01 23:38:03 2010 +0200 @@ -36,6 +36,7 @@ import traceback from pylons_app.config.environment import load_environment from pylons_app.model.hg_model import HgModel +from pylons_app.lib.helpers import safe_unicode from whoosh.index import create_in, open_dir from shutil import rmtree from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \ @@ -77,11 +78,7 @@ fobj = open(path, 'rb') content = fobj.read() fobj.close() - try: - u_content = unicode(content) - except UnicodeDecodeError: - #incase we have a decode error just represent as byte string - u_content = unicode(str(content).encode('string_escape')) + u_content = safe_unicode(content) else: log.debug(' >> %s' % path) #just index file name without it's content