comparison pylons_app/lib/indexers/daemon.py @ 443:e5157e2a530e

added safe unicode funtion, and implemented it in whoosh indexer
author Marcin Kuzminski <marcin@python-works.com>
date Wed, 01 Sep 2010 23:38:03 +0200
parents c59c4d4323e7
children d726f62f886e
comparison
equal deleted inserted replaced
442:d66a7fa7689b 443:e5157e2a530e
34 34
35 from pidlock import LockHeld, DaemonLock 35 from pidlock import LockHeld, DaemonLock
36 import traceback 36 import traceback
37 from pylons_app.config.environment import load_environment 37 from pylons_app.config.environment import load_environment
38 from pylons_app.model.hg_model import HgModel 38 from pylons_app.model.hg_model import HgModel
39 from pylons_app.lib.helpers import safe_unicode
39 from whoosh.index import create_in, open_dir 40 from whoosh.index import create_in, open_dir
40 from shutil import rmtree 41 from shutil import rmtree
41 from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \ 42 from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \
42 SCHEMA, IDX_NAME 43 SCHEMA, IDX_NAME
43 44
75 if ext in INDEX_EXTENSIONS: 76 if ext in INDEX_EXTENSIONS:
76 log.debug(' >> %s [WITH CONTENT]' % path) 77 log.debug(' >> %s [WITH CONTENT]' % path)
77 fobj = open(path, 'rb') 78 fobj = open(path, 'rb')
78 content = fobj.read() 79 content = fobj.read()
79 fobj.close() 80 fobj.close()
80 try: 81 u_content = safe_unicode(content)
81 u_content = unicode(content)
82 except UnicodeDecodeError:
83 #incase we have a decode error just represent as byte string
84 u_content = unicode(str(content).encode('string_escape'))
85 else: 82 else:
86 log.debug(' >> %s' % path) 83 log.debug(' >> %s' % path)
87 #just index file name without it's content 84 #just index file name without it's content
88 u_content = u'' 85 u_content = u''
89 86