comparison rhodecode/lib/indexers/daemon.py @ 2101:df96adcbb1f7 beta

code garden
author Marcin Kuzminski <marcin@python-works.com>
date Sun, 04 Mar 2012 19:57:42 +0000
parents 324ac367a4da
children 8ecfed1d8f8b
comparison
equal deleted inserted replaced
2100:f0649c7cf94a 2101:df96adcbb1f7
46 from rhodecode.lib.vcs.exceptions import ChangesetError, RepositoryError, \ 46 from rhodecode.lib.vcs.exceptions import ChangesetError, RepositoryError, \
47 NodeDoesNotExistError 47 NodeDoesNotExistError
48 48
49 from whoosh.index import create_in, open_dir 49 from whoosh.index import create_in, open_dir
50 50
51 51 log = logging.getLogger('whoosh_indexer')
52 log = logging.getLogger('whooshIndexer')
53 # create logger
54 log.setLevel(logging.DEBUG)
55 log.propagate = False
56 # create console handler and set level to debug
57 ch = logging.StreamHandler()
58 ch.setLevel(logging.DEBUG)
59
60 # create formatter
61 formatter = logging.Formatter("%(asctime)s - %(name)s -"
62 " %(levelname)s - %(message)s")
63
64 # add formatter to ch
65 ch.setFormatter(formatter)
66
67 # add ch to logger
68 log.addHandler(ch)
69 52
70 53
71 class WhooshIndexingDaemon(object): 54 class WhooshIndexingDaemon(object):
72 """ 55 """
73 Daemon for atomic jobs 56 Daemon for atomic jobs
101 log.info('Cannot run incremental index since it does not' 84 log.info('Cannot run incremental index since it does not'
102 ' yet exist running full build') 85 ' yet exist running full build')
103 self.initial = True 86 self.initial = True
104 87
105 def get_paths(self, repo): 88 def get_paths(self, repo):
106 """recursive walk in root dir and return a set of all path in that dir 89 """
90 recursive walk in root dir and return a set of all path in that dir
107 based on repository walk function 91 based on repository walk function
108 """ 92 """
109 index_paths_ = set() 93 index_paths_ = set()
110 try: 94 try:
111 tip = repo.get_changeset('tip') 95 tip = repo.get_changeset('tip')
125 109
126 def get_node_mtime(self, node): 110 def get_node_mtime(self, node):
127 return mktime(node.last_changeset.date.timetuple()) 111 return mktime(node.last_changeset.date.timetuple())
128 112
129 def add_doc(self, writer, path, repo, repo_name): 113 def add_doc(self, writer, path, repo, repo_name):
130 """Adding doc to writer this function itself fetches data from 114 """
131 the instance of vcs backend""" 115 Adding doc to writer this function itself fetches data from
116 the instance of vcs backend
117 """
118
132 node = self.get_node(repo, path) 119 node = self.get_node(repo, path)
133 120
134 #we just index the content of chosen files, and skip binary files 121 # we just index the content of chosen files, and skip binary files
135 if node.extension in INDEX_EXTENSIONS and not node.is_binary: 122 if node.extension in INDEX_EXTENSIONS and not node.is_binary:
136 123
137 u_content = node.content 124 u_content = node.content
138 if not isinstance(u_content, unicode): 125 if not isinstance(u_content, unicode):
139 log.warning(' >> %s Could not get this content as unicode ' 126 log.warning(' >> %s Could not get this content as unicode '
140 'replacing with empty content', path) 127 'replacing with empty content' % path)
141 u_content = u'' 128 u_content = u''
142 else: 129 else:
143 log.debug(' >> %s [WITH CONTENT]' % path) 130 log.debug(' >> %s [WITH CONTENT]' % path)
144 131
145 else: 132 else:
146 log.debug(' >> %s' % path) 133 log.debug(' >> %s' % path)
147 #just index file name without it's content 134 # just index file name without it's content
148 u_content = u'' 135 u_content = u''
149 136
150 writer.add_document(owner=unicode(repo.contact), 137 writer.add_document(
151 repository=safe_unicode(repo_name), 138 owner=unicode(repo.contact),
152 path=safe_unicode(path), 139 repository=safe_unicode(repo_name),
153 content=u_content, 140 path=safe_unicode(path),
154 modtime=self.get_node_mtime(node), 141 content=u_content,
155 extension=node.extension) 142 modtime=self.get_node_mtime(node),
143 extension=node.extension
144 )
156 145
157 def build_index(self): 146 def build_index(self):
158 if os.path.exists(self.index_location): 147 if os.path.exists(self.index_location):
159 log.debug('removing previous index') 148 log.debug('removing previous index')
160 rmtree(self.index_location) 149 rmtree(self.index_location)