comparison pylons_app/lib/indexers/daemon.py @ 411:9b67cebe6609

some fixes to whoosh indexer daemon
author Marcin Kuzminski <marcin@python-works.com>
date Wed, 18 Aug 2010 19:41:08 +0200
parents 0c9dfae57107
children 28f19fa562df
comparison
equal deleted inserted replaced
410:9a7ae16ff53e 411:9b67cebe6609
23 @author: marcink 23 @author: marcink
24 A deamon will read from task table and run tasks 24 A deamon will read from task table and run tasks
25 """ 25 """
26 import sys 26 import sys
27 import os 27 import os
28 from pidlock import LockHeld, DaemonLock
29 import traceback
30
31 from os.path import dirname as dn 28 from os.path import dirname as dn
32 from os.path import join as jn 29 from os.path import join as jn
33 30
34 #to get the pylons_app import 31 #to get the pylons_app import
35 sys.path.append(dn(dn(dn(dn(os.path.realpath(__file__)))))) 32 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
33 sys.path.append(project_path)
36 34
35 from pidlock import LockHeld, DaemonLock
36 import traceback
37 from pylons_app.config.environment import load_environment 37 from pylons_app.config.environment import load_environment
38 from pylons_app.model.hg_model import HgModel 38 from pylons_app.model.hg_model import HgModel
39 from whoosh.index import create_in, open_dir 39 from whoosh.index import create_in, open_dir
40 from shutil import rmtree 40 from shutil import rmtree
41 from pylons_app.lib.indexers import ANALYZER, EXCLUDE_EXTENSIONS, IDX_LOCATION, SCHEMA, IDX_NAME 41 from pylons_app.lib.indexers import ANALYZER, EXCLUDE_EXTENSIONS, IDX_LOCATION, \
42 SCHEMA, IDX_NAME
43
42 import logging 44 import logging
43 log = logging.getLogger(__name__) 45 import logging.config
44 46 logging.config.fileConfig(jn(project_path, 'development.ini'))
45 47 log = logging.getLogger('whooshIndexer')
46 location = '/home/marcink/python_workspace_dirty/*'
47 48
48 def scan_paths(root_location): 49 def scan_paths(root_location):
49 return HgModel.repo_scan('/', root_location, None, True) 50 return HgModel.repo_scan('/', root_location, None, True)
50 51
51 class WhooshIndexingDaemon(object): 52 class WhooshIndexingDaemon(object):
52 """Deamon for atomic jobs""" 53 """Deamon for atomic jobs"""
53 54
54 def __init__(self, indexname='HG_INDEX'): 55 def __init__(self, indexname='HG_INDEX', repo_location=None):
55 self.indexname = indexname 56 self.indexname = indexname
56 57 self.repo_location = repo_location
57 58
58 def get_paths(self, root_dir): 59 def get_paths(self, root_dir):
59 """recursive walk in root dir and return a set of all path in that dir 60 """recursive walk in root dir and return a set of all path in that dir
60 excluding files in .hg dir""" 61 excluding files in .hg dir"""
61 index_paths_ = set() 62 index_paths_ = set()
95 os.mkdir(IDX_LOCATION) 96 os.mkdir(IDX_LOCATION)
96 97
97 idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME) 98 idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
98 writer = idx.writer() 99 writer = idx.writer()
99 100
100 for cnt, repo in enumerate(scan_paths(location).values()): 101 for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
101 log.debug('building index @ %s' % repo.path) 102 log.debug('building index @ %s' % repo.path)
102 103
103 for idx_path in self.get_paths(repo.path): 104 for idx_path in self.get_paths(repo.path):
104 log.debug(' >> %s' % idx_path) 105 log.debug(' >> %s' % idx_path)
105 self.add_doc(writer, idx_path, repo) 106 self.add_doc(writer, idx_path, repo)
147 #writer.commit() 148 #writer.commit()
148 149
149 # Loop over the files in the filesystem 150 # Loop over the files in the filesystem
150 # Assume we have a function that gathers the filenames of the 151 # Assume we have a function that gathers the filenames of the
151 # documents to be indexed 152 # documents to be indexed
152 for repo in scan_paths(location).values(): 153 for repo in scan_paths(self.repo_location).values():
153 for path in self.get_paths(repo.path): 154 for path in self.get_paths(repo.path):
154 if path in to_index or path not in indexed_paths: 155 if path in to_index or path not in indexed_paths:
155 # This is either a file that's changed, or a new file 156 # This is either a file that's changed, or a new file
156 # that wasn't indexed before. So index it! 157 # that wasn't indexed before. So index it!
157 self.add_doc(writer, path, repo) 158 self.add_doc(writer, path, repo)
167 self.build_index() 168 self.build_index()
168 else: 169 else:
169 self.update_index() 170 self.update_index()
170 171
171 if __name__ == "__main__": 172 if __name__ == "__main__":
173 repo_location = '/home/marcink/python_workspace_dirty/*'
172 174
173 #config = load_environment()
174 #print config
175 try: 175 try:
176 l = DaemonLock() 176 l = DaemonLock()
177 WhooshIndexingDaemon().run(full_index=True) 177 WhooshIndexingDaemon(repo_location=repo_location).run(full_index=True)
178 l.release() 178 l.release()
179 except LockHeld: 179 except LockHeld:
180 sys.exit(1) 180 sys.exit(1)
181 181