Mercurial > kallithea
comparison pylons_app/lib/indexers/daemon.py @ 411:9b67cebe6609
some fixes to whoosh indexer daemon
author | Marcin Kuzminski <marcin@python-works.com> |
---|---|
date | Wed, 18 Aug 2010 19:41:08 +0200 |
parents | 0c9dfae57107 |
children | 28f19fa562df |
comparison
equal
deleted
inserted
replaced
410:9a7ae16ff53e | 411:9b67cebe6609 |
---|---|
23 @author: marcink | 23 @author: marcink |
24 A deamon will read from task table and run tasks | 24 A deamon will read from task table and run tasks |
25 """ | 25 """ |
26 import sys | 26 import sys |
27 import os | 27 import os |
28 from pidlock import LockHeld, DaemonLock | |
29 import traceback | |
30 | |
31 from os.path import dirname as dn | 28 from os.path import dirname as dn |
32 from os.path import join as jn | 29 from os.path import join as jn |
33 | 30 |
34 #to get the pylons_app import | 31 #to get the pylons_app import |
35 sys.path.append(dn(dn(dn(dn(os.path.realpath(__file__)))))) | 32 project_path = dn(dn(dn(dn(os.path.realpath(__file__))))) |
33 sys.path.append(project_path) | |
36 | 34 |
35 from pidlock import LockHeld, DaemonLock | |
36 import traceback | |
37 from pylons_app.config.environment import load_environment | 37 from pylons_app.config.environment import load_environment |
38 from pylons_app.model.hg_model import HgModel | 38 from pylons_app.model.hg_model import HgModel |
39 from whoosh.index import create_in, open_dir | 39 from whoosh.index import create_in, open_dir |
40 from shutil import rmtree | 40 from shutil import rmtree |
41 from pylons_app.lib.indexers import ANALYZER, EXCLUDE_EXTENSIONS, IDX_LOCATION, SCHEMA, IDX_NAME | 41 from pylons_app.lib.indexers import ANALYZER, EXCLUDE_EXTENSIONS, IDX_LOCATION, \ |
42 SCHEMA, IDX_NAME | |
43 | |
42 import logging | 44 import logging |
43 log = logging.getLogger(__name__) | 45 import logging.config |
44 | 46 logging.config.fileConfig(jn(project_path, 'development.ini')) |
45 | 47 log = logging.getLogger('whooshIndexer') |
46 location = '/home/marcink/python_workspace_dirty/*' | |
47 | 48 |
48 def scan_paths(root_location): | 49 def scan_paths(root_location): |
49 return HgModel.repo_scan('/', root_location, None, True) | 50 return HgModel.repo_scan('/', root_location, None, True) |
50 | 51 |
51 class WhooshIndexingDaemon(object): | 52 class WhooshIndexingDaemon(object): |
52 """Deamon for atomic jobs""" | 53 """Deamon for atomic jobs""" |
53 | 54 |
54 def __init__(self, indexname='HG_INDEX'): | 55 def __init__(self, indexname='HG_INDEX', repo_location=None): |
55 self.indexname = indexname | 56 self.indexname = indexname |
56 | 57 self.repo_location = repo_location |
57 | 58 |
58 def get_paths(self, root_dir): | 59 def get_paths(self, root_dir): |
59 """recursive walk in root dir and return a set of all path in that dir | 60 """recursive walk in root dir and return a set of all path in that dir |
60 excluding files in .hg dir""" | 61 excluding files in .hg dir""" |
61 index_paths_ = set() | 62 index_paths_ = set() |
95 os.mkdir(IDX_LOCATION) | 96 os.mkdir(IDX_LOCATION) |
96 | 97 |
97 idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME) | 98 idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME) |
98 writer = idx.writer() | 99 writer = idx.writer() |
99 | 100 |
100 for cnt, repo in enumerate(scan_paths(location).values()): | 101 for cnt, repo in enumerate(scan_paths(self.repo_location).values()): |
101 log.debug('building index @ %s' % repo.path) | 102 log.debug('building index @ %s' % repo.path) |
102 | 103 |
103 for idx_path in self.get_paths(repo.path): | 104 for idx_path in self.get_paths(repo.path): |
104 log.debug(' >> %s' % idx_path) | 105 log.debug(' >> %s' % idx_path) |
105 self.add_doc(writer, idx_path, repo) | 106 self.add_doc(writer, idx_path, repo) |
147 #writer.commit() | 148 #writer.commit() |
148 | 149 |
149 # Loop over the files in the filesystem | 150 # Loop over the files in the filesystem |
150 # Assume we have a function that gathers the filenames of the | 151 # Assume we have a function that gathers the filenames of the |
151 # documents to be indexed | 152 # documents to be indexed |
152 for repo in scan_paths(location).values(): | 153 for repo in scan_paths(self.repo_location).values(): |
153 for path in self.get_paths(repo.path): | 154 for path in self.get_paths(repo.path): |
154 if path in to_index or path not in indexed_paths: | 155 if path in to_index or path not in indexed_paths: |
155 # This is either a file that's changed, or a new file | 156 # This is either a file that's changed, or a new file |
156 # that wasn't indexed before. So index it! | 157 # that wasn't indexed before. So index it! |
157 self.add_doc(writer, path, repo) | 158 self.add_doc(writer, path, repo) |
167 self.build_index() | 168 self.build_index() |
168 else: | 169 else: |
169 self.update_index() | 170 self.update_index() |
170 | 171 |
171 if __name__ == "__main__": | 172 if __name__ == "__main__": |
173 repo_location = '/home/marcink/python_workspace_dirty/*' | |
172 | 174 |
173 #config = load_environment() | |
174 #print config | |
175 try: | 175 try: |
176 l = DaemonLock() | 176 l = DaemonLock() |
177 WhooshIndexingDaemon().run(full_index=True) | 177 WhooshIndexingDaemon(repo_location=repo_location).run(full_index=True) |
178 l.release() | 178 l.release() |
179 except LockHeld: | 179 except LockHeld: |
180 sys.exit(1) | 180 sys.exit(1) |
181 | 181 |