Mercurial > kallithea
changeset 2373:1828eb7fa688 beta
#469 added --update-only option to whoosh to re-index only given list
of repos in index
author | Marcin Kuzminski <marcin@python-works.com> |
---|---|
date | Sat, 02 Jun 2012 18:01:56 +0200 |
parents | 95bea8088213 |
children | be2163ef127e |
files | docs/changelog.rst rhodecode/lib/indexers/__init__.py rhodecode/lib/indexers/daemon.py |
diffstat | 3 files changed, 41 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/docs/changelog.rst Sat Jun 02 16:53:21 2012 +0200 +++ b/docs/changelog.rst Sat Jun 02 18:01:56 2012 +0200 @@ -20,6 +20,8 @@ - new git repos are created as bare now by default - #464 added links to groups in permission box - #465 mentions autocomplete inside comments boxes +- #469 added --update-only option to whoosh to re-index only given list + of repos in index fixes +++++
--- a/rhodecode/lib/indexers/__init__.py Sat Jun 02 16:53:21 2012 +0200 +++ b/rhodecode/lib/indexers/__init__.py Sat Jun 02 18:01:56 2012 +0200 @@ -93,6 +93,8 @@ if self.options.repo_location else RepoModel().repos_path repo_list = map(strip, self.options.repo_list.split(',')) \ if self.options.repo_list else None + repo_update_list = map(strip, self.options.repo_update_list.split(',')) \ + if self.options.repo_update_list else None load_rcextensions(config['here']) #====================================================================== # WHOOSH DAEMON @@ -103,7 +105,8 @@ l = DaemonLock(file_=jn(dn(dn(index_location)), 'make_index.lock')) WhooshIndexingDaemon(index_location=index_location, repo_location=repo_location, - repo_list=repo_list,)\ + repo_list=repo_list, + repo_update_list=repo_update_list)\ .run(full_index=self.options.full_index) l.release() except LockHeld: @@ -119,7 +122,14 @@ action='store', dest='repo_list', help="Specifies a comma separated list of repositores " - "to build index on OPTIONAL", + "to build index on. If not given all repositories " + "are scanned for indexing. OPTIONAL", + ) + self.parser.add_option('--update-only', + action='store', + dest='repo_update_list', + help="Specifies a comma separated list of repositores " + "to re-build index on. OPTIONAL", ) self.parser.add_option('-f', action='store_true',
--- a/rhodecode/lib/indexers/daemon.py Sat Jun 02 16:53:21 2012 +0200 +++ b/rhodecode/lib/indexers/daemon.py Sat Jun 02 18:01:56 2012 +0200 @@ -53,11 +53,12 @@ class WhooshIndexingDaemon(object): """ - Daemon for atomic jobs + Daemon for atomic indexing jobs """ def __init__(self, indexname=IDX_NAME, index_location=None, - repo_location=None, sa=None, repo_list=None): + repo_location=None, sa=None, repo_list=None, + repo_update_list=None): self.indexname = indexname self.index_location = index_location @@ -70,13 +71,23 @@ self.repo_paths = ScmModel(sa).repo_scan(self.repo_location) + #filter repo list if repo_list: - filtered_repo_paths = {} + self.filtered_repo_paths = {} for repo_name, repo in self.repo_paths.items(): if repo_name in repo_list: - filtered_repo_paths[repo_name] = repo + self.filtered_repo_paths[repo_name] = repo + + self.repo_paths = self.filtered_repo_paths - self.repo_paths = filtered_repo_paths + #filter update repo list + self.filtered_repo_update_paths = {} + if repo_update_list: + self.filtered_repo_update_paths = {} + for repo_name, repo in self.repo_paths.items(): + if repo_name in repo_update_list: + self.filtered_repo_update_paths[repo_name] = repo + self.repo_paths = self.filtered_repo_update_paths self.initial = False if not os.path.isdir(self.index_location): @@ -172,8 +183,8 @@ log.debug('>>> FINISHED BUILDING INDEX <<<') def update_index(self): - log.debug(('STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s ' - 'AND REPOS %s') % (INDEX_EXTENSIONS, self.repo_paths)) + log.debug((u'STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s ' + 'AND REPOS %s') % (INDEX_EXTENSIONS, self.repo_paths.keys())) idx = open_dir(self.index_location, indexname=self.indexname) # The set of all paths in the index @@ -187,18 +198,16 @@ # Loop over the stored fields in the index for fields in reader.all_stored_fields(): indexed_path = fields['path'] + indexed_repo_path = fields['repository'] indexed_paths.add(indexed_path) - repo = self.repo_paths[fields['repository']] + if not indexed_repo_path in self.filtered_repo_update_paths: + continue + + repo = self.repo_paths[indexed_repo_path] try: node = self.get_node(repo, indexed_path) - except (ChangesetError, NodeDoesNotExistError): - # This file was deleted since it was indexed - log.debug('removing from index %s' % indexed_path) - writer.delete_by_term('path', indexed_path) - - else: # Check if this file was changed since it was indexed indexed_time = fields['modtime'] mtime = self.get_node_mtime(node) @@ -208,6 +217,10 @@ log.debug('adding to reindex list %s' % indexed_path) writer.delete_by_term('path', indexed_path) to_index.add(indexed_path) + except (ChangesetError, NodeDoesNotExistError): + # This file was deleted since it was indexed + log.debug('removing from index %s' % indexed_path) + writer.delete_by_term('path', indexed_path) # Loop over the files in the filesystem # Assume we have a function that gathers the filenames of the