changeset 2373:1828eb7fa688 beta

#469 added --update-only option to whoosh to re-index only given list of repos in index
author Marcin Kuzminski <marcin@python-works.com>
date Sat, 02 Jun 2012 18:01:56 +0200
parents 95bea8088213
children be2163ef127e
files docs/changelog.rst rhodecode/lib/indexers/__init__.py rhodecode/lib/indexers/daemon.py
diffstat 3 files changed, 41 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/docs/changelog.rst	Sat Jun 02 16:53:21 2012 +0200
+++ b/docs/changelog.rst	Sat Jun 02 18:01:56 2012 +0200
@@ -20,6 +20,8 @@
 - new git repos are created as bare now by default
 - #464 added links to groups in permission box
 - #465 mentions autocomplete inside comments boxes
+- #469 added --update-only option to whoosh to re-index only given list
+  of repos in index 
 
 fixes
 +++++
--- a/rhodecode/lib/indexers/__init__.py	Sat Jun 02 16:53:21 2012 +0200
+++ b/rhodecode/lib/indexers/__init__.py	Sat Jun 02 18:01:56 2012 +0200
@@ -93,6 +93,8 @@
             if self.options.repo_location else RepoModel().repos_path
         repo_list = map(strip, self.options.repo_list.split(',')) \
             if self.options.repo_list else None
+        repo_update_list = map(strip, self.options.repo_update_list.split(',')) \
+            if self.options.repo_update_list else None
         load_rcextensions(config['here'])
         #======================================================================
         # WHOOSH DAEMON
@@ -103,7 +105,8 @@
             l = DaemonLock(file_=jn(dn(dn(index_location)), 'make_index.lock'))
             WhooshIndexingDaemon(index_location=index_location,
                                  repo_location=repo_location,
-                                 repo_list=repo_list,)\
+                                 repo_list=repo_list,
+                                 repo_update_list=repo_update_list)\
                 .run(full_index=self.options.full_index)
             l.release()
         except LockHeld:
@@ -119,7 +122,14 @@
                           action='store',
                           dest='repo_list',
                           help="Specifies a comma separated list of repositores "
-                                "to build index on OPTIONAL",
+                                "to build index on. If not given all repositories "
+                                "are scanned for indexing. OPTIONAL",
+                          )
+        self.parser.add_option('--update-only',
+                          action='store',
+                          dest='repo_update_list',
+                          help="Specifies a comma separated list of repositores "
+                                "to re-build index on. OPTIONAL",
                           )
         self.parser.add_option('-f',
                           action='store_true',
--- a/rhodecode/lib/indexers/daemon.py	Sat Jun 02 16:53:21 2012 +0200
+++ b/rhodecode/lib/indexers/daemon.py	Sat Jun 02 18:01:56 2012 +0200
@@ -53,11 +53,12 @@
 
 class WhooshIndexingDaemon(object):
     """
-    Daemon for atomic jobs
+    Daemon for atomic indexing jobs
     """
 
     def __init__(self, indexname=IDX_NAME, index_location=None,
-                 repo_location=None, sa=None, repo_list=None):
+                 repo_location=None, sa=None, repo_list=None,
+                 repo_update_list=None):
         self.indexname = indexname
 
         self.index_location = index_location
@@ -70,13 +71,23 @@
 
         self.repo_paths = ScmModel(sa).repo_scan(self.repo_location)
 
+        #filter repo list
         if repo_list:
-            filtered_repo_paths = {}
+            self.filtered_repo_paths = {}
             for repo_name, repo in self.repo_paths.items():
                 if repo_name in repo_list:
-                    filtered_repo_paths[repo_name] = repo
+                    self.filtered_repo_paths[repo_name] = repo
+
+            self.repo_paths = self.filtered_repo_paths
 
-            self.repo_paths = filtered_repo_paths
+        #filter update repo list
+        self.filtered_repo_update_paths = {}
+        if repo_update_list:
+            self.filtered_repo_update_paths = {}
+            for repo_name, repo in self.repo_paths.items():
+                if repo_name in repo_update_list:
+                    self.filtered_repo_update_paths[repo_name] = repo
+            self.repo_paths = self.filtered_repo_update_paths
 
         self.initial = False
         if not os.path.isdir(self.index_location):
@@ -172,8 +183,8 @@
         log.debug('>>> FINISHED BUILDING INDEX <<<')
 
     def update_index(self):
-        log.debug(('STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s '
-                   'AND REPOS %s') % (INDEX_EXTENSIONS, self.repo_paths))
+        log.debug((u'STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s '
+                   'AND REPOS %s') % (INDEX_EXTENSIONS, self.repo_paths.keys()))
 
         idx = open_dir(self.index_location, indexname=self.indexname)
         # The set of all paths in the index
@@ -187,18 +198,16 @@
         # Loop over the stored fields in the index
         for fields in reader.all_stored_fields():
             indexed_path = fields['path']
+            indexed_repo_path = fields['repository']
             indexed_paths.add(indexed_path)
 
-            repo = self.repo_paths[fields['repository']]
+            if not indexed_repo_path in self.filtered_repo_update_paths:
+                continue
+
+            repo = self.repo_paths[indexed_repo_path]
 
             try:
                 node = self.get_node(repo, indexed_path)
-            except (ChangesetError, NodeDoesNotExistError):
-                # This file was deleted since it was indexed
-                log.debug('removing from index %s' % indexed_path)
-                writer.delete_by_term('path', indexed_path)
-
-            else:
                 # Check if this file was changed since it was indexed
                 indexed_time = fields['modtime']
                 mtime = self.get_node_mtime(node)
@@ -208,6 +217,10 @@
                     log.debug('adding to reindex list %s' % indexed_path)
                     writer.delete_by_term('path', indexed_path)
                     to_index.add(indexed_path)
+            except (ChangesetError, NodeDoesNotExistError):
+                # This file was deleted since it was indexed
+                log.debug('removing from index %s' % indexed_path)
+                writer.delete_by_term('path', indexed_path)
 
         # Loop over the files in the filesystem
         # Assume we have a function that gathers the filenames of the