kallithea: rhodecode/lib/indexers/daemon.py comparison

comparison rhodecode/lib/indexers/daemon.py @ 2165:dc2584ba5fbc

merged beta into default branch

author	Marcin Kuzminski <marcin@python-works.com>
date	Wed, 28 Mar 2012 19:54:16 +0200
parents	82a88013a3fd 8ecfed1d8f8b
children	63e58ef80ef1

comparison

equal deleted inserted replaced

-:8fd6650bb436
+:dc2584ba5fbc
 #to get the rhodecode import
 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
 sys.path.append(project_path)
+from rhodecode.config.conf import INDEX_EXTENSIONS
 from rhodecode.model.scm import ScmModel
-from rhodecode.lib import safe_unicode
+from rhodecode.lib.utils2 import safe_unicode
-from rhodecode.lib.indexers import INDEX_EXTENSIONS, SCHEMA, IDX_NAME
+from rhodecode.lib.indexers import SCHEMA, IDX_NAME
 from rhodecode.lib.vcs.exceptions import ChangesetError, RepositoryError, \
 NodeDoesNotExistError
 from whoosh.index import create_in, open_dir
+log = logging.getLogger('whoosh_indexer')
-log = logging.getLogger('whooshIndexer')
-# create logger
-log.setLevel(logging.DEBUG)
-log.propagate = False
-# create console handler and set level to debug
-ch = logging.StreamHandler()
-ch.setLevel(logging.DEBUG)
-# create formatter
-formatter = logging.Formatter("%(asctime)s - %(name)s -"
-" %(levelname)s - %(message)s")
-# add formatter to ch
-ch.setFormatter(formatter)
-# add ch to logger
-log.addHandler(ch)
 class WhooshIndexingDaemon(object):
 """
 Daemon for atomic jobs
 log.info('Cannot run incremental index since it does not'
 ' yet exist running full build')
 self.initial = True
 def get_paths(self, repo):
-"""recursive walk in root dir and return a set of all path in that dir
+"""
+recursive walk in root dir and return a set of all path in that dir
 based on repository walk function
 """
 index_paths_ = set()
 try:
 tip = repo.get_changeset('tip')
 def get_node_mtime(self, node):
 return mktime(node.last_changeset.date.timetuple())
 def add_doc(self, writer, path, repo, repo_name):
-"""Adding doc to writer this function itself fetches data from
+"""
-the instance of vcs backend"""
+Adding doc to writer this function itself fetches data from
+the instance of vcs backend
+"""
 node = self.get_node(repo, path)
+indexed = indexed_w_content = 0
-#we just index the content of chosen files, and skip binary files
+# we just index the content of chosen files, and skip binary files
 if node.extension in INDEX_EXTENSIONS and not node.is_binary:
 u_content = node.content
 if not isinstance(u_content, unicode):
 log.warning('  >> %s Could not get this content as unicode '
-'replacing with empty content', path)
+'replacing with empty content' % path)
 u_content = u''
 else:
 log.debug('    >> %s [WITH CONTENT]' % path)
+indexed_w_content += 1
 else:
 log.debug('    >> %s' % path)
-#just index file name without it's content
+# just index file name without it's content
 u_content = u''
+indexed += 1
-writer.add_document(owner=unicode(repo.contact),
-repository=safe_unicode(repo_name),
+writer.add_document(
-path=safe_unicode(path),
+owner=unicode(repo.contact),
-content=u_content,
+repository=safe_unicode(repo_name),
-modtime=self.get_node_mtime(node),
+path=safe_unicode(path),
-extension=node.extension)
+content=u_content,
+modtime=self.get_node_mtime(node),
+extension=node.extension
+)
+return indexed, indexed_w_content
 def build_index(self):
 if os.path.exists(self.index_location):
 log.debug('removing previous index')
 rmtree(self.index_location)
 if not os.path.exists(self.index_location):
 os.mkdir(self.index_location)
 idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME)
 writer = idx.writer()
+log.debug('BUILDIN INDEX FOR EXTENSIONS %s' % INDEX_EXTENSIONS)
 for repo_name, repo in self.repo_paths.items():
 log.debug('building index @ %s' % repo.path)
+i_cnt = iwc_cnt = 0
 for idx_path in self.get_paths(repo):
-self.add_doc(writer, idx_path, repo, repo_name)
+i, iwc = self.add_doc(writer, idx_path, repo, repo_name)
+i_cnt += i
+iwc_cnt += iwc
+log.debug('added %s files %s with content for repo %s' % (
+i_cnt + iwc_cnt, iwc_cnt, repo.path)
+)
 log.debug('>> COMMITING CHANGES <<')
 writer.commit(merge=True)
 log.debug('>>> FINISHED BUILDING INDEX <<<')
 def update_index(self):
-log.debug('STARTING INCREMENTAL INDEXING UPDATE')
+log.debug('STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s' %
+INDEX_EXTENSIONS)
 idx = open_dir(self.index_location, indexname=self.indexname)
 # The set of all paths in the index
 indexed_paths = set()
 # The set of all paths we need to re-index
 to_index.add(indexed_path)
 # Loop over the files in the filesystem
 # Assume we have a function that gathers the filenames of the
 # documents to be indexed
+ri_cnt = riwc_cnt = 0
 for repo_name, repo in self.repo_paths.items():
 for path in self.get_paths(repo):
 if path in to_index or path not in indexed_paths:
 # This is either a file that's changed, or a new file
 # that wasn't indexed before. So index it!
-self.add_doc(writer, path, repo, repo_name)
+i, iwc = self.add_doc(writer, path, repo, repo_name)
 log.debug('re indexing %s' % path)
+ri_cnt += i
+riwc_cnt += iwc
+log.debug('added %s files %s with content for repo %s' % (
+ri_cnt + riwc_cnt, riwc_cnt, repo.path)
+)
 log.debug('>> COMMITING CHANGES <<')
 writer.commit(merge=True)
 log.debug('>>> FINISHED REBUILDING INDEX <<<')
 def run(self, full_index=False):

Mercurial > kallithea

comparison rhodecode/lib/indexers/daemon.py @ 2165:dc2584ba5fbc