changeset 2388:a0ef98f2520b beta

#453 added ID field in whoosh SCHEMA that solves the issue of reindexing modified files
author Marcin Kuzminski <marcin@python-works.com>
date Tue, 05 Jun 2012 00:27:59 +0200
parents 7d517a35b6c9
children 324b838250c9
files rhodecode/lib/indexers/__init__.py rhodecode/lib/indexers/daemon.py
diffstat 2 files changed, 10 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/rhodecode/lib/indexers/__init__.py	Mon Jun 04 22:03:40 2012 +0200
+++ b/rhodecode/lib/indexers/__init__.py	Tue Jun 05 00:27:59 2012 +0200
@@ -57,6 +57,7 @@
 
 #INDEX SCHEMA DEFINITION
 SCHEMA = Schema(
+    fileid=ID(unique=True),
     owner=TEXT(),
     repository=TEXT(stored=True),
     path=TEXT(stored=True),
--- a/rhodecode/lib/indexers/daemon.py	Mon Jun 04 22:03:40 2012 +0200
+++ b/rhodecode/lib/indexers/daemon.py	Tue Jun 05 00:27:59 2012 +0200
@@ -146,10 +146,12 @@
             u_content = u''
             indexed += 1
 
+        p = safe_unicode(path)
         writer.add_document(
+            fileid=p,
             owner=unicode(repo.contact),
             repository=safe_unicode(repo_name),
-            path=safe_unicode(path),
+            path=p,
             content=u_content,
             modtime=self.get_node_mtime(node),
             extension=node.extension
@@ -214,8 +216,11 @@
                 if mtime > indexed_time:
                     # The file has changed, delete it and add it to the list of
                     # files to reindex
-                    log.debug('adding to reindex list %s' % indexed_path)
-                    writer.delete_by_term('path', indexed_path)
+                    log.debug('adding to reindex list %s mtime: %s vs %s' % (
+                                    indexed_path, mtime, indexed_time)
+                    )
+                    writer.delete_by_term('fileid', indexed_path)
+
                     to_index.add(indexed_path)
             except (ChangesetError, NodeDoesNotExistError):
                 # This file was deleted since it was indexed
@@ -230,6 +235,7 @@
             for path in self.get_paths(repo):
                 path = safe_unicode(path)
                 if path in to_index or path not in indexed_paths:
+
                     # This is either a file that's changed, or a new file
                     # that wasn't indexed before. So index it!
                     i, iwc = self.add_doc(writer, path, repo, repo_name)