Mercurial > kallithea

diff rhodecode/lib/indexers/init.py @ 2640:5f21a9dcb09d beta
create an index for commit messages and the ability to search them and see results
author: Indra Talip <indra.talip@gmail.com>
date: Fri, 20 Jul 2012 12:50:56 +0200
parents: 324b838250c9
children: 88b0e82bcba4
--- a/rhodecode/lib/indexers/__init__.py	Wed Jul 18 22:07:46 2012 +0200
+++ b/rhodecode/lib/indexers/__init__.py	Fri Jul 20 12:50:56 2012 +0200
@@ -35,7 +35,7 @@
 from shutil import rmtree
 
 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
-from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
+from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType
 from whoosh.index import create_in, open_dir
 from whoosh.formats import Characters
 from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter
@@ -51,10 +51,11 @@
 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\
     load_rcextensions
 
+log = logging.getLogger(__name__)
+
 # CUSTOM ANALYZER wordsplit + lowercase filter
 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
 
-
 #INDEX SCHEMA DEFINITION
 SCHEMA = Schema(
     fileid=ID(unique=True),
@@ -71,6 +72,22 @@
 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
 FRAGMENTER = ContextFragmenter(200)
 
+CHGSETS_SCHEMA = Schema(
+    path=ID(unique=True, stored=True),
+    revision=NUMERIC(unique=True, stored=True),
+    last=BOOLEAN(),
+    owner=TEXT(),
+    repository=ID(unique=True, stored=True),
+    author=TEXT(stored=True),
+    message=FieldType(format=Characters(), analyzer=ANALYZER,
+                      scorable=True, stored=True),
+    parents=TEXT(),
+    added=TEXT(),
+    removed=TEXT(),
+    changed=TEXT(),
+)
+
+CHGSET_IDX_NAME = 'CHGSET_INDEX'
 
 class MakeIndex(BasePasterCommand):
 
@@ -191,14 +208,20 @@
 
     def get_full_content(self, docid):
         res = self.searcher.stored_fields(docid[0])
+        log.debug('result: %s' % res)
         full_repo_path = jn(self.repo_location, res['repository'])
         f_path = res['path'].split(full_repo_path)[-1]
         f_path = f_path.lstrip(os.sep)
+        res.update({'f_path': f_path})
 
-        content_short = self.get_short_content(res, docid[1])
-        res.update({'content_short': content_short,
-                    'content_short_hl': self.highlight(content_short),
-                    'f_path': f_path})
+        if self.search_type == 'content':
+            content_short = self.get_short_content(res, docid[1])
+            res.update({'content_short': content_short,
+                        'content_short_hl': self.highlight(content_short)})
+        elif self.search_type == 'message':
+            res.update({'message_hl': self.highlight(res['message'])})
+
+        log.debug('result: %s' % res)
 
         return res
 
@@ -216,19 +239,20 @@
         :param size:
         """
         memory = [(0, 0)]
-        for span in self.matcher.spans():
-            start = span.startchar or 0
-            end = span.endchar or 0
-            start_offseted = max(0, start - self.fragment_size)
-            end_offseted = end + self.fragment_size
+        if self.matcher.supports('positions'): 
+            for span in self.matcher.spans():
+                start = span.startchar or 0
+                end = span.endchar or 0
+                start_offseted = max(0, start - self.fragment_size)
+                end_offseted = end + self.fragment_size
 
-            if start_offseted < memory[-1][1]:
-                start_offseted = memory[-1][1]
-            memory.append((start_offseted, end_offseted,))
-            yield (start_offseted, end_offseted,)
+                if start_offseted < memory[-1][1]:
+                    start_offseted = memory[-1][1]
+                memory.append((start_offseted, end_offseted,))
+                yield (start_offseted, end_offseted,)
 
     def highlight(self, content, top=5):
-        if self.search_type != 'content':
+        if self.search_type not in ['content', 'message']:
             return ''
         hl = highlight(
             text=content,
author	Indra Talip <indra.talip@gmail.com>
date	Fri, 20 Jul 2012 12:50:56 +0200
parents	324b838250c9
children	88b0e82bcba4