kallithea: rhodecode/lib/indexers/__init_

comparison rhodecode/lib/indexers/init.py @ 2640:5f21a9dcb09d beta

create an index for commit messages and the ability to search them and see results

author	Indra Talip <indra.talip@gmail.com>
date	Fri, 20 Jul 2012 12:50:56 +0200
parents	324b838250c9
children	88b0e82bcba4

comparison

equal deleted inserted replaced

-:f597cfb492f9
+:5f21a9dcb09d
 from string import strip
 from shutil import rmtree
 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
-from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
+from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType
 from whoosh.index import create_in, open_dir
 from whoosh.formats import Characters
 from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter
 from webhelpers.html.builder import escape, literal
 from rhodecode.config.environment import load_environment
 from rhodecode.lib.utils2 import LazyProperty
 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\
 load_rcextensions
+log = logging.getLogger(__name__)
 # CUSTOM ANALYZER wordsplit + lowercase filter
 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
 #INDEX SCHEMA DEFINITION
 SCHEMA = Schema(
 fileid=ID(unique=True),
 owner=TEXT(),
 IDX_NAME = 'HG_INDEX'
 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
 FRAGMENTER = ContextFragmenter(200)
+CHGSETS_SCHEMA = Schema(
+path=ID(unique=True, stored=True),
+revision=NUMERIC(unique=True, stored=True),
+last=BOOLEAN(),
+owner=TEXT(),
+repository=ID(unique=True, stored=True),
+author=TEXT(stored=True),
+message=FieldType(format=Characters(), analyzer=ANALYZER,
+scorable=True, stored=True),
+parents=TEXT(),
+added=TEXT(),
+removed=TEXT(),
+changed=TEXT(),
+)
+CHGSET_IDX_NAME = 'CHGSET_INDEX'
 class MakeIndex(BasePasterCommand):
 max_args = 1
 min_args = 1
 slices.append(self.get_full_content(docid))
 return slices
 def get_full_content(self, docid):
 res = self.searcher.stored_fields(docid[0])
+log.debug('result: %s' % res)
 full_repo_path = jn(self.repo_location, res['repository'])
 f_path = res['path'].split(full_repo_path)[-1]
 f_path = f_path.lstrip(os.sep)
+res.update({'f_path': f_path})
-content_short = self.get_short_content(res, docid[1])
-res.update({'content_short': content_short,
+if self.search_type == 'content':
-'content_short_hl': self.highlight(content_short),
+content_short = self.get_short_content(res, docid[1])
-'f_path': f_path})
+res.update({'content_short': content_short,
+'content_short_hl': self.highlight(content_short)})
+elif self.search_type == 'message':
+res.update({'message_hl': self.highlight(res['message'])})
+log.debug('result: %s' % res)
 return res
 def get_short_content(self, res, chunks):
 :param matcher:
 :param size:
 """
 memory = [(0, 0)]
-for span in self.matcher.spans():
+if self.matcher.supports('positions'):
-start = span.startchar or 0
+for span in self.matcher.spans():
-end = span.endchar or 0
+start = span.startchar or 0
-start_offseted = max(0, start - self.fragment_size)
+end = span.endchar or 0
-end_offseted = end + self.fragment_size
+start_offseted = max(0, start - self.fragment_size)
+end_offseted = end + self.fragment_size
-if start_offseted < memory[-1][1]:
-start_offseted = memory[-1][1]
+if start_offseted < memory[-1][1]:
-memory.append((start_offseted, end_offseted,))
+start_offseted = memory[-1][1]
-yield (start_offseted, end_offseted,)
+memory.append((start_offseted, end_offseted,))
+yield (start_offseted, end_offseted,)
 def highlight(self, content, top=5):
-if self.search_type != 'content':
+if self.search_type not in ['content', 'message']:
 return ''
 hl = highlight(
 text=content,
 terms=self.highlight_items,
 analyzer=ANALYZER,

Mercurial > kallithea

comparison rhodecode/lib/indexers/__init__.py @ 2640:5f21a9dcb09d beta

comparison rhodecode/lib/indexers/init.py @ 2640:5f21a9dcb09d beta