kallithea: rhodecode/lib/indexers/__init_

comparison rhodecode/lib/indexers/init.py @ 2031:82a88013a3fd

merge 1.3 into stable

author	Marcin Kuzminski <marcin@python-works.com>
date	Sun, 26 Feb 2012 17:25:09 +0200
parents	752b0a7b7679 b6c902d88472
children	dc2584ba5fbc

comparison

equal deleted inserted replaced

-:ab0e122b38a7
+:82a88013a3fd
 Whoosh indexing module for RhodeCode
 :created_on: Aug 17, 2010
 :author: marcink
-:copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
+:copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
 :license: GPLv3, see COPYING for more details.
 """
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
 from whoosh.index import create_in, open_dir
 from whoosh.formats import Characters
-from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
+from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter
 from webhelpers.html.builder import escape
 from sqlalchemy import engine_from_config
-from vcs.utils.lazy import LazyProperty
 from rhodecode.model import init_model
 from rhodecode.model.scm import ScmModel
 from rhodecode.model.repo import RepoModel
 from rhodecode.config.environment import load_environment
-from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
+from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP, LazyProperty
 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
-#EXTENSIONS WE WANT TO INDEX CONTENT OFF
+# EXTENSIONS WE WANT TO INDEX CONTENT OFF
 INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
-#CUSTOM ANALYZER wordsplit + lowercase filter
+# CUSTOM ANALYZER wordsplit + lowercase filter
 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
 #INDEX SCHEMA DEFINITION
-SCHEMA = Schema(owner=TEXT(),
+SCHEMA = Schema(
-repository=TEXT(stored=True),
+owner=TEXT(),
-path=TEXT(stored=True),
+repository=TEXT(stored=True),
-content=FieldType(format=Characters(ANALYZER),
+path=TEXT(stored=True),
-scorable=True, stored=True),
+content=FieldType(format=Characters(), analyzer=ANALYZER,
-modtime=STORED(), extension=TEXT(stored=True))
+scorable=True, stored=True),
+modtime=STORED(),
+extension=TEXT(stored=True)
+)
 IDX_NAME = 'HG_INDEX'
 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
-FRAGMENTER = SimpleFragmenter(200)
+FRAGMENTER = ContextFragmenter(200)
 class MakeIndex(BasePasterCommand):
 max_args = 1
 dest='full_index',
 help="Specifies that index should be made full i.e"
 " destroy old and build from scratch",
 default=False)
 class ResultWrapper(object):
 def __init__(self, search_type, searcher, matcher, highlight_items):
 self.search_type = search_type
 self.searcher = searcher
 self.matcher = matcher
 self.highlight_items = highlight_items
-self.fragment_size = 200 / 2
+self.fragment_size = 200
 @LazyProperty
 def doc_ids(self):
 docs_id = []
 while self.matcher.is_active():
 """
 Slicing of resultWrapper
 """
 i, j = key.start, key.stop
-slice = []
+slices = []
 for docid in self.doc_ids[i:j]:
-slice.append(self.get_full_content(docid))
+slices.append(self.get_full_content(docid))
-return slice
+return slices
 def get_full_content(self, docid):
 res = self.searcher.stored_fields(docid[0])
 f_path = res['path'][res['path'].find(res['repository']) \
 + len(res['repository']):].lstrip('/')
 content_short = self.get_short_content(res, docid[1])
-res.update({'content_short':content_short,
+res.update({'content_short': content_short,
-'content_short_hl':self.highlight(content_short),
+'content_short_hl': self.highlight(content_short),
-'f_path':f_path})
+'f_path': f_path})
 return res
 def get_short_content(self, res, chunks):
 def get_chunks(self):
 """
 Smart function that implements chunking the content
 but not overlap chunks so it doesn't highlight the same
 close occurrences twice.
 :param matcher:
 :param size:
 """
 memory = [(0, 0)]
 for span in self.matcher.spans():
 yield (start_offseted, end_offseted,)
 def highlight(self, content, top=5):
 if self.search_type != 'content':
 return ''
-hl = highlight(escape(content),
+hl = highlight(
-self.highlight_items,
+text=escape(content),
-analyzer=ANALYZER,
+terms=self.highlight_items,
-fragmenter=FRAGMENTER,
+analyzer=ANALYZER,
-formatter=FORMATTER,
+fragmenter=FRAGMENTER,
-top=top)
+formatter=FORMATTER,
+top=top
+)
 return hl

Mercurial > kallithea

comparison rhodecode/lib/indexers/__init__.py @ 2031:82a88013a3fd

comparison rhodecode/lib/indexers/init.py @ 2031:82a88013a3fd