Mercurial > kallithea
comparison rhodecode/lib/indexers/__init__.py @ 2640:5f21a9dcb09d beta
create an index for commit messages and the ability to search them and see results
author | Indra Talip <indra.talip@gmail.com> |
---|---|
date | Fri, 20 Jul 2012 12:50:56 +0200 |
parents | 324b838250c9 |
children | 88b0e82bcba4 |
comparison
equal
deleted
inserted
replaced
2631:f597cfb492f9 | 2640:5f21a9dcb09d |
---|---|
33 | 33 |
34 from string import strip | 34 from string import strip |
35 from shutil import rmtree | 35 from shutil import rmtree |
36 | 36 |
37 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter | 37 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter |
38 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType | 38 from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType |
39 from whoosh.index import create_in, open_dir | 39 from whoosh.index import create_in, open_dir |
40 from whoosh.formats import Characters | 40 from whoosh.formats import Characters |
41 from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter | 41 from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter |
42 | 42 |
43 from webhelpers.html.builder import escape, literal | 43 from webhelpers.html.builder import escape, literal |
49 from rhodecode.config.environment import load_environment | 49 from rhodecode.config.environment import load_environment |
50 from rhodecode.lib.utils2 import LazyProperty | 50 from rhodecode.lib.utils2 import LazyProperty |
51 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\ | 51 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\ |
52 load_rcextensions | 52 load_rcextensions |
53 | 53 |
54 log = logging.getLogger(__name__) | |
55 | |
54 # CUSTOM ANALYZER wordsplit + lowercase filter | 56 # CUSTOM ANALYZER wordsplit + lowercase filter |
55 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() | 57 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() |
56 | |
57 | 58 |
58 #INDEX SCHEMA DEFINITION | 59 #INDEX SCHEMA DEFINITION |
59 SCHEMA = Schema( | 60 SCHEMA = Schema( |
60 fileid=ID(unique=True), | 61 fileid=ID(unique=True), |
61 owner=TEXT(), | 62 owner=TEXT(), |
69 | 70 |
70 IDX_NAME = 'HG_INDEX' | 71 IDX_NAME = 'HG_INDEX' |
71 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n') | 72 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n') |
72 FRAGMENTER = ContextFragmenter(200) | 73 FRAGMENTER = ContextFragmenter(200) |
73 | 74 |
75 CHGSETS_SCHEMA = Schema( | |
76 path=ID(unique=True, stored=True), | |
77 revision=NUMERIC(unique=True, stored=True), | |
78 last=BOOLEAN(), | |
79 owner=TEXT(), | |
80 repository=ID(unique=True, stored=True), | |
81 author=TEXT(stored=True), | |
82 message=FieldType(format=Characters(), analyzer=ANALYZER, | |
83 scorable=True, stored=True), | |
84 parents=TEXT(), | |
85 added=TEXT(), | |
86 removed=TEXT(), | |
87 changed=TEXT(), | |
88 ) | |
89 | |
90 CHGSET_IDX_NAME = 'CHGSET_INDEX' | |
74 | 91 |
75 class MakeIndex(BasePasterCommand): | 92 class MakeIndex(BasePasterCommand): |
76 | 93 |
77 max_args = 1 | 94 max_args = 1 |
78 min_args = 1 | 95 min_args = 1 |
189 slices.append(self.get_full_content(docid)) | 206 slices.append(self.get_full_content(docid)) |
190 return slices | 207 return slices |
191 | 208 |
192 def get_full_content(self, docid): | 209 def get_full_content(self, docid): |
193 res = self.searcher.stored_fields(docid[0]) | 210 res = self.searcher.stored_fields(docid[0]) |
211 log.debug('result: %s' % res) | |
194 full_repo_path = jn(self.repo_location, res['repository']) | 212 full_repo_path = jn(self.repo_location, res['repository']) |
195 f_path = res['path'].split(full_repo_path)[-1] | 213 f_path = res['path'].split(full_repo_path)[-1] |
196 f_path = f_path.lstrip(os.sep) | 214 f_path = f_path.lstrip(os.sep) |
197 | 215 res.update({'f_path': f_path}) |
198 content_short = self.get_short_content(res, docid[1]) | 216 |
199 res.update({'content_short': content_short, | 217 if self.search_type == 'content': |
200 'content_short_hl': self.highlight(content_short), | 218 content_short = self.get_short_content(res, docid[1]) |
201 'f_path': f_path}) | 219 res.update({'content_short': content_short, |
220 'content_short_hl': self.highlight(content_short)}) | |
221 elif self.search_type == 'message': | |
222 res.update({'message_hl': self.highlight(res['message'])}) | |
223 | |
224 log.debug('result: %s' % res) | |
202 | 225 |
203 return res | 226 return res |
204 | 227 |
205 def get_short_content(self, res, chunks): | 228 def get_short_content(self, res, chunks): |
206 | 229 |
214 | 237 |
215 :param matcher: | 238 :param matcher: |
216 :param size: | 239 :param size: |
217 """ | 240 """ |
218 memory = [(0, 0)] | 241 memory = [(0, 0)] |
219 for span in self.matcher.spans(): | 242 if self.matcher.supports('positions'): |
220 start = span.startchar or 0 | 243 for span in self.matcher.spans(): |
221 end = span.endchar or 0 | 244 start = span.startchar or 0 |
222 start_offseted = max(0, start - self.fragment_size) | 245 end = span.endchar or 0 |
223 end_offseted = end + self.fragment_size | 246 start_offseted = max(0, start - self.fragment_size) |
224 | 247 end_offseted = end + self.fragment_size |
225 if start_offseted < memory[-1][1]: | 248 |
226 start_offseted = memory[-1][1] | 249 if start_offseted < memory[-1][1]: |
227 memory.append((start_offseted, end_offseted,)) | 250 start_offseted = memory[-1][1] |
228 yield (start_offseted, end_offseted,) | 251 memory.append((start_offseted, end_offseted,)) |
252 yield (start_offseted, end_offseted,) | |
229 | 253 |
230 def highlight(self, content, top=5): | 254 def highlight(self, content, top=5): |
231 if self.search_type != 'content': | 255 if self.search_type not in ['content', 'message']: |
232 return '' | 256 return '' |
233 hl = highlight( | 257 hl = highlight( |
234 text=content, | 258 text=content, |
235 terms=self.highlight_items, | 259 terms=self.highlight_items, |
236 analyzer=ANALYZER, | 260 analyzer=ANALYZER, |