Mercurial > kallithea
annotate rhodecode/lib/indexers/__init__.py @ 639:45e1fdc0082c
Version bump,freeze of dependent libs.
Little test update
author | Marcin Kuzminski <marcin@python-works.com> |
---|---|
date | Tue, 02 Nov 2010 20:19:05 +0100 |
parents | 5cc96df705b9 |
children | 7e536d1af60d |
rev | line source |
---|---|
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
1 from os.path import dirname as dn, join as jn |
547
1e757ac98988
renamed project to rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
497
diff
changeset
|
2 from rhodecode.config.environment import load_environment |
1e757ac98988
renamed project to rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
497
diff
changeset
|
3 from rhodecode.model.hg_model import HgModel |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
4 from shutil import rmtree |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
5 from webhelpers.html.builder import escape |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
6 from vcs.utils.lazy import LazyProperty |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
7 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
8 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
9 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
10 from whoosh.index import create_in, open_dir |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
11 from whoosh.formats import Characters |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
12 from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
13 |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
14 import os |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
15 import sys |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
16 import traceback |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
17 |
547
1e757ac98988
renamed project to rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
497
diff
changeset
|
18 #to get the rhodecode import |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
19 sys.path.append(dn(dn(dn(os.path.realpath(__file__))))) |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
20 |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
21 |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
22 #LOCATION WE KEEP THE INDEX |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
23 IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index') |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
24 |
436
28f19fa562df
updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents:
406
diff
changeset
|
25 #EXTENSIONS WE WANT TO INDEX CONTENT OFF |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
26 INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c', |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
27 'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl', |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
28 'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp', |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
29 'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3', |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
30 'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql', |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
31 'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt', |
436
28f19fa562df
updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents:
406
diff
changeset
|
32 'yaws'] |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
33 |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
34 #CUSTOM ANALYZER wordsplit + lowercase filter |
436
28f19fa562df
updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents:
406
diff
changeset
|
35 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
36 |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
37 |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
38 #INDEX SCHEMA DEFINITION |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
39 SCHEMA = Schema(owner=TEXT(), |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
40 repository=TEXT(stored=True), |
556
65b2f150beb7
Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
547
diff
changeset
|
41 path=TEXT(stored=True), |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
42 content=FieldType(format=Characters(ANALYZER), |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
43 scorable=True, stored=True), |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
44 modtime=STORED(), extension=TEXT(stored=True)) |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
45 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
46 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
47 IDX_NAME = 'HG_INDEX' |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
48 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n') |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
49 FRAGMENTER = SimpleFragmenter(200) |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
50 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
51 class ResultWrapper(object): |
556
65b2f150beb7
Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
547
diff
changeset
|
52 def __init__(self, search_type, searcher, matcher, highlight_items): |
65b2f150beb7
Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
547
diff
changeset
|
53 self.search_type = search_type |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
54 self.searcher = searcher |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
55 self.matcher = matcher |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
56 self.highlight_items = highlight_items |
479
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
57 self.fragment_size = 200 / 2 |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
58 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
59 @LazyProperty |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
60 def doc_ids(self): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
61 docs_id = [] |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
62 while self.matcher.is_active(): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
63 docnum = self.matcher.id() |
479
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
64 chunks = [offsets for offsets in self.get_chunks()] |
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
65 docs_id.append([docnum, chunks]) |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
66 self.matcher.next() |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
67 return docs_id |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
68 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
69 def __str__(self): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
70 return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids)) |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
71 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
72 def __repr__(self): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
73 return self.__str__() |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
74 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
75 def __len__(self): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
76 return len(self.doc_ids) |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
77 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
78 def __iter__(self): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
79 """ |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
80 Allows Iteration over results,and lazy generate content |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
81 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
82 *Requires* implementation of ``__getitem__`` method. |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
83 """ |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
84 for docid in self.doc_ids: |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
85 yield self.get_full_content(docid) |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
86 |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
87 def __getslice__(self, i, j): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
88 """ |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
89 Slicing of resultWrapper |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
90 """ |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
91 slice = [] |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
92 for docid in self.doc_ids[i:j]: |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
93 slice.append(self.get_full_content(docid)) |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
94 return slice |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
95 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
96 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
97 def get_full_content(self, docid): |
479
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
98 res = self.searcher.stored_fields(docid[0]) |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
99 f_path = res['path'][res['path'].find(res['repository']) \ |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
100 + len(res['repository']):].lstrip('/') |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
101 |
479
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
102 content_short = self.get_short_content(res, docid[1]) |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
103 res.update({'content_short':content_short, |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
104 'content_short_hl':self.highlight(content_short), |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
105 'f_path':f_path}) |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
106 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
107 return res |
479
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
108 |
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
109 def get_short_content(self, res, chunks): |
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
110 |
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
111 return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks]) |
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
112 |
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
113 def get_chunks(self): |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
114 """ |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
115 Smart function that implements chunking the content |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
116 but not overlap chunks so it doesn't highlight the same |
556
65b2f150beb7
Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
547
diff
changeset
|
117 close occurrences twice. |
604
5cc96df705b9
fixed @repo into :repo for docs
Marcin Kuzminski <marcin@python-works.com>
parents:
556
diff
changeset
|
118 :param matcher: |
5cc96df705b9
fixed @repo into :repo for docs
Marcin Kuzminski <marcin@python-works.com>
parents:
556
diff
changeset
|
119 :param size: |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
120 """ |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
121 memory = [(0, 0)] |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
122 for span in self.matcher.spans(): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
123 start = span.startchar or 0 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
124 end = span.endchar or 0 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
125 start_offseted = max(0, start - self.fragment_size) |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
126 end_offseted = end + self.fragment_size |
479
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
127 |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
128 if start_offseted < memory[-1][1]: |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
129 start_offseted = memory[-1][1] |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
130 memory.append((start_offseted, end_offseted,)) |
479
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
131 yield (start_offseted, end_offseted,) |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
132 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
133 def highlight(self, content, top=5): |
556
65b2f150beb7
Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
547
diff
changeset
|
134 if self.search_type != 'content': |
65b2f150beb7
Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
547
diff
changeset
|
135 return '' |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
136 hl = highlight(escape(content), |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
137 self.highlight_items, |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
138 analyzer=ANALYZER, |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
139 fragmenter=FRAGMENTER, |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
140 formatter=FORMATTER, |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
141 top=top) |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
142 return hl |