annotate rhodecode/lib/indexers/__init__.py @ 560:3072935bdeed

rewrote whoosh indexing to run internal repository.walk() instead of filesystem. Disabled default hg update hook (not needed since whoosh is not dependent on file system files to index)
author Marcin Kuzminski <marcin@python-works.com>
date Sat, 09 Oct 2010 00:22:19 +0200
parents 65b2f150beb7
children 5cc96df705b9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
1 from os.path import dirname as dn, join as jn
547
1e757ac98988 renamed project to rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents: 497
diff changeset
2 from rhodecode.config.environment import load_environment
1e757ac98988 renamed project to rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents: 497
diff changeset
3 from rhodecode.model.hg_model import HgModel
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
4 from shutil import rmtree
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
5 from webhelpers.html.builder import escape
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
6 from vcs.utils.lazy import LazyProperty
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
7
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
8 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
9 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
10 from whoosh.index import create_in, open_dir
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
11 from whoosh.formats import Characters
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
12 from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
13
406
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
14 import os
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
15 import sys
406
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
16 import traceback
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
17
547
1e757ac98988 renamed project to rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents: 497
diff changeset
18 #to get the rhodecode import
406
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
19 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
20
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
21
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
22 #LOCATION WE KEEP THE INDEX
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
23 IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index')
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
24
436
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
25 #EXTENSIONS WE WANT TO INDEX CONTENT OFF
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
26 INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
27 'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl',
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
28 'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
29 'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
30 'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
31 'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt',
436
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
32 'yaws']
406
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
33
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
34 #CUSTOM ANALYZER wordsplit + lowercase filter
436
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
35 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
406
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
36
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
37
406
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
38 #INDEX SCHEMA DEFINITION
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
39 SCHEMA = Schema(owner=TEXT(),
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
40 repository=TEXT(stored=True),
556
65b2f150beb7 Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents: 547
diff changeset
41 path=TEXT(stored=True),
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
42 content=FieldType(format=Characters(ANALYZER),
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
43 scorable=True, stored=True),
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
44 modtime=STORED(), extension=TEXT(stored=True))
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
45
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
46
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
47 IDX_NAME = 'HG_INDEX'
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
48 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
49 FRAGMENTER = SimpleFragmenter(200)
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
50
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
51 class ResultWrapper(object):
556
65b2f150beb7 Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents: 547
diff changeset
52 def __init__(self, search_type, searcher, matcher, highlight_items):
65b2f150beb7 Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents: 547
diff changeset
53 self.search_type = search_type
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
54 self.searcher = searcher
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
55 self.matcher = matcher
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
56 self.highlight_items = highlight_items
479
149940ba96d9 fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents: 478
diff changeset
57 self.fragment_size = 200 / 2
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
58
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
59 @LazyProperty
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
60 def doc_ids(self):
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
61 docs_id = []
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
62 while self.matcher.is_active():
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
63 docnum = self.matcher.id()
479
149940ba96d9 fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents: 478
diff changeset
64 chunks = [offsets for offsets in self.get_chunks()]
149940ba96d9 fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents: 478
diff changeset
65 docs_id.append([docnum, chunks])
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
66 self.matcher.next()
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
67 return docs_id
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
68
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
69 def __str__(self):
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
70 return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
71
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
72 def __repr__(self):
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
73 return self.__str__()
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
74
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
75 def __len__(self):
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
76 return len(self.doc_ids)
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
77
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
78 def __iter__(self):
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
79 """
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
80 Allows Iteration over results,and lazy generate content
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
81
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
82 *Requires* implementation of ``__getitem__`` method.
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
83 """
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
84 for docid in self.doc_ids:
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
85 yield self.get_full_content(docid)
406
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
86
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
87 def __getslice__(self, i, j):
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
88 """
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
89 Slicing of resultWrapper
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
90 """
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
91 slice = []
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
92 for docid in self.doc_ids[i:j]:
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
93 slice.append(self.get_full_content(docid))
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
94 return slice
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
95
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
96
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
97 def get_full_content(self, docid):
479
149940ba96d9 fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents: 478
diff changeset
98 res = self.searcher.stored_fields(docid[0])
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
99 f_path = res['path'][res['path'].find(res['repository']) \
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
100 + len(res['repository']):].lstrip('/')
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
101
479
149940ba96d9 fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents: 478
diff changeset
102 content_short = self.get_short_content(res, docid[1])
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
103 res.update({'content_short':content_short,
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
104 'content_short_hl':self.highlight(content_short),
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
105 'f_path':f_path})
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
106
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
107 return res
479
149940ba96d9 fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents: 478
diff changeset
108
149940ba96d9 fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents: 478
diff changeset
109 def get_short_content(self, res, chunks):
149940ba96d9 fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents: 478
diff changeset
110
149940ba96d9 fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents: 478
diff changeset
111 return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
149940ba96d9 fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents: 478
diff changeset
112
149940ba96d9 fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents: 478
diff changeset
113 def get_chunks(self):
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
114 """
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
115 Smart function that implements chunking the content
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
116 but not overlap chunks so it doesn't highlight the same
556
65b2f150beb7 Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents: 547
diff changeset
117 close occurrences twice.
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
118 @param matcher:
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
119 @param size:
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
120 """
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
121 memory = [(0, 0)]
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
122 for span in self.matcher.spans():
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
123 start = span.startchar or 0
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
124 end = span.endchar or 0
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
125 start_offseted = max(0, start - self.fragment_size)
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
126 end_offseted = end + self.fragment_size
479
149940ba96d9 fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents: 478
diff changeset
127
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
128 if start_offseted < memory[-1][1]:
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
129 start_offseted = memory[-1][1]
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
130 memory.append((start_offseted, end_offseted,))
479
149940ba96d9 fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents: 478
diff changeset
131 yield (start_offseted, end_offseted,)
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
132
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
133 def highlight(self, content, top=5):
556
65b2f150beb7 Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents: 547
diff changeset
134 if self.search_type != 'content':
65b2f150beb7 Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents: 547
diff changeset
135 return ''
478
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
136 hl = highlight(escape(content),
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
137 self.highlight_items,
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
138 analyzer=ANALYZER,
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
139 fragmenter=FRAGMENTER,
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
140 formatter=FORMATTER,
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
141 top=top)
7010af6efde5 Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents: 436
diff changeset
142 return hl