Mercurial > kallithea
comparison rhodecode/lib/indexers/__init__.py @ 3915:a42bfe8a9335 beta
moved make-index command to paster_commands module
- optimized imports and code
author | Marcin Kuzminski <marcin@python-works.com> |
---|---|
date | Thu, 30 May 2013 00:01:16 +0200 |
parents | b76a595b7a5e |
children | 5293d4bbb1ea |
comparison
equal
deleted
inserted
replaced
3914:424b6c711a7f | 3915:a42bfe8a9335 |
---|---|
22 # | 22 # |
23 # You should have received a copy of the GNU General Public License | 23 # You should have received a copy of the GNU General Public License |
24 # along with this program. If not, see <http://www.gnu.org/licenses/>. | 24 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
25 import os | 25 import os |
26 import sys | 26 import sys |
27 import traceback | |
28 import logging | 27 import logging |
29 from os.path import dirname as dn, join as jn | 28 from os.path import dirname as dn, join as jn |
30 | 29 |
31 #to get the rhodecode import | 30 #to get the rhodecode import |
32 sys.path.append(dn(dn(dn(os.path.realpath(__file__))))) | 31 sys.path.append(dn(dn(dn(os.path.realpath(__file__))))) |
33 | 32 |
34 from string import strip | |
35 from shutil import rmtree | |
36 | |
37 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter | 33 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter |
38 from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType, DATETIME | 34 from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType, DATETIME |
39 from whoosh.index import create_in, open_dir | |
40 from whoosh.formats import Characters | 35 from whoosh.formats import Characters |
41 from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter | 36 from whoosh.highlight import highlight as whoosh_highlight, HtmlFormatter, ContextFragmenter |
42 | |
43 from webhelpers.html.builder import escape, literal | |
44 from sqlalchemy import engine_from_config | |
45 | |
46 from rhodecode.model import init_model | |
47 from rhodecode.model.scm import ScmModel | |
48 from rhodecode.model.repo import RepoModel | |
49 from rhodecode.config.environment import load_environment | |
50 from rhodecode.lib.utils2 import LazyProperty | 37 from rhodecode.lib.utils2 import LazyProperty |
51 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\ | |
52 load_rcextensions | |
53 | 38 |
54 log = logging.getLogger(__name__) | 39 log = logging.getLogger(__name__) |
55 | 40 |
56 # CUSTOM ANALYZER wordsplit + lowercase filter | 41 # CUSTOM ANALYZER wordsplit + lowercase filter |
57 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() | 42 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() |
95 date=DATETIME(), | 80 date=DATETIME(), |
96 action=TEXT(), | 81 action=TEXT(), |
97 repository=TEXT(), | 82 repository=TEXT(), |
98 ip=TEXT(), | 83 ip=TEXT(), |
99 ) | 84 ) |
100 | |
101 | |
102 class MakeIndex(BasePasterCommand): | |
103 | |
104 max_args = 1 | |
105 min_args = 1 | |
106 | |
107 usage = "CONFIG_FILE" | |
108 summary = "Creates or update full text search index" | |
109 group_name = "RhodeCode" | |
110 takes_config_file = -1 | |
111 parser = Command.standard_parser(verbose=True) | |
112 | |
113 def command(self): | |
114 logging.config.fileConfig(self.path_to_ini_file) | |
115 from pylons import config | |
116 add_cache(config) | |
117 engine = engine_from_config(config, 'sqlalchemy.db1.') | |
118 init_model(engine) | |
119 index_location = config['index_dir'] | |
120 repo_location = self.options.repo_location \ | |
121 if self.options.repo_location else RepoModel().repos_path | |
122 repo_list = map(strip, self.options.repo_list.split(',')) \ | |
123 if self.options.repo_list else None | |
124 repo_update_list = map(strip, self.options.repo_update_list.split(',')) \ | |
125 if self.options.repo_update_list else None | |
126 load_rcextensions(config['here']) | |
127 #====================================================================== | |
128 # WHOOSH DAEMON | |
129 #====================================================================== | |
130 from rhodecode.lib.pidlock import LockHeld, DaemonLock | |
131 from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon | |
132 try: | |
133 l = DaemonLock(file_=jn(dn(dn(index_location)), 'make_index.lock')) | |
134 WhooshIndexingDaemon(index_location=index_location, | |
135 repo_location=repo_location, | |
136 repo_list=repo_list, | |
137 repo_update_list=repo_update_list)\ | |
138 .run(full_index=self.options.full_index) | |
139 l.release() | |
140 except LockHeld: | |
141 sys.exit(1) | |
142 | |
143 def update_parser(self): | |
144 self.parser.add_option('--repo-location', | |
145 action='store', | |
146 dest='repo_location', | |
147 help="Specifies repositories location to index OPTIONAL", | |
148 ) | |
149 self.parser.add_option('--index-only', | |
150 action='store', | |
151 dest='repo_list', | |
152 help="Specifies a comma separated list of repositores " | |
153 "to build index on. If not given all repositories " | |
154 "are scanned for indexing. OPTIONAL", | |
155 ) | |
156 self.parser.add_option('--update-only', | |
157 action='store', | |
158 dest='repo_update_list', | |
159 help="Specifies a comma separated list of repositores " | |
160 "to re-build index on. OPTIONAL", | |
161 ) | |
162 self.parser.add_option('-f', | |
163 action='store_true', | |
164 dest='full_index', | |
165 help="Specifies that index should be made full i.e" | |
166 " destroy old and build from scratch", | |
167 default=False) | |
168 | 85 |
169 | 86 |
170 class WhooshResultWrapper(object): | 87 class WhooshResultWrapper(object): |
171 def __init__(self, search_type, searcher, matcher, highlight_items, | 88 def __init__(self, search_type, searcher, matcher, highlight_items, |
172 repo_location): | 89 repo_location): |
247 def get_chunks(self): | 164 def get_chunks(self): |
248 """ | 165 """ |
249 Smart function that implements chunking the content | 166 Smart function that implements chunking the content |
250 but not overlap chunks so it doesn't highlight the same | 167 but not overlap chunks so it doesn't highlight the same |
251 close occurrences twice. | 168 close occurrences twice. |
252 | |
253 :param matcher: | |
254 :param size: | |
255 """ | 169 """ |
256 memory = [(0, 0)] | 170 memory = [(0, 0)] |
257 if self.matcher.supports('positions'): | 171 if self.matcher.supports('positions'): |
258 for span in self.matcher.spans(): | 172 for span in self.matcher.spans(): |
259 start = span.startchar or 0 | 173 start = span.startchar or 0 |
267 yield (start_offseted, end_offseted,) | 181 yield (start_offseted, end_offseted,) |
268 | 182 |
269 def highlight(self, content, top=5): | 183 def highlight(self, content, top=5): |
270 if self.search_type not in ['content', 'message']: | 184 if self.search_type not in ['content', 'message']: |
271 return '' | 185 return '' |
272 hl = highlight( | 186 hl = whoosh_highlight( |
273 text=content, | 187 text=content, |
274 terms=self.highlight_items, | 188 terms=self.highlight_items, |
275 analyzer=ANALYZER, | 189 analyzer=ANALYZER, |
276 fragmenter=FRAGMENTER, | 190 fragmenter=FRAGMENTER, |
277 formatter=FORMATTER, | 191 formatter=FORMATTER, |