comparison rhodecode/lib/indexers/__init__.py @ 3915:a42bfe8a9335 beta

moved make-index command to paster_commands module - optimized imports and code
author Marcin Kuzminski <marcin@python-works.com>
date Thu, 30 May 2013 00:01:16 +0200
parents b76a595b7a5e
children 5293d4bbb1ea
comparison
equal deleted inserted replaced
3914:424b6c711a7f 3915:a42bfe8a9335
22 # 22 #
23 # You should have received a copy of the GNU General Public License 23 # You should have received a copy of the GNU General Public License
24 # along with this program. If not, see <http://www.gnu.org/licenses/>. 24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25 import os 25 import os
26 import sys 26 import sys
27 import traceback
28 import logging 27 import logging
29 from os.path import dirname as dn, join as jn 28 from os.path import dirname as dn, join as jn
30 29
31 #to get the rhodecode import 30 #to get the rhodecode import
32 sys.path.append(dn(dn(dn(os.path.realpath(__file__))))) 31 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
33 32
34 from string import strip
35 from shutil import rmtree
36
37 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter 33 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
38 from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType, DATETIME 34 from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType, DATETIME
39 from whoosh.index import create_in, open_dir
40 from whoosh.formats import Characters 35 from whoosh.formats import Characters
41 from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter 36 from whoosh.highlight import highlight as whoosh_highlight, HtmlFormatter, ContextFragmenter
42
43 from webhelpers.html.builder import escape, literal
44 from sqlalchemy import engine_from_config
45
46 from rhodecode.model import init_model
47 from rhodecode.model.scm import ScmModel
48 from rhodecode.model.repo import RepoModel
49 from rhodecode.config.environment import load_environment
50 from rhodecode.lib.utils2 import LazyProperty 37 from rhodecode.lib.utils2 import LazyProperty
51 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\
52 load_rcextensions
53 38
54 log = logging.getLogger(__name__) 39 log = logging.getLogger(__name__)
55 40
56 # CUSTOM ANALYZER wordsplit + lowercase filter 41 # CUSTOM ANALYZER wordsplit + lowercase filter
57 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() 42 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
95 date=DATETIME(), 80 date=DATETIME(),
96 action=TEXT(), 81 action=TEXT(),
97 repository=TEXT(), 82 repository=TEXT(),
98 ip=TEXT(), 83 ip=TEXT(),
99 ) 84 )
100
101
102 class MakeIndex(BasePasterCommand):
103
104 max_args = 1
105 min_args = 1
106
107 usage = "CONFIG_FILE"
108 summary = "Creates or update full text search index"
109 group_name = "RhodeCode"
110 takes_config_file = -1
111 parser = Command.standard_parser(verbose=True)
112
113 def command(self):
114 logging.config.fileConfig(self.path_to_ini_file)
115 from pylons import config
116 add_cache(config)
117 engine = engine_from_config(config, 'sqlalchemy.db1.')
118 init_model(engine)
119 index_location = config['index_dir']
120 repo_location = self.options.repo_location \
121 if self.options.repo_location else RepoModel().repos_path
122 repo_list = map(strip, self.options.repo_list.split(',')) \
123 if self.options.repo_list else None
124 repo_update_list = map(strip, self.options.repo_update_list.split(',')) \
125 if self.options.repo_update_list else None
126 load_rcextensions(config['here'])
127 #======================================================================
128 # WHOOSH DAEMON
129 #======================================================================
130 from rhodecode.lib.pidlock import LockHeld, DaemonLock
131 from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
132 try:
133 l = DaemonLock(file_=jn(dn(dn(index_location)), 'make_index.lock'))
134 WhooshIndexingDaemon(index_location=index_location,
135 repo_location=repo_location,
136 repo_list=repo_list,
137 repo_update_list=repo_update_list)\
138 .run(full_index=self.options.full_index)
139 l.release()
140 except LockHeld:
141 sys.exit(1)
142
143 def update_parser(self):
144 self.parser.add_option('--repo-location',
145 action='store',
146 dest='repo_location',
147 help="Specifies repositories location to index OPTIONAL",
148 )
149 self.parser.add_option('--index-only',
150 action='store',
151 dest='repo_list',
152 help="Specifies a comma separated list of repositores "
153 "to build index on. If not given all repositories "
154 "are scanned for indexing. OPTIONAL",
155 )
156 self.parser.add_option('--update-only',
157 action='store',
158 dest='repo_update_list',
159 help="Specifies a comma separated list of repositores "
160 "to re-build index on. OPTIONAL",
161 )
162 self.parser.add_option('-f',
163 action='store_true',
164 dest='full_index',
165 help="Specifies that index should be made full i.e"
166 " destroy old and build from scratch",
167 default=False)
168 85
169 86
170 class WhooshResultWrapper(object): 87 class WhooshResultWrapper(object):
171 def __init__(self, search_type, searcher, matcher, highlight_items, 88 def __init__(self, search_type, searcher, matcher, highlight_items,
172 repo_location): 89 repo_location):
247 def get_chunks(self): 164 def get_chunks(self):
248 """ 165 """
249 Smart function that implements chunking the content 166 Smart function that implements chunking the content
250 but not overlap chunks so it doesn't highlight the same 167 but not overlap chunks so it doesn't highlight the same
251 close occurrences twice. 168 close occurrences twice.
252
253 :param matcher:
254 :param size:
255 """ 169 """
256 memory = [(0, 0)] 170 memory = [(0, 0)]
257 if self.matcher.supports('positions'): 171 if self.matcher.supports('positions'):
258 for span in self.matcher.spans(): 172 for span in self.matcher.spans():
259 start = span.startchar or 0 173 start = span.startchar or 0
267 yield (start_offseted, end_offseted,) 181 yield (start_offseted, end_offseted,)
268 182
269 def highlight(self, content, top=5): 183 def highlight(self, content, top=5):
270 if self.search_type not in ['content', 'message']: 184 if self.search_type not in ['content', 'message']:
271 return '' 185 return ''
272 hl = highlight( 186 hl = whoosh_highlight(
273 text=content, 187 text=content,
274 terms=self.highlight_items, 188 terms=self.highlight_items,
275 analyzer=ANALYZER, 189 analyzer=ANALYZER,
276 fragmenter=FRAGMENTER, 190 fragmenter=FRAGMENTER,
277 formatter=FORMATTER, 191 formatter=FORMATTER,