Mercurial > kallithea
annotate rhodecode/lib/indexers/__init__.py @ 2468:6af1e0d5ff9d beta
Fix repo_root for grouped repos
author | hppj <hppj@postmage.biz> |
---|---|
date | Thu, 14 Jun 2012 22:41:06 -0700 |
parents | 324b838250c9 |
children | 5f21a9dcb09d |
rev | line source |
---|---|
903
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
1 # -*- coding: utf-8 -*- |
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
2 """ |
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
3 rhodecode.lib.indexers.__init__ |
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
5 |
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
6 Whoosh indexing module for RhodeCode |
1203
6832ef664673
source code cleanup: remove trailing white space, normalize file endings
Marcin Kuzminski <marcin@python-works.com>
parents:
1198
diff
changeset
|
7 |
903
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
8 :created_on: Aug 17, 2010 |
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
9 :author: marcink |
1824
89efedac4e6c
2012 copyrights
Marcin Kuzminski <marcin@python-works.com>
parents:
1810
diff
changeset
|
10 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com> |
903
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
11 :license: GPLv3, see COPYING for more details. |
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
12 """ |
1206
a671db5bdd58
fixed license issue #149
Marcin Kuzminski <marcin@python-works.com>
parents:
1203
diff
changeset
|
13 # This program is free software: you can redistribute it and/or modify |
a671db5bdd58
fixed license issue #149
Marcin Kuzminski <marcin@python-works.com>
parents:
1203
diff
changeset
|
14 # it under the terms of the GNU General Public License as published by |
a671db5bdd58
fixed license issue #149
Marcin Kuzminski <marcin@python-works.com>
parents:
1203
diff
changeset
|
15 # the Free Software Foundation, either version 3 of the License, or |
a671db5bdd58
fixed license issue #149
Marcin Kuzminski <marcin@python-works.com>
parents:
1203
diff
changeset
|
16 # (at your option) any later version. |
1203
6832ef664673
source code cleanup: remove trailing white space, normalize file endings
Marcin Kuzminski <marcin@python-works.com>
parents:
1198
diff
changeset
|
17 # |
903
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
18 # This program is distributed in the hope that it will be useful, |
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
21 # GNU General Public License for more details. |
1203
6832ef664673
source code cleanup: remove trailing white space, normalize file endings
Marcin Kuzminski <marcin@python-works.com>
parents:
1198
diff
changeset
|
22 # |
903
04c9bb9ca6d6
code docs, updates
Marcin Kuzminski <marcin@python-works.com>
parents:
894
diff
changeset
|
23 # You should have received a copy of the GNU General Public License |
1206
a671db5bdd58
fixed license issue #149
Marcin Kuzminski <marcin@python-works.com>
parents:
1203
diff
changeset
|
24 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
25 import os |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
26 import sys |
785
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
27 import traceback |
2102
04d26165c3d9
Whoosh logging is now controlled by the .ini files logging setup
Marcin Kuzminski <marcin@python-works.com>
parents:
1995
diff
changeset
|
28 import logging |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
29 from os.path import dirname as dn, join as jn |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
30 |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
31 #to get the rhodecode import |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
32 sys.path.append(dn(dn(dn(os.path.realpath(__file__))))) |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
33 |
894
1fed3c9161bb
fixes #90 + docs update
Marcin Kuzminski <marcin@python-works.com>
parents:
785
diff
changeset
|
34 from string import strip |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
35 from shutil import rmtree |
785
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
36 |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
37 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
38 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
39 from whoosh.index import create_in, open_dir |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
40 from whoosh.formats import Characters |
1995
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
41 from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
42 |
2389
324b838250c9
UI fixes for searching
Marcin Kuzminski <marcin@python-works.com>
parents:
2388
diff
changeset
|
43 from webhelpers.html.builder import escape, literal |
1302
f0e904651f21
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map
Marcin Kuzminski <marcin@python-works.com>
parents:
1206
diff
changeset
|
44 from sqlalchemy import engine_from_config |
f0e904651f21
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map
Marcin Kuzminski <marcin@python-works.com>
parents:
1206
diff
changeset
|
45 |
f0e904651f21
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map
Marcin Kuzminski <marcin@python-works.com>
parents:
1206
diff
changeset
|
46 from rhodecode.model import init_model |
f0e904651f21
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map
Marcin Kuzminski <marcin@python-works.com>
parents:
1206
diff
changeset
|
47 from rhodecode.model.scm import ScmModel |
1407
2744f5b01d00
Allowing indexing job to resolve repos path on its own if not given.
Jared Bunting <jared.bunting@peachjean.com>
parents:
1354
diff
changeset
|
48 from rhodecode.model.repo import RepoModel |
1302
f0e904651f21
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map
Marcin Kuzminski <marcin@python-works.com>
parents:
1206
diff
changeset
|
49 from rhodecode.config.environment import load_environment |
2109 | 50 from rhodecode.lib.utils2 import LazyProperty |
51 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\ | |
52 load_rcextensions | |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
53 |
1995
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
54 # CUSTOM ANALYZER wordsplit + lowercase filter |
436
28f19fa562df
updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents:
406
diff
changeset
|
55 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
56 |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
57 |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
58 #INDEX SCHEMA DEFINITION |
1995
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
59 SCHEMA = Schema( |
2388
a0ef98f2520b
#453 added ID field in whoosh SCHEMA that solves the issue of reindexing modified files
Marcin Kuzminski <marcin@python-works.com>
parents:
2373
diff
changeset
|
60 fileid=ID(unique=True), |
1995
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
61 owner=TEXT(), |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
62 repository=TEXT(stored=True), |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
63 path=TEXT(stored=True), |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
64 content=FieldType(format=Characters(), analyzer=ANALYZER, |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
65 scorable=True, stored=True), |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
66 modtime=STORED(), |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
67 extension=TEXT(stored=True) |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
68 ) |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
69 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
70 IDX_NAME = 'HG_INDEX' |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
71 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n') |
1995
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
72 FRAGMENTER = ContextFragmenter(200) |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
73 |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
74 |
785
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
75 class MakeIndex(BasePasterCommand): |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
76 |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
77 max_args = 1 |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
78 min_args = 1 |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
79 |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
80 usage = "CONFIG_FILE" |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
81 summary = "Creates index for full text search given configuration file" |
683
341beaa9edba
Implemented whoosh index building as paster command.
Marcin Kuzminski <marcin@python-works.com>
parents:
631
diff
changeset
|
82 group_name = "RhodeCode" |
341beaa9edba
Implemented whoosh index building as paster command.
Marcin Kuzminski <marcin@python-works.com>
parents:
631
diff
changeset
|
83 takes_config_file = -1 |
785
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
84 parser = Command.standard_parser(verbose=True) |
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
85 |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
86 def command(self): |
2102
04d26165c3d9
Whoosh logging is now controlled by the .ini files logging setup
Marcin Kuzminski <marcin@python-works.com>
parents:
1995
diff
changeset
|
87 logging.config.fileConfig(self.path_to_ini_file) |
785
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
88 from pylons import config |
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
89 add_cache(config) |
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
90 engine = engine_from_config(config, 'sqlalchemy.db1.') |
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
91 init_model(engine) |
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
92 index_location = config['index_dir'] |
1409
c3172bc09503
Updated contributors and fixed index line length
Marcin Kuzminski <marcin@python-works.com>
parents:
1408
diff
changeset
|
93 repo_location = self.options.repo_location \ |
c3172bc09503
Updated contributors and fixed index line length
Marcin Kuzminski <marcin@python-works.com>
parents:
1408
diff
changeset
|
94 if self.options.repo_location else RepoModel().repos_path |
1183
514efe34c255
fixes issue #146
Marcin Kuzminski <marcin@python-works.com>
parents:
903
diff
changeset
|
95 repo_list = map(strip, self.options.repo_list.split(',')) \ |
514efe34c255
fixes issue #146
Marcin Kuzminski <marcin@python-works.com>
parents:
903
diff
changeset
|
96 if self.options.repo_list else None |
2373
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2319
diff
changeset
|
97 repo_update_list = map(strip, self.options.repo_update_list.split(',')) \ |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2319
diff
changeset
|
98 if self.options.repo_update_list else None |
2109 | 99 load_rcextensions(config['here']) |
683
341beaa9edba
Implemented whoosh index building as paster command.
Marcin Kuzminski <marcin@python-works.com>
parents:
631
diff
changeset
|
100 #====================================================================== |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
101 # WHOOSH DAEMON |
683
341beaa9edba
Implemented whoosh index building as paster command.
Marcin Kuzminski <marcin@python-works.com>
parents:
631
diff
changeset
|
102 #====================================================================== |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
103 from rhodecode.lib.pidlock import LockHeld, DaemonLock |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
104 from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
105 try: |
1540
191f3f08236d
fixes #258 RhodeCode 1.2 assumes egg folder is writable
Marcin Kuzminski <marcin@python-works.com>
parents:
1409
diff
changeset
|
106 l = DaemonLock(file_=jn(dn(dn(index_location)), 'make_index.lock')) |
683
341beaa9edba
Implemented whoosh index building as paster command.
Marcin Kuzminski <marcin@python-works.com>
parents:
631
diff
changeset
|
107 WhooshIndexingDaemon(index_location=index_location, |
894
1fed3c9161bb
fixes #90 + docs update
Marcin Kuzminski <marcin@python-works.com>
parents:
785
diff
changeset
|
108 repo_location=repo_location, |
2373
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2319
diff
changeset
|
109 repo_list=repo_list, |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2319
diff
changeset
|
110 repo_update_list=repo_update_list)\ |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
111 .run(full_index=self.options.full_index) |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
112 l.release() |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
113 except LockHeld: |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
114 sys.exit(1) |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
115 |
785
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
116 def update_parser(self): |
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
117 self.parser.add_option('--repo-location', |
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
118 action='store', |
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
119 dest='repo_location', |
1408
93cffcb6fd54
Adding documentation for indexer's self-resolving repos location.
Jared Bunting <jared.bunting@peachjean.com>
parents:
1407
diff
changeset
|
120 help="Specifies repositories location to index OPTIONAL", |
785
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
121 ) |
894
1fed3c9161bb
fixes #90 + docs update
Marcin Kuzminski <marcin@python-works.com>
parents:
785
diff
changeset
|
122 self.parser.add_option('--index-only', |
1fed3c9161bb
fixes #90 + docs update
Marcin Kuzminski <marcin@python-works.com>
parents:
785
diff
changeset
|
123 action='store', |
1fed3c9161bb
fixes #90 + docs update
Marcin Kuzminski <marcin@python-works.com>
parents:
785
diff
changeset
|
124 dest='repo_list', |
1fed3c9161bb
fixes #90 + docs update
Marcin Kuzminski <marcin@python-works.com>
parents:
785
diff
changeset
|
125 help="Specifies a comma separated list of repositores " |
2373
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2319
diff
changeset
|
126 "to build index on. If not given all repositories " |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2319
diff
changeset
|
127 "are scanned for indexing. OPTIONAL", |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2319
diff
changeset
|
128 ) |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2319
diff
changeset
|
129 self.parser.add_option('--update-only', |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2319
diff
changeset
|
130 action='store', |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2319
diff
changeset
|
131 dest='repo_update_list', |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2319
diff
changeset
|
132 help="Specifies a comma separated list of repositores " |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2319
diff
changeset
|
133 "to re-build index on. OPTIONAL", |
894
1fed3c9161bb
fixes #90 + docs update
Marcin Kuzminski <marcin@python-works.com>
parents:
785
diff
changeset
|
134 ) |
785
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
135 self.parser.add_option('-f', |
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
136 action='store_true', |
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
137 dest='full_index', |
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
138 help="Specifies that index should be made full i.e" |
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
139 " destroy old and build from scratch", |
277427ac29a9
complete rewrite of paster commands,
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
140 default=False) |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
141 |
1810
203af05539e0
implements #330 api method for listing nodes at particular revision
Marcin Kuzminski <marcin@python-works.com>
parents:
1540
diff
changeset
|
142 |
2319
4c239e0dcbb7
fixes issue #454 Search results under Windows include preceeding backslash
Marcin Kuzminski <marcin@python-works.com>
parents:
2109
diff
changeset
|
143 class WhooshResultWrapper(object): |
4c239e0dcbb7
fixes issue #454 Search results under Windows include preceeding backslash
Marcin Kuzminski <marcin@python-works.com>
parents:
2109
diff
changeset
|
144 def __init__(self, search_type, searcher, matcher, highlight_items, |
4c239e0dcbb7
fixes issue #454 Search results under Windows include preceeding backslash
Marcin Kuzminski <marcin@python-works.com>
parents:
2109
diff
changeset
|
145 repo_location): |
556
65b2f150beb7
Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
547
diff
changeset
|
146 self.search_type = search_type |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
147 self.searcher = searcher |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
148 self.matcher = matcher |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
149 self.highlight_items = highlight_items |
1995
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
150 self.fragment_size = 200 |
2319
4c239e0dcbb7
fixes issue #454 Search results under Windows include preceeding backslash
Marcin Kuzminski <marcin@python-works.com>
parents:
2109
diff
changeset
|
151 self.repo_location = repo_location |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
152 |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
153 @LazyProperty |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
154 def doc_ids(self): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
155 docs_id = [] |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
156 while self.matcher.is_active(): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
157 docnum = self.matcher.id() |
479
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
158 chunks = [offsets for offsets in self.get_chunks()] |
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
159 docs_id.append([docnum, chunks]) |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
160 self.matcher.next() |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
161 return docs_id |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
162 |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
163 def __str__(self): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
164 return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids)) |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
165 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
166 def __repr__(self): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
167 return self.__str__() |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
168 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
169 def __len__(self): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
170 return len(self.doc_ids) |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
171 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
172 def __iter__(self): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
173 """ |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
174 Allows Iteration over results,and lazy generate content |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
175 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
176 *Requires* implementation of ``__getitem__`` method. |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
177 """ |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
178 for docid in self.doc_ids: |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
179 yield self.get_full_content(docid) |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
180 |
1198
02a7f263a849
fixed issue with latest webhelpers pagination module
Marcin Kuzminski <marcin@python-works.com>
parents:
1183
diff
changeset
|
181 def __getitem__(self, key): |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
182 """ |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
183 Slicing of resultWrapper |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
184 """ |
1198
02a7f263a849
fixed issue with latest webhelpers pagination module
Marcin Kuzminski <marcin@python-works.com>
parents:
1183
diff
changeset
|
185 i, j = key.start, key.stop |
02a7f263a849
fixed issue with latest webhelpers pagination module
Marcin Kuzminski <marcin@python-works.com>
parents:
1183
diff
changeset
|
186 |
1995
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
187 slices = [] |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
188 for docid in self.doc_ids[i:j]: |
1995
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
189 slices.append(self.get_full_content(docid)) |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
190 return slices |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
191 |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
192 def get_full_content(self, docid): |
479
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
193 res = self.searcher.stored_fields(docid[0]) |
2319
4c239e0dcbb7
fixes issue #454 Search results under Windows include preceeding backslash
Marcin Kuzminski <marcin@python-works.com>
parents:
2109
diff
changeset
|
194 full_repo_path = jn(self.repo_location, res['repository']) |
4c239e0dcbb7
fixes issue #454 Search results under Windows include preceeding backslash
Marcin Kuzminski <marcin@python-works.com>
parents:
2109
diff
changeset
|
195 f_path = res['path'].split(full_repo_path)[-1] |
4c239e0dcbb7
fixes issue #454 Search results under Windows include preceeding backslash
Marcin Kuzminski <marcin@python-works.com>
parents:
2109
diff
changeset
|
196 f_path = f_path.lstrip(os.sep) |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
197 |
479
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
198 content_short = self.get_short_content(res, docid[1]) |
1995
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
199 res.update({'content_short': content_short, |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
200 'content_short_hl': self.highlight(content_short), |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
201 'f_path': f_path}) |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
202 |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
203 return res |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
204 |
479
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
205 def get_short_content(self, res, chunks): |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
206 |
479
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
207 return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks]) |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
208 |
479
149940ba96d9
fixed search chunking bug and optimized chunk size
Marcin Kuzminski <marcin@python-works.com>
parents:
478
diff
changeset
|
209 def get_chunks(self): |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
210 """ |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
211 Smart function that implements chunking the content |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
212 but not overlap chunks so it doesn't highlight the same |
556
65b2f150beb7
Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
547
diff
changeset
|
213 close occurrences twice. |
1810
203af05539e0
implements #330 api method for listing nodes at particular revision
Marcin Kuzminski <marcin@python-works.com>
parents:
1540
diff
changeset
|
214 |
1302
f0e904651f21
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map
Marcin Kuzminski <marcin@python-works.com>
parents:
1206
diff
changeset
|
215 :param matcher: |
f0e904651f21
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map
Marcin Kuzminski <marcin@python-works.com>
parents:
1206
diff
changeset
|
216 :param size: |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
217 """ |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
218 memory = [(0, 0)] |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
219 for span in self.matcher.spans(): |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
220 start = span.startchar or 0 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
221 end = span.endchar or 0 |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
222 start_offseted = max(0, start - self.fragment_size) |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
223 end_offseted = end + self.fragment_size |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
224 |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
225 if start_offseted < memory[-1][1]: |
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
226 start_offseted = memory[-1][1] |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
227 memory.append((start_offseted, end_offseted,)) |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
228 yield (start_offseted, end_offseted,) |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
229 |
478
7010af6efde5
Reimplemented searching for speed on large files and added paging for search results
Marcin Kuzminski <marcin@python-works.com>
parents:
436
diff
changeset
|
230 def highlight(self, content, top=5): |
556
65b2f150beb7
Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
547
diff
changeset
|
231 if self.search_type != 'content': |
65b2f150beb7
Added searching for file names within the repository in rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
547
diff
changeset
|
232 return '' |
1995
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
233 hl = highlight( |
2389
324b838250c9
UI fixes for searching
Marcin Kuzminski <marcin@python-works.com>
parents:
2388
diff
changeset
|
234 text=content, |
1995
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
235 terms=self.highlight_items, |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
236 analyzer=ANALYZER, |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
237 fragmenter=FRAGMENTER, |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
238 formatter=FORMATTER, |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
239 top=top |
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
240 ) |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
241 return hl |