Mercurial > kallithea
annotate rhodecode/lib/indexers/daemon.py @ 2913:bfffaa9aff58 beta
#589 search urlgenerator didn't properly escape special chars on url
author | Marcin Kuzminski <marcin@python-works.com> |
---|---|
date | Tue, 09 Oct 2012 19:39:26 +0200 |
parents | 2fa3c09f63e0 |
children | d998cc84cf72 b3c8a3a5ce5f |
rev | line source |
---|---|
885
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
1 # -*- coding: utf-8 -*- |
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
2 """ |
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
3 rhodecode.lib.indexers.daemon |
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
5 |
1377
78e5853df5c8
fixed daemon typos
Marcin Kuzminski <marcin@python-works.com>
parents:
1206
diff
changeset
|
6 A daemon will read from task table and run tasks |
947
99850ac883d1
Fixed whoosh daemon, for depracated walk method
Marcin Kuzminski <marcin@python-works.com>
parents:
902
diff
changeset
|
7 |
885
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
8 :created_on: Jan 26, 2010 |
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
9 :author: marcink |
1824
89efedac4e6c
2012 copyrights
Marcin Kuzminski <marcin@python-works.com>
parents:
1711
diff
changeset
|
10 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com> |
885
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
11 :license: GPLv3, see COPYING for more details. |
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
12 """ |
1206
a671db5bdd58
fixed license issue #149
Marcin Kuzminski <marcin@python-works.com>
parents:
1183
diff
changeset
|
13 # This program is free software: you can redistribute it and/or modify |
a671db5bdd58
fixed license issue #149
Marcin Kuzminski <marcin@python-works.com>
parents:
1183
diff
changeset
|
14 # it under the terms of the GNU General Public License as published by |
a671db5bdd58
fixed license issue #149
Marcin Kuzminski <marcin@python-works.com>
parents:
1183
diff
changeset
|
15 # the Free Software Foundation, either version 3 of the License, or |
a671db5bdd58
fixed license issue #149
Marcin Kuzminski <marcin@python-works.com>
parents:
1183
diff
changeset
|
16 # (at your option) any later version. |
947
99850ac883d1
Fixed whoosh daemon, for depracated walk method
Marcin Kuzminski <marcin@python-works.com>
parents:
902
diff
changeset
|
17 # |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
18 # This program is distributed in the hope that it will be useful, |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
21 # GNU General Public License for more details. |
947
99850ac883d1
Fixed whoosh daemon, for depracated walk method
Marcin Kuzminski <marcin@python-works.com>
parents:
902
diff
changeset
|
22 # |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
23 # You should have received a copy of the GNU General Public License |
1206
a671db5bdd58
fixed license issue #149
Marcin Kuzminski <marcin@python-works.com>
parents:
1183
diff
changeset
|
24 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
2641
cfcd981d6679
import with_statment to make daemon.py python 2.5 compatible
Indra Talip <indra.talip@gmail.com>
parents:
2640
diff
changeset
|
25 from __future__ import with_statement |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
26 |
1154
36fe593dfe4b
simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function
Marcin Kuzminski <marcin@python-works.com>
parents:
1036
diff
changeset
|
27 import os |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
28 import sys |
1154
36fe593dfe4b
simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function
Marcin Kuzminski <marcin@python-works.com>
parents:
1036
diff
changeset
|
29 import logging |
885
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
30 import traceback |
1154
36fe593dfe4b
simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function
Marcin Kuzminski <marcin@python-works.com>
parents:
1036
diff
changeset
|
31 |
36fe593dfe4b
simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function
Marcin Kuzminski <marcin@python-works.com>
parents:
1036
diff
changeset
|
32 from shutil import rmtree |
36fe593dfe4b
simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function
Marcin Kuzminski <marcin@python-works.com>
parents:
1036
diff
changeset
|
33 from time import mktime |
36fe593dfe4b
simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function
Marcin Kuzminski <marcin@python-works.com>
parents:
1036
diff
changeset
|
34 |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
35 from os.path import dirname as dn |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
36 from os.path import join as jn |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
37 |
547
1e757ac98988
renamed project to rhodecode
Marcin Kuzminski <marcin@python-works.com>
parents:
497
diff
changeset
|
38 #to get the rhodecode import |
411
9b67cebe6609
some fixes to whoosh indexer daemon
Marcin Kuzminski <marcin@python-works.com>
parents:
407
diff
changeset
|
39 project_path = dn(dn(dn(dn(os.path.realpath(__file__))))) |
9b67cebe6609
some fixes to whoosh indexer daemon
Marcin Kuzminski <marcin@python-works.com>
parents:
407
diff
changeset
|
40 sys.path.append(project_path) |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
41 |
2109 | 42 from rhodecode.config.conf import INDEX_EXTENSIONS |
691
7486da5f0628
Refactor codes for scm model
Marcin Kuzminski <marcin@python-works.com>
parents:
683
diff
changeset
|
43 from rhodecode.model.scm import ScmModel |
2109 | 44 from rhodecode.lib.utils2 import safe_unicode |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
45 from rhodecode.lib.indexers import SCHEMA, IDX_NAME, CHGSETS_SCHEMA, \ |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
46 CHGSET_IDX_NAME |
411
9b67cebe6609
some fixes to whoosh indexer daemon
Marcin Kuzminski <marcin@python-works.com>
parents:
407
diff
changeset
|
47 |
2007
324ac367a4da
Added VCS into rhodecode core for faster and easier deployments of new versions
Marcin Kuzminski <marcin@python-works.com>
parents:
1995
diff
changeset
|
48 from rhodecode.lib.vcs.exceptions import ChangesetError, RepositoryError, \ |
1711
b369bec5d468
fixes issue with whoosh reindexing files that were removed or renamed
Marcin Kuzminski <marcin@python-works.com>
parents:
1451
diff
changeset
|
49 NodeDoesNotExistError |
560
3072935bdeed
rewrote whoosh indexing to run internal repository.walk() instead of filesystem.
Marcin Kuzminski <marcin@python-works.com>
parents:
557
diff
changeset
|
50 |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
51 from whoosh.index import create_in, open_dir, exists_in |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
52 from whoosh.query import * |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
53 from whoosh.qparser import QueryParser |
1154
36fe593dfe4b
simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function
Marcin Kuzminski <marcin@python-works.com>
parents:
1036
diff
changeset
|
54 |
2101 | 55 log = logging.getLogger('whoosh_indexer') |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
56 |
1995
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
57 |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
58 class WhooshIndexingDaemon(object): |
560
3072935bdeed
rewrote whoosh indexing to run internal repository.walk() instead of filesystem.
Marcin Kuzminski <marcin@python-works.com>
parents:
557
diff
changeset
|
59 """ |
2373
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
60 Daemon for atomic indexing jobs |
560
3072935bdeed
rewrote whoosh indexing to run internal repository.walk() instead of filesystem.
Marcin Kuzminski <marcin@python-works.com>
parents:
557
diff
changeset
|
61 """ |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
62 |
1995
b6c902d88472
bumbed whoosh to 2.3.X series
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
63 def __init__(self, indexname=IDX_NAME, index_location=None, |
2373
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
64 repo_location=None, sa=None, repo_list=None, |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
65 repo_update_list=None): |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
66 self.indexname = indexname |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
67 |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
68 self.index_location = index_location |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
69 if not index_location: |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
70 raise Exception('You have to provide index location') |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
71 |
411
9b67cebe6609
some fixes to whoosh indexer daemon
Marcin Kuzminski <marcin@python-works.com>
parents:
407
diff
changeset
|
72 self.repo_location = repo_location |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
73 if not repo_location: |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
74 raise Exception('You have to provide repositories location') |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
75 |
1036
405b80e4ccd5
Major refactoring, removed when possible calls to app globals.
Marcin Kuzminski <marcin@python-works.com>
parents:
947
diff
changeset
|
76 self.repo_paths = ScmModel(sa).repo_scan(self.repo_location) |
894
1fed3c9161bb
fixes #90 + docs update
Marcin Kuzminski <marcin@python-works.com>
parents:
886
diff
changeset
|
77 |
2373
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
78 #filter repo list |
894
1fed3c9161bb
fixes #90 + docs update
Marcin Kuzminski <marcin@python-works.com>
parents:
886
diff
changeset
|
79 if repo_list: |
2841
2fa3c09f63e0
fixed problems with re-indexing non-ascii names of repositories
Marcin Kuzminski <marcin@python-works.com>
parents:
2840
diff
changeset
|
80 #Fix non-ascii repo names to unicode |
2fa3c09f63e0
fixed problems with re-indexing non-ascii names of repositories
Marcin Kuzminski <marcin@python-works.com>
parents:
2840
diff
changeset
|
81 repo_list = map(safe_unicode, repo_list) |
2373
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
82 self.filtered_repo_paths = {} |
894
1fed3c9161bb
fixes #90 + docs update
Marcin Kuzminski <marcin@python-works.com>
parents:
886
diff
changeset
|
83 for repo_name, repo in self.repo_paths.items(): |
1fed3c9161bb
fixes #90 + docs update
Marcin Kuzminski <marcin@python-works.com>
parents:
886
diff
changeset
|
84 if repo_name in repo_list: |
2373
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
85 self.filtered_repo_paths[repo_name] = repo |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
86 |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
87 self.repo_paths = self.filtered_repo_paths |
894
1fed3c9161bb
fixes #90 + docs update
Marcin Kuzminski <marcin@python-works.com>
parents:
886
diff
changeset
|
88 |
2373
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
89 #filter update repo list |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
90 self.filtered_repo_update_paths = {} |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
91 if repo_update_list: |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
92 self.filtered_repo_update_paths = {} |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
93 for repo_name, repo in self.repo_paths.items(): |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
94 if repo_name in repo_update_list: |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
95 self.filtered_repo_update_paths[repo_name] = repo |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
96 self.repo_paths = self.filtered_repo_update_paths |
894
1fed3c9161bb
fixes #90 + docs update
Marcin Kuzminski <marcin@python-works.com>
parents:
886
diff
changeset
|
97 |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
98 self.initial = True |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
99 if not os.path.isdir(self.index_location): |
763
0dad296d2a57
extended trending languages to more entries, implemented new faster and "fancy"
Marcin Kuzminski <marcin@python-works.com>
parents:
691
diff
changeset
|
100 os.makedirs(self.index_location) |
465
e01a85f9fc90
fixed initial whoosh indexer. Build full index on first run even with incremental flag
Marcin Kuzminski <marcin@python-works.com>
parents:
452
diff
changeset
|
101 log.info('Cannot run incremental index since it does not' |
e01a85f9fc90
fixed initial whoosh indexer. Build full index on first run even with incremental flag
Marcin Kuzminski <marcin@python-works.com>
parents:
452
diff
changeset
|
102 ' yet exist running full build') |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
103 elif not exists_in(self.index_location, IDX_NAME): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
104 log.info('Running full index build as the file content' |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
105 ' index does not exist') |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
106 elif not exists_in(self.index_location, CHGSET_IDX_NAME): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
107 log.info('Running full index build as the changeset' |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
108 ' index does not exist') |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
109 else: |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
110 self.initial = False |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
111 |
561
5f3b967d9d10
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.
Marcin Kuzminski <marcin@python-works.com>
parents:
560
diff
changeset
|
112 def get_paths(self, repo): |
2101 | 113 """ |
114 recursive walk in root dir and return a set of all path in that dir | |
560
3072935bdeed
rewrote whoosh indexing to run internal repository.walk() instead of filesystem.
Marcin Kuzminski <marcin@python-works.com>
parents:
557
diff
changeset
|
115 based on repository walk function |
3072935bdeed
rewrote whoosh indexing to run internal repository.walk() instead of filesystem.
Marcin Kuzminski <marcin@python-works.com>
parents:
557
diff
changeset
|
116 """ |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
117 index_paths_ = set() |
567
80dc0a23edf7
fixed whoosh failure on new repository
Marcin Kuzminski <marcin@python-works.com>
parents:
561
diff
changeset
|
118 try: |
947
99850ac883d1
Fixed whoosh daemon, for depracated walk method
Marcin Kuzminski <marcin@python-works.com>
parents:
902
diff
changeset
|
119 tip = repo.get_changeset('tip') |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
120 for _topnode, _dirs, files in tip.walk('/'): |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
121 for f in files: |
561
5f3b967d9d10
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.
Marcin Kuzminski <marcin@python-works.com>
parents:
560
diff
changeset
|
122 index_paths_.add(jn(repo.path, f.path)) |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
123 |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
124 except RepositoryError: |
885
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
125 log.debug(traceback.format_exc()) |
567
80dc0a23edf7
fixed whoosh failure on new repository
Marcin Kuzminski <marcin@python-works.com>
parents:
561
diff
changeset
|
126 pass |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
127 return index_paths_ |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
128 |
561
5f3b967d9d10
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.
Marcin Kuzminski <marcin@python-works.com>
parents:
560
diff
changeset
|
129 def get_node(self, repo, path): |
5f3b967d9d10
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.
Marcin Kuzminski <marcin@python-works.com>
parents:
560
diff
changeset
|
130 n_path = path[len(repo.path) + 1:] |
5f3b967d9d10
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.
Marcin Kuzminski <marcin@python-works.com>
parents:
560
diff
changeset
|
131 node = repo.get_changeset().get_node(n_path) |
5f3b967d9d10
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.
Marcin Kuzminski <marcin@python-works.com>
parents:
560
diff
changeset
|
132 return node |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
133 |
561
5f3b967d9d10
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.
Marcin Kuzminski <marcin@python-works.com>
parents:
560
diff
changeset
|
134 def get_node_mtime(self, node): |
5f3b967d9d10
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.
Marcin Kuzminski <marcin@python-works.com>
parents:
560
diff
changeset
|
135 return mktime(node.last_changeset.date.timetuple()) |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
136 |
1171
2ab211e0aecd
changes for #56
Marcin Kuzminski <marcin@python-works.com>
parents:
1154
diff
changeset
|
137 def add_doc(self, writer, path, repo, repo_name): |
2101 | 138 """ |
139 Adding doc to writer this function itself fetches data from | |
140 the instance of vcs backend | |
141 """ | |
142 | |
561
5f3b967d9d10
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.
Marcin Kuzminski <marcin@python-works.com>
parents:
560
diff
changeset
|
143 node = self.get_node(repo, path) |
2109 | 144 indexed = indexed_w_content = 0 |
2101 | 145 # we just index the content of chosen files, and skip binary files |
886
0736230c7d91
#92 removed content of binary files for whoosh indexer
Marcin Kuzminski <marcin@python-works.com>
parents:
885
diff
changeset
|
146 if node.extension in INDEX_EXTENSIONS and not node.is_binary: |
560
3072935bdeed
rewrote whoosh indexing to run internal repository.walk() instead of filesystem.
Marcin Kuzminski <marcin@python-works.com>
parents:
557
diff
changeset
|
147 u_content = node.content |
885
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
148 if not isinstance(u_content, unicode): |
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
149 log.warning(' >> %s Could not get this content as unicode ' |
2101 | 150 'replacing with empty content' % path) |
885
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
151 u_content = u'' |
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
152 else: |
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
153 log.debug(' >> %s [WITH CONTENT]' % path) |
2109 | 154 indexed_w_content += 1 |
885
94f7585af8a1
fixes to #92, updated changelog
Marcin Kuzminski <marcin@python-works.com>
parents:
777
diff
changeset
|
155 |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
156 else: |
436
28f19fa562df
updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents:
411
diff
changeset
|
157 log.debug(' >> %s' % path) |
2101 | 158 # just index file name without it's content |
436
28f19fa562df
updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents:
411
diff
changeset
|
159 u_content = u'' |
2109 | 160 indexed += 1 |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
161 |
2388
a0ef98f2520b
#453 added ID field in whoosh SCHEMA that solves the issue of reindexing modified files
Marcin Kuzminski <marcin@python-works.com>
parents:
2373
diff
changeset
|
162 p = safe_unicode(path) |
2101 | 163 writer.add_document( |
2388
a0ef98f2520b
#453 added ID field in whoosh SCHEMA that solves the issue of reindexing modified files
Marcin Kuzminski <marcin@python-works.com>
parents:
2373
diff
changeset
|
164 fileid=p, |
2101 | 165 owner=unicode(repo.contact), |
166 repository=safe_unicode(repo_name), | |
2388
a0ef98f2520b
#453 added ID field in whoosh SCHEMA that solves the issue of reindexing modified files
Marcin Kuzminski <marcin@python-works.com>
parents:
2373
diff
changeset
|
167 path=p, |
2101 | 168 content=u_content, |
169 modtime=self.get_node_mtime(node), | |
170 extension=node.extension | |
171 ) | |
2109 | 172 return indexed, indexed_w_content |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
173 |
2643
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
174 def index_changesets(self, writer, repo_name, repo, start_rev=None): |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
175 """ |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
176 Add all changeset in the vcs repo starting at start_rev |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
177 to the index writer |
2643
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
178 |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
179 :param writer: the whoosh index writer to add to |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
180 :param repo_name: name of the repository from whence the |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
181 changeset originates including the repository group |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
182 :param repo: the vcs repository instance to index changesets for, |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
183 the presumption is the repo has changesets to index |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
184 :param start_rev=None: the full sha id to start indexing from |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
185 if start_rev is None then index from the first changeset in |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
186 the repo |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
187 """ |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
188 |
2643
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
189 if start_rev is None: |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
190 start_rev = repo[0].raw_id |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
191 |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
192 log.debug('indexing changesets in %s starting at rev: %s' % |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
193 (repo_name, start_rev)) |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
194 |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
195 indexed = 0 |
2643
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
196 for cs in repo.get_changesets(start=start_rev): |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
197 log.debug(' >> %s' % cs) |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
198 writer.add_document( |
2642
88b0e82bcba4
rename changeset index key to match raw_id rather than path for greater consistency
Indra Talip <indra.talip@gmail.com>
parents:
2641
diff
changeset
|
199 raw_id=unicode(cs.raw_id), |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
200 owner=unicode(repo.contact), |
2693
66c778b8cb54
Extended commit search schema with date of commit
Marcin Kuzminski <marcin@python-works.com>
parents:
2648
diff
changeset
|
201 date=cs._timestamp, |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
202 repository=safe_unicode(repo_name), |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
203 author=cs.author, |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
204 message=cs.message, |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
205 last=cs.last, |
2763
81624c8a1035
#548 Fixed issue with non-ascii paths in whoosh indexer
Marcin Kuzminski <marcin@python-works.com>
parents:
2693
diff
changeset
|
206 added=u' '.join([safe_unicode(node.path) for node in cs.added]).lower(), |
81624c8a1035
#548 Fixed issue with non-ascii paths in whoosh indexer
Marcin Kuzminski <marcin@python-works.com>
parents:
2693
diff
changeset
|
207 removed=u' '.join([safe_unicode(node.path) for node in cs.removed]).lower(), |
81624c8a1035
#548 Fixed issue with non-ascii paths in whoosh indexer
Marcin Kuzminski <marcin@python-works.com>
parents:
2693
diff
changeset
|
208 changed=u' '.join([safe_unicode(node.path) for node in cs.changed]).lower(), |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
209 parents=u' '.join([cs.raw_id for cs in cs.parents]), |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
210 ) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
211 indexed += 1 |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
212 |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
213 log.debug('indexed %d changesets for repo %s' % (indexed, repo_name)) |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
214 return indexed |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
215 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
216 def index_files(self, file_idx_writer, repo_name, repo): |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
217 """ |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
218 Index files for given repo_name |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
219 |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
220 :param file_idx_writer: the whoosh index writer to add to |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
221 :param repo_name: name of the repository we're indexing |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
222 :param repo: instance of vcs repo |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
223 """ |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
224 i_cnt = iwc_cnt = 0 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
225 log.debug('building index for [%s]' % repo.path) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
226 for idx_path in self.get_paths(repo): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
227 i, iwc = self.add_doc(file_idx_writer, idx_path, repo, repo_name) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
228 i_cnt += i |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
229 iwc_cnt += iwc |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
230 |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
231 log.debug('added %s files %s with content for repo %s' % |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
232 (i_cnt + iwc_cnt, iwc_cnt, repo.path)) |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
233 return i_cnt, iwc_cnt |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
234 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
235 def update_changeset_index(self): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
236 idx = open_dir(self.index_location, indexname=CHGSET_IDX_NAME) |
2569
b98fd6fc67f9
Little better logging in whoosh indexer
Marcin Kuzminski <marcin@python-works.com>
parents:
2388
diff
changeset
|
237 |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
238 with idx.searcher() as searcher: |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
239 writer = idx.writer() |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
240 writer_is_dirty = False |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
241 try: |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
242 indexed_total = 0 |
2839
c0ddc86b4654
Fix possible exception about repo_name not defined, on whoosh indexer when using index-only option
Marcin Kuzminski <marcin@python-works.com>
parents:
2763
diff
changeset
|
243 repo_name = None |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
244 for repo_name, repo in self.repo_paths.items(): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
245 # skip indexing if there aren't any revs in the repo |
2643
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
246 num_of_revs = len(repo) |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
247 if num_of_revs < 1: |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
248 continue |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
249 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
250 qp = QueryParser('repository', schema=CHGSETS_SCHEMA) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
251 q = qp.parse(u"last:t AND %s" % repo_name) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
252 |
2643
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
253 results = searcher.search(q) |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
254 |
2643
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
255 # default to scanning the entire repo |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
256 last_rev = 0 |
2643
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
257 start_id = None |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
258 |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
259 if len(results) > 0: |
2643
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
260 # assuming that there is only one result, if not this |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
261 # may require a full re-index. |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
262 start_id = results[0]['raw_id'] |
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
263 last_rev = repo.get_changeset(revision=start_id).revision |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
264 |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
265 # there are new changesets to index or a new repo to index |
2643
2ad50c44b025
when indexing changesets use the raw_id to locate the point from
Indra Talip <indra.talip@gmail.com>
parents:
2642
diff
changeset
|
266 if last_rev == 0 or num_of_revs > last_rev + 1: |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
267 # delete the docs in the index for the previous |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
268 # last changeset(s) |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
269 for hit in results: |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
270 q = qp.parse(u"last:t AND %s AND raw_id:%s" % |
2642
88b0e82bcba4
rename changeset index key to match raw_id rather than path for greater consistency
Indra Talip <indra.talip@gmail.com>
parents:
2641
diff
changeset
|
271 (repo_name, hit['raw_id'])) |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
272 writer.delete_by_query(q) |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
273 |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
274 # index from the previous last changeset + all new ones |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
275 indexed_total += self.index_changesets(writer, |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
276 repo_name, repo, start_id) |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
277 writer_is_dirty = True |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
278 log.debug('indexed %s changesets for repo %s' % ( |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
279 indexed_total, repo_name) |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
280 ) |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
281 finally: |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
282 if writer_is_dirty: |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
283 log.debug('>> COMMITING CHANGES TO CHANGESET INDEX<<') |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
284 writer.commit(merge=True) |
2840
c7c5825299fe
fixed logging messages on whoosh indexer
Marcin Kuzminski <marcin@python-works.com>
parents:
2839
diff
changeset
|
285 log.debug('>>> FINISHED REBUILDING CHANGESET INDEX <<<') |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
286 else: |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
287 writer.cancel |
2840
c7c5825299fe
fixed logging messages on whoosh indexer
Marcin Kuzminski <marcin@python-works.com>
parents:
2839
diff
changeset
|
288 log.debug('>> NOTHING TO COMMIT TO CHANGESET INDEX<<') |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
289 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
290 def update_file_index(self): |
2373
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
291 log.debug((u'STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s ' |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
292 'AND REPOS %s') % (INDEX_EXTENSIONS, self.repo_paths.keys())) |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
293 |
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
294 idx = open_dir(self.index_location, indexname=self.indexname) |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
295 # The set of all paths in the index |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
296 indexed_paths = set() |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
297 # The set of all paths we need to re-index |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
298 to_index = set() |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
299 |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
300 writer = idx.writer() |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
301 writer_is_dirty = False |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
302 try: |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
303 with idx.reader() as reader: |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
304 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
305 # Loop over the stored fields in the index |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
306 for fields in reader.all_stored_fields(): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
307 indexed_path = fields['path'] |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
308 indexed_repo_path = fields['repository'] |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
309 indexed_paths.add(indexed_path) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
310 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
311 if not indexed_repo_path in self.filtered_repo_update_paths: |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
312 continue |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
313 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
314 repo = self.repo_paths[indexed_repo_path] |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
315 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
316 try: |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
317 node = self.get_node(repo, indexed_path) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
318 # Check if this file was changed since it was indexed |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
319 indexed_time = fields['modtime'] |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
320 mtime = self.get_node_mtime(node) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
321 if mtime > indexed_time: |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
322 # The file has changed, delete it and add it to |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
323 # the list of files to reindex |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
324 log.debug( |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
325 'adding to reindex list %s mtime: %s vs %s' % ( |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
326 indexed_path, mtime, indexed_time) |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
327 ) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
328 writer.delete_by_term('fileid', indexed_path) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
329 writer_is_dirty = True |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
330 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
331 to_index.add(indexed_path) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
332 except (ChangesetError, NodeDoesNotExistError): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
333 # This file was deleted since it was indexed |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
334 log.debug('removing from index %s' % indexed_path) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
335 writer.delete_by_term('path', indexed_path) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
336 writer_is_dirty = True |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
337 |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
338 # Loop over the files in the filesystem |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
339 # Assume we have a function that gathers the filenames of the |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
340 # documents to be indexed |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
341 ri_cnt_total = 0 # indexed |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
342 riwc_cnt_total = 0 # indexed with content |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
343 for repo_name, repo in self.repo_paths.items(): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
344 # skip indexing if there aren't any revisions |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
345 if len(repo) < 1: |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
346 continue |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
347 ri_cnt = 0 # indexed |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
348 riwc_cnt = 0 # indexed with content |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
349 for path in self.get_paths(repo): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
350 path = safe_unicode(path) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
351 if path in to_index or path not in indexed_paths: |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
352 |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
353 # This is either a file that's changed, or a new file |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
354 # that wasn't indexed before. So index it! |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
355 i, iwc = self.add_doc(writer, path, repo, repo_name) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
356 writer_is_dirty = True |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
357 log.debug('re indexing %s' % path) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
358 ri_cnt += i |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
359 ri_cnt_total += 1 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
360 riwc_cnt += iwc |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
361 riwc_cnt_total += iwc |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
362 log.debug('added %s files %s with content for repo %s' % ( |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
363 ri_cnt + riwc_cnt, riwc_cnt, repo.path) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
364 ) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
365 log.debug('indexed %s files in total and %s with content' % ( |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
366 ri_cnt_total, riwc_cnt_total) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
367 ) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
368 finally: |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
369 if writer_is_dirty: |
2840
c7c5825299fe
fixed logging messages on whoosh indexer
Marcin Kuzminski <marcin@python-works.com>
parents:
2839
diff
changeset
|
370 log.debug('>> COMMITING CHANGES TO FILE INDEX <<') |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
371 writer.commit(merge=True) |
2840
c7c5825299fe
fixed logging messages on whoosh indexer
Marcin Kuzminski <marcin@python-works.com>
parents:
2839
diff
changeset
|
372 log.debug('>>> FINISHED REBUILDING FILE INDEX <<<') |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
373 else: |
2840
c7c5825299fe
fixed logging messages on whoosh indexer
Marcin Kuzminski <marcin@python-works.com>
parents:
2839
diff
changeset
|
374 log.debug('>> NOTHING TO COMMIT TO FILE INDEX <<') |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
375 writer.cancel() |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
376 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
377 def build_indexes(self): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
378 if os.path.exists(self.index_location): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
379 log.debug('removing previous index') |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
380 rmtree(self.index_location) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
381 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
382 if not os.path.exists(self.index_location): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
383 os.mkdir(self.index_location) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
384 |
2648
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
385 chgset_idx = create_in(self.index_location, CHGSETS_SCHEMA, |
0911cf6940af
little code cleanup
Marcin Kuzminski <marcin@python-works.com>
parents:
2643
diff
changeset
|
386 indexname=CHGSET_IDX_NAME) |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
387 chgset_idx_writer = chgset_idx.writer() |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
388 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
389 file_idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
390 file_idx_writer = file_idx.writer() |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
391 log.debug('BUILDING INDEX FOR EXTENSIONS %s ' |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
392 'AND REPOS %s' % (INDEX_EXTENSIONS, self.repo_paths.keys())) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
393 |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
394 for repo_name, repo in self.repo_paths.items(): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
395 # skip indexing if there aren't any revisions |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
396 if len(repo) < 1: |
2373
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
397 continue |
1828eb7fa688
#469 added --update-only option to whoosh to re-index only given list
Marcin Kuzminski <marcin@python-works.com>
parents:
2372
diff
changeset
|
398 |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
399 self.index_files(file_idx_writer, repo_name, repo) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
400 self.index_changesets(chgset_idx_writer, repo_name, repo) |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
401 |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
402 log.debug('>> COMMITING CHANGES <<') |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
403 file_idx_writer.commit(merge=True) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
404 chgset_idx_writer.commit(merge=True) |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
405 log.debug('>>> FINISHED BUILDING INDEX <<<') |
2388
a0ef98f2520b
#453 added ID field in whoosh SCHEMA that solves the issue of reindexing modified files
Marcin Kuzminski <marcin@python-works.com>
parents:
2373
diff
changeset
|
406 |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
407 def update_indexes(self): |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
408 self.update_file_index() |
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
409 self.update_changeset_index() |
631
05528ad948c4
Hacking for git support,and new faster repo scan
Marcin Kuzminski <marcin@python-works.com>
parents:
629
diff
changeset
|
410 |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
411 def run(self, full_index=False): |
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
412 """Run daemon""" |
465
e01a85f9fc90
fixed initial whoosh indexer. Build full index on first run even with incremental flag
Marcin Kuzminski <marcin@python-works.com>
parents:
452
diff
changeset
|
413 if full_index or self.initial: |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
414 self.build_indexes() |
406
b153a51b1d3b
Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
415 else: |
2640
5f21a9dcb09d
create an index for commit messages and the ability to search them and see results
Indra Talip <indra.talip@gmail.com>
parents:
2569
diff
changeset
|
416 self.update_indexes() |