annotate pylons_app/lib/indexers/multiprocessing_indexer.py @ 497:fb0c3af6031b celery

Implemented locking for task, to prevent for running the same tasks, moved out pidlock library. Added dirsize display
author Marcin Kuzminski <marcin@python-works.com>
date Thu, 23 Sep 2010 01:08:33 +0200
parents b153a51b1d3b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
406
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
1 from multiprocessing import Process, Queue, cpu_count, Lock
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
2 import socket, sys
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
3 import time
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
4 import os
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
5 import sys
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
6 from os.path import dirname as dn
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
7 from multiprocessing.dummy import current_process
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
8 from shutil import rmtree
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
9
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
10 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
11
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
12 from pylons_app.model.hg_model import HgModel
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
13 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
14 from whoosh.fields import TEXT, ID, STORED, Schema
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
15 from whoosh.index import create_in, open_dir
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
16 from datetime import datetime
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
17 from multiprocessing.process import current_process
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
18 from multiprocessing import Array, Value
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
19
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
20 root = dn(dn(os.path.dirname(os.path.abspath(__file__))))
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
21 idx_location = os.path.join(root, 'data', 'index')
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
22 root_path = '/home/marcink/python_workspace_dirty/*'
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
23
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
24 exclude_extensions = ['pyc', 'mo', 'png', 'jpg', 'jpeg', 'gif', 'swf',
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
25 'dll', 'ttf', 'psd', 'svg', 'pdf', 'bmp', 'dll']
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
26
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
27 my_analyzer = RegexTokenizer() | LowercaseFilter()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
28 def scan_paths(root_location):
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
29 return HgModel.repo_scan('/', root_location, None, True)
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
30
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
31 def index_paths(root_dir):
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
32 index_paths_ = set()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
33 for path, dirs, files in os.walk(root_dir):
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
34 if path.find('.hg') == -1:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
35 #if path.find('.hg') == -1 and path.find('bel-epa') != -1:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
36 for f in files:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
37 index_paths_.add(os.path.join(path, f))
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
38
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
39 return index_paths_
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
40
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
41 def get_schema():
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
42 return Schema(owner=TEXT(),
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
43 repository=TEXT(stored=True),
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
44 path=ID(stored=True, unique=True),
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
45 content=TEXT(stored=True, analyzer=my_analyzer),
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
46 modtime=STORED())
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
47
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
48 def add_doc(writer, path, repo_name, contact):
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
49 """
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
50 Adding doc to writer
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
51 @param writer:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
52 @param path:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
53 @param repo:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
54 @param fname:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
55 """
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
56
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
57 #we don't won't to read excluded file extensions just index them
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
58 if path.split('/')[-1].split('.')[-1].lower() not in exclude_extensions:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
59 fobj = open(path, 'rb')
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
60 content = fobj.read()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
61 fobj.close()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
62 try:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
63 u_content = unicode(content)
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
64 except UnicodeDecodeError:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
65 #incase we have a decode error just represent as byte string
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
66 u_content = unicode(str(content).encode('string_escape'))
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
67 else:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
68 u_content = u''
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
69 writer.add_document(repository=u"%s" % repo_name,
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
70 owner=unicode(contact),
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
71 path=u"%s" % path,
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
72 content=u_content,
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
73 modtime=os.path.getmtime(path))
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
74
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
75
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
76 class MultiProcessIndexer(object):
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
77 """ multiprocessing whoosh indexer """
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
78
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
79 def __init__(self, idx, work_set=set(), nr_processes=cpu_count()):
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
80 q = Queue()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
81 l = Lock()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
82 work_set = work_set
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
83 writer = None
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
84 #writer = idx.writer()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
85
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
86 for q_task in work_set:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
87 q.put(q_task)
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
88
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
89 q.put('COMMIT')
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
90
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
91 #to stop all processes we have to put STOP to queue and
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
92 #break the loop for each process
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
93 for _ in xrange(nr_processes):
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
94 q.put('STOP')
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
95
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
96
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
97 for _ in xrange(nr_processes):
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
98 p = Process(target=self.work_func, args=(q, l, idx, writer))
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
99 p.start()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
100
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
101
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
102
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
103 def work_func(self, q, l, idx, writer):
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
104 """ worker class invoked by process """
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
105
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
106
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
107 writer = idx.writer()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
108
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
109 while True:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
110 q_task = q.get()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
111 proc = current_process()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
112
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
113 # if q_task == 'COMMIT':
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
114 # l.acquire()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
115 # sys.stdout.write('%s commiting and STOP\n' % proc._name)
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
116 # writer.commit(merge=False)
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
117 # l.release()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
118 # break
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
119 # l.acquire()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
120 # writer = idx.writer()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
121 # l.release()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
122
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
123 if q_task == 'STOP':
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
124 sys.stdout.write('%s STOP\n' % proc._name)
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
125 break
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
126
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
127 if q_task != 'COMMIT':
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
128 l.acquire()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
129
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
130 sys.stdout.write(' >> %s %s %s @ ' % q_task)
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
131 sys.stdout.write(' %s \n' % proc._name)
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
132
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
133 l.release()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
134 add_doc(writer, q_task[0], q_task[1], q_task[2])
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
135
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
136 l.acquire()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
137 writer.commit(merge=True)
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
138 l.release()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
139
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
140
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
141 if __name__ == "__main__":
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
142 #build queue
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
143 do = True if len(sys.argv) > 1 else False
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
144 q_tasks = []
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
145
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
146 if os.path.exists(idx_location):
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
147 rmtree(idx_location)
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
148
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
149 if not os.path.exists(idx_location):
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
150 os.mkdir(idx_location)
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
151
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
152 idx = create_in(idx_location, get_schema() , indexname='HG_INDEX')
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
153
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
154
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
155 if do:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
156 sys.stdout.write('Building queue...')
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
157 for cnt, repo in enumerate(scan_paths(root_path).values()):
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
158 if repo.name != 'evoice_py':
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
159 continue
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
160 q_tasks.extend([(idx_path, repo.name, repo.contact) for idx_path in index_paths(repo.path)])
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
161 if cnt == 4:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
162 break
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
163
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
164 sys.stdout.write('done\n')
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
165
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
166 mpi = MultiProcessIndexer(idx, q_tasks)
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
167
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
168
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
169 else:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
170 print 'checking index'
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
171 reader = idx.reader()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
172 all = reader.all_stored_fields()
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
173 #print all
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
174 for fields in all:
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
175 print fields['path']
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
176