comparison pylons_app/lib/indexers/__init__.py @ 406:b153a51b1d3b

Implemented search using whoosh. Still as experimental option.
author Marcin Kuzminski <marcin@python-works.com>
date Tue, 17 Aug 2010 23:15:36 +0200
parents
children 28f19fa562df
comparison
equal deleted inserted replaced
405:bec06654d67b 406:b153a51b1d3b
1 import sys
2 import os
3 from pidlock import LockHeld, DaemonLock
4 import traceback
5
6 from os.path import dirname as dn
7 from os.path import join as jn
8
9 #to get the pylons_app import
10 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
11
12 from pylons_app.config.environment import load_environment
13 from pylons_app.model.hg_model import HgModel
14 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
15 from whoosh.fields import TEXT, ID, STORED, Schema
16 from whoosh.index import create_in, open_dir
17 from shutil import rmtree
18
19 #LOCATION WE KEEP THE INDEX
20 IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index')
21
22 #EXTENSION TO SKIP READING CONTENT ON
23 EXCLUDE_EXTENSIONS = ['pyc', 'mo', 'png', 'jpg', 'jpeg', 'gif', 'swf',
24 'dll', 'ttf', 'psd', 'svg', 'pdf', 'bmp', 'dll']
25
26 #CUSTOM ANALYZER wordsplit + lowercase filter
27 ANALYZER = RegexTokenizer() | LowercaseFilter()
28
29 #INDEX SCHEMA DEFINITION
30 SCHEMA = Schema(owner=TEXT(),
31 repository=TEXT(stored=True),
32 path=ID(stored=True, unique=True),
33 content=TEXT(stored=True, analyzer=ANALYZER),
34 modtime=STORED())
35
36 IDX_NAME = 'HG_INDEX'