annotate pylons_app/lib/indexers/__init__.py @ 465:e01a85f9fc90

fixed initial whoosh indexer. Build full index on first run even with incremental flag
author Marcin Kuzminski <marcin@python-works.com>
date Wed, 08 Sep 2010 01:33:38 +0200
parents 28f19fa562df
children 7010af6efde5
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
406
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
1 import sys
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
2 import os
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
3 from pidlock import LockHeld, DaemonLock
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
4 import traceback
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
5
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
6 from os.path import dirname as dn
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
7 from os.path import join as jn
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
8
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
9 #to get the pylons_app import
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
10 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
11
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
12 from pylons_app.config.environment import load_environment
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
13 from pylons_app.model.hg_model import HgModel
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
14 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
15 from whoosh.fields import TEXT, ID, STORED, Schema
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
16 from whoosh.index import create_in, open_dir
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
17 from shutil import rmtree
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
18
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
19 #LOCATION WE KEEP THE INDEX
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
20 IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index')
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
21
436
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
22 #EXTENSIONS WE WANT TO INDEX CONTENT OFF
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
23 INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
24 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl', 'h',
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
25 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
26 'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
27 'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
28 'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml','xsl','xslt',
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
29 'yaws']
406
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
30
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
31 #CUSTOM ANALYZER wordsplit + lowercase filter
436
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
32 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
406
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
33
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
34 #INDEX SCHEMA DEFINITION
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
35 SCHEMA = Schema(owner=TEXT(),
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
36 repository=TEXT(stored=True),
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
37 path=ID(stored=True, unique=True),
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
38 content=TEXT(stored=True, analyzer=ANALYZER),
436
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
39 modtime=STORED(),extension=TEXT(stored=True))
406
b153a51b1d3b Implemented search using whoosh. Still as experimental option.
Marcin Kuzminski <marcin@python-works.com>
parents:
diff changeset
40
436
28f19fa562df updated config files,
Marcin Kuzminski <marcin@python-works.com>
parents: 406
diff changeset
41 IDX_NAME = 'HG_INDEX'