comparison pylons_app/controllers/search.py @ 406:b153a51b1d3b

Implemented search using whoosh. Still as experimental option.
author Marcin Kuzminski <marcin@python-works.com>
date Tue, 17 Aug 2010 23:15:36 +0200
parents
children 9a7ae16ff53e
comparison
equal deleted inserted replaced
405:bec06654d67b 406:b153a51b1d3b
1 #!/usr/bin/env python
2 # encoding: utf-8
3 # search controller for pylons
4 # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
5 #
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; version 2
9 # of the License or (at your opinion) any later version of the license.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 # MA 02110-1301, USA.
20 """
21 Created on Aug 7, 2010
22 search controller for pylons
23 @author: marcink
24 """
25 from pylons import request, response, session, tmpl_context as c, url
26 from pylons.controllers.util import abort, redirect
27 from pylons_app.lib.auth import LoginRequired
28 from pylons_app.lib.base import BaseController, render
29 from pylons_app.lib.indexers import ANALYZER, IDX_LOCATION, SCHEMA
30 from webhelpers.html.builder import escape
31 from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter, \
32 ContextFragmenter
33 from whoosh.index import open_dir, EmptyIndexError
34 from whoosh.qparser import QueryParser, QueryParserError
35 from whoosh.query import Phrase
36 import logging
37 import traceback
38
39 log = logging.getLogger(__name__)
40
41 class SearchController(BaseController):
42
43 @LoginRequired()
44 def __before__(self):
45 super(SearchController, self).__before__()
46
47
48 def index(self):
49 c.formated_results = []
50 c.runtime = ''
51 search_items = set()
52 c.cur_query = request.GET.get('q', None)
53 if c.cur_query:
54 cur_query = c.cur_query.lower()
55
56
57 if c.cur_query:
58 try:
59 idx = open_dir(IDX_LOCATION, indexname='HG_INDEX')
60 searcher = idx.searcher()
61
62 qp = QueryParser("content", schema=SCHEMA)
63 try:
64 query = qp.parse(unicode(cur_query))
65
66 if isinstance(query, Phrase):
67 search_items.update(query.words)
68 else:
69 for i in query.all_terms():
70 search_items.add(i[1])
71
72 log.debug(query)
73 log.debug(search_items)
74 results = searcher.search(query)
75 c.runtime = '%s results (%.3f seconds)' \
76 % (len(results), results.runtime)
77
78 analyzer = ANALYZER
79 formatter = HtmlFormatter('span',
80 between='\n<span class="break">...</span>\n')
81
82 #how the parts are splitted within the same text part
83 fragmenter = SimpleFragmenter(200)
84 #fragmenter = ContextFragmenter(search_items)
85
86 for res in results:
87 d = {}
88 d.update(res)
89 hl = highlight(escape(res['content']), search_items,
90 analyzer=analyzer,
91 fragmenter=fragmenter,
92 formatter=formatter,
93 top=5)
94 f_path = res['path'][res['path'].find(res['repository']) \
95 + len(res['repository']):].lstrip('/')
96 d.update({'content_short':hl,
97 'f_path':f_path})
98 #del d['content']
99 c.formated_results.append(d)
100
101 except QueryParserError:
102 c.runtime = 'Invalid search query. Try quoting it.'
103
104 except (EmptyIndexError, IOError):
105 log.error(traceback.format_exc())
106 log.error('Empty Index data')
107 c.runtime = 'There is no index to search in. Please run whoosh indexer'
108
109
110
111 # Return a rendered template
112 return render('/search/search.html')