Mercurial > kallithea

--- a/kallithea/lib/indexers/__init__.py	Mon Jan 23 02:17:38 2017 +0900
+++ b/kallithea/lib/indexers/__init__.py	Mon Jan 23 02:17:38 2017 +0900
@@ -33,7 +33,7 @@
 # Add location of top level folder to sys.path
 sys.path.append(dirname(dirname(dirname(os.path.realpath(__file__)))))

-from whoosh.analysis import RegexTokenizer, LowercaseFilter
+from whoosh.analysis import RegexTokenizer, LowercaseFilter, IDTokenizer
 from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType, DATETIME
 from whoosh.formats import Characters
 from whoosh.highlight import highlight as whoosh_highlight, HtmlFormatter, ContextFragmenter
@@ -44,11 +44,20 @@
 # CUSTOM ANALYZER wordsplit + lowercase filter
 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()

+# CUSTOM ANALYZER raw-string + lowercase filter
+#
+# This is useful to:
+# - avoid tokenization
+# - avoid removing "stop words" from text
+# - search case-insensitively
+#
+ICASEIDANALYZER = IDTokenizer() | LowercaseFilter()
+
 #INDEX SCHEMA DEFINITION
 SCHEMA = Schema(
     fileid=ID(unique=True),
     owner=TEXT(),
-    repository=TEXT(stored=True),
+    repository=TEXT(stored=True, analyzer=ICASEIDANALYZER),
     path=TEXT(stored=True),
     content=FieldType(format=Characters(), analyzer=ANALYZER,
                       scorable=True, stored=True),
--- a/kallithea/tests/functional/test_search_indexing.py	Mon Jan 23 02:17:38 2017 +0900
+++ b/kallithea/tests/functional/test_search_indexing.py	Mon Jan 23 02:17:38 2017 +0900
@@ -113,9 +113,9 @@
         (u'group/*'),
     ])
     @parametrize('searchtype,query,hit', [
-        #('content', 'this_should_be_unique_content', 1),
+        ('content', 'this_should_be_unique_content', 1),
         ('commit', 'this_should_be_unique_commit_log', 1),
-        #('path', 'this_should_be_unique_filename.txt', 1),
+        ('path', 'this_should_be_unique_filename.txt', 1),
     ])
     def test_repository_tokenization(self, reponame, searchtype, query, hit):
         self.log_user()