Mercurial > kallithea

--- a/kallithea/lib/indexers/__init__.py	Mon Jan 23 02:17:38 2017 +0900
+++ b/kallithea/lib/indexers/__init__.py	Mon Jan 23 02:17:38 2017 +0900
@@ -61,6 +61,14 @@
 #
 IDANALYZER = IDTokenizer()

+# CUSTOM ANALYZER wordsplit + lowercase filter, for pathname-like text
+#
+# This is useful to:
+# - avoid removing "stop words" from text
+# - search case-insensitively
+#
+PATHANALYZER = RegexTokenizer() | LowercaseFilter()
+
 #INDEX SCHEMA DEFINITION
 SCHEMA = Schema(
     fileid=ID(unique=True),
@@ -68,11 +76,11 @@
     # this field preserves case of repository name for exact matching
     repository_rawname=TEXT(analyzer=IDANALYZER),
     repository=TEXT(stored=True, analyzer=ICASEIDANALYZER),
-    path=TEXT(stored=True),
+    path=TEXT(stored=True, analyzer=PATHANALYZER),
     content=FieldType(format=Characters(), analyzer=ANALYZER,
                       scorable=True, stored=True),
     modtime=STORED(),
-    extension=TEXT(stored=True)
+    extension=TEXT(stored=True, analyzer=PATHANALYZER)
 )

 IDX_NAME = 'HG_INDEX'
@@ -92,9 +100,9 @@
     message=FieldType(format=Characters(), analyzer=ANALYZER,
                       scorable=True, stored=True),
     parents=TEXT(),
-    added=TEXT(),
-    removed=TEXT(),
-    changed=TEXT(),
+    added=TEXT(analyzer=PATHANALYZER),
+    removed=TEXT(analyzer=PATHANALYZER),
+    changed=TEXT(analyzer=PATHANALYZER),
 )

 CHGSET_IDX_NAME = 'CHGSET_INDEX'
--- a/kallithea/tests/functional/test_search_indexing.py	Mon Jan 23 02:17:38 2017 +0900
+++ b/kallithea/tests/functional/test_search_indexing.py	Mon Jan 23 02:17:38 2017 +0900
@@ -156,19 +156,19 @@

         # confirm that there is no matching against lower name repository
         assert uname in response
-        #assert lname not in response
+        assert lname not in response

     @parametrize('searchtype,query,hit', [
-        ('content', 'path:this/is/it def test', 37),
-        ('commit', 'added:this/is/it bother to ask where', 4),
+        ('content', 'path:this/is/it def test', 1),
+        ('commit', 'added:this/is/it bother to ask where', 1),
         # this condition matches against files below, because
         # "path:" condition is also applied on "repository path".
         # - "this/is/it" in "stopword_test" repo
         # - "this_should_be_unique_filename.txt" in "this-is-it" repo
-        ('path', 'this/is/it', 0),
+        ('path', 'this/is/it', 2),

-        ('content', 'extension:us', 0),
-        ('path', 'extension:us', 0),
+        ('content', 'extension:us', 1),
+        ('path', 'extension:us', 1),
     ])
     def test_filename_stopword(self, searchtype, query, hit):
         response = self.app.get(url(controller='search', action='index'),