Mercurial > kallithea

--- a/kallithea/controllers/search.py	Mon Jan 23 02:17:38 2017 +0900
+++ b/kallithea/controllers/search.py	Mon Jan 23 02:17:38 2017 +0900
@@ -94,7 +94,9 @@

                 qp = QueryParser(search_type, schema=schema_defn)
                 if c.repo_name:
-                    cur_query = u'repository:%s %s' % (c.repo_name, cur_query)
+                    # use "repository_rawname:" instead of "repository:"
+                    # for case-sensitive matching
+                    cur_query = u'repository_rawname:%s %s' % (c.repo_name, cur_query)
                 try:
                     query = qp.parse(unicode(cur_query))
                     # extract words for highlight
--- a/kallithea/lib/indexers/__init__.py	Mon Jan 23 02:17:38 2017 +0900
+++ b/kallithea/lib/indexers/__init__.py	Mon Jan 23 02:17:38 2017 +0900
@@ -53,10 +53,20 @@
 #
 ICASEIDANALYZER = IDTokenizer() | LowercaseFilter()

+# CUSTOM ANALYZER raw-string
+#
+# This is useful to:
+# - avoid tokenization
+# - avoid removing "stop words" from text
+#
+IDANALYZER = IDTokenizer()
+
 #INDEX SCHEMA DEFINITION
 SCHEMA = Schema(
     fileid=ID(unique=True),
     owner=TEXT(),
+    # this field preserves case of repository name for exact matching
+    repository_rawname=TEXT(analyzer=IDANALYZER),
     repository=TEXT(stored=True, analyzer=ICASEIDANALYZER),
     path=TEXT(stored=True),
     content=FieldType(format=Characters(), analyzer=ANALYZER,
@@ -74,7 +84,10 @@
     date=NUMERIC(stored=True),
     last=BOOLEAN(),
     owner=TEXT(),
-    repository=ID(unique=True, stored=True),
+    # this field preserves case of repository name for exact matching
+    # and unique-ness in index table
+    repository_rawname=ID(unique=True),
+    repository=ID(stored=True, analyzer=ICASEIDANALYZER),
     author=TEXT(stored=True),
     message=FieldType(format=Characters(), analyzer=ANALYZER,
                       scorable=True, stored=True),
--- a/kallithea/lib/indexers/daemon.py	Mon Jan 23 02:17:38 2017 +0900
+++ b/kallithea/lib/indexers/daemon.py	Mon Jan 23 02:17:38 2017 +0900
@@ -203,6 +203,7 @@
         writer.add_document(
             fileid=p,
             owner=unicode(repo.contact),
+            repository_rawname=repo.name_unicode,
             repository=safe_unicode(repo_name),
             path=p,
             content=u_content,
@@ -241,6 +242,7 @@
                 raw_id=unicode(cs.raw_id),
                 owner=unicode(repo.contact),
                 date=cs._timestamp,
+                repository_rawname=repo.name_unicode,
                 repository=safe_unicode(repo_name),
                 author=cs.author,
                 message=cs.message,
--- a/kallithea/tests/functional/test_search_indexing.py	Mon Jan 23 02:17:38 2017 +0900
+++ b/kallithea/tests/functional/test_search_indexing.py	Mon Jan 23 02:17:38 2017 +0900
@@ -126,9 +126,9 @@
         response.mustcontain('>%d results' % hit)

     @parametrize('searchtype,query,hit', [
-        ('content', 'this_should_be_unique_content', 2),
+        ('content', 'this_should_be_unique_content', 1),
         ('commit', 'this_should_be_unique_commit_log', 1),
-        ('path', 'this_should_be_unique_filename.txt', 2),
+        ('path', 'this_should_be_unique_filename.txt', 1),
     ])
     def test_repository_case_sensitivity(self, searchtype, query, hit):
         self.log_user()
@@ -142,7 +142,7 @@
         response = self.app.get(url(controller='search', action='index'),
                                 {'q': q, 'type': searchtype})

-        response.mustcontain('>%d results' % hit)
+        response.mustcontain('>%d results' % (hit * 2))

         # (2) on the other hand, searching under the specific
         # repository should return results only for that repository,