changeset 6472:e6224a7c3d4e

tests: introduce more test coverage of whoosh filtering This has been extracted from other changesets by Mads Kiilerich to establish a test baseline so we clearly can see what the following fixes are fixing. Some of these tests will thus demonstrate bad behaviour - that will be fixed later.
author FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
date Mon, 23 Jan 2017 02:17:38 +0900
parents 8b4c09e740a7
children 73e3599971da
files kallithea/tests/fixtures/journal_dump.csv kallithea/tests/functional/test_admin.py kallithea/tests/functional/test_search_indexing.py
diffstat 3 files changed, 262 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/tests/fixtures/journal_dump.csv	Mon Jan 23 02:17:38 2017 +0900
+++ b/kallithea/tests/fixtures/journal_dump.csv	Mon Jan 23 02:17:38 2017 +0900
@@ -2033,3 +2033,5 @@
 2097,5,natosha.bard,414,natosha-repo1-fork,"",user_created_fork:natosha-repo1-fork,2012-12-05 16:47:24.113866
 2098,5,natosha.bard,414,natosha-repo1-fork,62.116.219.97,"push:a25c825fd81d069596d614efcf92505aed46227a,ac513595518923aca8b39f0a1c33c4c6e0f9d83a,d395e22e8e16373a1fffbe66b322581d69a7db17",2012-12-05 16:47:44.701535
 2099,2,admin,38,code-review-test,"",user_commented_pull_request:73,2012-12-05 17:23:18.059481
+2100,390,this-is-it,415,this,"",user_created_repo,2017-01-22 00:00:00.000000
+2101,390,this-is-it,416,this/is-it,"",user_created_repo,2017-01-22 00:00:01.000000
--- a/kallithea/tests/functional/test_admin.py	Mon Jan 23 02:17:38 2017 +0900
+++ b/kallithea/tests/functional/test_admin.py	Mon Jan 23 02:17:38 2017 +0900
@@ -56,94 +56,140 @@
     def test_filter_all_entries(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',))
-        response.mustcontain('2034 Entries')
+        response.mustcontain(' 2036 Entries')
 
     def test_filter_journal_filter_exact_match_on_repository(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='repository:xxx'))
-        response.mustcontain('3 Entries')
+        response.mustcontain(' 3 Entries')
 
     def test_filter_journal_filter_exact_match_on_repository_CamelCase(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='repository:XxX'))
-        response.mustcontain('3 Entries')
+        response.mustcontain(' 3 Entries')
 
     def test_filter_journal_filter_wildcard_on_repository(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='repository:*test*'))
-        response.mustcontain('862 Entries')
+        response.mustcontain(' 862 Entries')
 
     def test_filter_journal_filter_prefix_on_repository(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='repository:test*'))
-        response.mustcontain('257 Entries')
+        response.mustcontain(' 257 Entries')
 
     def test_filter_journal_filter_prefix_on_repository_CamelCase(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='repository:Test*'))
-        response.mustcontain('257 Entries')
+        response.mustcontain(' 257 Entries')
 
     def test_filter_journal_filter_prefix_on_repository_and_user(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='repository:test* AND username:demo'))
-        response.mustcontain('130 Entries')
+        response.mustcontain(' 130 Entries')
 
     def test_filter_journal_filter_prefix_on_repository_or_other_repo(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='repository:test* OR repository:xxx'))
-        response.mustcontain('260 Entries')  # 257 + 3
+        response.mustcontain(' 260 Entries')  # 257 + 3
 
     def test_filter_journal_filter_exact_match_on_username(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='username:demo'))
-        response.mustcontain('1087 Entries')
+        response.mustcontain(' 1087 Entries')
 
     def test_filter_journal_filter_exact_match_on_username_camelCase(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='username:DemO'))
-        response.mustcontain('1087 Entries')
+        response.mustcontain(' 1087 Entries')
 
     def test_filter_journal_filter_wildcard_on_username(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='username:*test*'))
-        response.mustcontain('100 Entries')
+        response.mustcontain(' 100 Entries')
 
     def test_filter_journal_filter_prefix_on_username(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='username:demo*'))
-        response.mustcontain('1101 Entries')
+        response.mustcontain(' 1101 Entries')
 
     def test_filter_journal_filter_prefix_on_user_or_other_user(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='username:demo OR username:volcan'))
-        response.mustcontain('1095 Entries')  # 1087 + 8
+        response.mustcontain(' 1095 Entries')  # 1087 + 8
 
     def test_filter_journal_filter_wildcard_on_action(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='action:*pull_request*'))
-        response.mustcontain('187 Entries')
+        response.mustcontain(' 187 Entries')
 
     def test_filter_journal_filter_on_date(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='date:20121010'))
-        response.mustcontain('47 Entries')
+        response.mustcontain(' 47 Entries')
 
     def test_filter_journal_filter_on_date_2(self):
         self.log_user()
         response = self.app.get(url(controller='admin/admin', action='index',
                                     filter='date:20121020'))
-        response.mustcontain('17 Entries')
+        response.mustcontain(' 17 Entries')
+
+    @parametrize('filter,hit', [
+        #### "repository:" filtering
+        # "/" is used for grouping
+        ('repository:group/test', 0),
+        # "-" is often used for "-fork"
+        ('repository:fork-test1', 0),
+        # using "stop words"
+        ('repository:this', 2036),
+        ('repository:this/is-it', 2036),
+
+        ## additional tests to quickly find out regression in the future
+        ## (and check case-insensitive search, too)
+        # non-ascii character "." and "-"
+        ('repository:TESTIES1.2.3', 4),
+        ('repository:test_git_repo', 2),
+        # combination with wildcard "*"
+        ('repository:GROUP/*', 182),
+        ('repository:*/test', 7),
+        ('repository:fork-*', 273),
+        ('repository:*-Test1', 5),
+
+        #### "username:" filtering
+        # "-" is valid character
+        ('username:peso-xxx', 0),
+        # using "stop words"
+        ('username:this-is-it', 2036),
+
+        ## additional tests to quickly find out regression in the future
+        ## (and check case-insensitive search, too)
+        # non-ascii character "." and "-"
+        ('username:ADMIN_xanroot', 6),
+        ('username:robert.Zaremba', 3),
+        # combination with wildcard "*"
+        ('username:THIS-*', 2),
+        ('username:*-IT', 2),
+    ])
+    def test_filter_journal_filter_tokenization(self, filter, hit):
+        self.log_user()
+
+        response = self.app.get(url(controller='admin/admin', action='index',
+                                    filter=filter))
+        if hit != 1:
+            response.mustcontain(' %s Entries' % hit)
+        else:
+            response.mustcontain(' 1 Entry')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kallithea/tests/functional/test_search_indexing.py	Mon Jan 23 02:17:38 2017 +0900
@@ -0,0 +1,198 @@
+import mock
+
+from kallithea import CONFIG
+from kallithea.config.conf import INDEX_FILENAMES
+from kallithea.model.meta import Session
+from kallithea.model.repo import RepoModel
+from kallithea.model.repo_group import RepoGroupModel
+from kallithea.tests.base import *
+from kallithea.tests.fixture import create_test_index, Fixture
+
+fixture = Fixture()
+
+def init_indexing_test(repo):
+    prev = fixture.commit_change(repo.repo_name,
+                                 filename='this_should_be_unique_filename.txt',
+                                 content='this_should_be_unique_content\n',
+                                 message='this_should_be_unique_commit_log',
+                                 vcs_type='hg',
+                                 newfile=True)
+
+def init_stopword_test(repo):
+    prev = fixture.commit_change(repo.repo_name,
+                                 filename='this/is/it',
+                                 content='def test\n',
+                                 message='bother to ask where - in folder',
+                                 vcs_type='hg',
+                                 newfile=True)
+    prev = fixture.commit_change(repo.repo_name,
+                                 filename='join.us',
+                                 content='def test\n',
+                                 message='bother to ask where - top level',
+                                 author='this is it <this-is-it@foo.bar.com>',
+                                 vcs_type='hg',
+                                 parent=prev,
+                                 newfile=True)
+
+repos = [
+    # reponame,              init func or fork base, groupname
+    (u'indexing_test',       init_indexing_test,     None),
+    (u'indexing_test-fork',  u'indexing_test',       None),
+    (u'group/indexing_test', u'indexing_test',       u'group'),
+    (u'this-is-it',          u'indexing_test',       None),
+    (u'indexing_test-foo',   u'indexing_test',       None),
+    (u'indexing_test-FOO',   u'indexing_test',       None),
+    (u'stopword_test',       init_stopword_test,     None),
+]
+
+# map: name => id
+repoids = {}
+groupids = {}
+
+def rebuild_index(full_index):
+    with mock.patch('kallithea.lib.indexers.daemon.log.debug',
+                    lambda *args, **kwargs: None):
+        # The more revisions managed repositories have, the more
+        # memory capturing "log.debug()" output in "indexers.daemon"
+        # requires. This may cause unintentional failure of subsequent
+        # tests, if ENOMEM at forking "git" prevents from rebuilding
+        # index for search.
+        # Therefore, "log.debug()" is disabled regardless of logging
+        # level while rebuilding index.
+        # (FYI, ENOMEM occurs at forking "git" with python 2.7.3,
+        # Linux 3.2.78-1 x86_64, 3GB memory, and no ulimit
+        # configuration for memory)
+        create_test_index(TESTS_TMP_PATH, CONFIG, full_index=full_index)
+
+
+class TestSearchControllerIndexing(TestController):
+    @classmethod
+    def setup_class(cls):
+        for reponame, init_or_fork, groupname in repos:
+            if groupname and groupname not in groupids:
+                group = fixture.create_repo_group(groupname)
+                groupids[groupname] = group.group_id
+            if callable(init_or_fork):
+                repo = fixture.create_repo(reponame,
+                                           repo_group=groupname)
+                init_or_fork(repo)
+            else:
+                repo = fixture.create_fork(init_or_fork, reponame,
+                                           repo_group=groupname)
+            repoids[reponame] = repo.repo_id
+
+        # treat "it" as indexable filename
+        filenames_mock = list(INDEX_FILENAMES)
+        filenames_mock.append('it')
+        with mock.patch('kallithea.lib.indexers.daemon.INDEX_FILENAMES',
+                        filenames_mock):
+            rebuild_index(full_index=False) # only for newly added repos
+
+    @classmethod
+    def teardown_class(cls):
+        # delete in reversed order, to delete fork destination at first
+        for reponame, init_or_fork, groupname in reversed(repos):
+            RepoModel().delete(repoids[reponame])
+
+        for reponame, init_or_fork, groupname in reversed(repos):
+            if groupname in groupids:
+                RepoGroupModel().delete(groupids.pop(groupname),
+                                        force_delete=True)
+
+        Session().commit()
+        Session.remove()
+
+        rebuild_index(full_index=True) # rebuild fully for subsequent tests
+
+    @parametrize('reponame', [
+        (u'indexing_test'),
+        (u'indexing_test-fork'),
+        (u'group/indexing_test'),
+        (u'this-is-it'),
+        (u'*-fork'),
+        (u'group/*'),
+    ])
+    @parametrize('searchtype,query,hit', [
+        #('content', 'this_should_be_unique_content', 1),
+        ('commit', 'this_should_be_unique_commit_log', 1),
+        #('path', 'this_should_be_unique_filename.txt', 1),
+    ])
+    def test_repository_tokenization(self, reponame, searchtype, query, hit):
+        self.log_user()
+
+        q = 'repository:%s %s' % (reponame, query)
+        response = self.app.get(url(controller='search', action='index'),
+                                {'q': q, 'type': searchtype})
+        response.mustcontain('>%d results' % hit)
+
+    @parametrize('searchtype,query,hit', [
+        ('content', 'this_should_be_unique_content', 2),
+        ('commit', 'this_should_be_unique_commit_log', 1),
+        ('path', 'this_should_be_unique_filename.txt', 2),
+    ])
+    def test_repository_case_sensitivity(self, searchtype, query, hit):
+        self.log_user()
+
+        lname = u'indexing_test-foo'
+        uname = u'indexing_test-FOO'
+
+        # (1) "repository:REPONAME" condition should match against
+        # repositories case-insensitively
+        q = 'repository:%s %s' % (lname, query)
+        response = self.app.get(url(controller='search', action='index'),
+                                {'q': q, 'type': searchtype})
+
+        response.mustcontain('>%d results' % hit)
+
+        # (2) on the other hand, searching under the specific
+        # repository should return results only for that repository,
+        # even if specified name matches against another repository
+        # case-insensitively.
+        response = self.app.get(url(controller='search', action='index',
+                                    repo_name=uname),
+                                {'q': query, 'type': searchtype})
+
+        response.mustcontain('>%d results' % hit)
+
+        # confirm that there is no matching against lower name repository
+        assert uname in response
+        #assert lname not in response
+
+    @parametrize('searchtype,query,hit', [
+        ('content', 'path:this/is/it def test', 37),
+        ('commit', 'added:this/is/it bother to ask where', 4),
+        # this condition matches against files below, because
+        # "path:" condition is also applied on "repository path".
+        # - "this/is/it" in "stopword_test" repo
+        # - "this_should_be_unique_filename.txt" in "this-is-it" repo
+        ('path', 'this/is/it', 0),
+
+        ('content', 'extension:us', 0),
+        ('path', 'extension:us', 0),
+    ])
+    def test_filename_stopword(self, searchtype, query, hit):
+        response = self.app.get(url(controller='search', action='index'),
+                                {'q': query, 'type': searchtype})
+
+        response.mustcontain('>%d results' % hit)
+
+    @parametrize('searchtype,query,hit', [
+        # matching against both 2 files
+        ('content', 'owner:"this is it"', 0),
+        ('content', 'owner:this-is-it', 0),
+        ('path', 'owner:"this is it"', 0),
+        ('path', 'owner:this-is-it', 0),
+
+        # matching against both 2 revisions
+        ('commit', 'owner:"this is it"', 0),
+        ('commit', 'owner:"this-is-it"', 0),
+
+        # matching against only 1 revision
+        ('commit', 'author:"this is it"', 0),
+        ('commit', 'author:"this-is-it"', 0),
+    ])
+    def test_mailaddr_stopword(self, searchtype, query, hit):
+        response = self.app.get(url(controller='search', action='index'),
+                                {'q': query, 'type': searchtype})
+
+        response.mustcontain('>%d results' % hit)