Mercurial > kallithea
changeset 6472:e6224a7c3d4e
tests: introduce more test coverage of whoosh filtering
This has been extracted from other changesets by Mads Kiilerich to establish a
test baseline so we clearly can see what the following fixes are fixing.
Some of these tests will thus demonstrate bad behaviour - that will be fixed
later.
author | FUJIWARA Katsunori <foozy@lares.dti.ne.jp> |
---|---|
date | Mon, 23 Jan 2017 02:17:38 +0900 |
parents | 8b4c09e740a7 |
children | 73e3599971da |
files | kallithea/tests/fixtures/journal_dump.csv kallithea/tests/functional/test_admin.py kallithea/tests/functional/test_search_indexing.py |
diffstat | 3 files changed, 262 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/kallithea/tests/fixtures/journal_dump.csv Mon Jan 23 02:17:38 2017 +0900 +++ b/kallithea/tests/fixtures/journal_dump.csv Mon Jan 23 02:17:38 2017 +0900 @@ -2033,3 +2033,5 @@ 2097,5,natosha.bard,414,natosha-repo1-fork,"",user_created_fork:natosha-repo1-fork,2012-12-05 16:47:24.113866 2098,5,natosha.bard,414,natosha-repo1-fork,62.116.219.97,"push:a25c825fd81d069596d614efcf92505aed46227a,ac513595518923aca8b39f0a1c33c4c6e0f9d83a,d395e22e8e16373a1fffbe66b322581d69a7db17",2012-12-05 16:47:44.701535 2099,2,admin,38,code-review-test,"",user_commented_pull_request:73,2012-12-05 17:23:18.059481 +2100,390,this-is-it,415,this,"",user_created_repo,2017-01-22 00:00:00.000000 +2101,390,this-is-it,416,this/is-it,"",user_created_repo,2017-01-22 00:00:01.000000
--- a/kallithea/tests/functional/test_admin.py Mon Jan 23 02:17:38 2017 +0900 +++ b/kallithea/tests/functional/test_admin.py Mon Jan 23 02:17:38 2017 +0900 @@ -56,94 +56,140 @@ def test_filter_all_entries(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index',)) - response.mustcontain('2034 Entries') + response.mustcontain(' 2036 Entries') def test_filter_journal_filter_exact_match_on_repository(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='repository:xxx')) - response.mustcontain('3 Entries') + response.mustcontain(' 3 Entries') def test_filter_journal_filter_exact_match_on_repository_CamelCase(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='repository:XxX')) - response.mustcontain('3 Entries') + response.mustcontain(' 3 Entries') def test_filter_journal_filter_wildcard_on_repository(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='repository:*test*')) - response.mustcontain('862 Entries') + response.mustcontain(' 862 Entries') def test_filter_journal_filter_prefix_on_repository(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='repository:test*')) - response.mustcontain('257 Entries') + response.mustcontain(' 257 Entries') def test_filter_journal_filter_prefix_on_repository_CamelCase(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='repository:Test*')) - response.mustcontain('257 Entries') + response.mustcontain(' 257 Entries') def test_filter_journal_filter_prefix_on_repository_and_user(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='repository:test* AND username:demo')) - response.mustcontain('130 Entries') + response.mustcontain(' 130 Entries') def test_filter_journal_filter_prefix_on_repository_or_other_repo(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='repository:test* OR repository:xxx')) - response.mustcontain('260 Entries') # 257 + 3 + response.mustcontain(' 260 Entries') # 257 + 3 def test_filter_journal_filter_exact_match_on_username(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='username:demo')) - response.mustcontain('1087 Entries') + response.mustcontain(' 1087 Entries') def test_filter_journal_filter_exact_match_on_username_camelCase(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='username:DemO')) - response.mustcontain('1087 Entries') + response.mustcontain(' 1087 Entries') def test_filter_journal_filter_wildcard_on_username(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='username:*test*')) - response.mustcontain('100 Entries') + response.mustcontain(' 100 Entries') def test_filter_journal_filter_prefix_on_username(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='username:demo*')) - response.mustcontain('1101 Entries') + response.mustcontain(' 1101 Entries') def test_filter_journal_filter_prefix_on_user_or_other_user(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='username:demo OR username:volcan')) - response.mustcontain('1095 Entries') # 1087 + 8 + response.mustcontain(' 1095 Entries') # 1087 + 8 def test_filter_journal_filter_wildcard_on_action(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='action:*pull_request*')) - response.mustcontain('187 Entries') + response.mustcontain(' 187 Entries') def test_filter_journal_filter_on_date(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='date:20121010')) - response.mustcontain('47 Entries') + response.mustcontain(' 47 Entries') def test_filter_journal_filter_on_date_2(self): self.log_user() response = self.app.get(url(controller='admin/admin', action='index', filter='date:20121020')) - response.mustcontain('17 Entries') + response.mustcontain(' 17 Entries') + + @parametrize('filter,hit', [ + #### "repository:" filtering + # "/" is used for grouping + ('repository:group/test', 0), + # "-" is often used for "-fork" + ('repository:fork-test1', 0), + # using "stop words" + ('repository:this', 2036), + ('repository:this/is-it', 2036), + + ## additional tests to quickly find out regression in the future + ## (and check case-insensitive search, too) + # non-ascii character "." and "-" + ('repository:TESTIES1.2.3', 4), + ('repository:test_git_repo', 2), + # combination with wildcard "*" + ('repository:GROUP/*', 182), + ('repository:*/test', 7), + ('repository:fork-*', 273), + ('repository:*-Test1', 5), + + #### "username:" filtering + # "-" is valid character + ('username:peso-xxx', 0), + # using "stop words" + ('username:this-is-it', 2036), + + ## additional tests to quickly find out regression in the future + ## (and check case-insensitive search, too) + # non-ascii character "." and "-" + ('username:ADMIN_xanroot', 6), + ('username:robert.Zaremba', 3), + # combination with wildcard "*" + ('username:THIS-*', 2), + ('username:*-IT', 2), + ]) + def test_filter_journal_filter_tokenization(self, filter, hit): + self.log_user() + + response = self.app.get(url(controller='admin/admin', action='index', + filter=filter)) + if hit != 1: + response.mustcontain(' %s Entries' % hit) + else: + response.mustcontain(' 1 Entry')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kallithea/tests/functional/test_search_indexing.py Mon Jan 23 02:17:38 2017 +0900 @@ -0,0 +1,198 @@ +import mock + +from kallithea import CONFIG +from kallithea.config.conf import INDEX_FILENAMES +from kallithea.model.meta import Session +from kallithea.model.repo import RepoModel +from kallithea.model.repo_group import RepoGroupModel +from kallithea.tests.base import * +from kallithea.tests.fixture import create_test_index, Fixture + +fixture = Fixture() + +def init_indexing_test(repo): + prev = fixture.commit_change(repo.repo_name, + filename='this_should_be_unique_filename.txt', + content='this_should_be_unique_content\n', + message='this_should_be_unique_commit_log', + vcs_type='hg', + newfile=True) + +def init_stopword_test(repo): + prev = fixture.commit_change(repo.repo_name, + filename='this/is/it', + content='def test\n', + message='bother to ask where - in folder', + vcs_type='hg', + newfile=True) + prev = fixture.commit_change(repo.repo_name, + filename='join.us', + content='def test\n', + message='bother to ask where - top level', + author='this is it <this-is-it@foo.bar.com>', + vcs_type='hg', + parent=prev, + newfile=True) + +repos = [ + # reponame, init func or fork base, groupname + (u'indexing_test', init_indexing_test, None), + (u'indexing_test-fork', u'indexing_test', None), + (u'group/indexing_test', u'indexing_test', u'group'), + (u'this-is-it', u'indexing_test', None), + (u'indexing_test-foo', u'indexing_test', None), + (u'indexing_test-FOO', u'indexing_test', None), + (u'stopword_test', init_stopword_test, None), +] + +# map: name => id +repoids = {} +groupids = {} + +def rebuild_index(full_index): + with mock.patch('kallithea.lib.indexers.daemon.log.debug', + lambda *args, **kwargs: None): + # The more revisions managed repositories have, the more + # memory capturing "log.debug()" output in "indexers.daemon" + # requires. This may cause unintentional failure of subsequent + # tests, if ENOMEM at forking "git" prevents from rebuilding + # index for search. + # Therefore, "log.debug()" is disabled regardless of logging + # level while rebuilding index. + # (FYI, ENOMEM occurs at forking "git" with python 2.7.3, + # Linux 3.2.78-1 x86_64, 3GB memory, and no ulimit + # configuration for memory) + create_test_index(TESTS_TMP_PATH, CONFIG, full_index=full_index) + + +class TestSearchControllerIndexing(TestController): + @classmethod + def setup_class(cls): + for reponame, init_or_fork, groupname in repos: + if groupname and groupname not in groupids: + group = fixture.create_repo_group(groupname) + groupids[groupname] = group.group_id + if callable(init_or_fork): + repo = fixture.create_repo(reponame, + repo_group=groupname) + init_or_fork(repo) + else: + repo = fixture.create_fork(init_or_fork, reponame, + repo_group=groupname) + repoids[reponame] = repo.repo_id + + # treat "it" as indexable filename + filenames_mock = list(INDEX_FILENAMES) + filenames_mock.append('it') + with mock.patch('kallithea.lib.indexers.daemon.INDEX_FILENAMES', + filenames_mock): + rebuild_index(full_index=False) # only for newly added repos + + @classmethod + def teardown_class(cls): + # delete in reversed order, to delete fork destination at first + for reponame, init_or_fork, groupname in reversed(repos): + RepoModel().delete(repoids[reponame]) + + for reponame, init_or_fork, groupname in reversed(repos): + if groupname in groupids: + RepoGroupModel().delete(groupids.pop(groupname), + force_delete=True) + + Session().commit() + Session.remove() + + rebuild_index(full_index=True) # rebuild fully for subsequent tests + + @parametrize('reponame', [ + (u'indexing_test'), + (u'indexing_test-fork'), + (u'group/indexing_test'), + (u'this-is-it'), + (u'*-fork'), + (u'group/*'), + ]) + @parametrize('searchtype,query,hit', [ + #('content', 'this_should_be_unique_content', 1), + ('commit', 'this_should_be_unique_commit_log', 1), + #('path', 'this_should_be_unique_filename.txt', 1), + ]) + def test_repository_tokenization(self, reponame, searchtype, query, hit): + self.log_user() + + q = 'repository:%s %s' % (reponame, query) + response = self.app.get(url(controller='search', action='index'), + {'q': q, 'type': searchtype}) + response.mustcontain('>%d results' % hit) + + @parametrize('searchtype,query,hit', [ + ('content', 'this_should_be_unique_content', 2), + ('commit', 'this_should_be_unique_commit_log', 1), + ('path', 'this_should_be_unique_filename.txt', 2), + ]) + def test_repository_case_sensitivity(self, searchtype, query, hit): + self.log_user() + + lname = u'indexing_test-foo' + uname = u'indexing_test-FOO' + + # (1) "repository:REPONAME" condition should match against + # repositories case-insensitively + q = 'repository:%s %s' % (lname, query) + response = self.app.get(url(controller='search', action='index'), + {'q': q, 'type': searchtype}) + + response.mustcontain('>%d results' % hit) + + # (2) on the other hand, searching under the specific + # repository should return results only for that repository, + # even if specified name matches against another repository + # case-insensitively. + response = self.app.get(url(controller='search', action='index', + repo_name=uname), + {'q': query, 'type': searchtype}) + + response.mustcontain('>%d results' % hit) + + # confirm that there is no matching against lower name repository + assert uname in response + #assert lname not in response + + @parametrize('searchtype,query,hit', [ + ('content', 'path:this/is/it def test', 37), + ('commit', 'added:this/is/it bother to ask where', 4), + # this condition matches against files below, because + # "path:" condition is also applied on "repository path". + # - "this/is/it" in "stopword_test" repo + # - "this_should_be_unique_filename.txt" in "this-is-it" repo + ('path', 'this/is/it', 0), + + ('content', 'extension:us', 0), + ('path', 'extension:us', 0), + ]) + def test_filename_stopword(self, searchtype, query, hit): + response = self.app.get(url(controller='search', action='index'), + {'q': query, 'type': searchtype}) + + response.mustcontain('>%d results' % hit) + + @parametrize('searchtype,query,hit', [ + # matching against both 2 files + ('content', 'owner:"this is it"', 0), + ('content', 'owner:this-is-it', 0), + ('path', 'owner:"this is it"', 0), + ('path', 'owner:this-is-it', 0), + + # matching against both 2 revisions + ('commit', 'owner:"this is it"', 0), + ('commit', 'owner:"this-is-it"', 0), + + # matching against only 1 revision + ('commit', 'author:"this is it"', 0), + ('commit', 'author:"this-is-it"', 0), + ]) + def test_mailaddr_stopword(self, searchtype, query, hit): + response = self.app.get(url(controller='search', action='index'), + {'q': query, 'type': searchtype}) + + response.mustcontain('>%d results' % hit)