changeset 8344:aec1b9c9ffe6

db: drop Repository CacheInvalidation The benefit of this functionality is questionable. Especially in bigger setups with multiple front-end instances all serving the same multitude of repositories, making the hit rate very low. And the overhead of storing cache invalidation data *in* the database is non-trivial. We preserve a small cache in Repository SA records, but should probably just in general know what we are doing and not ask for the same information multiple times in each request.
author Mads Kiilerich <mads@kiilerich.com>
date Sun, 20 Oct 2019 04:57:04 +0200
parents a67bcc6f9118
children f6be760c786c
files kallithea/config/routing.py kallithea/controllers/admin/repos.py kallithea/model/db.py kallithea/templates/admin/repos/repo_edit.html kallithea/templates/admin/repos/repo_edit_caches.html kallithea/tests/api/api_base.py kallithea/tests/other/test_vcs_operations.py
diffstat 7 files changed, 13 insertions(+), 261 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/config/routing.py	Sun Oct 20 22:06:26 2019 +0200
+++ b/kallithea/config/routing.py	Sun Oct 20 04:57:04 2019 +0200
@@ -563,13 +563,6 @@
                  controller='admin/repos', action="edit_advanced_fork",
                  conditions=dict(method=["POST"], function=check_repo))
 
-    rmap.connect("edit_repo_caches", "/{repo_name:.*?}/settings/caches",
-                 controller='admin/repos', action="edit_caches",
-                 conditions=dict(method=["GET"], function=check_repo))
-    rmap.connect("update_repo_caches", "/{repo_name:.*?}/settings/caches",
-                 controller='admin/repos', action="edit_caches",
-                 conditions=dict(method=["POST"], function=check_repo))
-
     rmap.connect("edit_repo_remote", "/{repo_name:.*?}/settings/remote",
                  controller='admin/repos', action="edit_remote",
                  conditions=dict(method=["GET"], function=check_repo))
--- a/kallithea/controllers/admin/repos.py	Sun Oct 20 22:06:26 2019 +0200
+++ b/kallithea/controllers/admin/repos.py	Sun Oct 20 04:57:04 2019 +0200
@@ -479,24 +479,6 @@
         raise HTTPFound(location=url('edit_repo_advanced', repo_name=repo_name))
 
     @HasRepoPermissionLevelDecorator('admin')
-    def edit_caches(self, repo_name):
-        c.repo_info = self._load_repo()
-        c.active = 'caches'
-        if request.POST:
-            try:
-                ScmModel().mark_for_invalidation(repo_name)
-                Session().commit()
-                h.flash(_('Cache invalidation successful'),
-                        category='success')
-            except Exception as e:
-                log.error(traceback.format_exc())
-                h.flash(_('An error occurred during cache invalidation'),
-                        category='error')
-
-            raise HTTPFound(location=url('edit_repo_caches', repo_name=c.repo_name))
-        return render('admin/repos/repo_edit.html')
-
-    @HasRepoPermissionLevelDecorator('admin')
     def edit_remote(self, repo_name):
         c.repo_info = self._load_repo()
         c.active = 'remote'
--- a/kallithea/model/db.py	Sun Oct 20 22:06:26 2019 +0200
+++ b/kallithea/model/db.py	Sun Oct 20 04:57:04 2019 +0200
@@ -37,7 +37,6 @@
 
 import ipaddr
 import sqlalchemy
-from beaker.cache import cache_region, region_invalidate
 from sqlalchemy import Boolean, Column, DateTime, Float, ForeignKey, Index, Integer, LargeBinary, String, Unicode, UnicodeText, UniqueConstraint
 from sqlalchemy.ext.hybrid import hybrid_property
 from sqlalchemy.orm import class_mapper, joinedload, relationship, validates
@@ -1104,16 +1103,6 @@
         p += self.repo_name.split(URL_SEP)
         return os.path.join(*p)
 
-    @property
-    def cache_keys(self):
-        """
-        Returns associated cache keys for that repo
-        """
-        return CacheInvalidation.query() \
-            .filter(CacheInvalidation.cache_args == self.repo_name) \
-            .order_by(CacheInvalidation.cache_key) \
-            .all()
-
     def get_new_name(self, repo_name):
         """
         returns new full repository name based on assigned group and new new
@@ -1336,33 +1325,21 @@
 
     def set_invalidate(self):
         """
-        Mark caches of this repo as invalid.
+        Flush SA session caches of instances of on disk repo.
         """
-        CacheInvalidation.set_invalidate(self.repo_name)
+        try:
+            del self._scm_instance
+        except AttributeError:
+            pass
 
-    _scm_instance = None
+    _scm_instance = None  # caching inside lifetime of SA session
 
     @property
     def scm_instance(self):
         if self._scm_instance is None:
-            self._scm_instance = self.scm_instance_cached()
+            return self.scm_instance_no_cache()  # will populate self._scm_instance
         return self._scm_instance
 
-    def scm_instance_cached(self, valid_cache_keys=None):
-        @cache_region('long_term', 'scm_instance_cached')
-        def _c(repo_name): # repo_name is just for the cache key
-            log.debug('Creating new %s scm_instance and populating cache', repo_name)
-            return self.scm_instance_no_cache()
-        rn = self.repo_name
-
-        valid = CacheInvalidation.test_and_set_valid(rn, None, valid_cache_keys=valid_cache_keys)
-        if not valid:
-            log.debug('Cache for %s invalidated, getting new object', rn)
-            region_invalidate(_c, None, 'scm_instance_cached', rn)
-        else:
-            log.debug('Trying to get scm_instance of %s from cache', rn)
-        return _c(rn)
-
     def scm_instance_no_cache(self):
         repo_full_path = self.repo_full_path
         alias = get_scm(repo_full_path)[0]
@@ -1371,12 +1348,11 @@
         backend = get_backend(alias)
 
         if alias == 'hg':
-            repo = backend(repo_full_path, create=False,
-                           baseui=self._ui)
+            self._scm_instance = backend(repo_full_path, create=False, baseui=self._ui)
         else:
-            repo = backend(repo_full_path, create=False)
+            self._scm_instance = backend(repo_full_path, create=False)
 
-        return repo
+        return self._scm_instance
 
     def __json__(self):
         return dict(
@@ -1947,130 +1923,6 @@
         return cls.query().filter(cls.follows_repository_id == repo_id)
 
 
-class CacheInvalidation(Base, BaseDbModel):
-    __tablename__ = 'cache_invalidation'
-    __table_args__ = (
-        Index('key_idx', 'cache_key'),
-        _table_args_default_dict,
-    )
-
-    # cache_id, not used
-    cache_id = Column(Integer(), primary_key=True)
-    # cache_key as created by _get_cache_key
-    cache_key = Column(Unicode(255), nullable=False, unique=True)
-    # cache_args is a repo_name
-    cache_args = Column(Unicode(255), nullable=False)
-    # instance sets cache_active True when it is caching, other instances set
-    # cache_active to False to indicate that this cache is invalid
-    cache_active = Column(Boolean(), nullable=False, default=False)
-
-    def __init__(self, cache_key, repo_name=''):
-        self.cache_key = cache_key
-        self.cache_args = repo_name
-        self.cache_active = False
-
-    def __repr__(self):
-        return "<%s %s: %s=%s" % (
-            self.__class__.__name__,
-            self.cache_id, self.cache_key, self.cache_active)
-
-    def _cache_key_partition(self):
-        prefix, repo_name, suffix = self.cache_key.partition(self.cache_args)
-        return prefix, repo_name, suffix
-
-    def get_prefix(self):
-        """
-        get prefix that might have been used in _get_cache_key to
-        generate self.cache_key. Only used for informational purposes
-        in repo_edit.html.
-        """
-        # prefix, repo_name, suffix
-        return self._cache_key_partition()[0]
-
-    def get_suffix(self):
-        """
-        get suffix that might have been used in _get_cache_key to
-        generate self.cache_key. Only used for informational purposes
-        in repo_edit.html.
-        """
-        # prefix, repo_name, suffix
-        return self._cache_key_partition()[2]
-
-    @classmethod
-    def clear_cache(cls):
-        """
-        Delete all cache keys from database.
-        Should only be run when all instances are down and all entries thus stale.
-        """
-        cls.query().delete()
-        Session().commit()
-
-    @classmethod
-    def _get_cache_key(cls, key):
-        """
-        Wrapper for generating a unique cache key for this instance and "key".
-        key must / will start with a repo_name which will be stored in .cache_args .
-        """
-        prefix = kallithea.CONFIG.get('instance_id', '')
-        return "%s%s" % (prefix, key)
-
-    @classmethod
-    def set_invalidate(cls, repo_name):
-        """
-        Mark all caches of a repo as invalid in the database.
-        """
-        inv_objs = Session().query(cls).filter(cls.cache_args == repo_name).all()
-        log.debug('for repo %s got %s invalidation objects',
-                  repo_name, inv_objs)
-
-        for inv_obj in inv_objs:
-            log.debug('marking %s key for invalidation based on repo_name=%s',
-                      inv_obj, repo_name)
-            Session().delete(inv_obj)
-        Session().commit()
-
-    @classmethod
-    def test_and_set_valid(cls, repo_name, kind, valid_cache_keys=None):
-        """
-        Mark this cache key as active and currently cached.
-        Return True if the existing cache registration still was valid.
-        Return False to indicate that it had been invalidated and caches should be refreshed.
-        """
-
-        key = (repo_name + '_' + kind) if kind else repo_name
-        cache_key = cls._get_cache_key(key)
-
-        if valid_cache_keys and cache_key in valid_cache_keys:
-            return True
-
-        inv_obj = cls.query().filter(cls.cache_key == cache_key).scalar()
-        if inv_obj is None:
-            inv_obj = cls(cache_key, repo_name)
-            Session().add(inv_obj)
-        elif inv_obj.cache_active:
-            return True
-        inv_obj.cache_active = True
-        try:
-            Session().commit()
-        except sqlalchemy.exc.IntegrityError:
-            log.error('commit of CacheInvalidation failed - retrying')
-            Session().rollback()
-            inv_obj = cls.query().filter(cls.cache_key == cache_key).scalar()
-            if inv_obj is None:
-                log.error('failed to create CacheInvalidation entry')
-                # TODO: fail badly?
-            # else: TOCTOU - another thread added the key at the same time; no further action required
-        return False
-
-    @classmethod
-    def get_valid_cache_keys(cls):
-        """
-        Return opaque object with information of which caches still are valid
-        and can be used without checking for invalidation.
-        """
-        return set(inv_obj.cache_key for inv_obj in cls.query().filter(cls.cache_active).all())
-
-
 class ChangesetComment(Base, BaseDbModel):
     __tablename__ = 'changeset_comments'
     __table_args__ = (
--- a/kallithea/templates/admin/repos/repo_edit.html	Sun Oct 20 22:06:26 2019 +0200
+++ b/kallithea/templates/admin/repos/repo_edit.html	Sun Oct 20 04:57:04 2019 +0200
@@ -33,9 +33,6 @@
           <li class="${'active' if c.active=='fields' else ''}">
               <a href="${h.url('edit_repo_fields', repo_name=c.repo_name)}">${_('Extra Fields')}</a>
           </li>
-          <li class="${'active' if c.active=='caches' else ''}">
-              <a href="${h.url('edit_repo_caches', repo_name=c.repo_name)}">${_('Caches')}</a>
-          </li>
           <li class="${'active' if c.active=='remote' else ''}">
               <a href="${h.url('edit_repo_remote', repo_name=c.repo_name)}">${_('Remote')}</a>
           </li>
--- a/kallithea/templates/admin/repos/repo_edit_caches.html	Sun Oct 20 22:06:26 2019 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-${h.form(url('update_repo_caches', repo_name=c.repo_name))}
-<div class="form">
-   <div>
-       ${h.submit('reset_cache_%s' % c.repo_info.repo_name,_('Invalidate Repository Cache'),class_="btn btn-default btn-sm")}
-      <div class="text-muted">
-        ${_('Manually invalidate cache for this repository. On first access, the repository will be cached again.')}
-      </div>
-      <div>
-        <h5>${_('List of Cached Values')}</h5>
-        <table class="table">
-          <tr>
-            <th>${_('Prefix')}</th>
-            <th>${_('Key')}</th>
-            <th>${_('Active')}</th>
-          </tr>
-          %for cache in c.repo_info.cache_keys:
-              <tr>
-                <td>${cache.get_prefix() or '-'}</td>
-                <td>${cache.cache_key}</td>
-                <td>${h.boolicon(cache.cache_active)}</td>
-              </tr>
-          %endfor
-        </table>
-      </div>
-   </div>
-</div>
-${h.end_form()}
--- a/kallithea/tests/api/api_base.py	Sun Oct 20 22:06:26 2019 +0200
+++ b/kallithea/tests/api/api_base.py	Sun Oct 20 04:57:04 2019 +0200
@@ -357,40 +357,6 @@
         expected = 'Error occurred during rescan repositories action'
         self._compare_error(id_, expected, given=response.body)
 
-    def test_api_invalidate_cache(self):
-        repo = RepoModel().get_by_repo_name(self.REPO)
-        repo.scm_instance_cached()  # seed cache
-
-        id_, params = _build_data(self.apikey, 'invalidate_cache',
-                                  repoid=self.REPO)
-        response = api_call(self, params)
-
-        expected = {
-            'msg': "Cache for repository `%s` was invalidated" % (self.REPO,),
-            'repository': self.REPO
-        }
-        self._compare_ok(id_, expected, given=response.body)
-
-    @mock.patch.object(ScmModel, 'mark_for_invalidation', crash)
-    def test_api_invalidate_cache_error(self):
-        id_, params = _build_data(self.apikey, 'invalidate_cache',
-                                  repoid=self.REPO)
-        response = api_call(self, params)
-
-        expected = 'Error occurred during cache invalidation action'
-        self._compare_error(id_, expected, given=response.body)
-
-    def test_api_invalidate_cache_regular_user_no_permission(self):
-        repo = RepoModel().get_by_repo_name(self.REPO)
-        repo.scm_instance_cached() # seed cache
-
-        id_, params = _build_data(self.apikey_regular, 'invalidate_cache',
-                                  repoid=self.REPO)
-        response = api_call(self, params)
-
-        expected = "repository `%s` does not exist" % (self.REPO,)
-        self._compare_error(id_, expected, given=response.body)
-
     def test_api_create_existing_user(self):
         id_, params = _build_data(self.apikey, 'create_user',
                                   username=base.TEST_USER_ADMIN_LOGIN,
--- a/kallithea/tests/other/test_vcs_operations.py	Sun Oct 20 22:06:26 2019 +0200
+++ b/kallithea/tests/other/test_vcs_operations.py	Sun Oct 20 04:57:04 2019 +0200
@@ -38,7 +38,7 @@
 
 from kallithea import CONFIG
 from kallithea.lib.utils2 import ascii_bytes, safe_str
-from kallithea.model.db import CacheInvalidation, Repository, Ui, User, UserIpMap, UserLog
+from kallithea.model.db import Repository, Ui, User, UserIpMap, UserLog
 from kallithea.model.meta import Session
 from kallithea.model.ssh_key import SshKeyModel
 from kallithea.model.user import UserModel
@@ -442,31 +442,20 @@
     def test_push_invalidates_cache(self, webserver, testfork, vt):
         pre_cached_tip = [repo.get_api_data()['last_changeset']['short_id'] for repo in Repository.query().filter(Repository.repo_name == testfork[vt.repo_type])]
 
-        key = CacheInvalidation.query().filter(CacheInvalidation.cache_key
-                                               == testfork[vt.repo_type]).scalar()
-        if not key:
-            key = CacheInvalidation(testfork[vt.repo_type], testfork[vt.repo_type])
-            Session().add(key)
-
-        key.cache_active = True
-        Session().commit()
-
         dest_dir = _get_tmp_dir()
         clone_url = vt.repo_url_param(webserver, testfork[vt.repo_type])
         stdout, stderr = Command(base.TESTS_TMP_PATH).execute(vt.repo_type, 'clone', clone_url, dest_dir)
 
         stdout, stderr = _add_files_and_push(webserver, vt, dest_dir, files_no=1, clone_url=clone_url)
 
+        Session().commit()  # expire test session to make sure SA fetch new Repository instances after last_changeset has been updated server side hook in other process
+
         if vt.repo_type == 'git':
             _check_proper_git_push(stdout, stderr)
 
         post_cached_tip = [repo.get_api_data()['last_changeset']['short_id'] for repo in Repository.query().filter(Repository.repo_name == testfork[vt.repo_type])]
         assert pre_cached_tip != post_cached_tip
 
-        key = CacheInvalidation.query().filter(CacheInvalidation.cache_key
-                                               == testfork[vt.repo_type]).all()
-        assert key == []
-
     @parametrize_vcs_test_http
     def test_push_wrong_credentials(self, webserver, vt):
         dest_dir = _get_tmp_dir()