changeset 7714:ce2a4ef8cd5f

middleware: move handling of permanent repo URLs to separate middleware This is about the handling of repo URLs like '_123' for the repo with repo_id 123. The de-mangling of such URLs was spread out across multiple layers. It fits much more nicely as a middleware layer. The code in routing and simplehg / simplegit can thus be removed. The base _get_by_id function was confusing - fix it by removing it. To do that, refactor utils introducing fix_repo_id_name to replace get_repo_by_id. We now assume in the application that we never have any extra leading '/' in URL paths. And while trailing extra '/' might be fine in actual URLs, they must be handled at the routing level, not propagated through all layers. This changeset is not really changing that.
author Mads Kiilerich <mads@kiilerich.com>
date Mon, 07 Jan 2019 01:50:56 +0100
parents cba155768085
children a444c46a0649
files kallithea/config/app_cfg.py kallithea/config/routing.py kallithea/lib/base.py kallithea/lib/middleware/permanent_repo_url.py kallithea/lib/middleware/simplegit.py kallithea/lib/middleware/simplehg.py kallithea/lib/utils.py kallithea/tests/other/test_libs.py
diffstat 8 files changed, 96 insertions(+), 76 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/config/app_cfg.py	Mon Jan 07 00:00:18 2019 +0100
+++ b/kallithea/config/app_cfg.py	Mon Jan 07 01:50:56 2019 +0100
@@ -30,6 +30,7 @@
 from sqlalchemy import create_engine
 import mercurial
 
+from kallithea.lib.middleware.permanent_repo_url import PermanentRepoUrl
 from kallithea.lib.middleware.https_fixup import HttpsFixup
 from kallithea.lib.middleware.simplegit import SimpleGit
 from kallithea.lib.middleware.simplehg import SimpleHg
@@ -208,6 +209,8 @@
     # Enable https redirects based on HTTP_X_URL_SCHEME set by proxy
     if any(asbool(config.get(x)) for x in ['https_fixup', 'force_https', 'use_htsts']):
         app = HttpsFixup(app, config)
+
+    app = PermanentRepoUrl(app, config)
     return app
 
 
--- a/kallithea/config/routing.py	Mon Jan 07 00:00:18 2019 +0100
+++ b/kallithea/config/routing.py	Mon Jan 07 01:50:56 2019 +0100
@@ -33,28 +33,18 @@
     rmap.minimization = False
     rmap.explicit = False
 
-    from kallithea.lib.utils import (is_valid_repo, is_valid_repo_group,
-                                     get_repo_by_id)
+    from kallithea.lib.utils import is_valid_repo, is_valid_repo_group
 
     def check_repo(environ, match_dict):
         """
-        check for valid repository for proper 404 handling
-
-        :param environ:
-        :param match_dict:
+        Check for valid repository for proper 404 handling.
+        Also, a bit of side effect modifying match_dict ...
         """
-        repo_name = match_dict.get('repo_name')
-
         if match_dict.get('f_path'):
             # fix for multiple initial slashes that causes errors
             match_dict['f_path'] = match_dict['f_path'].lstrip('/')
 
-        by_id_match = get_repo_by_id(repo_name)
-        if by_id_match:
-            repo_name = by_id_match
-            match_dict['repo_name'] = repo_name
-
-        return is_valid_repo(repo_name, config['base_path'])
+        return is_valid_repo(match_dict['repo_name'], config['base_path'])
 
     def check_group(environ, match_dict):
         """
--- a/kallithea/lib/base.py	Mon Jan 07 00:00:18 2019 +0100
+++ b/kallithea/lib/base.py	Mon Jan 07 01:50:56 2019 +0100
@@ -266,23 +266,6 @@
     def _handle_request(self, environ, start_response):
         raise NotImplementedError()
 
-    def _get_by_id(self, repo_name):
-        """
-        Gets a special pattern _<ID> from clone url and tries to replace it
-        with a repository_name for support of _<ID> permanent URLs
-
-        :param repo_name:
-        """
-
-        data = repo_name.split('/')
-        if len(data) >= 2:
-            from kallithea.lib.utils import get_repo_by_id
-            by_id_match = get_repo_by_id(repo_name)
-            if by_id_match:
-                data[1] = safe_str(by_id_match)
-
-        return '/'.join(data)
-
     def _check_permission(self, action, authuser, repo_name):
         """
         Checks permissions using action (push/pull) user and repository
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kallithea/lib/middleware/permanent_repo_url.py	Mon Jan 07 01:50:56 2019 +0100
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+kallithea.lib.middleware.permanent_repo_url
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+middleware to handle permanent repo URLs, replacing PATH_INFO '/_123/yada' with
+'/name/of/repo/yada' after looking 123 up in the database.
+"""
+
+
+from kallithea.lib.utils import safe_str, fix_repo_id_name
+
+
+class PermanentRepoUrl(object):
+
+    def __init__(self, app, config):
+        self.application = app
+        self.config = config
+
+    def __call__(self, environ, start_response):
+        path_info = environ['PATH_INFO']
+        if path_info.startswith('/'): # it must
+            path_info = '/' + safe_str(fix_repo_id_name(path_info[1:]))
+            environ['PATH_INFO'] = path_info
+
+        return self.application(environ, start_response)
--- a/kallithea/lib/middleware/simplegit.py	Mon Jan 07 00:00:18 2019 +0100
+++ b/kallithea/lib/middleware/simplegit.py	Mon Jan 07 01:50:56 2019 +0100
@@ -141,14 +141,11 @@
         :param environ: environ where PATH_INFO is stored
         """
         try:
-            environ['PATH_INFO'] = self._get_by_id(environ['PATH_INFO'])
-            repo_name = GIT_PROTO_PAT.match(environ['PATH_INFO']).group(1)
+            return GIT_PROTO_PAT.match(environ['PATH_INFO']).group(1)
         except Exception:
             log.error(traceback.format_exc())
             raise
 
-        return repo_name
-
     def __get_action(self, environ):
         """
         Maps Git request commands into a pull or push command.
--- a/kallithea/lib/middleware/simplehg.py	Mon Jan 07 00:00:18 2019 +0100
+++ b/kallithea/lib/middleware/simplehg.py	Mon Jan 07 01:50:56 2019 +0100
@@ -167,16 +167,13 @@
         :param environ: environ where PATH_INFO is stored
         """
         try:
-            environ['PATH_INFO'] = self._get_by_id(environ['PATH_INFO'])
-            repo_name = '/'.join(environ['PATH_INFO'].split('/')[1:])
-            if repo_name.endswith('/'):
-                repo_name = repo_name.rstrip('/')
+            path_info = environ['PATH_INFO']
+            if path_info.startswith('/'):
+                return path_info[1:].rstrip('/')
         except Exception:
             log.error(traceback.format_exc())
             raise
 
-        return repo_name
-
     def __get_action(self, environ):
         """
         Maps Mercurial request commands into 'pull' or 'push'.
--- a/kallithea/lib/utils.py	Mon Jan 07 00:00:18 2019 +0100
+++ b/kallithea/lib/utils.py	Mon Jan 07 01:50:56 2019 +0100
@@ -78,29 +78,35 @@
     return None
 
 
-def _extract_id_from_repo_name(repo_name):
-    if repo_name.startswith('/'):
-        repo_name = repo_name.lstrip('/')
-    by_id_match = re.match(r'^_(\d{1,})', repo_name)
-    if by_id_match:
-        return by_id_match.groups()[0]
+def _get_permanent_id(s):
+    """Helper for decoding stable URLs with repo ID. For a string like '_123'
+    return 123.
+    """
+    by_id_match = re.match(r'^_(\d+)$', s)
+    if by_id_match is None:
+        return None
+    return int(by_id_match.group(1))
 
 
-def get_repo_by_id(repo_name):
+def fix_repo_id_name(path):
     """
-    Extracts repo_name by id from special urls. Example url is _11/repo_name
+    Rewrite repo_name for _<ID> permanent URLs.
 
-    :param repo_name:
-    :return: repo_name if matched else None
+    Given a path, if the first path element is like _<ID>, return the path with
+    this part expanded to the corresponding full repo name, else return the
+    provided path.
     """
-    _repo_id = _extract_id_from_repo_name(repo_name)
-    if _repo_id:
+    first, rest = path, ''
+    if '/' in path:
+        first, rest_ = path.split('/', 1)
+        rest = '/' + rest_
+    repo_id = _get_permanent_id(first)
+    if repo_id is not None:
         from kallithea.model.db import Repository
-        repo = Repository.get(_repo_id)
-        if repo:
-            # TODO: return repo instead of reponame? or would that be a layering violation?
-            return repo.repo_name
-    return None
+        repo = Repository.get(repo_id)
+        if repo is not None:
+            return repo.repo_name + rest
+    return path
 
 
 def action_logger(user, action, repo, ipaddr='', commit=False):
--- a/kallithea/tests/other/test_libs.py	Mon Jan 07 00:00:18 2019 +0100
+++ b/kallithea/tests/other/test_libs.py	Mon Jan 07 01:50:56 2019 +0100
@@ -532,26 +532,32 @@
 
     @parametrize('test,expected', [
       ("", None),
-      ("/_2", '2'),
-      ("_2", '2'),
-      ("/_2/", '2'),
-      ("_2/", '2'),
-
-      ("/_21", '21'),
-      ("_21", '21'),
-      ("/_21/", '21'),
-      ("_21/", '21'),
+      ("/_2", None),
+      ("_2", 2),
+      ("_2/", None),
+    ])
+    def test_get_permanent_id(self, test, expected):
+        from kallithea.lib.utils import _get_permanent_id
+        extracted = _get_permanent_id(test)
+        assert extracted == expected, 'url:%s, got:`%s` expected: `%s`' % (test, _test, expected)
 
-      ("/_21/foobar", '21'),
-      ("_21/121", '21'),
-      ("/_21/_12", '21'),
-      ("_21/prefix/foo", '21'),
+    @parametrize('test,expected', [
+      ("", ""),
+      ("/", "/"),
+      ("/_ID", '/_ID'),
+      ("ID", "ID"),
+      ("_ID", 'NAME'),
+      ("_ID/", 'NAME/'),
+      ("_ID/1/2", 'NAME/1/2'),
+      ("_IDa", '_IDa'),
     ])
-    def test_get_repo_by_id(self, test, expected):
-        from kallithea.lib.utils import _extract_id_from_repo_name
-        _test = _extract_id_from_repo_name(test)
-        assert _test == expected, 'url:%s, got:`%s` expected: `%s`' % (test, _test, expected)
-
+    def test_fix_repo_id_name(self, test, expected):
+        repo = Repository.get_by_repo_name(HG_REPO)
+        test = test.replace('ID', str(repo.repo_id))
+        expected = expected.replace('NAME', repo.repo_name).replace('ID', str(repo.repo_id))
+        from kallithea.lib.utils import fix_repo_id_name
+        replaced = fix_repo_id_name(test)
+        assert replaced == expected, 'url:%s, got:`%s` expected: `%s`' % (test, replaced, expected)
 
     @parametrize('canonical,test,expected', [
         ('http://www.example.org/', '/abc/xyz', 'http://www.example.org/abc/xyz'),