changeset 5182:0e2d450feb03

git: run external commands as list of strings so we really get correct quoting (Issue #135) a6dfd14d4b20 from https://bitbucket.org/conservancy/kallithea/pull-request/17/add-quotes-to-repo-urls-for-git-backend fixed that issue but did not make it "safe". The vcs git backend still used command strings but tried to quote them correctly ... but that approach is almost impossible to get right. Instead, pass a string list all the way to the subprocess module and let it do the quoting. This also makes some of the code more simple.
author Mads Kiilerich <madski@unity3d.com>
date Tue, 09 Jun 2015 22:53:24 +0200
parents e3840a1ec58c
children 53d68f201e46
files kallithea/controllers/compare.py kallithea/lib/hooks.py kallithea/lib/middleware/pygrack.py kallithea/lib/utils.py kallithea/lib/vcs/backends/git/changeset.py kallithea/lib/vcs/backends/git/repository.py kallithea/lib/vcs/subprocessio.py kallithea/tests/vcs/test_git.py
diffstat 8 files changed, 58 insertions(+), 92 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/controllers/compare.py	Sun May 31 21:39:22 2015 +0200
+++ b/kallithea/controllers/compare.py	Tue Jun 09 22:53:24 2015 +0200
@@ -130,13 +130,13 @@
 
             else:
                 so, se = org_repo.run_git_command(
-                    'log --reverse --pretty="format: %%H" -s %s..%s'
-                        % (org_rev, other_rev)
+                    ['log', '--reverse', '--pretty=format:%H',
+                     '-s', '%s..%s' % (org_rev, other_rev)]
                 )
                 other_changesets = [org_repo.get_changeset(cs)
                               for cs in re.findall(r'[0-9a-fA-F]{40}', so)]
                 so, se = org_repo.run_git_command(
-                    'merge-base %s %s' % (org_rev, other_rev)
+                    ['merge-base', org_rev, other_rev]
                 )
                 ancestor = re.findall(r'[0-9a-fA-F]{40}', so)[0]
             org_changesets = []
--- a/kallithea/lib/hooks.py	Sun May 31 21:39:22 2015 +0200
+++ b/kallithea/lib/hooks.py	Tue Jun 09 22:53:24 2015 +0200
@@ -447,21 +447,21 @@
                         repo._repo.refs.set_symbolic_ref('HEAD',
                                             'refs/heads/%s' % push_ref['name'])
 
-                    cmd = "for-each-ref --format='%(refname)' 'refs/heads/*'"
+                    cmd = ['for-each-ref', '--format=%(refname)','refs/heads/*']
                     heads = repo.run_git_command(cmd)[0]
+                    cmd = ['log', push_ref['new_rev'],
+                           '--reverse', '--pretty=format:%H', '--not']
                     heads = heads.replace(push_ref['ref'], '')
-                    heads = ' '.join(map(lambda c: c.strip('\n').strip(),
-                                         heads.splitlines()))
-                    cmd = (('log %(new_rev)s' % push_ref) +
-                           ' --reverse --pretty=format:"%H" --not ' + heads)
+                    for l in heads.splitlines():
+                        cmd.append(l.strip())
                     git_revs += repo.run_git_command(cmd)[0].splitlines()
 
                 elif push_ref['new_rev'] == EmptyChangeset().raw_id:
                     #delete branch case
                     git_revs += ['delete_branch=>%s' % push_ref['name']]
                 else:
-                    cmd = (('log %(old_rev)s..%(new_rev)s' % push_ref) +
-                           ' --reverse --pretty=format:"%H"')
+                    cmd = ['log', '%(old_rev)s..%(new_rev)s' % push_ref,
+                           '--reverse', '--pretty=format:%H']
                     git_revs += repo.run_git_command(cmd)[0].splitlines()
 
             elif _type == 'tags':
--- a/kallithea/lib/middleware/pygrack.py	Sun May 31 21:39:22 2015 +0200
+++ b/kallithea/lib/middleware/pygrack.py	Tue Jun 09 22:53:24 2015 +0200
@@ -82,13 +82,12 @@
         server_advert = '# service=%s' % git_command
         packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower()
         _git_path = kallithea.CONFIG.get('git_path', 'git')
+        cmd = [_git_path, git_command[4:],
+               '--stateless-rpc', '--advertise-refs', self.content_path]
+        log.debug('handling cmd %s', cmd)
         try:
-            out = subprocessio.SubprocessIOChunker(
-                r'%s %s --stateless-rpc --advertise-refs "%s"' % (
-                    _git_path, git_command[4:], self.content_path),
-                starting_values=[
-                    packet_len + server_advert + '0000'
-                ]
+            out = subprocessio.SubprocessIOChunker(cmd,
+                starting_values=[packet_len + server_advert + '0000']
             )
         except EnvironmentError, e:
             log.error(traceback.format_exc())
@@ -118,21 +117,17 @@
         else:
             inputstream = environ['wsgi.input']
 
+        gitenv = dict(os.environ)
+        # forget all configs
+        gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
+        cmd = [_git_path, git_command[4:], '--stateless-rpc', self.content_path]
+        log.debug('handling cmd %s', cmd)
         try:
-            gitenv = os.environ
-            # forget all configs
-            gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
-            opts = dict(
-                env=gitenv,
-                cwd=self.content_path,
-            )
-            cmd = r'%s %s --stateless-rpc "%s"' % (_git_path, git_command[4:],
-                                                   self.content_path),
-            log.debug('handling cmd %s' % cmd)
             out = subprocessio.SubprocessIOChunker(
                 cmd,
                 inputstream=inputstream,
-                **opts
+                env=gitenv,
+                cwd=self.content_path,
             )
         except EnvironmentError, e:
             log.error(traceback.format_exc())
--- a/kallithea/lib/utils.py	Sun May 31 21:39:22 2015 +0200
+++ b/kallithea/lib/utils.py	Tue Jun 09 22:53:24 2015 +0200
@@ -799,7 +799,7 @@
     if 'git' not in BACKENDS:
         return None
 
-    stdout, stderr = GitRepository._run_git_command('--version', _bare=True,
+    stdout, stderr = GitRepository._run_git_command(['--version'], _bare=True,
                                                     _safe=True)
 
     m = re.search("\d+.\d+.\d+", stdout)
--- a/kallithea/lib/vcs/backends/git/changeset.py	Sun May 31 21:39:22 2015 +0200
+++ b/kallithea/lib/vcs/backends/git/changeset.py	Tue Jun 09 22:53:24 2015 +0200
@@ -189,7 +189,7 @@
         """
         rev_filter = settings.GIT_REV_FILTER
         so, se = self.repository.run_git_command(
-            "rev-list %s --children" % (rev_filter)
+            ['rev-list', rev_filter, '--children']
         )
 
         children = []
@@ -288,12 +288,12 @@
         f_path = safe_str(path)
 
         if limit:
-            cmd = 'log -n %s --pretty="format: %%H" -s %s -- "%s"' % (
-                      safe_int(limit, 0), cs_id, f_path)
+            cmd = ['log', '-n', str(safe_int(limit, 0)),
+                   '--pretty=format:%H', '-s', cs_id, '--', f_path]
 
         else:
-            cmd = 'log --pretty="format: %%H" -s %s -- "%s"' % (
-                      cs_id, f_path)
+            cmd = ['log',
+                   '--pretty=format:%H', '-s', cs_id, '--', f_path]
         so, se = self.repository.run_git_command(cmd)
         ids = re.findall(r'[0-9a-fA-F]{40}', so)
         return [self.repository.get_changeset(sha) for sha in ids]
@@ -321,7 +321,7 @@
         generally not good. Should be replaced with algorithm iterating
         commits.
         """
-        cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path)
+        cmd = ['blame', '-l', '--root', '-r', self.id, '--', path]
         # -l     ==> outputs long shas (and we need all 40 characters)
         # --root ==> doesn't put '^' character for boundaries
         # -r sha ==> blames for the given revision
@@ -471,8 +471,8 @@
     def _diff_name_status(self):
         output = []
         for parent in self.parents:
-            cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id,
-                                                                self.raw_id)
+            cmd = ['diff', '--name-status', parent.raw_id, self.raw_id,
+                   '--encoding=utf8']
             so, se = self.repository.run_git_command(cmd)
             output.append(so.strip())
         return '\n'.join(output)
--- a/kallithea/lib/vcs/backends/git/repository.py	Sun May 31 21:39:22 2015 +0200
+++ b/kallithea/lib/vcs/backends/git/repository.py	Tue Jun 09 22:53:24 2015 +0200
@@ -18,18 +18,6 @@
 import logging
 import posixpath
 import string
-import sys
-if sys.platform == "win32":
-    from subprocess import list2cmdline
-    def quote(s):
-        return list2cmdline([s])
-else:
-    try:
-        # Python <=2.7
-        from pipes import quote
-    except ImportError:
-        # Python 3.3+
-        from shlex import quote
 
 from dulwich.objects import Tag
 from dulwich.repo import Repo, NotGitRepository
@@ -135,10 +123,7 @@
             del opts['_safe']
             safe_call = True
 
-        _str_cmd = False
-        if isinstance(cmd, basestring):
-            cmd = [cmd]
-            _str_cmd = True
+        assert isinstance(cmd, list), cmd
 
         gitenv = os.environ
         # need to clean fix GIT_DIR !
@@ -148,13 +133,11 @@
 
         _git_path = settings.GIT_EXECUTABLE_PATH
         cmd = [_git_path] + _copts + cmd
-        if _str_cmd:
-            cmd = ' '.join(cmd)
 
         try:
             _opts = dict(
                 env=gitenv,
-                shell=True,
+                shell=False,
             )
             _opts.update(opts)
             p = subprocessio.SubprocessIOChunker(cmd, **_opts)
@@ -268,7 +251,7 @@
             return []
 
         rev_filter = settings.GIT_REV_FILTER
-        cmd = 'rev-list %s --reverse --date-order' % (rev_filter)
+        cmd = ['rev-list', rev_filter, '--reverse', '--date-order']
         try:
             so, se = self.run_git_command(cmd)
         except RepositoryError:
@@ -551,22 +534,16 @@
 
         # %H at format means (full) commit hash, initial hashes are retrieved
         # in ascending date order
-        cmd_template = 'log --date-order --reverse --pretty=format:"%H"'
-        cmd_params = {}
+        cmd = ['log', '--date-order', '--reverse', '--pretty=format:%H']
         if start_date:
-            cmd_template += ' --since "$since"'
-            cmd_params['since'] = start_date.strftime('%m/%d/%y %H:%M:%S')
+            cmd += ['--since', start_date.strftime('%m/%d/%y %H:%M:%S')]
         if end_date:
-            cmd_template += ' --until "$until"'
-            cmd_params['until'] = end_date.strftime('%m/%d/%y %H:%M:%S')
+            cmd += ['--until', end_date.strftime('%m/%d/%y %H:%M:%S')]
         if branch_name:
-            cmd_template += ' $branch_name'
-            cmd_params['branch_name'] = branch_name
+            cmd.append(branch_name)
         else:
-            rev_filter = settings.GIT_REV_FILTER
-            cmd_template += ' %s' % (rev_filter)
+            cmd.append(settings.GIT_REV_FILTER)
 
-        cmd = string.Template(cmd_template).safe_substitute(**cmd_params)
         revs = self.run_git_command(cmd)[0].splitlines()
         start_pos = 0
         end_pos = len(revs)
@@ -622,14 +599,14 @@
 
         if rev1 == self.EMPTY_CHANGESET:
             rev2 = self.get_changeset(rev2).raw_id
-            cmd = ' '.join(['show'] + flags + [rev2])
+            cmd = ['show'] + flags + [rev2]
         else:
             rev1 = self.get_changeset(rev1).raw_id
             rev2 = self.get_changeset(rev2).raw_id
-            cmd = ' '.join(['diff'] + flags + [rev1, rev2])
+            cmd = ['diff'] + flags + [rev1, rev2]
 
         if path:
-            cmd += ' -- "%s"' % path
+            cmd += ['--', path]
 
         stdout, stderr = self.run_git_command(cmd)
         # TODO: don't ignore stderr
@@ -663,8 +640,7 @@
             cmd.append('--bare')
         elif not update_after_clone:
             cmd.append('--no-checkout')
-        cmd += ['--', quote(url), quote(self.path)]
-        cmd = ' '.join(cmd)
+        cmd += ['--', url, self.path]
         # If error occurs run_git_command raises RepositoryError already
         self.run_git_command(cmd)
 
@@ -673,8 +649,7 @@
         Tries to pull changes from external location.
         """
         url = self._get_url(url)
-        cmd = ['pull', "--ff-only", quote(url)]
-        cmd = ' '.join(cmd)
+        cmd = ['pull', '--ff-only', url]
         # If error occurs run_git_command raises RepositoryError already
         self.run_git_command(cmd)
 
@@ -683,13 +658,11 @@
         Tries to pull changes from external location.
         """
         url = self._get_url(url)
-        so, se = self.run_git_command('ls-remote -h %s' % quote(url))
-        refs = []
+        so, se = self.run_git_command(['ls-remote', '-h', url])
+        cmd = ['fetch', url, '--']
         for line in (x for x in so.splitlines()):
             sha, ref = line.split('\t')
-            refs.append(ref)
-        refs = ' '.join(('+%s:%s' % (r, r) for r in refs))
-        cmd = '''fetch %s -- %s''' % (quote(url), refs)
+            cmd.append('+%s:%s' % (ref, ref))
         self.run_git_command(cmd)
 
     def _update_server_info(self):
--- a/kallithea/lib/vcs/subprocessio.py	Sun May 31 21:39:22 2015 +0200
+++ b/kallithea/lib/vcs/subprocessio.py	Tue Jun 09 22:53:24 2015 +0200
@@ -342,11 +342,9 @@
             input_streamer.start()
             inputstream = input_streamer.output
 
-        _shell = kwargs.get('shell', True)
-        if isinstance(cmd, (list, tuple)):
-            cmd = ' '.join(cmd)
+        # Note: fragile cmd mangling has been removed for use in Kallithea
+        assert isinstance(cmd, list), cmd
 
-        kwargs['shell'] = _shell
         _p = subprocess.Popen(cmd, bufsize=-1,
                               stdin=inputstream,
                               stdout=subprocess.PIPE,
--- a/kallithea/tests/vcs/test_git.py	Sun May 31 21:39:22 2015 +0200
+++ b/kallithea/tests/vcs/test_git.py	Tue Jun 09 22:53:24 2015 +0200
@@ -682,34 +682,34 @@
             'base')
 
     def test_workdir_get_branch(self):
-        self.repo.run_git_command('checkout -b production')
+        self.repo.run_git_command(['checkout', '-b', 'production'])
         # Regression test: one of following would fail if we don't check
         # .git/HEAD file
-        self.repo.run_git_command('checkout production')
+        self.repo.run_git_command(['checkout', 'production'])
         self.assertEqual(self.repo.workdir.get_branch(), 'production')
-        self.repo.run_git_command('checkout master')
+        self.repo.run_git_command(['checkout', 'master'])
         self.assertEqual(self.repo.workdir.get_branch(), 'master')
 
     def test_get_diff_runs_git_command_with_hashes(self):
         self.repo.run_git_command = mock.Mock(return_value=['', ''])
         self.repo.get_diff(0, 1)
         self.repo.run_git_command.assert_called_once_with(
-          'diff -U%s --full-index --binary -p -M --abbrev=40 %s %s' %
-            (3, self.repo._get_revision(0), self.repo._get_revision(1)))
+            ['diff', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40',
+             self.repo._get_revision(0), self.repo._get_revision(1)])
 
     def test_get_diff_runs_git_command_with_str_hashes(self):
         self.repo.run_git_command = mock.Mock(return_value=['', ''])
         self.repo.get_diff(self.repo.EMPTY_CHANGESET, 1)
         self.repo.run_git_command.assert_called_once_with(
-            'show -U%s --full-index --binary -p -M --abbrev=40 %s' %
-            (3, self.repo._get_revision(1)))
+            ['show', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40',
+             self.repo._get_revision(1)])
 
     def test_get_diff_runs_git_command_with_path_if_its_given(self):
         self.repo.run_git_command = mock.Mock(return_value=['', ''])
         self.repo.get_diff(0, 1, 'foo')
         self.repo.run_git_command.assert_called_once_with(
-          'diff -U%s --full-index --binary -p -M --abbrev=40 %s %s -- "foo"'
-            % (3, self.repo._get_revision(0), self.repo._get_revision(1)))
+            ['diff', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40',
+             self.repo._get_revision(0), self.repo._get_revision(1), '--', 'foo'])
 
 
 class GitRegressionTest(_BackendTestMixin, unittest.TestCase):