Mercurial > kallithea
changeset 5182:0e2d450feb03
git: run external commands as list of strings so we really get correct quoting (Issue #135)
a6dfd14d4b20 from
https://bitbucket.org/conservancy/kallithea/pull-request/17/add-quotes-to-repo-urls-for-git-backend
fixed that issue but did not make it "safe". The vcs git backend still used
command strings but tried to quote them correctly ... but that approach is
almost impossible to get right.
Instead, pass a string list all the way to the subprocess module and let it do
the quoting. This also makes some of the code more simple.
author | Mads Kiilerich <madski@unity3d.com> |
---|---|
date | Tue, 09 Jun 2015 22:53:24 +0200 |
parents | e3840a1ec58c |
children | 53d68f201e46 |
files | kallithea/controllers/compare.py kallithea/lib/hooks.py kallithea/lib/middleware/pygrack.py kallithea/lib/utils.py kallithea/lib/vcs/backends/git/changeset.py kallithea/lib/vcs/backends/git/repository.py kallithea/lib/vcs/subprocessio.py kallithea/tests/vcs/test_git.py |
diffstat | 8 files changed, 58 insertions(+), 92 deletions(-) [+] |
line wrap: on
line diff
--- a/kallithea/controllers/compare.py Sun May 31 21:39:22 2015 +0200 +++ b/kallithea/controllers/compare.py Tue Jun 09 22:53:24 2015 +0200 @@ -130,13 +130,13 @@ else: so, se = org_repo.run_git_command( - 'log --reverse --pretty="format: %%H" -s %s..%s' - % (org_rev, other_rev) + ['log', '--reverse', '--pretty=format:%H', + '-s', '%s..%s' % (org_rev, other_rev)] ) other_changesets = [org_repo.get_changeset(cs) for cs in re.findall(r'[0-9a-fA-F]{40}', so)] so, se = org_repo.run_git_command( - 'merge-base %s %s' % (org_rev, other_rev) + ['merge-base', org_rev, other_rev] ) ancestor = re.findall(r'[0-9a-fA-F]{40}', so)[0] org_changesets = []
--- a/kallithea/lib/hooks.py Sun May 31 21:39:22 2015 +0200 +++ b/kallithea/lib/hooks.py Tue Jun 09 22:53:24 2015 +0200 @@ -447,21 +447,21 @@ repo._repo.refs.set_symbolic_ref('HEAD', 'refs/heads/%s' % push_ref['name']) - cmd = "for-each-ref --format='%(refname)' 'refs/heads/*'" + cmd = ['for-each-ref', '--format=%(refname)','refs/heads/*'] heads = repo.run_git_command(cmd)[0] + cmd = ['log', push_ref['new_rev'], + '--reverse', '--pretty=format:%H', '--not'] heads = heads.replace(push_ref['ref'], '') - heads = ' '.join(map(lambda c: c.strip('\n').strip(), - heads.splitlines())) - cmd = (('log %(new_rev)s' % push_ref) + - ' --reverse --pretty=format:"%H" --not ' + heads) + for l in heads.splitlines(): + cmd.append(l.strip()) git_revs += repo.run_git_command(cmd)[0].splitlines() elif push_ref['new_rev'] == EmptyChangeset().raw_id: #delete branch case git_revs += ['delete_branch=>%s' % push_ref['name']] else: - cmd = (('log %(old_rev)s..%(new_rev)s' % push_ref) + - ' --reverse --pretty=format:"%H"') + cmd = ['log', '%(old_rev)s..%(new_rev)s' % push_ref, + '--reverse', '--pretty=format:%H'] git_revs += repo.run_git_command(cmd)[0].splitlines() elif _type == 'tags':
--- a/kallithea/lib/middleware/pygrack.py Sun May 31 21:39:22 2015 +0200 +++ b/kallithea/lib/middleware/pygrack.py Tue Jun 09 22:53:24 2015 +0200 @@ -82,13 +82,12 @@ server_advert = '# service=%s' % git_command packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower() _git_path = kallithea.CONFIG.get('git_path', 'git') + cmd = [_git_path, git_command[4:], + '--stateless-rpc', '--advertise-refs', self.content_path] + log.debug('handling cmd %s', cmd) try: - out = subprocessio.SubprocessIOChunker( - r'%s %s --stateless-rpc --advertise-refs "%s"' % ( - _git_path, git_command[4:], self.content_path), - starting_values=[ - packet_len + server_advert + '0000' - ] + out = subprocessio.SubprocessIOChunker(cmd, + starting_values=[packet_len + server_advert + '0000'] ) except EnvironmentError, e: log.error(traceback.format_exc()) @@ -118,21 +117,17 @@ else: inputstream = environ['wsgi.input'] + gitenv = dict(os.environ) + # forget all configs + gitenv['GIT_CONFIG_NOGLOBAL'] = '1' + cmd = [_git_path, git_command[4:], '--stateless-rpc', self.content_path] + log.debug('handling cmd %s', cmd) try: - gitenv = os.environ - # forget all configs - gitenv['GIT_CONFIG_NOGLOBAL'] = '1' - opts = dict( - env=gitenv, - cwd=self.content_path, - ) - cmd = r'%s %s --stateless-rpc "%s"' % (_git_path, git_command[4:], - self.content_path), - log.debug('handling cmd %s' % cmd) out = subprocessio.SubprocessIOChunker( cmd, inputstream=inputstream, - **opts + env=gitenv, + cwd=self.content_path, ) except EnvironmentError, e: log.error(traceback.format_exc())
--- a/kallithea/lib/utils.py Sun May 31 21:39:22 2015 +0200 +++ b/kallithea/lib/utils.py Tue Jun 09 22:53:24 2015 +0200 @@ -799,7 +799,7 @@ if 'git' not in BACKENDS: return None - stdout, stderr = GitRepository._run_git_command('--version', _bare=True, + stdout, stderr = GitRepository._run_git_command(['--version'], _bare=True, _safe=True) m = re.search("\d+.\d+.\d+", stdout)
--- a/kallithea/lib/vcs/backends/git/changeset.py Sun May 31 21:39:22 2015 +0200 +++ b/kallithea/lib/vcs/backends/git/changeset.py Tue Jun 09 22:53:24 2015 +0200 @@ -189,7 +189,7 @@ """ rev_filter = settings.GIT_REV_FILTER so, se = self.repository.run_git_command( - "rev-list %s --children" % (rev_filter) + ['rev-list', rev_filter, '--children'] ) children = [] @@ -288,12 +288,12 @@ f_path = safe_str(path) if limit: - cmd = 'log -n %s --pretty="format: %%H" -s %s -- "%s"' % ( - safe_int(limit, 0), cs_id, f_path) + cmd = ['log', '-n', str(safe_int(limit, 0)), + '--pretty=format:%H', '-s', cs_id, '--', f_path] else: - cmd = 'log --pretty="format: %%H" -s %s -- "%s"' % ( - cs_id, f_path) + cmd = ['log', + '--pretty=format:%H', '-s', cs_id, '--', f_path] so, se = self.repository.run_git_command(cmd) ids = re.findall(r'[0-9a-fA-F]{40}', so) return [self.repository.get_changeset(sha) for sha in ids] @@ -321,7 +321,7 @@ generally not good. Should be replaced with algorithm iterating commits. """ - cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path) + cmd = ['blame', '-l', '--root', '-r', self.id, '--', path] # -l ==> outputs long shas (and we need all 40 characters) # --root ==> doesn't put '^' character for boundaries # -r sha ==> blames for the given revision @@ -471,8 +471,8 @@ def _diff_name_status(self): output = [] for parent in self.parents: - cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id, - self.raw_id) + cmd = ['diff', '--name-status', parent.raw_id, self.raw_id, + '--encoding=utf8'] so, se = self.repository.run_git_command(cmd) output.append(so.strip()) return '\n'.join(output)
--- a/kallithea/lib/vcs/backends/git/repository.py Sun May 31 21:39:22 2015 +0200 +++ b/kallithea/lib/vcs/backends/git/repository.py Tue Jun 09 22:53:24 2015 +0200 @@ -18,18 +18,6 @@ import logging import posixpath import string -import sys -if sys.platform == "win32": - from subprocess import list2cmdline - def quote(s): - return list2cmdline([s]) -else: - try: - # Python <=2.7 - from pipes import quote - except ImportError: - # Python 3.3+ - from shlex import quote from dulwich.objects import Tag from dulwich.repo import Repo, NotGitRepository @@ -135,10 +123,7 @@ del opts['_safe'] safe_call = True - _str_cmd = False - if isinstance(cmd, basestring): - cmd = [cmd] - _str_cmd = True + assert isinstance(cmd, list), cmd gitenv = os.environ # need to clean fix GIT_DIR ! @@ -148,13 +133,11 @@ _git_path = settings.GIT_EXECUTABLE_PATH cmd = [_git_path] + _copts + cmd - if _str_cmd: - cmd = ' '.join(cmd) try: _opts = dict( env=gitenv, - shell=True, + shell=False, ) _opts.update(opts) p = subprocessio.SubprocessIOChunker(cmd, **_opts) @@ -268,7 +251,7 @@ return [] rev_filter = settings.GIT_REV_FILTER - cmd = 'rev-list %s --reverse --date-order' % (rev_filter) + cmd = ['rev-list', rev_filter, '--reverse', '--date-order'] try: so, se = self.run_git_command(cmd) except RepositoryError: @@ -551,22 +534,16 @@ # %H at format means (full) commit hash, initial hashes are retrieved # in ascending date order - cmd_template = 'log --date-order --reverse --pretty=format:"%H"' - cmd_params = {} + cmd = ['log', '--date-order', '--reverse', '--pretty=format:%H'] if start_date: - cmd_template += ' --since "$since"' - cmd_params['since'] = start_date.strftime('%m/%d/%y %H:%M:%S') + cmd += ['--since', start_date.strftime('%m/%d/%y %H:%M:%S')] if end_date: - cmd_template += ' --until "$until"' - cmd_params['until'] = end_date.strftime('%m/%d/%y %H:%M:%S') + cmd += ['--until', end_date.strftime('%m/%d/%y %H:%M:%S')] if branch_name: - cmd_template += ' $branch_name' - cmd_params['branch_name'] = branch_name + cmd.append(branch_name) else: - rev_filter = settings.GIT_REV_FILTER - cmd_template += ' %s' % (rev_filter) + cmd.append(settings.GIT_REV_FILTER) - cmd = string.Template(cmd_template).safe_substitute(**cmd_params) revs = self.run_git_command(cmd)[0].splitlines() start_pos = 0 end_pos = len(revs) @@ -622,14 +599,14 @@ if rev1 == self.EMPTY_CHANGESET: rev2 = self.get_changeset(rev2).raw_id - cmd = ' '.join(['show'] + flags + [rev2]) + cmd = ['show'] + flags + [rev2] else: rev1 = self.get_changeset(rev1).raw_id rev2 = self.get_changeset(rev2).raw_id - cmd = ' '.join(['diff'] + flags + [rev1, rev2]) + cmd = ['diff'] + flags + [rev1, rev2] if path: - cmd += ' -- "%s"' % path + cmd += ['--', path] stdout, stderr = self.run_git_command(cmd) # TODO: don't ignore stderr @@ -663,8 +640,7 @@ cmd.append('--bare') elif not update_after_clone: cmd.append('--no-checkout') - cmd += ['--', quote(url), quote(self.path)] - cmd = ' '.join(cmd) + cmd += ['--', url, self.path] # If error occurs run_git_command raises RepositoryError already self.run_git_command(cmd) @@ -673,8 +649,7 @@ Tries to pull changes from external location. """ url = self._get_url(url) - cmd = ['pull', "--ff-only", quote(url)] - cmd = ' '.join(cmd) + cmd = ['pull', '--ff-only', url] # If error occurs run_git_command raises RepositoryError already self.run_git_command(cmd) @@ -683,13 +658,11 @@ Tries to pull changes from external location. """ url = self._get_url(url) - so, se = self.run_git_command('ls-remote -h %s' % quote(url)) - refs = [] + so, se = self.run_git_command(['ls-remote', '-h', url]) + cmd = ['fetch', url, '--'] for line in (x for x in so.splitlines()): sha, ref = line.split('\t') - refs.append(ref) - refs = ' '.join(('+%s:%s' % (r, r) for r in refs)) - cmd = '''fetch %s -- %s''' % (quote(url), refs) + cmd.append('+%s:%s' % (ref, ref)) self.run_git_command(cmd) def _update_server_info(self):
--- a/kallithea/lib/vcs/subprocessio.py Sun May 31 21:39:22 2015 +0200 +++ b/kallithea/lib/vcs/subprocessio.py Tue Jun 09 22:53:24 2015 +0200 @@ -342,11 +342,9 @@ input_streamer.start() inputstream = input_streamer.output - _shell = kwargs.get('shell', True) - if isinstance(cmd, (list, tuple)): - cmd = ' '.join(cmd) + # Note: fragile cmd mangling has been removed for use in Kallithea + assert isinstance(cmd, list), cmd - kwargs['shell'] = _shell _p = subprocess.Popen(cmd, bufsize=-1, stdin=inputstream, stdout=subprocess.PIPE,
--- a/kallithea/tests/vcs/test_git.py Sun May 31 21:39:22 2015 +0200 +++ b/kallithea/tests/vcs/test_git.py Tue Jun 09 22:53:24 2015 +0200 @@ -682,34 +682,34 @@ 'base') def test_workdir_get_branch(self): - self.repo.run_git_command('checkout -b production') + self.repo.run_git_command(['checkout', '-b', 'production']) # Regression test: one of following would fail if we don't check # .git/HEAD file - self.repo.run_git_command('checkout production') + self.repo.run_git_command(['checkout', 'production']) self.assertEqual(self.repo.workdir.get_branch(), 'production') - self.repo.run_git_command('checkout master') + self.repo.run_git_command(['checkout', 'master']) self.assertEqual(self.repo.workdir.get_branch(), 'master') def test_get_diff_runs_git_command_with_hashes(self): self.repo.run_git_command = mock.Mock(return_value=['', '']) self.repo.get_diff(0, 1) self.repo.run_git_command.assert_called_once_with( - 'diff -U%s --full-index --binary -p -M --abbrev=40 %s %s' % - (3, self.repo._get_revision(0), self.repo._get_revision(1))) + ['diff', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40', + self.repo._get_revision(0), self.repo._get_revision(1)]) def test_get_diff_runs_git_command_with_str_hashes(self): self.repo.run_git_command = mock.Mock(return_value=['', '']) self.repo.get_diff(self.repo.EMPTY_CHANGESET, 1) self.repo.run_git_command.assert_called_once_with( - 'show -U%s --full-index --binary -p -M --abbrev=40 %s' % - (3, self.repo._get_revision(1))) + ['show', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40', + self.repo._get_revision(1)]) def test_get_diff_runs_git_command_with_path_if_its_given(self): self.repo.run_git_command = mock.Mock(return_value=['', '']) self.repo.get_diff(0, 1, 'foo') self.repo.run_git_command.assert_called_once_with( - 'diff -U%s --full-index --binary -p -M --abbrev=40 %s %s -- "foo"' - % (3, self.repo._get_revision(0), self.repo._get_revision(1))) + ['diff', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40', + self.repo._get_revision(0), self.repo._get_revision(1), '--', 'foo']) class GitRegressionTest(_BackendTestMixin, unittest.TestCase):