comparison rhodecode/lib/vcs/backends/git/repository.py @ 2007:324ac367a4da beta

Added VCS into rhodecode core for faster and easier deployments of new versions
author Marcin Kuzminski <marcin@python-works.com>
date Mon, 20 Feb 2012 23:00:54 +0200
parents
children 092080cd96ba
comparison
equal deleted inserted replaced
2006:34d009e5147a 2007:324ac367a4da
1 # -*- coding: utf-8 -*-
2 """
3 vcs.backends.git
4 ~~~~~~~~~~~~~~~~
5
6 Git backend implementation.
7
8 :created_on: Apr 8, 2010
9 :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
10 """
11
12 import os
13 import re
14 import time
15 import posixpath
16 from dulwich.repo import Repo, NotGitRepository
17 #from dulwich.config import ConfigFile
18 from string import Template
19 from subprocess import Popen, PIPE
20 from rhodecode.lib.vcs.backends.base import BaseRepository
21 from rhodecode.lib.vcs.exceptions import BranchDoesNotExistError
22 from rhodecode.lib.vcs.exceptions import ChangesetDoesNotExistError
23 from rhodecode.lib.vcs.exceptions import EmptyRepositoryError
24 from rhodecode.lib.vcs.exceptions import RepositoryError
25 from rhodecode.lib.vcs.exceptions import TagAlreadyExistError
26 from rhodecode.lib.vcs.exceptions import TagDoesNotExistError
27 from rhodecode.lib.vcs.utils import safe_unicode, makedate, date_fromtimestamp
28 from rhodecode.lib.vcs.utils.lazy import LazyProperty
29 from rhodecode.lib.vcs.utils.ordered_dict import OrderedDict
30 from rhodecode.lib.vcs.utils.paths import abspath
31 from rhodecode.lib.vcs.utils.paths import get_user_home
32 from .workdir import GitWorkdir
33 from .changeset import GitChangeset
34 from .inmemory import GitInMemoryChangeset
35 from .config import ConfigFile
36
37
38 class GitRepository(BaseRepository):
39 """
40 Git repository backend.
41 """
42 DEFAULT_BRANCH_NAME = 'master'
43 scm = 'git'
44
45 def __init__(self, repo_path, create=False, src_url=None,
46 update_after_clone=False, bare=False):
47
48 self.path = abspath(repo_path)
49 self._repo = self._get_repo(create, src_url, update_after_clone, bare)
50 try:
51 self.head = self._repo.head()
52 except KeyError:
53 self.head = None
54
55 self._config_files = [
56 bare and abspath(self.path, 'config') or abspath(self.path, '.git',
57 'config'),
58 abspath(get_user_home(), '.gitconfig'),
59 ]
60
61 @LazyProperty
62 def revisions(self):
63 """
64 Returns list of revisions' ids, in ascending order. Being lazy
65 attribute allows external tools to inject shas from cache.
66 """
67 return self._get_all_revisions()
68
69 def run_git_command(self, cmd):
70 """
71 Runs given ``cmd`` as git command and returns tuple
72 (returncode, stdout, stderr).
73
74 .. note::
75 This method exists only until log/blame functionality is implemented
76 at Dulwich (see https://bugs.launchpad.net/bugs/645142). Parsing
77 os command's output is road to hell...
78
79 :param cmd: git command to be executed
80 """
81 #cmd = '(cd %s && git %s)' % (self.path, cmd)
82 if isinstance(cmd, basestring):
83 cmd = 'git %s' % cmd
84 else:
85 cmd = ['git'] + cmd
86 try:
87 opts = dict(
88 shell=isinstance(cmd, basestring),
89 stdout=PIPE,
90 stderr=PIPE)
91 if os.path.isdir(self.path):
92 opts['cwd'] = self.path
93 p = Popen(cmd, **opts)
94 except OSError, err:
95 raise RepositoryError("Couldn't run git command (%s).\n"
96 "Original error was:%s" % (cmd, err))
97 so, se = p.communicate()
98 if not se.startswith("fatal: bad default revision 'HEAD'") and \
99 p.returncode != 0:
100 raise RepositoryError("Couldn't run git command (%s).\n"
101 "stderr:\n%s" % (cmd, se))
102 return so, se
103
104 def _check_url(self, url):
105 """
106 Functon will check given url and try to verify if it's a valid
107 link. Sometimes it may happened that mercurial will issue basic
108 auth request that can cause whole API to hang when used from python
109 or other external calls.
110
111 On failures it'll raise urllib2.HTTPError
112 """
113
114 #TODO: implement this
115 pass
116
117 def _get_repo(self, create, src_url=None, update_after_clone=False,
118 bare=False):
119 if create and os.path.exists(self.path):
120 raise RepositoryError("Location already exist")
121 if src_url and not create:
122 raise RepositoryError("Create should be set to True if src_url is "
123 "given (clone operation creates repository)")
124 try:
125 if create and src_url:
126 self._check_url(src_url)
127 self.clone(src_url, update_after_clone, bare)
128 return Repo(self.path)
129 elif create:
130 os.mkdir(self.path)
131 if bare:
132 return Repo.init_bare(self.path)
133 else:
134 return Repo.init(self.path)
135 else:
136 return Repo(self.path)
137 except (NotGitRepository, OSError), err:
138 raise RepositoryError(err)
139
140 def _get_all_revisions(self):
141 cmd = 'rev-list --all --date-order'
142 try:
143 so, se = self.run_git_command(cmd)
144 except RepositoryError:
145 # Can be raised for empty repositories
146 return []
147 revisions = so.splitlines()
148 revisions.reverse()
149 return revisions
150
151 def _get_revision(self, revision):
152 """
153 For git backend we always return integer here. This way we ensure
154 that changset's revision attribute would become integer.
155 """
156 pattern = re.compile(r'^[[0-9a-fA-F]{12}|[0-9a-fA-F]{40}]$')
157 is_bstr = lambda o: isinstance(o, (str, unicode))
158 is_null = lambda o: len(o) == revision.count('0')
159
160 if len(self.revisions) == 0:
161 raise EmptyRepositoryError("There are no changesets yet")
162
163 if revision in (None, '', 'tip', 'HEAD', 'head', -1):
164 revision = self.revisions[-1]
165
166 if ((is_bstr(revision) and revision.isdigit() and len(revision) < 12)
167 or isinstance(revision, int) or is_null(revision)):
168 try:
169 revision = self.revisions[int(revision)]
170 except:
171 raise ChangesetDoesNotExistError("Revision %r does not exist "
172 "for this repository %s" % (revision, self))
173
174 elif is_bstr(revision):
175 if not pattern.match(revision) or revision not in self.revisions:
176 raise ChangesetDoesNotExistError("Revision %r does not exist "
177 "for this repository %s" % (revision, self))
178
179 # Ensure we return full id
180 if not pattern.match(str(revision)):
181 raise ChangesetDoesNotExistError("Given revision %r not recognized"
182 % revision)
183 return revision
184
185 def _get_archives(self, archive_name='tip'):
186
187 for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
188 yield {"type": i[0], "extension": i[1], "node": archive_name}
189
190 def _get_url(self, url):
191 """
192 Returns normalized url. If schema is not given, would fall to
193 filesystem (``file:///``) schema.
194 """
195 url = str(url)
196 if url != 'default' and not '://' in url:
197 url = ':///'.join(('file', url))
198 return url
199
200 @LazyProperty
201 def name(self):
202 return os.path.basename(self.path)
203
204 @LazyProperty
205 def last_change(self):
206 """
207 Returns last change made on this repository as datetime object
208 """
209 return date_fromtimestamp(self._get_mtime(), makedate()[1])
210
211 def _get_mtime(self):
212 try:
213 return time.mktime(self.get_changeset().date.timetuple())
214 except RepositoryError:
215 # fallback to filesystem
216 in_path = os.path.join(self.path, '.git', "index")
217 he_path = os.path.join(self.path, '.git', "HEAD")
218 if os.path.exists(in_path):
219 return os.stat(in_path).st_mtime
220 else:
221 return os.stat(he_path).st_mtime
222
223 @LazyProperty
224 def description(self):
225 undefined_description = u'unknown'
226 description_path = os.path.join(self.path, '.git', 'description')
227 if os.path.isfile(description_path):
228 return safe_unicode(open(description_path).read())
229 else:
230 return undefined_description
231
232 @LazyProperty
233 def contact(self):
234 undefined_contact = u'Unknown'
235 return undefined_contact
236
237 @property
238 def branches(self):
239 if not self.revisions:
240 return {}
241 refs = self._repo.refs.as_dict()
242 sortkey = lambda ctx: ctx[0]
243 _branches = [('/'.join(ref.split('/')[2:]), head)
244 for ref, head in refs.items()
245 if ref.startswith('refs/heads/') or
246 ref.startswith('refs/remotes/') and not ref.endswith('/HEAD')]
247 return OrderedDict(sorted(_branches, key=sortkey, reverse=False))
248
249 def _get_tags(self):
250 if not self.revisions:
251 return {}
252 sortkey = lambda ctx: ctx[0]
253 _tags = [('/'.join(ref.split('/')[2:]), head) for ref, head in
254 self._repo.get_refs().items() if ref.startswith('refs/tags/')]
255 return OrderedDict(sorted(_tags, key=sortkey, reverse=True))
256
257 @LazyProperty
258 def tags(self):
259 return self._get_tags()
260
261 def tag(self, name, user, revision=None, message=None, date=None,
262 **kwargs):
263 """
264 Creates and returns a tag for the given ``revision``.
265
266 :param name: name for new tag
267 :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
268 :param revision: changeset id for which new tag would be created
269 :param message: message of the tag's commit
270 :param date: date of tag's commit
271
272 :raises TagAlreadyExistError: if tag with same name already exists
273 """
274 if name in self.tags:
275 raise TagAlreadyExistError("Tag %s already exists" % name)
276 changeset = self.get_changeset(revision)
277 message = message or "Added tag %s for commit %s" % (name,
278 changeset.raw_id)
279 self._repo.refs["refs/tags/%s" % name] = changeset._commit.id
280
281 self.tags = self._get_tags()
282 return changeset
283
284 def remove_tag(self, name, user, message=None, date=None):
285 """
286 Removes tag with the given ``name``.
287
288 :param name: name of the tag to be removed
289 :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
290 :param message: message of the tag's removal commit
291 :param date: date of tag's removal commit
292
293 :raises TagDoesNotExistError: if tag with given name does not exists
294 """
295 if name not in self.tags:
296 raise TagDoesNotExistError("Tag %s does not exist" % name)
297 tagpath = posixpath.join(self._repo.refs.path, 'refs', 'tags', name)
298 try:
299 os.remove(tagpath)
300 self.tags = self._get_tags()
301 except OSError, e:
302 raise RepositoryError(e.strerror)
303
304 def get_changeset(self, revision=None):
305 """
306 Returns ``GitChangeset`` object representing commit from git repository
307 at the given revision or head (most recent commit) if None given.
308 """
309 if isinstance(revision, GitChangeset):
310 return revision
311 revision = self._get_revision(revision)
312 changeset = GitChangeset(repository=self, revision=revision)
313 return changeset
314
315 def get_changesets(self, start=None, end=None, start_date=None,
316 end_date=None, branch_name=None, reverse=False):
317 """
318 Returns iterator of ``GitChangeset`` objects from start to end (both
319 are inclusive), in ascending date order (unless ``reverse`` is set).
320
321 :param start: changeset ID, as str; first returned changeset
322 :param end: changeset ID, as str; last returned changeset
323 :param start_date: if specified, changesets with commit date less than
324 ``start_date`` would be filtered out from returned set
325 :param end_date: if specified, changesets with commit date greater than
326 ``end_date`` would be filtered out from returned set
327 :param branch_name: if specified, changesets not reachable from given
328 branch would be filtered out from returned set
329 :param reverse: if ``True``, returned generator would be reversed
330 (meaning that returned changesets would have descending date order)
331
332 :raise BranchDoesNotExistError: If given ``branch_name`` does not
333 exist.
334 :raise ChangesetDoesNotExistError: If changeset for given ``start`` or
335 ``end`` could not be found.
336
337 """
338 if branch_name and branch_name not in self.branches:
339 raise BranchDoesNotExistError("Branch '%s' not found" \
340 % branch_name)
341 # %H at format means (full) commit hash, initial hashes are retrieved
342 # in ascending date order
343 cmd_template = 'log --date-order --reverse --pretty=format:"%H"'
344 cmd_params = {}
345 if start_date:
346 cmd_template += ' --since "$since"'
347 cmd_params['since'] = start_date.strftime('%m/%d/%y %H:%M:%S')
348 if end_date:
349 cmd_template += ' --until "$until"'
350 cmd_params['until'] = end_date.strftime('%m/%d/%y %H:%M:%S')
351 if branch_name:
352 cmd_template += ' $branch_name'
353 cmd_params['branch_name'] = branch_name
354 else:
355 cmd_template += ' --all'
356
357 cmd = Template(cmd_template).safe_substitute(**cmd_params)
358 revs = self.run_git_command(cmd)[0].splitlines()
359 start_pos = 0
360 end_pos = len(revs)
361 if start:
362 _start = self._get_revision(start)
363 try:
364 start_pos = revs.index(_start)
365 except ValueError:
366 pass
367
368 if end is not None:
369 _end = self._get_revision(end)
370 try:
371 end_pos = revs.index(_end)
372 except ValueError:
373 pass
374
375 if None not in [start, end] and start_pos > end_pos:
376 raise RepositoryError('start cannot be after end')
377
378 if end_pos is not None:
379 end_pos += 1
380
381 revs = revs[start_pos:end_pos]
382 if reverse:
383 revs = reversed(revs)
384 for rev in revs:
385 yield self.get_changeset(rev)
386
387 def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
388 context=3):
389 """
390 Returns (git like) *diff*, as plain text. Shows changes introduced by
391 ``rev2`` since ``rev1``.
392
393 :param rev1: Entry point from which diff is shown. Can be
394 ``self.EMPTY_CHANGESET`` - in this case, patch showing all
395 the changes since empty state of the repository until ``rev2``
396 :param rev2: Until which revision changes should be shown.
397 :param ignore_whitespace: If set to ``True``, would not show whitespace
398 changes. Defaults to ``False``.
399 :param context: How many lines before/after changed lines should be
400 shown. Defaults to ``3``.
401 """
402 flags = ['-U%s' % context]
403 if ignore_whitespace:
404 flags.append('-w')
405
406 if rev1 == self.EMPTY_CHANGESET:
407 rev2 = self.get_changeset(rev2).raw_id
408 cmd = ' '.join(['show'] + flags + [rev2])
409 else:
410 rev1 = self.get_changeset(rev1).raw_id
411 rev2 = self.get_changeset(rev2).raw_id
412 cmd = ' '.join(['diff'] + flags + [rev1, rev2])
413
414 if path:
415 cmd += ' -- "%s"' % path
416 stdout, stderr = self.run_git_command(cmd)
417 # If we used 'show' command, strip first few lines (until actual diff
418 # starts)
419 if rev1 == self.EMPTY_CHANGESET:
420 lines = stdout.splitlines()
421 x = 0
422 for line in lines:
423 if line.startswith('diff'):
424 break
425 x += 1
426 # Append new line just like 'diff' command do
427 stdout = '\n'.join(lines[x:]) + '\n'
428 return stdout
429
430 @LazyProperty
431 def in_memory_changeset(self):
432 """
433 Returns ``GitInMemoryChangeset`` object for this repository.
434 """
435 return GitInMemoryChangeset(self)
436
437 def clone(self, url, update_after_clone=True, bare=False):
438 """
439 Tries to clone changes from external location.
440
441 :param update_after_clone: If set to ``False``, git won't checkout
442 working directory
443 :param bare: If set to ``True``, repository would be cloned into
444 *bare* git repository (no working directory at all).
445 """
446 url = self._get_url(url)
447 cmd = ['clone']
448 if bare:
449 cmd.append('--bare')
450 elif not update_after_clone:
451 cmd.append('--no-checkout')
452 cmd += ['--', '"%s"' % url, '"%s"' % self.path]
453 cmd = ' '.join(cmd)
454 # If error occurs run_git_command raises RepositoryError already
455 self.run_git_command(cmd)
456
457 @LazyProperty
458 def workdir(self):
459 """
460 Returns ``Workdir`` instance for this repository.
461 """
462 return GitWorkdir(self)
463
464 def get_config_value(self, section, name, config_file=None):
465 """
466 Returns configuration value for a given [``section``] and ``name``.
467
468 :param section: Section we want to retrieve value from
469 :param name: Name of configuration we want to retrieve
470 :param config_file: A path to file which should be used to retrieve
471 configuration from (might also be a list of file paths)
472 """
473 if config_file is None:
474 config_file = []
475 elif isinstance(config_file, basestring):
476 config_file = [config_file]
477
478 def gen_configs():
479 for path in config_file + self._config_files:
480 try:
481 yield ConfigFile.from_path(path)
482 except (IOError, OSError, ValueError):
483 continue
484
485 for config in gen_configs():
486 try:
487 return config.get(section, name)
488 except KeyError:
489 continue
490 return None
491
492 def get_user_name(self, config_file=None):
493 """
494 Returns user's name from global configuration file.
495
496 :param config_file: A path to file which should be used to retrieve
497 configuration from (might also be a list of file paths)
498 """
499 return self.get_config_value('user', 'name', config_file)
500
501 def get_user_email(self, config_file=None):
502 """
503 Returns user's email from global configuration file.
504
505 :param config_file: A path to file which should be used to retrieve
506 configuration from (might also be a list of file paths)
507 """
508 return self.get_config_value('user', 'email', config_file)