# HG changeset patch # User Marcin Kuzminski # Date 1354215587 -3600 # Node ID c57a37430dc9abf5171fd771472acc9908f0baaa # Parent b61824e61e68461a30e73d1280c04b5a50fef43e fixes #652 switch to generator approach when doing file annotation to prevent huge memory consumption when executed on large files. Thanks to ALexey Larikov for patch. - added pure dulwich method for getting file history. diff -r b61824e61e68 -r c57a37430dc9 rhodecode/lib/annotate.py --- a/rhodecode/lib/annotate.py Thu Nov 29 19:29:33 2012 +0100 +++ b/rhodecode/lib/annotate.py Thu Nov 29 19:59:47 2012 +0100 @@ -142,14 +142,15 @@ lines.append('') ls = '\n'.join(lines) - annotate_changesets = [tup[1] for tup in self.filenode.annotate] - # If pygments cropped last lines break we need do that too - ln_cs = len(annotate_changesets) - ln_ = len(ls.splitlines()) - if ln_cs > ln_: - annotate_changesets = annotate_changesets[:ln_ - ln_cs] - annotate = ''.join((self.annotate_from_changeset(changeset) - for changeset in annotate_changesets)) +# annotate_changesets = [tup[1] for tup in self.filenode.annotate] +## TODO: not sure what that fixes +# # If pygments cropped last lines break we need do that too +# ln_cs = len(annotate_changesets) +# ln_ = len(ls.splitlines()) +# if ln_cs > ln_: +# annotate_changesets = annotate_changesets[:ln_ - ln_cs] + annotate = ''.join((self.annotate_from_changeset(el[2]()) + for el in self.filenode.annotate)) # in case you wonder about the seemingly redundant
here: # since the content in the other cell also is wrapped in a div, # some browsers in some configurations seem to mess up the formatting. diff -r b61824e61e68 -r c57a37430dc9 rhodecode/lib/vcs/backends/git/changeset.py --- a/rhodecode/lib/vcs/backends/git/changeset.py Thu Nov 29 19:29:33 2012 +0100 +++ b/rhodecode/lib/vcs/backends/git/changeset.py Thu Nov 29 19:59:47 2012 +0100 @@ -272,6 +272,7 @@ iterating commits. """ self._get_filectx(path) + cmd = 'log --pretty="format: %%H" -s -p %s -- "%s"' % ( self.id, path ) @@ -279,9 +280,24 @@ ids = re.findall(r'[0-9a-fA-F]{40}', so) return [self.repository.get_changeset(id) for id in ids] + def get_file_history_2(self, path): + """ + Returns history of file as reversed list of ``Changeset`` objects for + which file at given ``path`` has been modified. + + """ + self._get_filectx(path) + from dulwich.walk import Walker + include = [self.id] + walker = Walker(self.repository._repo.object_store, include, + paths=[path], max_entries=1) + return [self.repository.get_changeset(sha) + for sha in (x.commit.id for x in walker)] + def get_file_annotate(self, path): """ - Returns a list of three element tuples with lineno,changeset and line + Returns a generator of four element tuples with + lineno, sha, changeset lazy loader and line TODO: This function now uses os underlying 'git' command which is generally not good. Should be replaced with algorithm iterating @@ -293,12 +309,10 @@ # -r sha ==> blames for the given revision so, se = self.repository.run_git_command(cmd) - annotate = [] for i, blame_line in enumerate(so.split('\n')[:-1]): ln_no = i + 1 - id, line = re.split(r' ', blame_line, 1) - annotate.append((ln_no, self.repository.get_changeset(id), line)) - return annotate + sha, line = re.split(r' ', blame_line, 1) + yield (ln_no, sha, lambda: self.repository.get_changeset(sha), line) def fill_archive(self, stream=None, kind='tgz', prefix=None, subrepos=False): diff -r b61824e61e68 -r c57a37430dc9 rhodecode/lib/vcs/backends/hg/changeset.py --- a/rhodecode/lib/vcs/backends/hg/changeset.py Thu Nov 29 19:29:33 2012 +0100 +++ b/rhodecode/lib/vcs/backends/hg/changeset.py Thu Nov 29 19:59:47 2012 +0100 @@ -235,17 +235,15 @@ def get_file_annotate(self, path): """ - Returns a list of three element tuples with lineno,changeset and line + Returns a generator of four element tuples with + lineno, sha, changeset lazy loader and line """ + fctx = self._get_filectx(path) - annotate = [] for i, annotate_data in enumerate(fctx.annotate()): ln_no = i + 1 - annotate.append((ln_no, self.repository\ - .get_changeset(hex(annotate_data[0].node())), - annotate_data[1],)) - - return annotate + sha = hex(annotate_data[0].node()) + yield (ln_no, sha, lambda: self.repository.get_changeset(sha), annotate_data[1],) def fill_archive(self, stream=None, kind='tgz', prefix=None, subrepos=False):