changeset 3044:c57a37430dc9 beta

fixes #652 switch to generator approach when doing file annotation to prevent huge memory consumption when executed on large files. Thanks to ALexey Larikov for patch. - added pure dulwich method for getting file history.
author Marcin Kuzminski <marcin@python-works.com>
date Thu, 29 Nov 2012 19:59:47 +0100
parents b61824e61e68
children b81680c97494
files rhodecode/lib/annotate.py rhodecode/lib/vcs/backends/git/changeset.py rhodecode/lib/vcs/backends/hg/changeset.py
diffstat 3 files changed, 33 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/rhodecode/lib/annotate.py	Thu Nov 29 19:29:33 2012 +0100
+++ b/rhodecode/lib/annotate.py	Thu Nov 29 19:59:47 2012 +0100
@@ -142,14 +142,15 @@
                     lines.append('')
             ls = '\n'.join(lines)
 
-        annotate_changesets = [tup[1] for tup in self.filenode.annotate]
-        # If pygments cropped last lines break we need do that too
-        ln_cs = len(annotate_changesets)
-        ln_ = len(ls.splitlines())
-        if  ln_cs > ln_:
-            annotate_changesets = annotate_changesets[:ln_ - ln_cs]
-        annotate = ''.join((self.annotate_from_changeset(changeset)
-            for changeset in annotate_changesets))
+#        annotate_changesets = [tup[1] for tup in self.filenode.annotate]
+##        TODO: not sure what that fixes
+#        # If pygments cropped last lines break we need do that too
+#        ln_cs = len(annotate_changesets)
+#        ln_ = len(ls.splitlines())
+#        if  ln_cs > ln_:
+#            annotate_changesets = annotate_changesets[:ln_ - ln_cs]
+        annotate = ''.join((self.annotate_from_changeset(el[2]())
+                            for el in self.filenode.annotate))
         # in case you wonder about the seemingly redundant <div> here:
         # since the content in the other cell also is wrapped in a div,
         # some browsers in some configurations seem to mess up the formatting.
--- a/rhodecode/lib/vcs/backends/git/changeset.py	Thu Nov 29 19:29:33 2012 +0100
+++ b/rhodecode/lib/vcs/backends/git/changeset.py	Thu Nov 29 19:59:47 2012 +0100
@@ -272,6 +272,7 @@
         iterating commits.
         """
         self._get_filectx(path)
+
         cmd = 'log --pretty="format: %%H" -s -p %s -- "%s"' % (
                   self.id, path
                )
@@ -279,9 +280,24 @@
         ids = re.findall(r'[0-9a-fA-F]{40}', so)
         return [self.repository.get_changeset(id) for id in ids]
 
+    def get_file_history_2(self, path):
+        """
+        Returns history of file as reversed list of ``Changeset`` objects for
+        which file at given ``path`` has been modified.
+
+        """
+        self._get_filectx(path)
+        from dulwich.walk import Walker
+        include = [self.id]
+        walker = Walker(self.repository._repo.object_store, include,
+                        paths=[path], max_entries=1)
+        return [self.repository.get_changeset(sha) 
+                for sha in (x.commit.id for x in walker)]
+
     def get_file_annotate(self, path):
         """
-        Returns a list of three element tuples with lineno,changeset and line
+        Returns a generator of four element tuples with
+            lineno, sha, changeset lazy loader and line
 
         TODO: This function now uses os underlying 'git' command which is
         generally not good. Should be replaced with algorithm iterating
@@ -293,12 +309,10 @@
         # -r sha ==> blames for the given revision
         so, se = self.repository.run_git_command(cmd)
 
-        annotate = []
         for i, blame_line in enumerate(so.split('\n')[:-1]):
             ln_no = i + 1
-            id, line = re.split(r' ', blame_line, 1)
-            annotate.append((ln_no, self.repository.get_changeset(id), line))
-        return annotate
+            sha, line = re.split(r' ', blame_line, 1)
+            yield (ln_no, sha, lambda: self.repository.get_changeset(sha), line)
 
     def fill_archive(self, stream=None, kind='tgz', prefix=None,
                      subrepos=False):
--- a/rhodecode/lib/vcs/backends/hg/changeset.py	Thu Nov 29 19:29:33 2012 +0100
+++ b/rhodecode/lib/vcs/backends/hg/changeset.py	Thu Nov 29 19:59:47 2012 +0100
@@ -235,17 +235,15 @@
 
     def get_file_annotate(self, path):
         """
-        Returns a list of three element tuples with lineno,changeset and line
+        Returns a generator of four element tuples with
+            lineno, sha, changeset lazy loader and line
         """
+
         fctx = self._get_filectx(path)
-        annotate = []
         for i, annotate_data in enumerate(fctx.annotate()):
             ln_no = i + 1
-            annotate.append((ln_no, self.repository\
-                             .get_changeset(hex(annotate_data[0].node())),
-                             annotate_data[1],))
-
-        return annotate
+            sha = hex(annotate_data[0].node())
+            yield (ln_no, sha, lambda: self.repository.get_changeset(sha), annotate_data[1],)
 
     def fill_archive(self, stream=None, kind='tgz', prefix=None,
                      subrepos=False):