changeset 8814:4a18e6bf6b87

model: simplify how get_commits_stats task group on author Avoid using the caching h.person . We want to get rid of the model dependency on helpers. The stats are persisted, and any temporary incorrectness in the long term cached h.person will thus remain forever. It is thus arguably better to avoid using it in this place. get_commits_stats is also a long running task, so speed is not *that* critical. And generally, processing commits in order will have a lot of the same committers, so a local cache will have a good hit rate. (Alternatively, h.person could perhaps be in user model ... but that's not how it is now.)
author Mads Kiilerich <mads@kiilerich.com>
date Fri, 18 Dec 2020 22:03:10 +0100
parents a36a8804e7be
children 1b683a4eb9fc
files kallithea/model/async_tasks.py scripts/deps.py
diffstat 2 files changed, 21 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/model/async_tasks.py	Fri Dec 18 22:24:48 2020 +0100
+++ b/kallithea/model/async_tasks.py	Fri Dec 18 22:03:10 2020 +0100
@@ -40,11 +40,10 @@
 from tg import config
 
 import kallithea
-import kallithea.lib.helpers as h
 from kallithea.lib import celerylib, conf, ext_json, hooks
 from kallithea.lib.indexers.daemon import WhooshIndexingDaemon
 from kallithea.lib.utils2 import asbool, ascii_bytes
-from kallithea.lib.vcs.utils import author_email
+from kallithea.lib.vcs.utils import author_email, author_name
 from kallithea.model import db, repo, userlog
 
 
@@ -66,6 +65,19 @@
                          .run(full_index=full_index)
 
 
+def _author_username(author):
+    """Return the username of the user identified by the email part of the 'author' string,
+    default to the name or email.
+    Kind of similar to h.person() ."""
+    email = author_email(author)
+    if email:
+        user = db.User.get_by_email(email)
+        if user is not None:
+            return user.username
+    # Still nothing?  Just pass back the author name if any, else the email
+    return author_name(author) or email
+
+
 @celerylib.task
 @celerylib.dbsession
 def get_commits_stats(repo_name, ts_min_y, ts_max_y, recurse_limit=100):
@@ -124,13 +136,19 @@
         log.debug('Getting revisions from %s to %s',
              last_rev, last_rev + parse_limit
         )
+        usernames_cache = {}
         for cs in scm_repo[last_rev:last_rev + parse_limit]:
             log.debug('parsing %s', cs)
             last_cs = cs  # remember last parsed changeset
             tt = cs.date.timetuple()
             k = mktime(tt[:3] + (0, 0, 0, 0, 0, 0))
 
-            username = h.person(cs.author)
+            # get username from author - similar to what h.person does
+            username = usernames_cache.get(cs.author)
+            if username is None:
+                username = _author_username(cs.author)
+                usernames_cache[cs.author] = username
+
             if username in co_day_auth_aggr:
                 try:
                     l = [timegetter(x) for x in
--- a/scripts/deps.py	Fri Dec 18 22:24:48 2020 +0100
+++ b/scripts/deps.py	Fri Dec 18 22:03:10 2020 +0100
@@ -158,7 +158,6 @@
 ('kallithea.lib.utils', 'kallithea.model'),  # clean up utils
 ('kallithea.lib.utils', 'kallithea.model.db'),
 ('kallithea.lib.utils', 'kallithea.model.scm'),
-('kallithea.model.async_tasks', 'kallithea.lib.helpers'),
 ('kallithea.model.async_tasks', 'kallithea.lib.hooks'),
 ('kallithea.model.async_tasks', 'kallithea.lib.indexers'),
 ('kallithea.model.async_tasks', 'kallithea.model'),