changeset 8772:52816813cbec

docs: describe, visualize, and verify internal code structure and layering Try to describe something that isn't entirely there yet. deps.py will help track and minimize violations through checks and visualization in deps.svg .
author Mads Kiilerich <mads@kiilerich.com>
date Fri, 13 Nov 2020 01:04:30 +0100
parents f8971422795e
children c6b4788337b5
files .hgignore docs/contributing.rst scripts/deps.py scripts/run-all-cleanup
diffstat 4 files changed, 389 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Sat Nov 07 02:29:41 2020 +0100
+++ b/.hgignore	Fri Nov 13 01:04:30 2020 +0100
@@ -53,3 +53,6 @@
 ^\.pytest_cache$
 ^venv$
 /__pycache__$
+^deps\.dot$
+^deps\.svg$
+^deps\.txt$
--- a/docs/contributing.rst	Sat Nov 07 02:29:41 2020 +0100
+++ b/docs/contributing.rst	Fri Nov 13 01:04:30 2020 +0100
@@ -72,6 +72,94 @@
 .. _contributing-tests:
 
 
+Internal dependencies
+---------------------
+
+We try to keep the code base clean and modular and avoid circular dependencies.
+Code should only invoke code in layers below itself.
+
+Imports should import whole modules ``from`` their parent module, perhaps
+``as`` a shortened name. Avoid imports ``from`` modules.
+
+To avoid cycles and partially initialized modules, ``__init__.py`` should *not*
+contain any non-trivial imports. The top level of a module should *not* be a
+facade for the module functionality.
+
+Common code for a module is often in ``base.py``.
+
+The important part of the dependency graph is approximately linear. In the
+following list, modules may only depend on modules below them:
+
+``tests``
+  Just get the job done - anything goes.
+
+``bin/`` & ``config/`` & ``alembic/``
+  The main entry points, defined in ``setup.py``. Note: The TurboGears template
+  use ``config`` for the high WSGI application - this is not for low level
+  configuration.
+
+``controllers/``
+  The top level web application, with TurboGears using the ``root`` controller
+  as entry point, and ``routing`` dispatching to other controllers.
+
+``templates/**.html``
+  The "view", rendering to HTML. Invoked by controllers which can pass them
+  anything from lower layers - especially ``helpers`` available as ``h`` will
+  cut through all layers, and ``c`` gives access to global variables.
+
+``lib/helpers.py``
+  High level helpers, exposing everything to templates as ``h``. It depends on
+  everything and has a huge dependency chain, so it should not be used for
+  anything else. TODO.
+
+``controlles/base.py``
+  The base class of controllers, with lots of model knowledge.
+
+``lib/auth.py``
+  All things related to authentication. TODO.
+
+``lib/utils.py``
+  High level utils with lots of model knowledge. TODO.
+
+``lib/hooks.py``
+  Hooks into "everything" to give centralized logging to database, cache
+  invalidation, and extension handling. TODO.
+
+``model/``
+  Convenience business logic wrappers around database models.
+
+``model/db.py``
+  Defines the database schema and provides some additional logic.
+
+``model/scm.py``
+  All things related to anything. TODO.
+
+SQLAlchemy
+  Database session and transaction in thread-local variables.
+
+``lib/utils2.py``
+  Low level utils specific to Kallithea.
+
+``lib/webutils.py``
+  Low level generic utils with awareness of the TurboGears environment.
+
+TurboGears
+  Request, response and state like i18n gettext in thread-local variables.
+  External dependency with global state - usage should be minimized.
+
+``lib/vcs/``
+  Previously an independent library. No awareness of web, database, or state.
+
+``lib/*``
+  Various "pure" functionality not depending on anything else.
+
+``__init__``
+  Very basic Kallithea constants - some of them are set very early based on ``.ini``.
+
+This is not exactly how it is right now, but we aim for something like that.
+Especially the areas marked as TODO have some problems that need untangling.
+
+
 Running tests
 -------------
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/deps.py	Fri Nov 13 01:04:30 2020 +0100
@@ -0,0 +1,295 @@
+#!/usr/bin/env python3
+
+
+import re
+import sys
+
+
+ignored_modules = set('''
+argparse
+base64
+bcrypt
+binascii
+bleach
+calendar
+celery
+celery
+chardet
+click
+collections
+configparser
+copy
+csv
+ctypes
+datetime
+dateutil
+decimal
+decorator
+difflib
+distutils
+docutils
+email
+errno
+fileinput
+functools
+getpass
+grp
+hashlib
+hmac
+html
+http
+imp
+importlib
+inspect
+io
+ipaddr
+IPython
+isapi_wsgi
+itertools
+json
+kajiki
+ldap
+logging
+mako
+markdown
+mimetypes
+mock
+msvcrt
+multiprocessing
+operator
+os
+paginate
+paginate_sqlalchemy
+pam
+paste
+pkg_resources
+platform
+posixpath
+pprint
+pwd
+pyflakes
+pytest
+pytest_localserver
+random
+re
+routes
+setuptools
+shlex
+shutil
+smtplib
+socket
+ssl
+stat
+string
+struct
+subprocess
+sys
+tarfile
+tempfile
+textwrap
+tgext
+threading
+time
+traceback
+traitlets
+types
+urllib
+urlobject
+uuid
+warnings
+webhelpers2
+webob
+webtest
+whoosh
+win32traceutil
+zipfile
+'''.split())
+
+top_modules = set('''
+kallithea.alembic
+kallithea.bin
+kallithea.config
+kallithea.controllers
+kallithea.templates.py
+scripts
+'''.split())
+
+bottom_external_modules = set('''
+tg
+mercurial
+sqlalchemy
+alembic
+formencode
+pygments
+dulwich
+beaker
+psycopg2
+docs
+setup
+conftest
+'''.split())
+
+normal_modules = set('''
+kallithea
+kallithea.lib.celerylib.tasks
+kallithea.lib
+kallithea.lib.auth
+kallithea.lib.auth_modules
+kallithea.lib.base
+kallithea.lib.celerylib
+kallithea.lib.db_manage
+kallithea.lib.helpers
+kallithea.lib.hooks
+kallithea.lib.indexers
+kallithea.lib.utils
+kallithea.lib.utils2
+kallithea.lib.vcs
+kallithea.lib.webutils
+kallithea.model
+kallithea.model.scm
+kallithea.templates.py
+'''.split())
+
+shown_modules = normal_modules | top_modules
+
+# break the chains somehow - this is a cleanup TODO list
+known_violations = [
+('kallithea.lib.auth_modules', 'kallithea.lib.auth'),  # needs base&facade
+('kallithea.lib.utils', 'kallithea.model'),  # clean up utils
+('kallithea.lib.utils', 'kallithea.model.db'),
+('kallithea.lib.utils', 'kallithea.model.scm'),
+('kallithea.lib.celerylib.tasks', 'kallithea.lib.helpers'),
+('kallithea.lib.celerylib.tasks', 'kallithea.lib.hooks'),
+('kallithea.lib.celerylib.tasks', 'kallithea.lib.indexers'),
+('kallithea.lib.celerylib.tasks', 'kallithea.model'),
+('kallithea.model', 'kallithea.lib.auth'),  # auth.HasXXX
+('kallithea.model', 'kallithea.lib.auth_modules'),  # validators
+('kallithea.model', 'kallithea.lib.helpers'),
+('kallithea.model', 'kallithea.lib.hooks'),  # clean up hooks
+('kallithea.model', 'kallithea.model.scm'),
+('kallithea.model.scm', 'kallithea.lib.hooks'),
+]
+
+extra_edges = [
+('kallithea.config', 'kallithea.controllers'),  # through TG
+('kallithea.lib.auth', 'kallithea.lib.auth_modules'),  # custom loader
+]
+
+
+def normalize(s):
+    """Given a string with dot path, return the string it should be shown as."""
+    parts = s.replace('.__init__', '').split('.')
+    short_2 = '.'.join(parts[:2])
+    short_3 = '.'.join(parts[:3])
+    short_4 = '.'.join(parts[:4])
+    if parts[0] in ['scripts', 'contributor_data', 'i18n_utils']:
+        return 'scripts'
+    if short_3 == 'kallithea.model.meta':
+        return 'kallithea.model.db'
+    if parts[:4] == ['kallithea', 'lib', 'vcs', 'ssh']:
+        return 'kallithea.lib.vcs.ssh'
+    if short_4 in shown_modules:
+        return short_4
+    if short_3 in shown_modules:
+        return short_3
+    if short_2 in shown_modules:
+        return short_2
+    if short_2 == 'kallithea.tests':
+        return None
+    if parts[0] in ignored_modules:
+        return None
+    assert parts[0] in bottom_external_modules, parts
+    return parts[0]
+
+
+def main(filenames):
+    if not filenames or filenames[0].startswith('-'):
+        print('''\
+Usage:
+    hg files 'set:!binary()&grep("^#!.*python")' 'set:**.py' | xargs scripts/deps.py
+    dot -Tsvg deps.dot > deps.svg
+        ''')
+        raise SystemExit(1)
+
+    files_imports = dict()  # map filenames to its imports
+    import_deps = set()  # set of tuples with module name and its imports
+    for fn in filenames:
+        with open(fn) as f:
+            s = f.read()
+
+        dot_name = (fn[:-3] if fn.endswith('.py') else fn).replace('/', '.')
+        file_imports = set()
+        for m in re.finditer(r'^ *(?:from ([^ ]*) import (?:([a-zA-Z].*)|\(([^)]*)\))|import (.*))$', s, re.MULTILINE):
+            m_from, m_from_import, m_from_import2, m_import = m.groups()
+            if m_from:
+                pre = m_from + '.'
+                if pre.startswith('.'):
+                    pre = dot_name.rsplit('.', 1)[0] + pre
+                importlist = m_from_import or m_from_import2
+            else:
+                pre = ''
+                importlist = m_import
+            for imp in importlist.split('#', 1)[0].split(','):
+                full_imp = pre + imp.strip().split(' as ', 1)[0]
+                file_imports.add(full_imp)
+                import_deps.add((dot_name, full_imp))
+        files_imports[fn] = file_imports
+
+    # dump out all deps for debugging and analysis
+    with open('deps.txt', 'w') as f:
+        for fn, file_imports in sorted(files_imports.items()):
+            for file_import in sorted(file_imports):
+                if file_import.split('.', 1)[0] in ignored_modules:
+                    continue
+                f.write('%s: %s\n' % (fn, file_import))
+
+    # find leafs that haven't been ignored - they are the important external dependencies and shown in the bottom row
+    only_imported = set(
+        set(normalize(b) for a, b in import_deps) -
+        set(normalize(a) for a, b in import_deps) -
+        set([None, 'kallithea'])
+    )
+
+    normalized_dep_edges = set()
+    for dot_name, full_imp in import_deps:
+        a = normalize(dot_name)
+        b = normalize(full_imp)
+        if a is None or b is None or a == b:
+            continue
+        normalized_dep_edges.add((a, b))
+        #print((dot_name, full_imp, a, b))
+    normalized_dep_edges.update(extra_edges)
+
+    unseen_shown_modules = shown_modules.difference(a for a, b in normalized_dep_edges).difference(b for a, b in normalized_dep_edges)
+    assert not unseen_shown_modules, unseen_shown_modules
+
+    with open('deps.dot', 'w') as f:
+        f.write('digraph {\n')
+        f.write('subgraph { rank = same; %s}\n' % ''.join('"%s"; ' % s for s in sorted(top_modules)))
+        f.write('subgraph { rank = same; %s}\n' % ''.join('"%s"; ' % s for s in sorted(only_imported)))
+        for a, b in sorted(normalized_dep_edges):
+            f.write('  "%s" -> "%s"%s\n' % (a, b, ' [color=red]' if (a, b) in known_violations else ' [color=green]' if (a, b) in extra_edges else ''))
+        f.write('}\n')
+
+    # verify dependencies by untangling dependency chain bottom-up:
+    todo = set(normalized_dep_edges)
+    for x in known_violations:
+        todo.remove(x)
+
+    while todo:
+        depending = set(a for a, b in todo)
+        depended = set(b for a, b in todo)
+        drop = depended - depending
+        if not drop:
+            print('ERROR: cycles:', len(todo))
+            for x in sorted(todo):
+                print('%s,' % (x,))
+            raise SystemExit(1)
+        #for do_b in sorted(drop):
+        #    print('Picking', do_b, '- unblocks:', ' '.join(a for a, b in sorted((todo)) if b == do_b))
+        todo = set((a, b) for a, b in todo if b in depending)
+        #print()
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
--- a/scripts/run-all-cleanup	Sat Nov 07 02:29:41 2020 +0100
+++ b/scripts/run-all-cleanup	Fri Nov 13 01:04:30 2020 +0100
@@ -5,6 +5,9 @@
 set -e
 set -x
 
+hg files 'set:!binary()&grep("^#!.*python")' 'set:**.py' | xargs scripts/deps.py
+dot -Tsvg deps.dot > deps.svg
+
 scripts/docs-headings.py
 scripts/generate-ini.py
 scripts/whitespacecleanup.sh