# HG changeset patch # User Mads Kiilerich # Date 1476826163 -7200 # Node ID 5cc6a3308a8fe5f62ceec8f0466b60e69c1ff6e0 # Parent 1013af35fa60be41427f4f04806b027e73f08c64 repo-scan: rewrite get_filesystem_repos to use os.walk instead of stupid recursion I think this is more readable. It is also faster. Perhaps because the more readable implementation makes it easier to optimize. diff -r 1013af35fa60 -r 5cc6a3308a8f kallithea/lib/paster_commands/repo_scan.py --- a/kallithea/lib/paster_commands/repo_scan.py Fri Jun 10 01:19:58 2016 +0200 +++ b/kallithea/lib/paster_commands/repo_scan.py Tue Oct 18 23:29:23 2016 +0200 @@ -61,7 +61,9 @@ remove_obsolete=rm_obsolete) added = ', '.join(added) or '-' removed = ', '.join(removed) or '-' - print 'Scan completed added: %s removed: %s' % (added, removed) + print 'Scan completed.' + print 'Added: %s' % added + print 'Removed: %s' % removed def update_parser(self): self.parser.add_option( diff -r 1013af35fa60 -r 5cc6a3308a8f kallithea/lib/utils.py --- a/kallithea/lib/utils.py Fri Jun 10 01:19:58 2016 +0200 +++ b/kallithea/lib/utils.py Tue Oct 18 23:29:23 2016 +0200 @@ -206,7 +206,7 @@ sa.commit() -def get_filesystem_repos(path, recursive=False, skip_removed_repos=True): +def get_filesystem_repos(path): """ Scans given path for repos and return (name,(type,path)) tuple @@ -216,40 +216,50 @@ # remove ending slash for better results path = safe_str(path.rstrip(os.sep)) - log.debug('now scanning in %s location recursive:%s...', path, recursive) + log.debug('now scanning in %s', path) + + def isdir(*n): + return os.path.isdir(os.path.join(*n)) - def _get_repos(p): - if not os.access(p, os.R_OK) or not os.access(p, os.X_OK): - log.warning('ignoring repo path without access: %s', p) - return - if not os.access(p, os.W_OK): - log.warning('repo path without write access: %s', p) - for dirpath in os.listdir(p): - if os.path.isfile(os.path.join(p, dirpath)): - continue - cur_path = os.path.join(p, dirpath) - + for root, dirs, _files in os.walk(path): + recurse_dirs = [] + for subdir in dirs: # skip removed repos - if skip_removed_repos and REMOVED_REPO_PAT.match(dirpath): + if REMOVED_REPO_PAT.match(subdir): continue #skip . dirs TODO: rly? then we should prevent creating them ... - if dirpath.startswith('.'): + if subdir.startswith('.'): continue - try: - scm_info = get_scm(cur_path) - yield scm_info[1].split(path, 1)[-1].lstrip(os.sep), scm_info - except VCSError: - if not recursive: + cur_path = os.path.join(root, subdir) + if (isdir(cur_path, '.hg') or + isdir(cur_path, '.git') or + isdir(cur_path, '.svn') or + isdir(cur_path, 'objects') and (isdir(cur_path, 'refs') or + os.path.isfile(os.path.join(cur_path, 'packed-refs')))): + + if not os.access(cur_path, os.R_OK) or not os.access(cur_path, os.X_OK): + log.warning('ignoring repo path without access: %s', cur_path) continue - #check if this dir containts other repos for recursive scan - rec_path = os.path.join(p, dirpath) - if not os.path.islink(rec_path) and os.path.isdir(rec_path): - for inner_scm in _get_repos(rec_path): - yield inner_scm + + if not os.access(cur_path, os.W_OK): + log.warning('repo path without write access: %s', cur_path) - return _get_repos(path) + try: + scm_info = get_scm(cur_path) + assert cur_path.startswith(path) + repo_path = cur_path[len(path) + 1:] + yield repo_path, scm_info + continue # no recursion + except VCSError: + # We should perhaps ignore such broken repos, but especially + # the bare git detection is unreliable so we dive into it + pass + + recurse_dirs.append(subdir) + + dirs[:] = recurse_dirs def is_valid_repo(repo_name, base_path, scm=None): diff -r 1013af35fa60 -r 5cc6a3308a8f kallithea/model/scm.py --- a/kallithea/model/scm.py Fri Jun 10 01:19:58 2016 +0200 +++ b/kallithea/model/scm.py Tue Oct 18 23:29:23 2016 +0200 @@ -276,7 +276,7 @@ baseui = make_ui('db') repos = {} - for name, path in get_filesystem_repos(repos_path, recursive=True): + for name, path in get_filesystem_repos(repos_path): # name need to be decomposed and put back together using the / # since this is internal storage separator for kallithea name = Repository.normalize_repo_name(name)