Mercurial > kallithea
view rhodecode/tests/scripts/test_crawler.py @ 4116:ffd45b185016 rhodecode-2.2.5-gpl
Imported some of the GPLv3'd changes from RhodeCode v2.2.5.
This imports changes between changesets 21af6c4eab3d and 6177597791c2 in
RhodeCode's original repository, including only changes to Python files and HTML.
RhodeCode clearly licensed its changes to these files under GPLv3
in their /LICENSE file, which states the following:
The Python code and integrated HTML are licensed under the GPLv3 license.
(See:
https://code.rhodecode.com/rhodecode/files/v2.2.5/LICENSE
or
http://web.archive.org/web/20140512193334/https://code.rhodecode.com/rhodecode/files/f3b123159901f15426d18e3dc395e8369f70ebe0/LICENSE
for an online copy of that LICENSE file)
Conservancy reviewed these changes and confirmed that they can be licensed as
a whole to the Kallithea project under GPLv3-only.
While some of the contents committed herein are clearly licensed
GPLv3-or-later, on the whole we must assume the are GPLv3-only, since the
statement above from RhodeCode indicates that they intend GPLv3-only as their
license, per GPLv3ยง14 and other relevant sections of GPLv3.
author | Bradley M. Kuhn <bkuhn@sfconservancy.org> |
---|---|
date | Wed, 02 Jul 2014 19:03:13 -0400 |
parents | 32471bd1f4ee |
children | 7e5f8c12a3fc |
line wrap: on
line source
# -*- coding: utf-8 -*- # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ rhodecode.tests.test_crawer ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Test for crawling a project for memory usage This should be runned just as regular script together with a watch script that will show memory usage. watch -n1 ./rhodecode/tests/mem_watch :created_on: Apr 21, 2010 :author: marcink :copyright: (c) 2013 RhodeCode GmbH. :license: GPLv3, see LICENSE for more details. """ import cookielib import urllib import urllib2 import time import os import sys from os.path import join as jn from os.path import dirname as dn __here__ = os.path.abspath(__file__) __root__ = dn(dn(dn(__here__))) sys.path.append(__root__) from rhodecode.lib import vcs from rhodecode.lib.compat import OrderedSet from rhodecode.lib.vcs.exceptions import RepositoryError PASES = 3 HOST = 'http://127.0.0.1' PORT = 5000 BASE_URI = '%s:%s/' % (HOST, PORT) if len(sys.argv) == 2: BASE_URI = sys.argv[1] if not BASE_URI.endswith('/'): BASE_URI += '/' print 'Crawling @ %s' % BASE_URI BASE_URI += '%s' PROJECT_PATH = jn('/', 'home', 'marcink', 'repos') PROJECTS = [ #'linux-magx-pbranch', 'CPython', 'rhodecode_tip', ] cj = cookielib.FileCookieJar('/tmp/rc_test_cookie.txt') o = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) o.addheaders = [ ('User-agent', 'rhodecode-crawler'), ('Accept-Language', 'en - us, en;q = 0.5') ] urllib2.install_opener(o) def _get_repo(proj): if isinstance(proj, basestring): repo = vcs.get_repo(jn(PROJECT_PATH, proj)) proj = proj else: repo = proj proj = repo.name return repo, proj def test_changelog_walk(proj, pages=100): repo, proj = _get_repo(proj) total_time = 0 for i in range(1, pages): page = '/'.join((proj, 'changelog',)) full_uri = (BASE_URI % page) + '?' + urllib.urlencode({'page': i}) s = time.time() f = o.open(full_uri) assert f.url == full_uri, 'URL:%s does not match %s' % (f.url, full_uri) size = len(f.read()) e = time.time() - s total_time += e print 'visited %s size:%s req:%s ms' % (full_uri, size, e) print 'total_time', total_time print 'average on req', total_time / float(pages) def test_changeset_walk(proj, limit=None): repo, proj = _get_repo(proj) print 'processing', jn(PROJECT_PATH, proj) total_time = 0 cnt = 0 for i in repo: cnt += 1 raw_cs = '/'.join((proj, 'changeset', i.raw_id)) if limit and limit == cnt: break full_uri = (BASE_URI % raw_cs) print '%s visiting %s\%s' % (cnt, full_uri, i) s = time.time() f = o.open(full_uri) size = len(f.read()) e = time.time() - s total_time += e print '%s visited %s\%s size:%s req:%s ms' % (cnt, full_uri, i, size, e) print 'total_time', total_time print 'average on req', total_time / float(cnt) def test_files_walk(proj, limit=100): repo, proj = _get_repo(proj) print 'processing', jn(PROJECT_PATH, proj) total_time = 0 paths_ = OrderedSet(['']) try: tip = repo.get_changeset('tip') for topnode, dirs, files in tip.walk('/'): for dir in dirs: paths_.add(dir.path) for f in dir: paths_.add(f.path) for f in files: paths_.add(f.path) except RepositoryError, e: pass cnt = 0 for f in paths_: cnt += 1 if limit and limit == cnt: break file_path = '/'.join((proj, 'files', 'tip', f)) full_uri = (BASE_URI % file_path) print '%s visiting %s' % (cnt, full_uri) s = time.time() f = o.open(full_uri) size = len(f.read()) e = time.time() - s total_time += e print '%s visited OK size:%s req:%s ms' % (cnt, size, e) print 'total_time', total_time print 'average on req', total_time / float(cnt) if __name__ == '__main__': for path in PROJECTS: repo = vcs.get_repo(jn(PROJECT_PATH, path)) for i in range(PASES): print 'PASS %s/%s' % (i, PASES) test_changelog_walk(repo, pages=80) test_changeset_walk(repo, limit=100) test_files_walk(repo, limit=100)