Mercurial > kallithea
annotate rhodecode/tests/scripts/test_crawler.py @ 3169:a649599ad8b7 beta
added my gist script for another detailed installation instructions
author | Marcin Kuzminski <marcin@python-works.com> |
---|---|
date | Fri, 18 Jan 2013 01:11:28 +0100 |
parents | 32471bd1f4ee |
children | ffd45b185016 |
rev | line source |
---|---|
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
1 # -*- coding: utf-8 -*- |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
2 """ |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
3 rhodecode.tests.test_crawer |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
5 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
6 Test for crawling a project for memory usage |
1371
0ad5769fa78d
Fixed desc on test_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
1341
diff
changeset
|
7 This should be runned just as regular script together |
0ad5769fa78d
Fixed desc on test_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
1341
diff
changeset
|
8 with a watch script that will show memory usage. |
1818
cf51bbfb120e
auto white-space removal
Marcin Kuzminski <marcin@python-works.com>
parents:
1514
diff
changeset
|
9 |
1334
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
10 watch -n1 ./rhodecode/tests/mem_watch |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
11 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
12 :created_on: Apr 21, 2010 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
13 :author: marcink |
1824
89efedac4e6c
2012 copyrights
Marcin Kuzminski <marcin@python-works.com>
parents:
1818
diff
changeset
|
14 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com> |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
15 :license: GPLv3, see COPYING for more details. |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
16 """ |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
17 # This program is free software: you can redistribute it and/or modify |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
18 # it under the terms of the GNU General Public License as published by |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
19 # the Free Software Foundation, either version 3 of the License, or |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
20 # (at your option) any later version. |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
21 # |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
22 # This program is distributed in the hope that it will be useful, |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
23 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
25 # GNU General Public License for more details. |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
26 # |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
27 # You should have received a copy of the GNU General Public License |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
28 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
29 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
30 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
31 import cookielib |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
32 import urllib |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
33 import urllib2 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
34 import time |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
35 import os |
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
36 import sys |
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
37 from os.path import join as jn |
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
38 from os.path import dirname as dn |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
39 |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
40 __here__ = os.path.abspath(__file__) |
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
41 __root__ = dn(dn(dn(__here__))) |
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
42 sys.path.append(__root__) |
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
43 |
2007
324ac367a4da
Added VCS into rhodecode core for faster and easier deployments of new versions
Marcin Kuzminski <marcin@python-works.com>
parents:
1977
diff
changeset
|
44 from rhodecode.lib import vcs |
2226
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
45 from rhodecode.lib.compat import OrderedSet |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
46 from rhodecode.lib.vcs.exceptions import RepositoryError |
1334
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
47 |
2226
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
48 PASES = 3 |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
49 HOST = 'http://127.0.0.1' |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
50 PORT = 5000 |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
51 BASE_URI = '%s:%s/' % (HOST, PORT) |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
52 |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
53 if len(sys.argv) == 2: |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
54 BASE_URI = sys.argv[1] |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
55 |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
56 if not BASE_URI.endswith('/'): |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
57 BASE_URI += '/' |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
58 |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
59 print 'Crawling @ %s' % BASE_URI |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
60 BASE_URI += '%s' |
2995
32471bd1f4ee
Implemented generation of changesets based
Marcin Kuzminski <marcin@python-works.com>
parents:
2527
diff
changeset
|
61 PROJECT_PATH = jn('/', 'home', 'marcink', 'repos') |
2213
884fbc541d8d
added more repos to crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2211
diff
changeset
|
62 PROJECTS = [ |
2995
32471bd1f4ee
Implemented generation of changesets based
Marcin Kuzminski <marcin@python-works.com>
parents:
2527
diff
changeset
|
63 #'linux-magx-pbranch', |
2213
884fbc541d8d
added more repos to crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2211
diff
changeset
|
64 'CPython', |
884fbc541d8d
added more repos to crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2211
diff
changeset
|
65 'rhodecode_tip', |
884fbc541d8d
added more repos to crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2211
diff
changeset
|
66 ] |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
67 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
68 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
69 cj = cookielib.FileCookieJar('/tmp/rc_test_cookie.txt') |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
70 o = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
71 o.addheaders = [ |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
72 ('User-agent', 'rhodecode-crawler'), |
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
73 ('Accept-Language', 'en - us, en;q = 0.5') |
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
74 ] |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
75 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
76 urllib2.install_opener(o) |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
77 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
78 |
2226
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
79 def _get_repo(proj): |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
80 if isinstance(proj, basestring): |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
81 repo = vcs.get_repo(jn(PROJECT_PATH, proj)) |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
82 proj = proj |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
83 else: |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
84 repo = proj |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
85 proj = repo.name |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
86 |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
87 return repo, proj |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
88 |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
89 |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
90 def test_changelog_walk(proj, pages=100): |
2226
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
91 repo, proj = _get_repo(proj) |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
92 |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
93 total_time = 0 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
94 for i in range(1, pages): |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
95 |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
96 page = '/'.join((proj, 'changelog',)) |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
97 |
2995
32471bd1f4ee
Implemented generation of changesets based
Marcin Kuzminski <marcin@python-works.com>
parents:
2527
diff
changeset
|
98 full_uri = (BASE_URI % page) + '?' + urllib.urlencode({'page': i}) |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
99 s = time.time() |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
100 f = o.open(full_uri) |
2995
32471bd1f4ee
Implemented generation of changesets based
Marcin Kuzminski <marcin@python-works.com>
parents:
2527
diff
changeset
|
101 |
32471bd1f4ee
Implemented generation of changesets based
Marcin Kuzminski <marcin@python-works.com>
parents:
2527
diff
changeset
|
102 assert f.url == full_uri, 'URL:%s does not match %s' % (f.url, full_uri) |
32471bd1f4ee
Implemented generation of changesets based
Marcin Kuzminski <marcin@python-works.com>
parents:
2527
diff
changeset
|
103 |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
104 size = len(f.read()) |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
105 e = time.time() - s |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
106 total_time += e |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
107 print 'visited %s size:%s req:%s ms' % (full_uri, size, e) |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
108 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
109 print 'total_time', total_time |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
110 print 'average on req', total_time / float(pages) |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
111 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
112 |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
113 def test_changeset_walk(proj, limit=None): |
2226
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
114 repo, proj = _get_repo(proj) |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
115 |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
116 print 'processing', jn(PROJECT_PATH, proj) |
1334
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
117 total_time = 0 |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
118 |
1338
bbfc3f305c6b
Updated test_crawler to scan paths in more sensible order using ordered tuple.
Marcin Kuzminski <marcin@python-works.com>
parents:
1334
diff
changeset
|
119 cnt = 0 |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
120 for i in repo: |
1338
bbfc3f305c6b
Updated test_crawler to scan paths in more sensible order using ordered tuple.
Marcin Kuzminski <marcin@python-works.com>
parents:
1334
diff
changeset
|
121 cnt += 1 |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
122 raw_cs = '/'.join((proj, 'changeset', i.raw_id)) |
1338
bbfc3f305c6b
Updated test_crawler to scan paths in more sensible order using ordered tuple.
Marcin Kuzminski <marcin@python-works.com>
parents:
1334
diff
changeset
|
123 if limit and limit == cnt: |
bbfc3f305c6b
Updated test_crawler to scan paths in more sensible order using ordered tuple.
Marcin Kuzminski <marcin@python-works.com>
parents:
1334
diff
changeset
|
124 break |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
125 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
126 full_uri = (BASE_URI % raw_cs) |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
127 print '%s visiting %s\%s' % (cnt, full_uri, i) |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
128 s = time.time() |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
129 f = o.open(full_uri) |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
130 size = len(f.read()) |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
131 e = time.time() - s |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
132 total_time += e |
1341
1881b808a71d
small fixes for test crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
1338
diff
changeset
|
133 print '%s visited %s\%s size:%s req:%s ms' % (cnt, full_uri, i, size, e) |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
134 |
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
135 print 'total_time', total_time |
1338
bbfc3f305c6b
Updated test_crawler to scan paths in more sensible order using ordered tuple.
Marcin Kuzminski <marcin@python-works.com>
parents:
1334
diff
changeset
|
136 print 'average on req', total_time / float(cnt) |
bbfc3f305c6b
Updated test_crawler to scan paths in more sensible order using ordered tuple.
Marcin Kuzminski <marcin@python-works.com>
parents:
1334
diff
changeset
|
137 |
1332
3fdfecc52c32
added test for crawling and memory usage
Marcin Kuzminski <marcin@python-works.com>
parents:
diff
changeset
|
138 |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
139 def test_files_walk(proj, limit=100): |
2226
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
140 repo, proj = _get_repo(proj) |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
141 |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
142 print 'processing', jn(PROJECT_PATH, proj) |
1334
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
143 total_time = 0 |
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
144 |
1338
bbfc3f305c6b
Updated test_crawler to scan paths in more sensible order using ordered tuple.
Marcin Kuzminski <marcin@python-works.com>
parents:
1334
diff
changeset
|
145 paths_ = OrderedSet(['']) |
1334
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
146 try: |
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
147 tip = repo.get_changeset('tip') |
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
148 for topnode, dirs, files in tip.walk('/'): |
1338
bbfc3f305c6b
Updated test_crawler to scan paths in more sensible order using ordered tuple.
Marcin Kuzminski <marcin@python-works.com>
parents:
1334
diff
changeset
|
149 |
bbfc3f305c6b
Updated test_crawler to scan paths in more sensible order using ordered tuple.
Marcin Kuzminski <marcin@python-works.com>
parents:
1334
diff
changeset
|
150 for dir in dirs: |
bbfc3f305c6b
Updated test_crawler to scan paths in more sensible order using ordered tuple.
Marcin Kuzminski <marcin@python-works.com>
parents:
1334
diff
changeset
|
151 paths_.add(dir.path) |
bbfc3f305c6b
Updated test_crawler to scan paths in more sensible order using ordered tuple.
Marcin Kuzminski <marcin@python-works.com>
parents:
1334
diff
changeset
|
152 for f in dir: |
bbfc3f305c6b
Updated test_crawler to scan paths in more sensible order using ordered tuple.
Marcin Kuzminski <marcin@python-works.com>
parents:
1334
diff
changeset
|
153 paths_.add(f.path) |
bbfc3f305c6b
Updated test_crawler to scan paths in more sensible order using ordered tuple.
Marcin Kuzminski <marcin@python-works.com>
parents:
1334
diff
changeset
|
154 |
1334
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
155 for f in files: |
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
156 paths_.add(f.path) |
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
157 |
1977
3b0255d936c8
fixed exception in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
1824
diff
changeset
|
158 except RepositoryError, e: |
1334
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
159 pass |
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
160 |
1341
1881b808a71d
small fixes for test crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
1338
diff
changeset
|
161 cnt = 0 |
1334
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
162 for f in paths_: |
1341
1881b808a71d
small fixes for test crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
1338
diff
changeset
|
163 cnt += 1 |
1881b808a71d
small fixes for test crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
1338
diff
changeset
|
164 if limit and limit == cnt: |
1881b808a71d
small fixes for test crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
1338
diff
changeset
|
165 break |
1881b808a71d
small fixes for test crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
1338
diff
changeset
|
166 |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
167 file_path = '/'.join((proj, 'files', 'tip', f)) |
1334
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
168 full_uri = (BASE_URI % file_path) |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
169 print '%s visiting %s' % (cnt, full_uri) |
1334
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
170 s = time.time() |
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
171 f = o.open(full_uri) |
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
172 size = len(f.read()) |
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
173 e = time.time() - s |
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
174 total_time += e |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
175 print '%s visited OK size:%s req:%s ms' % (cnt, size, e) |
1334
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
176 |
08cd02374883
Added mem_watch script. Test can also walk on file tree. Fixed some path issues
Marcin Kuzminski <marcin@python-works.com>
parents:
1332
diff
changeset
|
177 print 'total_time', total_time |
1341
1881b808a71d
small fixes for test crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
1338
diff
changeset
|
178 print 'average on req', total_time / float(cnt) |
1881b808a71d
small fixes for test crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
1338
diff
changeset
|
179 |
2211
c00ab8b2893e
small improvements in rhodecode_crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2007
diff
changeset
|
180 if __name__ == '__main__': |
2226
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
181 for path in PROJECTS: |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
182 repo = vcs.get_repo(jn(PROJECT_PATH, path)) |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
183 for i in range(PASES): |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
184 print 'PASS %s/%s' % (i, PASES) |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
185 test_changelog_walk(repo, pages=80) |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
186 test_changeset_walk(repo, limit=100) |
ce04e6ef80c0
improvements for rhodecode crawler
Marcin Kuzminski <marcin@python-works.com>
parents:
2213
diff
changeset
|
187 test_files_walk(repo, limit=100) |