changeset 1401:b7563ad4e7ee beta

Unicode fixes, added safe_str method for global str() operations +better test sandboxing
author Marcin Kuzminski <marcin@python-works.com>
date Thu, 30 Jun 2011 01:25:37 +0200
parents 0d7b56b97953
children edfbf3ee9965
files rhodecode/config/environment.py rhodecode/controllers/files.py rhodecode/lib/__init__.py rhodecode/lib/celerylib/tasks.py rhodecode/lib/helpers.py rhodecode/lib/middleware/simplegit.py rhodecode/lib/middleware/simplehg.py rhodecode/lib/utils.py rhodecode/model/db.py rhodecode/model/repo.py rhodecode/model/scm.py rhodecode/tests/functional/test_search.py
diffstat 12 files changed, 93 insertions(+), 61 deletions(-) [+]
line wrap: on
line diff
--- a/rhodecode/config/environment.py	Wed Jun 29 01:49:53 2011 +0200
+++ b/rhodecode/config/environment.py	Thu Jun 30 01:25:37 2011 +0200
@@ -61,7 +61,7 @@
         from rhodecode.lib.utils import create_test_env, create_test_index
         from rhodecode.tests import  TESTS_TMP_PATH
         create_test_env(TESTS_TMP_PATH, config)
-        create_test_index(TESTS_TMP_PATH, True)
+        create_test_index(TESTS_TMP_PATH, config, True)
 
     #MULTIPLE DB configs
     # Setup the SQLAlchemy database engine
--- a/rhodecode/controllers/files.py	Wed Jun 29 01:49:53 2011 +0200
+++ b/rhodecode/controllers/files.py	Thu Jun 30 01:25:37 2011 +0200
@@ -38,7 +38,7 @@
 from vcs.nodes import FileNode, NodeKind
 from vcs.utils import diffs as differ
 
-from rhodecode.lib import convert_line_endings, detect_mode
+from rhodecode.lib import convert_line_endings, detect_mode, safe_str
 from rhodecode.lib.auth import LoginRequired, HasRepoPermissionAnyDecorator
 from rhodecode.lib.base import BaseRepoController, render
 from rhodecode.lib.utils import EmptyChangeset
@@ -153,7 +153,7 @@
         file_node = self.__get_filenode_or_redirect(repo_name, cs, f_path)
 
         response.content_disposition = 'attachment; filename=%s' % \
-            f_path.split(os.sep)[-1].encode('utf8', 'replace')
+            safe_str(f_path.split(os.sep)[-1])
 
         response.content_type = file_node.mimetype
         return file_node.content
@@ -198,7 +198,7 @@
 
         if dispo == 'attachment':
             dispo = 'attachment; filename=%s' % \
-                        f_path.split(os.sep)[-1].encode('utf8', 'replace')
+                        safe_str(f_path.split(os.sep)[-1])
 
         response.content_disposition = dispo
         response.content_type = mimetype
--- a/rhodecode/lib/__init__.py	Wed Jun 29 01:49:53 2011 +0200
+++ b/rhodecode/lib/__init__.py	Thu Jun 30 01:25:37 2011 +0200
@@ -160,7 +160,7 @@
 def safe_unicode(_str, from_encoding='utf8'):
     """
     safe unicode function. In case of UnicodeDecode error we try to return
-    unicode with errors replace
+    unicode with errors replaceed
 
     :param _str: string to decode
     :rtype: unicode
@@ -178,6 +178,28 @@
     return u_str
 
 
+def safe_str(_unicode, to_encoding='utf8'):
+    """
+    safe str function. In case of UnicodeEncode error we try to return
+    str with errors replaceed
+
+    :param _unicode: unicode to encode
+    :rtype: str
+    :returns: str object
+    """
+
+    if isinstance(_unicode, str):
+        return _unicode
+
+    try:
+        safe_str = str(_unicode)
+    except UnicodeEncodeError:
+        safe_str = _unicode.encode(to_encoding, 'replace')
+
+    return safe_str
+
+
+
 def engine_from_config(configuration, prefix='sqlalchemy.', **kwargs):
     """
     Custom engine_from_config functions that makes sure we use NullPool for
--- a/rhodecode/lib/celerylib/tasks.py	Wed Jun 29 01:49:53 2011 +0200
+++ b/rhodecode/lib/celerylib/tasks.py	Thu Jun 30 01:25:37 2011 +0200
@@ -37,7 +37,7 @@
 from pylons import config
 from pylons.i18n.translation import _
 
-from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
+from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP, safe_str
 from rhodecode.lib.celerylib import run_task, locked_task, str2bool, \
     __get_lockkey, LockHeld, DaemonLock
 from rhodecode.lib.helpers import person
@@ -66,7 +66,6 @@
 CELERY_ON = str2bool(config['app_conf'].get('use_celery'))
 
 
-
 def get_session():
     if CELERY_ON:
         engine = engine_from_config(config, 'sqlalchemy.db1.')
@@ -112,8 +111,7 @@
         co_day_auth_aggr = {}
         commits_by_day_aggregate = {}
         repos_path = get_repos_path()
-        p = os.path.join(repos_path, repo_name)
-        repo = get_repo(p)
+        repo = get_repo(safe_str(os.path.join(repos_path, repo_name)))
         repo_size = len(repo.revisions)
         #return if repo have no revisions
         if repo_size < 1:
@@ -358,8 +356,7 @@
 
 def __get_codes_stats(repo_name):
     repos_path = get_repos_path()
-    p = os.path.join(repos_path, repo_name)
-    repo = get_repo(p)
+    repo = get_repo(safe_str(os.path.join(repos_path, repo_name)))
     tip = repo.get_changeset()
     code_stats = {}
 
--- a/rhodecode/lib/helpers.py	Wed Jun 29 01:49:53 2011 +0200
+++ b/rhodecode/lib/helpers.py	Thu Jun 30 01:25:37 2011 +0200
@@ -36,7 +36,7 @@
 
 from vcs.utils.annotate import annotate_highlight
 from rhodecode.lib.utils import repo_name_slug
-from rhodecode.lib import str2bool, safe_unicode
+from rhodecode.lib import str2bool, safe_unicode, safe_str
 
 def _reset(name, value=None, id=NotGiven, type="reset", **attrs):
     """
@@ -502,7 +502,7 @@
 
     if isinstance(email_address, unicode):
         #hashlib crashes on unicode items
-        email_address = email_address.encode('utf8', 'replace')
+        email_address = safe_str(email_address)
     # construct the url
     gravatar_url = baseurl + hashlib.md5(email_address.lower()).hexdigest() + "?"
     gravatar_url += urllib.urlencode({'d':default, 's':str(size)})
--- a/rhodecode/lib/middleware/simplegit.py	Wed Jun 29 01:49:53 2011 +0200
+++ b/rhodecode/lib/middleware/simplegit.py	Thu Jun 30 01:25:37 2011 +0200
@@ -69,6 +69,7 @@
 from paste.auth.basic import AuthBasicAuthenticator
 from paste.httpheaders import REMOTE_USER, AUTH_TYPE
 
+from rhodecode.lib import safe_str
 from rhodecode.lib.auth import authfunc, HasPermissionAnyMiddleware
 from rhodecode.lib.utils import invalidate_cache, check_repo_fast
 from rhodecode.model.user import UserModel
@@ -147,8 +148,8 @@
                 #==============================================================
 
                 if not REMOTE_USER(environ):
-                    self.authenticate.realm = self.config['rhodecode_realm'].\
-                        encode('utf8', 'replace')
+                    self.authenticate.realm = \
+                        safe_str(self.config['rhodecode_realm'])
                     result = self.authenticate(environ)
                     if isinstance(result, str):
                         AUTH_TYPE.update(environ, 'basic')
--- a/rhodecode/lib/middleware/simplehg.py	Wed Jun 29 01:49:53 2011 +0200
+++ b/rhodecode/lib/middleware/simplehg.py	Thu Jun 30 01:25:37 2011 +0200
@@ -35,6 +35,7 @@
 from paste.auth.basic import AuthBasicAuthenticator
 from paste.httpheaders import REMOTE_USER, AUTH_TYPE
 
+from rhodecode.lib import safe_str
 from rhodecode.lib.auth import authfunc, HasPermissionAnyMiddleware
 from rhodecode.lib.utils import make_ui, invalidate_cache, \
     check_repo_fast, ui_sections
@@ -112,8 +113,8 @@
                 #==============================================================
 
                 if not REMOTE_USER(environ):
-                    self.authenticate.realm = self.config['rhodecode_realm'].\
-                        encode('utf8', 'replace')
+                    self.authenticate.realm = \
+                        safe_str(self.config['rhodecode_realm'])
                     result = self.authenticate(environ)
                     if isinstance(result, str):
                         AUTH_TYPE.update(environ, 'basic')
--- a/rhodecode/lib/utils.py	Wed Jun 29 01:49:53 2011 +0200
+++ b/rhodecode/lib/utils.py	Thu Jun 30 01:25:37 2011 +0200
@@ -458,18 +458,22 @@
 #==============================================================================
 # TEST FUNCTIONS AND CREATORS
 #==============================================================================
-def create_test_index(repo_location, full_index):
-    """Makes default test index
-    :param repo_location:
+def create_test_index(repo_location, config, full_index):
+    """
+    Makes default test index
+    
+    :param config: test config
     :param full_index:
     """
+
     from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
     from rhodecode.lib.pidlock import DaemonLock, LockHeld
-    import shutil
+
+    repo_location = repo_location
 
-    index_location = os.path.join(repo_location, 'index')
-    if os.path.exists(index_location):
-        shutil.rmtree(index_location)
+    index_location = os.path.join(config['app_conf']['index_dir'], 'index')
+    if not os.path.exists(index_location):
+        os.makedirs(index_location)
 
     try:
         l = DaemonLock(file=jn(dn(index_location), 'make_index.lock'))
--- a/rhodecode/model/db.py	Wed Jun 29 01:49:53 2011 +0200
+++ b/rhodecode/model/db.py	Thu Jun 30 01:25:37 2011 +0200
@@ -42,7 +42,7 @@
 from vcs.utils.lazy import LazyProperty
 from vcs.nodes import FileNode
 
-from rhodecode.lib import str2bool, json
+from rhodecode.lib import str2bool, json, safe_str
 from rhodecode.model.meta import Base, Session
 from rhodecode.model.caching_query import FromCache
 
@@ -479,7 +479,11 @@
             Session.add(inv)
             Session.commit()
 
-        return _c(self.repo_name)
+        # TODO: remove this trick when beaker 1.6 is released
+        # and have fixed this issue
+        rn = safe_str(self.repo_name)
+
+        return _c(rn)
 
     def __get_instance(self):
 
@@ -497,7 +501,8 @@
             return
 
         if alias == 'hg':
-            repo = backend(repo_full_path, create=False,
+
+            repo = backend(safe_str(repo_full_path), create=False,
                            baseui=self._ui)
             #skip hidden web repository
             if repo._get_hidden():
--- a/rhodecode/model/repo.py	Wed Jun 29 01:49:53 2011 +0200
+++ b/rhodecode/model/repo.py	Thu Jun 30 01:25:37 2011 +0200
@@ -33,6 +33,8 @@
 from vcs.utils.lazy import LazyProperty
 from vcs.backends import get_backend
 
+from rhodecode.lib import safe_str
+
 from rhodecode.model import BaseModel
 from rhodecode.model.caching_query import FromCache
 from rhodecode.model.db import Repository, RepoToPerm, User, Permission, \
@@ -171,13 +173,12 @@
 
         try:
             if fork:
-                #force str since hg doesn't go with unicode
-                repo_name = str(form_data['fork_name'])
-                org_name = str(form_data['repo_name'])
-                org_full_name = org_name#str(form_data['fork_name_full'])
+                repo_name = form_data['fork_name']
+                org_name = form_data['repo_name']
+                org_full_name = org_name
 
             else:
-                org_name = repo_name = str(form_data['repo_name'])
+                org_name = repo_name = form_data['repo_name']
                 repo_name_full = form_data['repo_name_full']
 
             new_repo = Repository()
@@ -302,21 +303,23 @@
         """
         from rhodecode.lib.utils import check_repo
 
-
         if new_parent_id:
             paths = Group.get(new_parent_id).full_path.split(Group.url_sep())
             new_parent_path = os.sep.join(paths)
         else:
             new_parent_path = ''
 
-        repo_path = os.path.join(self.repos_path, new_parent_path, repo_name)
+        repo_path = os.path.join(*map(lambda x:safe_str(x),
+                                [self.repos_path, new_parent_path, repo_name]))
 
-        if check_repo(repo_name, self.repos_path):
+        if check_repo(repo_path, self.repos_path):
             log.info('creating repo %s in %s @ %s', repo_name, repo_path,
-                    clone_uri)
+                     clone_uri)
             backend = get_backend(alias)
+
             backend(repo_path, create=True, src_url=clone_uri)
 
+
     def __rename_repo(self, old, new):
         """
         renames repository on filesystem
--- a/rhodecode/model/scm.py	Wed Jun 29 01:49:53 2011 +0200
+++ b/rhodecode/model/scm.py	Thu Jun 30 01:25:37 2011 +0200
@@ -27,12 +27,7 @@
 import traceback
 import logging
 
-from mercurial import ui
-
 from sqlalchemy.exc import DatabaseError
-from sqlalchemy.orm import make_transient
-
-from beaker.cache import cache_region, region_invalidate
 
 from vcs import get_backend
 from vcs.utils.helpers import get_scm
@@ -42,15 +37,14 @@
 
 from rhodecode import BACKENDS
 from rhodecode.lib import helpers as h
+from rhodecode.lib import safe_str
 from rhodecode.lib.auth import HasRepoPermissionAny
 from rhodecode.lib.utils import get_repos as get_filesystem_repos, make_ui, \
     action_logger
 from rhodecode.model import BaseModel
 from rhodecode.model.user import UserModel
-from rhodecode.model.repo import RepoModel
 from rhodecode.model.db import Repository, RhodeCodeUi, CacheInvalidation, \
     UserFollowing, UserLog
-from rhodecode.model.caching_query import FromCache
 
 log = logging.getLogger(__name__)
 
@@ -182,7 +176,10 @@
                     klass = get_backend(path[0])
 
                     if path[0] == 'hg' and path[0] in BACKENDS.keys():
-                        repos_list[name] = klass(path[1], baseui=baseui)
+
+                        # for mercurial we need to have an str path
+                        repos_list[name] = klass(safe_str(path[1]),
+                                                 baseui=baseui)
 
                     if path[0] == 'git' and path[0] in BACKENDS.keys():
                         repos_list[name] = klass(path[1])
@@ -364,10 +361,10 @@
 
         # decoding here will force that we have proper encoded values
         # in any other case this will throw exceptions and deny commit
-        content = content.encode('utf8')
-        message = message.encode('utf8')
-        path = f_path.encode('utf8')
-        author = author.encode('utf8')
+        content = safe_str(content)
+        message = safe_str(message)
+        path = safe_str(f_path)
+        author = safe_str(author)
         m = IMC(repo)
         m.change(FileNode(path, content))
         tip = m.commit(message=message,
--- a/rhodecode/tests/functional/test_search.py	Wed Jun 29 01:49:53 2011 +0200
+++ b/rhodecode/tests/functional/test_search.py	Thu Jun 30 01:25:37 2011 +0200
@@ -7,8 +7,9 @@
     def test_index(self):
         self.log_user()
         response = self.app.get(url(controller='search', action='index'))
-        print response.body
-        assert 'class="small" id="q" name="q" type="text"' in response.body, 'Search box content error'
+
+        self.assertTrue('class="small" id="q" name="q" type="text"' in
+                        response.body)
         # Test response...
 
     def test_empty_search(self):
@@ -16,20 +17,21 @@
             raise SkipTest('skipped due to existing index')
         else:
             self.log_user()
-            response = self.app.get(url(controller='search', action='index'), {'q':HG_REPO})
-            assert 'There is no index to search in. Please run whoosh indexer' in response.body, 'No error message about empty index'
+            response = self.app.get(url(controller='search', action='index'),
+                                    {'q':HG_REPO})
+            self.assertTrue('There is no index to search in. '
+                            'Please run whoosh indexer' in response.body)
 
     def test_normal_search(self):
         self.log_user()
-        response = self.app.get(url(controller='search', action='index'), {'q':'def repo'})
-        print response.body
-        assert '10 results' in response.body, 'no message about proper search results'
-        assert 'Permission denied' not in response.body, 'Wrong permissions settings for that repo and user'
-
+        response = self.app.get(url(controller='search', action='index'),
+                                {'q':'def repo'})
+        self.assertTrue('10 results' in response.body)
+        self.assertTrue('Permission denied' not in response.body)
 
     def test_repo_search(self):
         self.log_user()
-        response = self.app.get(url(controller='search', action='index'), {'q':'repository:%s def test' % HG_REPO})
-        print response.body
-        assert '4 results' in response.body, 'no message about proper search results'
-        assert 'Permission denied' not in response.body, 'Wrong permissions settings for that repo and user'
+        response = self.app.get(url(controller='search', action='index'),
+                                {'q':'repository:%s def test' % HG_REPO})
+        self.assertTrue('4 results' in response.body)
+        self.assertTrue('Permission denied' not in response.body)