changeset 4699:94f25a680aad

helpers: tweak URL matching patterns - don't include trailing punctuation
author Mads Kiilerich <madski@unity3d.com>
date Mon, 15 Dec 2014 13:47:36 +0100
parents 0b14eb8c690b
children 6a0964373a30
files kallithea/lib/helpers.py kallithea/lib/markup_renderer.py
diffstat 2 files changed, 9 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/lib/helpers.py	Mon Dec 15 13:47:36 2014 +0100
+++ b/kallithea/lib/helpers.py	Mon Dec 15 13:47:36 2014 +0100
@@ -57,7 +57,7 @@
 from kallithea.lib.utils2 import str2bool, safe_unicode, safe_str, \
     get_changeset_safe, datetime_to_time, time_to_datetime, AttributeDict,\
     safe_int
-from kallithea.lib.markup_renderer import MarkupRenderer
+from kallithea.lib.markup_renderer import MarkupRenderer, url_re
 from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError
 from kallithea.lib.vcs.backends.base import BaseChangeset, EmptyChangeset
 from kallithea.config.conf import DATE_FORMAT, DATETIME_FORMAT
@@ -1256,13 +1256,10 @@
     :param text_:
     """
 
-    url_pat = re.compile(r'''(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]'''
-                         '''|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)''')
-
     def url_func(match_obj):
         url_full = match_obj.groups()[0]
         return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
-    _newtext = url_pat.sub(url_func, text_)
+    _newtext = url_re.sub(url_func, text_)
     if safe:
         return literal(_newtext)
     return _newtext
--- a/kallithea/lib/markup_renderer.py	Mon Dec 15 13:47:36 2014 +0100
+++ b/kallithea/lib/markup_renderer.py	Mon Dec 15 13:47:36 2014 +0100
@@ -35,6 +35,9 @@
 log = logging.getLogger(__name__)
 
 
+url_re = re.compile(r'''(\bhttps?://(?:[\da-zA-Z0-9@:.-]+)'''
+                    r'''(?:[/a-zA-Z0-9_=@#~&+%.,:?!*()-]*[/a-zA-Z0-9_=@#~])?)''')
+
 class MarkupRenderer(object):
     RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
 
@@ -127,17 +130,11 @@
         if universal_newline:
             newline = '\n'
             source = newline.join(source.splitlines())
-        def urlify_text(text):
-            url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]'
-                                 '|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
 
-            def url_func(match_obj):
-                url_full = match_obj.groups()[0]
-                return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
-
-            return url_pat.sub(url_func, text)
-
-        source = urlify_text(source)
+        def url_func(match_obj):
+            url_full = match_obj.groups()[0]
+            return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
+        source = url_re.sub(url_func, source)
         return '<br />' + source.replace("\n", '<br />')
 
     @classmethod