changeset 5715:1fd82c81118d

vcs: better handling of invalid email addresses: don't consider them email addresses 13da89053853 was in principle right in always returning email adresses as string ... but unfortunately the function also returned invalid email addresses that didn't fit into strings. To fix this, the function is refactored to always use regexp matching of valid email addresses ... and to be simpler. The behaviour should be the same as before for all valid email addresses.
author Mads Kiilerich <madski@unity3d.com>
date Mon, 15 Feb 2016 19:29:26 +0100
parents b6c702202f82
children bbd307ce70c0
files kallithea/lib/vcs/utils/__init__.py kallithea/tests/vcs/test_git.py kallithea/tests/vcs/test_utils.py
diffstat 3 files changed, 23 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/lib/vcs/utils/__init__.py	Mon Feb 15 19:29:26 2016 +0100
+++ b/kallithea/lib/vcs/utils/__init__.py	Mon Feb 15 19:29:26 2016 +0100
@@ -5,6 +5,7 @@
 
 import time
 import datetime
+import re
 
 
 def makedate():
@@ -150,30 +151,33 @@
         return unicode_.encode(to_encoding[0], 'replace')
 
 
+# Regex taken from http://www.regular-expressions.info/email.html
+email_re = re.compile(
+    r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@"""
+    r"""(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?""",
+    re.IGNORECASE)
+
 def author_email(author):
     """
-    returns email address of given author.
-    If any of <,> sign are found, it fallbacks to regex findall()
-    and returns first found result or empty string
+    Returns email address of given author string.
+    If author contains <> brackets, only look inside that.
+    If any RFC valid email address is found, return that.
+    Else, return empty string.
 
-    Regex taken from http://www.regular-expressions.info/email.html
     """
     if not author:
         return ''
-    import re
-    r = author.find('>')
-    l = author.find('<')
 
-    if l == -1 or r == -1:
-        # fallback to regex match of email out of a string
-        email_re = re.compile(r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!"""
-                              r"""#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z"""
-                              r"""0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]"""
-                              r"""*[a-z0-9])?""", re.IGNORECASE)
-        m = re.findall(email_re, author)
-        return safe_str(m[0]) if m else ''
+    l = author.find('<') + 1
+    if l != 0:
+        r = author.find('>', l)
+        if r != -1:
+            author = author[l:r]
 
-    return safe_str(author[l + 1:r].strip())
+    m = email_re.search(author)
+    if m is None:
+        return ''
+    return safe_str(m.group(0))
 
 
 def author_name(author):
--- a/kallithea/tests/vcs/test_git.py	Mon Feb 15 19:29:26 2016 +0100
+++ b/kallithea/tests/vcs/test_git.py	Mon Feb 15 19:29:26 2016 +0100
@@ -604,7 +604,7 @@
         self.assertEqual('lukasz.balcerzak@python-center.pl',
           self.repo.get_changeset('ff7ca51e58c505fec0dd2491de52c622bb7a806b') \
           .author_email)
-        self.assertEqual('none@none',
+        self.assertEqual('',
           self.repo.get_changeset('8430a588b43b5d6da365400117c89400326e7992') \
           .author_email)
 
@@ -615,7 +615,7 @@
         self.assertEqual('Lukasz Balcerzak',
           self.repo.get_changeset('ff7ca51e58c505fec0dd2491de52c622bb7a806b') \
           .author_name)
-        self.assertEqual('marcink',
+        self.assertEqual('marcink none@none',
           self.repo.get_changeset('8430a588b43b5d6da365400117c89400326e7992') \
           .author_name)
 
--- a/kallithea/tests/vcs/test_utils.py	Mon Feb 15 19:29:26 2016 +0100
+++ b/kallithea/tests/vcs/test_utils.py	Mon Feb 15 19:29:26 2016 +0100
@@ -206,7 +206,7 @@
                   ('Mr Double Name withemail@example.com ',
                    ('Mr Double Name', 'withemail@example.com')),
                   (u'John Doe <джондо à éẋàṁṗłê.ç°ḿ>',
-                   (u'John Doe <\u0434\u0436\u043e\u043d\u0434\u043e \xe0 \xe9\u1e8b\xe0\u1e41\u1e57\u0142\xea.\xe7\xb0\u1e3f>', '\xd0\xb4\xd0\xb6\xd0\xbe\xd0\xbd\xd0\xb4\xd0\xbe \xc3\xa0 \xc3\xa9\xe1\xba\x8b\xc3\xa0\xe1\xb9\x81\xe1\xb9\x97\xc5\x82\xc3\xaa.\xc3\xa7\xc2\xb0\xe1\xb8\xbf')),
+                   (u'John Doe <\u0434\u0436\u043e\u043d\u0434\u043e \xe0 \xe9\u1e8b\xe0\u1e41\u1e57\u0142\xea.\xe7\xb0\u1e3f>', '')),
                   ]
 
     def test_author_email(self):