Mercurial > kallithea
changeset 5715:1fd82c81118d
vcs: better handling of invalid email addresses: don't consider them email addresses
13da89053853 was in principle right in always returning email adresses as
string ... but unfortunately the function also returned invalid email addresses
that didn't fit into strings.
To fix this, the function is refactored to always use regexp matching of valid
email addresses ... and to be simpler. The behaviour should be the same as
before for all valid email addresses.
author | Mads Kiilerich <madski@unity3d.com> |
---|---|
date | Mon, 15 Feb 2016 19:29:26 +0100 |
parents | b6c702202f82 |
children | bbd307ce70c0 |
files | kallithea/lib/vcs/utils/__init__.py kallithea/tests/vcs/test_git.py kallithea/tests/vcs/test_utils.py |
diffstat | 3 files changed, 23 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/kallithea/lib/vcs/utils/__init__.py Mon Feb 15 19:29:26 2016 +0100 +++ b/kallithea/lib/vcs/utils/__init__.py Mon Feb 15 19:29:26 2016 +0100 @@ -5,6 +5,7 @@ import time import datetime +import re def makedate(): @@ -150,30 +151,33 @@ return unicode_.encode(to_encoding[0], 'replace') +# Regex taken from http://www.regular-expressions.info/email.html +email_re = re.compile( + r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@""" + r"""(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?""", + re.IGNORECASE) + def author_email(author): """ - returns email address of given author. - If any of <,> sign are found, it fallbacks to regex findall() - and returns first found result or empty string + Returns email address of given author string. + If author contains <> brackets, only look inside that. + If any RFC valid email address is found, return that. + Else, return empty string. - Regex taken from http://www.regular-expressions.info/email.html """ if not author: return '' - import re - r = author.find('>') - l = author.find('<') - if l == -1 or r == -1: - # fallback to regex match of email out of a string - email_re = re.compile(r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!""" - r"""#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z""" - r"""0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]""" - r"""*[a-z0-9])?""", re.IGNORECASE) - m = re.findall(email_re, author) - return safe_str(m[0]) if m else '' + l = author.find('<') + 1 + if l != 0: + r = author.find('>', l) + if r != -1: + author = author[l:r] - return safe_str(author[l + 1:r].strip()) + m = email_re.search(author) + if m is None: + return '' + return safe_str(m.group(0)) def author_name(author):
--- a/kallithea/tests/vcs/test_git.py Mon Feb 15 19:29:26 2016 +0100 +++ b/kallithea/tests/vcs/test_git.py Mon Feb 15 19:29:26 2016 +0100 @@ -604,7 +604,7 @@ self.assertEqual('lukasz.balcerzak@python-center.pl', self.repo.get_changeset('ff7ca51e58c505fec0dd2491de52c622bb7a806b') \ .author_email) - self.assertEqual('none@none', + self.assertEqual('', self.repo.get_changeset('8430a588b43b5d6da365400117c89400326e7992') \ .author_email) @@ -615,7 +615,7 @@ self.assertEqual('Lukasz Balcerzak', self.repo.get_changeset('ff7ca51e58c505fec0dd2491de52c622bb7a806b') \ .author_name) - self.assertEqual('marcink', + self.assertEqual('marcink none@none', self.repo.get_changeset('8430a588b43b5d6da365400117c89400326e7992') \ .author_name)
--- a/kallithea/tests/vcs/test_utils.py Mon Feb 15 19:29:26 2016 +0100 +++ b/kallithea/tests/vcs/test_utils.py Mon Feb 15 19:29:26 2016 +0100 @@ -206,7 +206,7 @@ ('Mr Double Name withemail@example.com ', ('Mr Double Name', 'withemail@example.com')), (u'John Doe <джондо à éẋàṁṗłê.ç°ḿ>', - (u'John Doe <\u0434\u0436\u043e\u043d\u0434\u043e \xe0 \xe9\u1e8b\xe0\u1e41\u1e57\u0142\xea.\xe7\xb0\u1e3f>', '\xd0\xb4\xd0\xb6\xd0\xbe\xd0\xbd\xd0\xb4\xd0\xbe \xc3\xa0 \xc3\xa9\xe1\xba\x8b\xc3\xa0\xe1\xb9\x81\xe1\xb9\x97\xc5\x82\xc3\xaa.\xc3\xa7\xc2\xb0\xe1\xb8\xbf')), + (u'John Doe <\u0434\u0436\u043e\u043d\u0434\u043e \xe0 \xe9\u1e8b\xe0\u1e41\u1e57\u0142\xea.\xe7\xb0\u1e3f>', '')), ] def test_author_email(self):