Created
June 3, 2011 21:03
-
-
Save lukaszb/1007169 to your computer and use it in GitHub Desktop.
username & email extractor for VCS & RhodeCode
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def author_email(author): | |
| """ | |
| returns email address of given author. | |
| If any of <,> sign are found, it fallbacks to regex findall() | |
| and returns first found result or empty string | |
| Regex taken from http://www.regular-expressions.info/email.html | |
| """ | |
| import re | |
| r = author.find('>') | |
| l = author.find('<') | |
| if l == -1 or r == -1: | |
| # fallback to regex match of email out of a string | |
| email_re = re.compile(r"""[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!""" | |
| r"""#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z""" | |
| r"""0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]""" | |
| r"""*[a-z0-9])?""", re.IGNORECASE) | |
| m = re.findall(email_re, author) | |
| return m[0] if m else '' | |
| return author[l + 1:r].strip() | |
| def author_name(author): | |
| """ | |
| get name of author, or else username. | |
| It'll try to find an email in the author string and just cut it off | |
| to get the username | |
| """ | |
| if not '@' in author: | |
| return author | |
| else: | |
| return author.replace(author_email(author), '').replace('<', '')\ | |
| .replace('>', '').strip() | |
| #============================================================================== | |
| # TESTS | |
| #============================================================================== | |
| test_usernames = [('Marcin Kuzminski <marcin@python-works.com>', | |
| ('Marcin Kuzminski', 'marcin@python-works.com')), | |
| ('Marcin Kuzminski Spaces < marcin@python-works.com >', | |
| ('Marcin Kuzminski Spaces', 'marcin@python-works.com')), | |
| ('Marcin Kuzminski <marcin.kuzminski@python-works.com>', | |
| ('Marcin Kuzminski', 'marcin.kuzminski@python-works.com')), | |
| ('mrf RFC_SPEC <marcin+kuzminski@python-works.com>', | |
| ('mrf RFC_SPEC', 'marcin+kuzminski@python-works.com')), | |
| ('username <user@email.com>', | |
| ('username', 'user@email.com')), | |
| ('username <user@email.com', | |
| ('username', 'user@email.com')), | |
| ('broken missing@email.com', | |
| ('broken', 'missing@email.com')), | |
| ('<justemail@mail.com>', | |
| ('', 'justemail@mail.com')), | |
| ('justname', | |
| ('justname', '')), | |
| ('Mr Double Name withemail@email.com ', | |
| ('Mr Double Name', 'withemail@email.com')), | |
| ] | |
| def test_author_email(): | |
| for test_str, result in test_usernames: | |
| assert result[1] == author_email(test_str) | |
| def test_author_name(): | |
| for test_str, result in test_usernames: | |
| assert result[0] == author_name(test_str) | |
| if __name__ == '__main__': | |
| test_author_email() | |
| test_author_name() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment