# HG changeset patch # User Thomas De Schampheleire # Date 1520715719 -3600 # Node ID d24051ce961c03f9c302debb38f11aa2bb9fe219 # Parent e5a7f8f413703f4328e744d689613a537f9b5b24 issues: support generic regex replacements in issue_url and issue_prefix Issue reference linking is pretty limited: - the issue_url is a literal with only three special tokens {id}, {repo} and {repo_name}. There is no way to let the URL be dependent on other elements of the input issue reference. - The value for {id} is somewhat oddly determined by the concatenation of all parenthesized groups in the issue_pat regular expression - the link text of the resulting link is limited to the contents of the literal issue_prefix with the determined {id}. It is not possible to retain the input issue reference verbatim, nor to let the link text be dependent on other elements of the input issue reference. This commit makes the issue reference linking more flexible: - issue_prefix is replaced by the more generic issue_sub(stitution), which is a string that may contain backreferences to regex groups specified in issue_pat. This string, with backreferences resolved, is used as the link text of urlified issue references. - if issue_sub is empty, the entire text matched by issue_pat is used as the link text. - like issue_sub, also issue_url can contain backreferences to regex groups. - {id} is no longer treated as a special token, as it can be solved by generic backreferences ('\g' assuming issue pattern contains something like '(P\d+)'. {repo} and {repo_name} are still supported, because their value is provided externally and not normally part of the issue pattern. Documentation and ini file template is updated as well. diff -r e5a7f8f41370 -r d24051ce961c development.ini --- a/development.ini Fri Feb 16 22:30:51 2018 +0100 +++ b/development.ini Sat Mar 10 22:01:59 2018 +0100 @@ -168,34 +168,44 @@ ## issue tracker for Kallithea (leave blank to disable, absent for default) #bugtracker = https://bitbucket.org/conservancy/kallithea/issues -## issue tracking mapping for commits messages -## comment out issue_pat, issue_server, issue_prefix to enable +## issue tracking mapping for commit messages, comments, PR descriptions, ... +## Refer to the documentation ("Integration with issue trackers") for more details. -## pattern to get the issues from commit messages -## default one used here is # with a regex passive group for `#` -## {id} will be all groups matched from this pattern +## regular expression to match issue references +## This pattern may/should contain parenthesized groups, that can +## be referred to in issue_server_link or issue_sub using Python backreferences +## (e.g. \1, \2, ...). You can also create named groups with '(?P)'. +## To require mandatory whitespace before the issue pattern, use: +## (?:^|(?<=\s)) before the actual pattern, and for mandatory whitespace +## behind the issue pattern, use (?:$|(?=\s)) after the actual pattern. issue_pat = #(\d+) -## server url to the issue, each {id} will be replaced with match -## fetched from the regex and {repo} is replaced with full repository name -## including groups {repo_name} is replaced with just name of repo - -issue_server_link = https://issues.example.com/{repo}/issue/{id} +## server url to the issue +## This pattern may/should contain backreferences to parenthesized groups in issue_pat. +## A backreference can be \1, \2, ... or \g if you specified a named group +## called 'groupname' in issue_pat. +## The special token {repo} is replaced with the full repository name +## including repository groups, while {repo_name} is replaced with just +## the name of the repository. -## prefix to add to link to indicate it's an url -## #314 will be replaced by +issue_server_link = https://issues.example.com/{repo}/issue/\1 -issue_prefix = # +## substitution pattern to use as the link text +## If issue_sub is empty, the text matched by issue_pat is retained verbatim +## for the link text. Otherwise, the link text is that of issue_sub, with any +## backreferences to groups in issue_pat replaced. -## issue_pat, issue_server_link, issue_prefix can have suffixes to specify +issue_sub = + +## issue_pat, issue_server_link and issue_sub can have suffixes to specify ## multiple patterns, to other issues server, wiki or others ## below an example how to create a wiki pattern # wiki-some-id -> https://wiki.example.com/some-id -#issue_pat_wiki = (?:wiki-)(.+) -#issue_server_link_wiki = https://wiki.example.com/{id} -#issue_prefix_wiki = WIKI- +#issue_pat_wiki = wiki-(\S+) +#issue_server_link_wiki = https://wiki.example.com/\1 +#issue_sub_wiki = WIKI-\1 ## alternative return HTTP header for failed authentication. Default HTTP ## response is 401 HTTPUnauthorized. Currently Mercurial clients have trouble with diff -r e5a7f8f41370 -r d24051ce961c docs/setup.rst --- a/docs/setup.rst Fri Feb 16 22:30:51 2018 +0100 +++ b/docs/setup.rst Sat Mar 10 22:01:59 2018 +0100 @@ -535,36 +535,69 @@ Kallithea provides a simple integration with issue trackers. It's possible to define a regular expression that will match an issue ID in commit messages, -and have that replaced with a URL to the issue. To enable this simply -uncomment the following variables in the ini file:: +and have that replaced with a URL to the issue. + +This is achieved with following three variables in the ini file:: - issue_pat = (?:^#|\s#)(\w+) - issue_server_link = https://issues.example.com/{repo}/issue/{id} - issue_prefix = # + issue_pat = #(\d+) + issue_server_link = https://issues.example.com/{repo}/issue/\1 + issue_sub = ``issue_pat`` is the regular expression describing which strings in -commit messages will be treated as issue references. A match group in -parentheses should be used to specify the actual issue id. +commit messages will be treated as issue references. The expression can/should +have one or more parenthesized groups that can later be referred to in +``issue_server_link`` and ``issue_sub`` (see below). If you prefer, named groups +can be used instead of simple parenthesized groups. -The default expression matches issues in the format ``#``, e.g., ``#300``. +If the pattern should only match if it is preceded by whitespace, add the +following string before the actual pattern: ``(?:^|(?<=\s))``. +If the pattern should only match if it is followed by whitespace, add the +following string after the actual pattern: ``(?:$|(?=\s))``. +These expressions use lookbehind and lookahead assertions of the Python regular +expression module to avoid the whitespace to be part of the actual pattern, +otherwise the link text will also contain that whitespace. Matched issue references are replaced with the link specified in -``issue_server_link``. ``{id}`` is replaced with the issue ID, and -``{repo}`` with the repository name. Since the # is stripped away, -``issue_prefix`` is prepended to the link text. ``issue_prefix`` doesn't -necessarily need to be ``#``: if you set issue prefix to ``ISSUE-`` this will -generate a URL in the format: +``issue_server_link``, in which any backreferences are resolved. Backreferences +can be ``\1``, ``\2``, ... or for named groups ``\g``. +The special token ``{repo}`` is replaced with the full repository path +(including repository groups), while token ``{repo_name}`` is replaced with the +repository name (without repository groups). + +The link text is determined by ``issue_sub``, which can be a string containing +backreferences to the groups specified in ``issue_pat``. If ``issue_sub`` is +empty, then the text matched by ``issue_pat`` is used verbatim. + +The example settings shown above match issues in the format ``#``. +This will cause the text ``#300`` to be transformed into a link: .. code-block:: html - ISSUE-300 + #300 + +The following example transforms a text starting with either of 'pullrequest', +'pull request' or 'PR', followed by an optional space, then a pound character +(#) and one or more digits, into a link with the text 'PR #' followed by the +digits:: + + issue_pat = (pullrequest|pull request|PR) ?#(\d+) + issue_server_link = https://issues.example.com/\2 + issue_sub = PR #\2 + +The following example demonstrates how to require whitespace before the issue +reference in order for it to be recognized, such that the text ``issue#123`` will +not cause a match, but ``issue #123`` will:: + + issue_pat = (?:^|(?<=\s))#(\d+) + issue_server_link = https://issues.example.com/\1 + issue_sub = If needed, more than one pattern can be specified by appending a unique suffix to -the variables. For example:: +the variables. For example, also demonstrating the use of named groups:: - issue_pat_wiki = (?:wiki-)(.+) - issue_server_link_wiki = https://wiki.example.com/{id} - issue_prefix_wiki = WIKI- + issue_pat_wiki = wiki-(?P\S+) + issue_server_link_wiki = https://wiki.example.com/\g + issue_sub_wiki = WIKI-\g With these settings, wiki pages can be referenced as wiki-some-id, and every such reference will be transformed into: @@ -573,6 +606,9 @@ WIKI-some-id +Refer to the `Python regular expression documentation`_ for more details about +the supported syntax in ``issue_pat``, ``issue_server_link`` and ``issue_sub``. + Hook management --------------- @@ -901,6 +937,7 @@ .. _virtualenv: http://pypi.python.org/pypi/virtualenv .. _python: http://www.python.org/ +.. _Python regular expression documentation: https://docs.python.org/2/library/re.html .. _Mercurial: https://www.mercurial-scm.org/ .. _Celery: http://celeryproject.org/ .. _Celery documentation: http://docs.celeryproject.org/en/latest/getting-started/index.html diff -r e5a7f8f41370 -r d24051ce961c kallithea/lib/helpers.py --- a/kallithea/lib/helpers.py Fri Feb 16 22:30:51 2018 +0100 +++ b/kallithea/lib/helpers.py Sat Mar 10 22:01:59 2018 +0100 @@ -1140,35 +1140,46 @@ suffix = m.group(1) issue_pat = CONFIG.get(k) issue_server_link = CONFIG.get('issue_server_link%s' % suffix) - issue_prefix = CONFIG.get('issue_prefix%s' % suffix) - if not issue_pat or not issue_server_link or issue_prefix is None: # issue_prefix can be empty but should be present - log.error('skipping incomplete issue pattern %r: %r -> %r %r', suffix, issue_pat, issue_server_link, issue_prefix) + issue_sub = CONFIG.get('issue_sub%s' % suffix) + if not issue_pat or not issue_server_link or issue_sub is None: # issue_sub can be empty but should be present + log.error('skipping incomplete issue pattern %r: %r -> %r %r', suffix, issue_pat, issue_server_link, issue_sub) continue # Wrap tmp_urlify_issues_f with substitution of this pattern, while making sure all loop variables (and compiled regexpes) are bound try: issue_re = re.compile(issue_pat) except re.error as e: - log.error('skipping invalid issue pattern %r: %r -> %r %r. Error: %s', suffix, issue_pat, issue_server_link, issue_prefix, str(e)) + log.error('skipping invalid issue pattern %r: %r -> %r %r. Error: %s', suffix, issue_pat, issue_server_link, issue_sub, str(e)) continue - log.debug('issue pattern %r: %r -> %r %r', suffix, issue_pat, issue_server_link, issue_prefix) + log.debug('issue pattern %r: %r -> %r %r', suffix, issue_pat, issue_server_link, issue_sub) def issues_replace(match_obj, - issue_server_link=issue_server_link, issue_prefix=issue_prefix): - issue_id = ''.join(match_obj.groups()) - issue_url = issue_server_link.replace('{id}', issue_id) + issue_server_link=issue_server_link, issue_sub=issue_sub): + try: + issue_url = match_obj.expand(issue_server_link) + except (IndexError, re.error) as e: + log.error('invalid issue_url setting %r -> %r %r. Error: %s', issue_pat, issue_server_link, issue_sub, str(e)) + issue_url = issue_server_link issue_url = issue_url.replace('{repo}', repo_name) issue_url = issue_url.replace('{repo_name}', repo_name.split(URL_SEP)[-1]) + # if issue_sub is empty use the matched issue reference verbatim + if not issue_sub: + issue_text = match_obj.group() + else: + try: + issue_text = match_obj.expand(issue_sub) + except (IndexError, re.error) as e: + log.error('invalid issue_sub setting %r -> %r %r. Error: %s', issue_pat, issue_server_link, issue_sub, str(e)) + issue_text = match_obj.group() + return ( '' - '%(issue-prefix)s%(id-repr)s' + '%(text)s' '' ) % { 'url': issue_url, - 'id-repr': issue_id, - 'issue-prefix': issue_prefix, - 'serv': issue_server_link, + 'text': issue_text, } tmp_urlify_issues_f = (lambda s, issue_re=issue_re, issues_replace=issues_replace, chain_f=tmp_urlify_issues_f: diff -r e5a7f8f41370 -r d24051ce961c kallithea/lib/paster_commands/template.ini.mako --- a/kallithea/lib/paster_commands/template.ini.mako Fri Feb 16 22:30:51 2018 +0100 +++ b/kallithea/lib/paster_commands/template.ini.mako Sat Mar 10 22:01:59 2018 +0100 @@ -261,37 +261,44 @@ <%text>## issue tracker for Kallithea (leave blank to disable, absent for default) #bugtracker = https://bitbucket.org/conservancy/kallithea/issues -<%text>## issue tracking mapping for commits messages -<%text>## comment out issue_pat, issue_server, issue_prefix to enable +<%text>## issue tracking mapping for commit messages, comments, PR descriptions, ... +<%text>## Refer to the documentation ("Integration with issue trackers") for more details. -<%text>## pattern to get the issues from commit messages -<%text>## default one used here is # with a regex passive group for `#` -<%text>## {id} will be all groups matched from this pattern +<%text>## regular expression to match issue references +<%text>## This pattern may/should contain parenthesized groups, that can +<%text>## be referred to in issue_server_link or issue_sub using Python backreferences +<%text>## (e.g. \1, \2, ...). You can also create named groups with '(?P)'. <%text>## To require mandatory whitespace before the issue pattern, use: <%text>## (?:^|(?<=\s)) before the actual pattern, and for mandatory whitespace -<%text>## behind the issue pattern, use (?:$|(?=\s)) after the actual pattern +<%text>## behind the issue pattern, use (?:$|(?=\s)) after the actual pattern. issue_pat = #(\d+) -<%text>## server url to the issue, each {id} will be replaced with match -<%text>## fetched from the regex and {repo} is replaced with full repository name -<%text>## including groups {repo_name} is replaced with just name of repo - -issue_server_link = https://issues.example.com/{repo}/issue/{id} +<%text>## server url to the issue +<%text>## This pattern may/should contain backreferences to parenthesized groups in issue_pat. +<%text>## A backreference can be \1, \2, ... or \g if you specified a named group +<%text>## called 'groupname' in issue_pat. +<%text>## The special token {repo} is replaced with the full repository name +<%text>## including repository groups, while {repo_name} is replaced with just +<%text>## the name of the repository. -<%text>## prefix to add to link to indicate it's an url -<%text>## #314 will be replaced by +issue_server_link = https://issues.example.com/{repo}/issue/\1 -issue_prefix = # +<%text>## substitution pattern to use as the link text +<%text>## If issue_sub is empty, the text matched by issue_pat is retained verbatim +<%text>## for the link text. Otherwise, the link text is that of issue_sub, with any +<%text>## backreferences to groups in issue_pat replaced. -<%text>## issue_pat, issue_server_link, issue_prefix can have suffixes to specify +issue_sub = + +<%text>## issue_pat, issue_server_link and issue_sub can have suffixes to specify <%text>## multiple patterns, to other issues server, wiki or others <%text>## below an example how to create a wiki pattern # wiki-some-id -> https://wiki.example.com/some-id -#issue_pat_wiki = (?:wiki-)(.+) -#issue_server_link_wiki = https://wiki.example.com/{id} -#issue_prefix_wiki = WIKI- +#issue_pat_wiki = wiki-(\S+) +#issue_server_link_wiki = https://wiki.example.com/\1 +#issue_sub_wiki = WIKI-\1 <%text>## alternative return HTTP header for failed authentication. Default HTTP <%text>## response is 401 HTTPUnauthorized. Currently Mercurial clients have trouble with diff -r e5a7f8f41370 -r d24051ce961c kallithea/tests/other/test_libs.py --- a/kallithea/tests/other/test_libs.py Fri Feb 16 22:30:51 2018 +0100 +++ b/kallithea/tests/other/test_libs.py Sat Mar 10 22:01:59 2018 +0100 @@ -406,84 +406,91 @@ from kallithea.lib.helpers import urlify_text assert urlify_text(sample, 'repo_name', link_='#the-link') == expected - @parametrize('issue_pat,issue_server,issue_prefix,sample,expected', [ - (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#', + @parametrize('issue_pat,issue_server,issue_sub,sample,expected', [ + (r'#(\d+)', 'http://foo/{repo}/issue/\\1', '#\\1', 'issue #123 and issue#456', """issue #123 and """ """issue#456"""), - # following test case shows the result of a backward incompatible change that was made: the - # space between 'issue' and '#123' is removed, because the space is part of the pattern. - (r'(?:\s*#)(\d+)', 'http://foo/{repo}/issue/{id}', '#', + (r'(?:\s*#)(\d+)', 'http://foo/{repo}/issue/\\1', '#\\1', 'issue #123 and issue#456', """issue#123 and """ """issue#456"""), # to require whitespace before the issue reference, one may be tempted to use \b... - (r'\bPR(\d+)', 'http://foo/{repo}/issue/{id}', '#', + (r'\bPR(\d+)', 'http://foo/{repo}/issue/\\1', '#\\1', 'issue PR123 and issuePR456', """issue #123 and """ """issuePR456"""), # ... but it turns out that \b does not work well in combination with '#': the expectations # are reversed from what is actually happening. - (r'\b#(\d+)', 'http://foo/{repo}/issue/{id}', '#', + (r'\b#(\d+)', 'http://foo/{repo}/issue/\\1', '#\\1', 'issue #123 and issue#456', """issue #123 and """ """issue#456"""), # ... so maybe try to be explicit? Unfortunately the whitespace before the issue # reference is not retained, again, because it is part of the pattern. - (r'(?:^|\s)#(\d+)', 'http://foo/{repo}/issue/{id}', '#', + (r'(?:^|\s)#(\d+)', 'http://foo/{repo}/issue/\\1', '#\\1', '#15 and issue #123 and issue#456', """#15 and """ """issue#123 and """ """issue#456"""), # ... instead, use lookbehind assertions. - (r'(?:^|(?<=\s))#(\d+)', 'http://foo/{repo}/issue/{id}', '#', + (r'(?:^|(?<=\s))#(\d+)', 'http://foo/{repo}/issue/\\1', '#\\1', '#15 and issue #123 and issue#456', """#15 and """ """issue #123 and """ """issue#456"""), - (r'(?:pullrequest|pull request|PR|pr) ?#?(\d+)', 'http://foo/{repo}/issue/{id}', 'PR#', + (r'(?:pullrequest|pull request|PR|pr) ?#?(\d+)', 'http://foo/{repo}/issue/\\1', 'PR#\\1', 'fixed with pullrequest #1, pull request#2, PR 3, pr4', """fixed with PR#1, """ """PR#2, """ """PR#3, """ """PR#4"""), - (r'#(\d+)', 'http://foo/{repo}/issue/{id}', 'PR', + (r'#(\d+)', 'http://foo/{repo}/issue/\\1', 'PR\\1', 'interesting issue #123', """interesting issue PR123"""), - (r'BUG\d{5}', 'https://bar/{repo}/{id}', 'BUG', - 'silly me, I did not parenthesize the {id}, BUG12345.', - """silly me, I did not parenthesize the {id}, BUG."""), - (r'BUG(\d{5})', 'https://bar/{repo}/', 'BUG', - 'silly me, the URL does not contain {id}, BUG12345.', - """silly me, the URL does not contain {id}, BUG12345."""), - (r'(PR-\d+)', 'http://foo/{repo}/issue/{id}', '', + (r'BUG\d{5}', 'https://bar/{repo}/\\1', '\\1', + 'silly me, I did not parenthesize the id, BUG12345.', + """silly me, I did not parenthesize the id, BUG12345."""), + (r'BUG(\d{5})', 'https://bar/{repo}/', 'BUG\\1', + 'silly me, the URL does not contain id, BUG12345.', + """silly me, the URL does not contain id, BUG12345."""), + (r'(PR-\d+)', 'http://foo/{repo}/issue/\\1', '', 'interesting issue #123, err PR-56', """interesting issue #123, err PR-56"""), - (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#', + (r'#(\d+)', 'http://foo/{repo}/issue/\\1', '#\\1', "some 'standard' text with apostrophes", """some 'standard' text with apostrophes"""), - (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#', + (r'#(\d+)', 'http://foo/{repo}/issue/\\1', '#\\1', "some 'standard' issue #123", """some 'standard' issue #123"""), - (r'#(\d+)', 'http://foo/{repo}/issue/{id}', '#', + (r'#(\d+)', 'http://foo/{repo}/issue/\\1', '#\\1', 'an issue #123 with extra whitespace', """an issue #123 with extra whitespace"""), - # Note: whitespace is squashed - (r'(?:\s*#)(\d+)', 'http://foo/{repo}/issue/{id}', '#', + (r'(?:\s*#)(\d+)', 'http://foo/{repo}/issue/\\1', '#\\1', 'an issue #123 with extra whitespace', - """an issue #123 with extra whitespace"""), + """an issue#123 with extra whitespace"""), # invalid issue pattern (r'(PR\d+', 'http://foo/{repo}/issue/{id}', '', 'PR135', """PR135"""), + # other character than # + (r'(?:^|(?<=\s))\$(\d+)', 'http://foo/{repo}/issue/\\1', '', + 'empty issue_sub $123 and issue$456', + """empty issue_sub $123 and """ + """issue$456"""), + # named groups + (r'(PR|pullrequest|pull request) ?(?PBRU|CPH|BER)-(?P\d+)', 'http://foo/\g/pullrequest/\g/', 'PR-\g-\g', + 'pullrequest CPH-789 is similar to PRBRU-747', + """PR-CPH-789 is similar to """ + """PR-BRU-747"""), ]) - def test_urlify_issues(self, issue_pat, issue_server, issue_prefix, sample, expected): + def test_urlify_issues(self, issue_pat, issue_server, issue_sub, sample, expected): from kallithea.lib.helpers import urlify_text config_stub = { 'sqlalchemy.url': 'foo', 'issue_pat': issue_pat, 'issue_server_link': issue_server, - 'issue_prefix': issue_prefix, + 'issue_sub': issue_sub, } # force recreation of lazy function with mock.patch('kallithea.lib.helpers._urlify_issues_f', None): @@ -496,7 +503,7 @@ ('pull request7 #', 'PR#7 #'), ('look PR9 and pr #11', 'look PR#9 and PR#11'), ('pullrequest#10 solves issue 9', 'PR#10 solves bug#9'), - ('issue FAIL67', 'issue 67'), + ('issue FAIL67', 'issue FAIL67'), ('issue FAILMORE89', 'issue FAILMORE89'), # no match because absent prefix ]) def test_urlify_issues_multiple_issue_patterns(self, sample, expected): @@ -504,19 +511,19 @@ config_stub = { 'sqlalchemy.url': 'foo', 'issue_pat': 'X(\d+)', - 'issue_server_link': 'http://main/{repo}/main/{id}/', - 'issue_prefix': '#', + 'issue_server_link': 'http://main/{repo}/main/\\1/', + 'issue_sub': '#\\1', 'issue_pat_pr': '(?:pullrequest|pull request|PR|pr) ?#?(\d+)', - 'issue_server_link_pr': 'http://pr/{repo}/pr/{id}', - 'issue_prefix_pr': 'PR#', + 'issue_server_link_pr': 'http://pr/{repo}/pr/\\1', + 'issue_sub_pr': 'PR#\\1', 'issue_pat_bug': '(?:BUG|bug|issue) ?#?(\d+)', - 'issue_server_link_bug': 'http://bug/{repo}/bug/{id}', - 'issue_prefix_bug': 'bug#', + 'issue_server_link_bug': 'http://bug/{repo}/bug/\\1', + 'issue_sub_bug': 'bug#\\1', 'issue_pat_empty_prefix': 'FAIL(\d+)', - 'issue_server_link_empty_prefix': 'http://fail/{repo}/{id}', - 'issue_prefix_empty_prefix': '', + 'issue_server_link_empty_prefix': 'http://fail/{repo}/\\1', + 'issue_sub_empty_prefix': '', 'issue_pat_absent_prefix': 'FAILMORE(\d+)', - 'issue_server_link_absent_prefix': 'http://failmore/{repo}/{id}', + 'issue_server_link_absent_prefix': 'http://failmore/{repo}/\\1', } # force recreation of lazy function with mock.patch('kallithea.lib.helpers._urlify_issues_f', None):