changeset 7544:2ac4499b25eb

lib: sanitize HTML for all types of README rendering, not only markdown The repository summary page will display a rendered version of the repository 'readme' based on its file extension. In commit 5746cc3b3fa5, the rendered output was already sanitized when the input was markdown. However, also readmes written in other formats, like ReStructuredText (RST) or plain text could have content that we want sanitized. Therefore, move the sanitizing one level up so it covers all renderers, for now and the future. This fixes an XSS issue when a repository readme contains javascript code, which would be executed when the repository summary page is visited by a user. Reported by Bob Hogg <wombat@rwhogg.site> (thanks!).
author Thomas De Schampheleire <thomas.de_schampheleire@nokia.com>
date Sat, 26 Jan 2019 20:27:50 +0100
parents c9159e6fda04
children 109b068ba6e5
files kallithea/lib/markup_renderer.py
diffstat 1 files changed, 17 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/lib/markup_renderer.py	Sat Jan 26 20:00:14 2019 +0100
+++ b/kallithea/lib/markup_renderer.py	Sat Jan 26 20:27:50 2019 +0100
@@ -124,7 +124,19 @@
 
         renderer = self._detect_renderer(source, filename)
         readme_data = renderer(source)
-        return readme_data
+        # Allow most HTML, while preventing XSS issues:
+        # no <script> tags, no onclick attributes, no javascript
+        # "protocol", and also limit styling to prevent defacing.
+        return bleach.clean(readme_data,
+            tags=['a', 'abbr', 'b', 'blockquote', 'br', 'code', 'dd',
+                  'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5',
+                  'h6', 'hr', 'i', 'img', 'li', 'ol', 'p', 'pre', 'span',
+                  'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'th',
+                  'thead', 'tr', 'ul'],
+            attributes=['class', 'id', 'style', 'label', 'title', 'alt', 'href', 'src'],
+            styles=['color'],
+            protocols=['http', 'https', 'mailto'],
+            )
 
     @classmethod
     def plain(cls, source, universal_newline=True):
@@ -162,22 +174,10 @@
         try:
             if flavored:
                 source = cls._flavored_markdown(source)
-            markdown_html = markdown_mod.markdown(source,
-                                       extensions=['codehilite', 'extra'],
-                                       extension_configs={'codehilite': {'css_class': 'code-highlight'}})
-            # Allow most HTML, while preventing XSS issues:
-            # no <script> tags, no onclick attributes, no javascript
-            # "protocol", and also limit styling to prevent defacing.
-            return bleach.clean(markdown_html,
-                tags=['a', 'abbr', 'b', 'blockquote', 'br', 'code', 'dd',
-                      'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5',
-                      'h6', 'hr', 'i', 'img', 'li', 'ol', 'p', 'pre', 'span',
-                      'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'th',
-                      'thead', 'tr', 'ul'],
-                attributes=['class', 'id', 'style', 'label', 'title', 'alt', 'href', 'src'],
-                styles=['color'],
-                protocols=['http', 'https', 'mailto'],
-                )
+            return markdown_mod.markdown(
+                source,
+                extensions=['codehilite', 'extra'],
+                extension_configs={'codehilite': {'css_class': 'code-highlight'}})
         except Exception:
             log.error(traceback.format_exc())
             if safe: