changeset 6871:26235543d64d

pygments: reimplement get_lem under the name get_extension_descriptions The old implementation was cryptic and over-engineered. And reduce() is not a builtin in Python 3. This function works on static input, and it was verified that this implementation returns exactly the same as the old one. And is simpler and slightly more readable. In my opinion. Inspired by change by Lars Kruse.
author Mads Kiilerich <mads@kiilerich.com>
date Fri, 08 Sep 2017 04:12:51 +0200
parents 20f32aebc974
children dd1c939c33b7
files kallithea/config/conf.py kallithea/lib/pygmentsutils.py
diffstat 2 files changed, 19 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/kallithea/config/conf.py	Fri Aug 25 14:36:01 2017 +0200
+++ b/kallithea/config/conf.py	Fri Sep 08 04:12:51 2017 +0200
@@ -30,7 +30,7 @@
 
 # language map is also used by whoosh indexer, which for those specified
 # extensions will index it's content
-LANGUAGES_EXTENSIONS_MAP = pygmentsutils.get_lem()
+LANGUAGES_EXTENSIONS_MAP = pygmentsutils.get_extension_descriptions()
 
 # Whoosh index targets
 
--- a/kallithea/lib/pygmentsutils.py	Fri Aug 25 14:36:01 2017 +0200
+++ b/kallithea/lib/pygmentsutils.py	Fri Sep 08 04:12:51 2017 +0200
@@ -27,40 +27,32 @@
 
 from collections import defaultdict
 from itertools import ifilter
-from string import lower
 
 from pygments import lexers
 
 
-def get_lem():
-    """
-    Get language extension map based on what's inside pygments lexers
+def get_extension_descriptions():
     """
-    d = defaultdict(lambda: [])
-
-    def __clean(s):
-        s = s.lstrip('*')
-        s = s.lstrip('.')
-
-        if s.find('[') != -1:
-            exts = []
-            start, stop = s.find('['), s.find(']')
-
-            for suffix in s[start + 1:stop]:
-                exts.append(s[:s.find('[')] + suffix)
-            return map(lower, exts)
-        else:
-            return map(lower, [s])
+    Based on what's inside pygments lexers, return a mapping from lowercase
+    extensions to lists of very brief descriptions.
+    """
+    ext_descs = defaultdict(list)
 
     for lx, t in sorted(lexers.LEXERS.items()):
-        m = map(__clean, t[-2])
-        if m:
-            m = reduce(lambda x, y: x + y, m)
-            for ext in m:
-                desc = lx.replace('Lexer', '')
-                d[ext].append(desc)
+        desc = lx.replace('Lexer', '')
+        for glob in t[-2]:
+            s = glob.lstrip('*').lstrip('.').lower()
+            start = s.find('[')
+            if start > -1 and s.endswith(']'):
+                # expand trailing [] range
+                prefix = s[:start]
+                for char in s[start + 1:-1]:
+                    ext_descs[prefix + char].append(desc)
+            else:
+                # use stripped glob as extension
+                ext_descs[s].append(desc)
 
-    return dict(d)
+    return dict(ext_descs)
 
 
 def get_index_filenames():