From b39ff46c32d0fa52f3f6c64efd2fb1d6e62fa512 Mon Sep 17 00:00:00 2001
From: "J.-S. Caux" <J.S.Caux@uva.nl>
Date: Mon, 10 Jun 2019 21:27:32 +0200
Subject: [PATCH] Partial work on language recognition, and rendering

---
 markup/forms.py                       | 22 +++++++++++++++-------
 markup/templatetags/process_markup.py | 12 ++++++++----
 markup/utils.py                       |  6 +++++-
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/markup/forms.py b/markup/forms.py
index 8dcc56976..3dc10533d 100644
--- a/markup/forms.py
+++ b/markup/forms.py
@@ -7,6 +7,8 @@ from docutils.core import publish_parts
 import markdown
 import re
 
+from mdx_math import MathExtension
+
 from django import forms
 from django.utils.encoding import force_text
 from django.utils.safestring import mark_safe
@@ -24,7 +26,6 @@ class MarkupTextForm(forms.Form):
         # Detect text format
         markup_detector = detect_markup_language(text)
         language = markup_detector['language']
-        print('language: %s' % language)
 
         if markup_detector['errors']:
             return markup_detector
@@ -43,7 +44,8 @@ class MarkupTextForm(forms.Form):
                         'doctitle_xform': False,
                         'raw_enabled': False,
                         'file_insertion_enabled': False,
-                        'warning_stream': warnStream})
+                        'warning_stream': warnStream
+                    })
                 return {
                     'language': language,
                     'processed_markup': mark_safe(force_text(parts['html_body'])),
@@ -58,11 +60,17 @@ class MarkupTextForm(forms.Form):
         elif language == 'Markdown':
             return {
                 'language': language,
-                #'processed_markup': markdown.markdown(escape(text), output_format='html5')
-                'processed_markup': bleach.clean(
-                    markdown.markdown(text, output_format='html5'),
-                    tags=BLEACH_ALLOWED_TAGS)
-                }
+                'processed_markup': mark_safe(
+                    markdown.markdown(
+                        bleach.clean(
+                            text,
+                            tags=BLEACH_ALLOWED_TAGS
+                        ).replace('&amp;', '&').replace(' &lt; ', ' < '),
+                        output_format='html5',
+                        extensions=[MathExtension(enable_dollar_delimiter=True)]
+                    )
+                )
+            }
 
         # at this point, language is assumed to be plain text
         from django.template.defaultfilters import linebreaksbr
diff --git a/markup/templatetags/process_markup.py b/markup/templatetags/process_markup.py
index 0e0afb26e..4cb03de2d 100644
--- a/markup/templatetags/process_markup.py
+++ b/markup/templatetags/process_markup.py
@@ -6,6 +6,7 @@ import bleach
 from docutils.core import publish_parts
 from io import StringIO
 import markdown
+import re
 
 from mdx_math import MathExtension
 
@@ -27,13 +28,11 @@ def process_markup(text, language_forced=None):
         return ''
 
     markup_detector = detect_markup_language(text)
-    print('language detected: %s' % markup_detector['language'])
+    language = language_forced if language_forced else markup_detector['language']
 
     if markup_detector['errors']:
         return markup_detector['errors']
 
-    language = language_forced if language_forced else markup_detector['language']
-
     if language == 'reStructuredText':
         warnStream = StringIO()
         try:
@@ -64,7 +63,12 @@ def process_markup(text, language_forced=None):
                 bleach.clean(
                     text,
                     tags=BLEACH_ALLOWED_TAGS
-                ).replace('&amp;', '&').replace(' &lt; ', ' < '),
+                ).replace('&amp;', '&'       # to preserve math separator for MathJax
+                ).replace(' &lt; ', ' < '    # to preserve < for MathJax
+                ).replace(' &gt; ', ' > '    # to preserve > for MathJax
+                ).replace('&gt;&gt;', '>>'   # to preserve nested Markdown blockquotes
+                ).replace('&gt; ', '> '      # to preserve > for Markdown blockquotes
+                ),
                 output_format='html5',
                 extensions=[MathExtension(enable_dollar_delimiter=True)]
             )
diff --git a/markup/utils.py b/markup/utils.py
index e3ecc7650..7312cb0f8 100644
--- a/markup/utils.py
+++ b/markup/utils.py
@@ -40,7 +40,7 @@ def match_md_header(text, level=None):
         raise TypeError('level must be an int')
     if level < 1 or level > 6:
         raise ValueError('level must be an integer from 1 to 6')
-    return re.search(r'^#{' + level + ',}[ ].+$', text)
+    return re.search(r'^#{' + str(level) + ',}[ ].+$', text)
 
 def match_md_blockquote(text):
     """Return first match of regex search for Markdown blockquote."""
@@ -166,6 +166,7 @@ def detect_markup_language(text):
     md_blockquote = match_md_blockquote(text)
 
     if rst_math_role or rst_math_directive:
+        # reStructuredText presumed; check for errors
         if inline_math:
             detector['errors'] = (
                 'You have mixed inline maths ($ ... $ or \( ... \) ) with '
@@ -187,6 +188,9 @@ def detect_markup_language(text):
         else:
             detector['language'] = 'reStructuredText'
 
+    elif md_header or md_blockquote:
+        detector['language'] = 'Markdown'
+
     return detector
 
 
-- 
GitLab