From b39ff46c32d0fa52f3f6c64efd2fb1d6e62fa512 Mon Sep 17 00:00:00 2001 From: "J.-S. Caux" <J.S.Caux@uva.nl> Date: Mon, 10 Jun 2019 21:27:32 +0200 Subject: [PATCH] Partial work on language recognition, and rendering --- markup/forms.py | 22 +++++++++++++++------- markup/templatetags/process_markup.py | 12 ++++++++---- markup/utils.py | 6 +++++- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/markup/forms.py b/markup/forms.py index 8dcc56976..3dc10533d 100644 --- a/markup/forms.py +++ b/markup/forms.py @@ -7,6 +7,8 @@ from docutils.core import publish_parts import markdown import re +from mdx_math import MathExtension + from django import forms from django.utils.encoding import force_text from django.utils.safestring import mark_safe @@ -24,7 +26,6 @@ class MarkupTextForm(forms.Form): # Detect text format markup_detector = detect_markup_language(text) language = markup_detector['language'] - print('language: %s' % language) if markup_detector['errors']: return markup_detector @@ -43,7 +44,8 @@ class MarkupTextForm(forms.Form): 'doctitle_xform': False, 'raw_enabled': False, 'file_insertion_enabled': False, - 'warning_stream': warnStream}) + 'warning_stream': warnStream + }) return { 'language': language, 'processed_markup': mark_safe(force_text(parts['html_body'])), @@ -58,11 +60,17 @@ class MarkupTextForm(forms.Form): elif language == 'Markdown': return { 'language': language, - #'processed_markup': markdown.markdown(escape(text), output_format='html5') - 'processed_markup': bleach.clean( - markdown.markdown(text, output_format='html5'), - tags=BLEACH_ALLOWED_TAGS) - } + 'processed_markup': mark_safe( + markdown.markdown( + bleach.clean( + text, + tags=BLEACH_ALLOWED_TAGS + ).replace('&', '&').replace(' < ', ' < '), + output_format='html5', + extensions=[MathExtension(enable_dollar_delimiter=True)] + ) + ) + } # at this point, language is assumed to be plain text from django.template.defaultfilters import linebreaksbr diff --git a/markup/templatetags/process_markup.py b/markup/templatetags/process_markup.py index 0e0afb26e..4cb03de2d 100644 --- a/markup/templatetags/process_markup.py +++ b/markup/templatetags/process_markup.py @@ -6,6 +6,7 @@ import bleach from docutils.core import publish_parts from io import StringIO import markdown +import re from mdx_math import MathExtension @@ -27,13 +28,11 @@ def process_markup(text, language_forced=None): return '' markup_detector = detect_markup_language(text) - print('language detected: %s' % markup_detector['language']) + language = language_forced if language_forced else markup_detector['language'] if markup_detector['errors']: return markup_detector['errors'] - language = language_forced if language_forced else markup_detector['language'] - if language == 'reStructuredText': warnStream = StringIO() try: @@ -64,7 +63,12 @@ def process_markup(text, language_forced=None): bleach.clean( text, tags=BLEACH_ALLOWED_TAGS - ).replace('&', '&').replace(' < ', ' < '), + ).replace('&', '&' # to preserve math separator for MathJax + ).replace(' < ', ' < ' # to preserve < for MathJax + ).replace(' > ', ' > ' # to preserve > for MathJax + ).replace('>>', '>>' # to preserve nested Markdown blockquotes + ).replace('> ', '> ' # to preserve > for Markdown blockquotes + ), output_format='html5', extensions=[MathExtension(enable_dollar_delimiter=True)] ) diff --git a/markup/utils.py b/markup/utils.py index e3ecc7650..7312cb0f8 100644 --- a/markup/utils.py +++ b/markup/utils.py @@ -40,7 +40,7 @@ def match_md_header(text, level=None): raise TypeError('level must be an int') if level < 1 or level > 6: raise ValueError('level must be an integer from 1 to 6') - return re.search(r'^#{' + level + ',}[ ].+$', text) + return re.search(r'^#{' + str(level) + ',}[ ].+$', text) def match_md_blockquote(text): """Return first match of regex search for Markdown blockquote.""" @@ -166,6 +166,7 @@ def detect_markup_language(text): md_blockquote = match_md_blockquote(text) if rst_math_role or rst_math_directive: + # reStructuredText presumed; check for errors if inline_math: detector['errors'] = ( 'You have mixed inline maths ($ ... $ or \( ... \) ) with ' @@ -187,6 +188,9 @@ def detect_markup_language(text): else: detector['language'] = 'reStructuredText' + elif md_header or md_blockquote: + detector['language'] = 'Markdown' + return detector -- GitLab