Work on markup autodetection

f3d06ffa · Jean-Sébastien Caux · 9075ae0d · f3d06ffa · f3d06ffa · f3d06ffa
Commit f3d06ffa authored 5 years ago by Jean-Sébastien Caux
--- a/common/forms.py
+++ b/common/forms.py
@@ -132,9 +132,15 @@ class MarkupTextForm(forms.Form):
    def get_processed_markup(self):
        text = self.cleaned_data['markup_text']
        # Detect text format
-        language = detect_markup_language(text)
+        markup_detector = detect_markup_language(text)
+        language = markup_detector['language']
        print('language: %s' % language)
+        if markup_detector['errors']:
+            return markup_detector
        if language == 'reStructuredText':
            # This performs the same actions as the restructuredtext filter of app scipost
            from io import StringIO
@@ -160,6 +166,7 @@ class MarkupTextForm(forms.Form):
                'language': language,
                'errors': warnStream.getvalue()
            }
+        # at this point, language is assumed to be plain text
        from django.template.defaultfilters import linebreaksbr
        return {
            'language': language,

--- a/common/utils.py
+++ b/common/utils.py
@@ -3,6 +3,7 @@ __license__ = "AGPL v3"
 from datetime import timedelta
+import re
 from django.core.mail import EmailMultiAlternatives
 from django.db.models import Q
@@ -140,13 +141,65 @@ def detect_markup_language(text):
    """
    Detect which markup language is being used.
-    Possible return values:
+    This method returns a dictionary containing:
-    * plain
-    * reStructuredText
+    * language
+    * errors
+    Language can be one of: plain, reStructuredText
+    The criteria used are:
+    * if the ``math`` role or directive is found together with $...$, return error
+    * if the ``math`` role or directive is found, return ReST
+    Assumptions:
+    * MathJax is set up with $...$ for inline, \[...\] for online equations.
    """
-    rst_headers = ["####", "****", "====", "----", "^^^^", "\"\"\"\"",]
+    # Inline maths
+    inline_math = re.search("\$[^$]+\$", text)
+    if inline_math:
+        print('inline math: %s' % inline_math.group(0))
+    # Online maths is of the form \[ ... \]
+    # The re.DOTALL is to also capture newline chars with the . (any single character)
+    online_math = re.search(r'[\\][[].+[\\][\]]', text, re.DOTALL)
+    if online_math:
+        print('online math: %s' % online_math.group(0))
+    rst_math = '.. math::' in text or ':math:`' in text
+    # Normal inline/online maths cannot be used simultaneously with ReST math.
+    # If this is detected, language is set to plain, and errors are reported.
+    # Otherwise if math present in ReST but not in/online math, assume ReST.
+    if rst_math:
+        if inline_math:
+            return {
+                'language': 'plain',
+                'errors': ('Cannot determine whether this is plain text or reStructuredText.\n'
+                           'You have mixed inline maths ($...$) with reStructuredText markup.'
+                           '\n\nPlease use one or the other, but not both!')
+            }
+        elif online_math:
+            return {
+                'language': 'plain',
+                'errors': ('Cannot determine whether this is plain text or reStructuredText.\n'
+                           'You have mixed online maths (\[...\]) with reStructuredText markup.'
+                           '\n\nPlease use one or the other, but not both!')
+            }
+        else: # assume ReST
+            return {
+                'language': 'reStructuredText',
+                'errors': None
+            }
+    # reStructuredText header patterns
+    rst_header_patterns = [
+        "^#{2,}$", "^\*{2,}$", "^={2,}$", "^-{2,}$", "^\^{2,}$", "^\"{2,}$",]
    # See list of reStructuredText directives at
    # http://docutils.sourceforge.net/0.4/docs/ref/rst/directives.html
+    # We don't include the math one here since we covered it above.
    rst_directives = [
        "attention", "caution", "danger", "error", "hint", "important", "note", "tip",
        "warning", "admonition",
@@ -156,28 +209,50 @@ def detect_markup_language(text):
        "contents", "sectnum", "section-autonumbering", "header", "footer",
        "target-notes",
        "replace", "unicode", "date", "class", "role", "default-role",
-        "math",]
+    ]
    # See list at http://docutils.sourceforge.net/0.4/docs/ref/rst/roles.html
    rst_roles = [
        "emphasis", "literal", "pep-reference", "rfc-reference",
        "strong", "subscript", "superscript", "title-reference",
-        "math",]
+    ]
-    nr_rst_roles = 0
    nr_rst_headers = 0
-    for header in rst_headers:
+    for header_pattern in rst_header_patterns:
-        if header in text:
+        matches = re.findall(header_pattern, text, re.MULTILINE)
-            nr_rst_headers += 1
+        print ('%s matched %d times' % (header_pattern, len(matches)))
+        nr_rst_headers += len(matches)
    nr_rst_directives = 0
    for directive in rst_directives:
        if ('.. %s::' % directive) in text:
            nr_rst_directives += 1
+    nr_rst_roles = 0
    for role in rst_roles:
        if (':%s:`' % role) in text:
            nr_rst_roles += 1
    if (nr_rst_headers > 0 or nr_rst_directives > 0 or nr_rst_roles > 0):
-        return 'reStructuredText'
+        if inline_math:
-    return 'plain'
+            return {
+                'language': 'plain',
+                'errors': ('Cannot determine whether this is plain text or reStructuredText.\n'
+                           'You have mixed inline maths ($...$) with reStructuredText markup.'
+                           '\n\nPlease use one or the other, but not both!')
+            }
+        elif online_math:
+            return {
+                'language': 'plain',
+                'errors': ('Cannot determine whether this is plain text or reStructuredText.\n'
+                           'You have mixed online maths (\[...\]) with reStructuredText markup.'
+                           '\n\nPlease use one or the other, but not both!')
+            }
+        else:
+            return {
+                'language': 'reStructuredText',
+                'errors': None
+            }
+    return {
+        'language': 'plain',
+        'errors': None
+    }
--- a/scipost/static/scipost/ticket-preview.js
+++ b/scipost/static/scipost/ticket-preview.js
@@ -31,14 +31,14 @@ $('#runPreviewButton').on('click', function(){
 		$('#preview-description').css('background', '#feebce');
 		$('#submitButton').hide();
 		$('#runPreviewButton').show();
-		alert("An error has occurred while processing the ReStructuredText:\n\n" + data.errors);
+		alert("An error has occurred while processing the text:\n\n" + data.errors);
 	    }
    	    $('#preview-description').html(data.processed_markup);
 	    let preview = document.getElementById('preview-description');
    	    MathJax.Hub.Queue(["Typeset",MathJax.Hub, preview]);
    	},
 	error: function(data) {
-	    alert("An error has occurred while processing the ReStructuredText.");
+	    alert("An error has occurred while processing the text.");
 	}
    });
    $('#runPreviewButton').hide();