From 6a1952e44cef4e3ec40d3b4509f20d8844cfb1f2 Mon Sep 17 00:00:00 2001
From: "J.-S. Caux" <J.S.Caux@uva.nl>
Date: Thu, 13 Jun 2019 09:19:50 +0200
Subject: [PATCH] Improve markup language detection

---
 markup/utils.py | 122 ++++++++++++++++++++++++++++--------------------
 markup/views.py |   1 +
 2 files changed, 72 insertions(+), 51 deletions(-)

diff --git a/markup/utils.py b/markup/utils.py
index 3e0487510..d19851c0e 100644
--- a/markup/utils.py
+++ b/markup/utils.py
@@ -106,12 +106,48 @@ def match_rst_header(text, symbol=None):
         return None
     if symbol not in ReST_HEADER_REGEX_DICT.keys():
         raise ValueError('symbol is not a ReST header symbol')
-    print('Looking for %s in rst: %s' % (
-        symbol,
-        re.search(ReST_HEADER_REGEX_DICT[symbol], text, re.MULTILINE)))
     return re.search(ReST_HEADER_REGEX_DICT[symbol], text, re.MULTILINE)
 
 
+def check_markers(markers):
+    """
+    Checks the consistency of a markers dictionary. Returns a detector.
+    """
+    if len(markers['rst']) > 0:
+        if len(markers['md']) > 0:
+            return {
+                'language': 'plain',
+                'errors': ('Inconsistency: Markdown and reStructuredText syntaxes are mixed:\n\n'
+                           'Markdown: %s\n\nreStructuredText: %s' % (
+                               markers['md'].popitem(),
+                               markers['rst'].popitem()))
+            }
+        if len(markers['plain_or_md']) > 0:
+            return {
+                'language': 'plain',
+                'errors': ('Inconsistency: plain/Markdown and reStructuredText '
+                           'syntaxes are mixed:\n\n'
+                           'Markdown: %s\n\nreStructuredText: %s' % (
+                               markers['plain_or_md'].popitem(),
+                               markers['rst'].popitem()))
+            }
+        return {
+            'language': 'reStructuredText',
+            'errors': None,
+        }
+
+    elif len(markers['md']) > 0:
+        return {
+            'language': 'Markdown',
+            'errors': None,
+        }
+
+    return {
+        'language': 'plain',
+        'errors': None,
+    }
+
+
 def detect_markup_language(text):
     """
     Detect whether text is plain text, Markdown or reStructuredText.
@@ -163,68 +199,48 @@ def detect_markup_language(text):
     * if the ``math`` role or directive is found together with inline/displayed maths
     """
 
-    # Start from the default assumption
-    detector = {
-        'language': 'plain',
-        'errors': None
+    markers = {
+        'plain_or_md': {},
+        'md': {},
+        'rst': {},
     }
 
     # Step 1: check maths
 
     # Inline maths is of the form $ ... $ or \( ... \)
-    inline_math = match_inline_math(text)
+    match = match_inline_math(text)
+    if match:
+        markers['plain_or_md']['inline_math'] = match
 
     # Displayed maths is of the form \[ ... \] or $$ ... $$
-    displayed_math = match_displayed_math(text)
-
-    rst_math_role = match_rst_role(text, 'math')
-    rst_math_directive = match_rst_directive(text, 'math')
-
-    if rst_math_role or rst_math_directive:
-        # reStructuredText presumed; check for errors
-        if inline_math:
-            detector['errors'] = (
-                'You have mixed inline maths ($ ... $ or \( ... \) ) with '
-                'reStructuredText markup.\n\nPlease use one or the other, but not both!')
-            return detector
-        elif displayed_math:
-            detector['errors'] = (
-                'You have mixed displayed maths ($$ ... $$ or \[ ... \]) with '
-                'reStructuredText markup.\n\nPlease use one or the other, but not both!')
-            return detector
-        else:
-            detector['language'] = 'reStructuredText'
-            return detector
+    match = match_displayed_math(text)
+    if match:
+        markers['plain_or_md']['displayed_math'] = match
 
-    # no rst math from here onwards
+    match = match_rst_role(text, 'math')
+    if match:
+        markers['rst']['math_role'] = match
+    match = match_rst_directive(text, 'math')
+    if match:
+        markers['rst']['math_directive'] = match
 
     # Step 2: check headers and blockquotes
 
-    md_header = match_md_header(text)
-    print('md_header: %s' % md_header)
-    md_blockquote = match_md_blockquote(text)
-
-    rst_header = match_rst_header(text)
-    print('rst_header: %s' % rst_header)
-
-    if md_header or md_blockquote:
-        if rst_math_role or rst_math_directive:
-            if md_header:
-                detector['errors'] = (
-                    'You have mixed Markdown headers with reStructuredText math '
-                    'roles/directives.\n\nPlease use one language only.')
-            elif md_blockquote:
-                detector['errors'] = (
-                    'You have mixed Markdown blockquotes with reStructuredText math '
-                    'roles/directives.\n\nPlease use one language only.')
-        detector['language'] = 'Markdown'
+    match = match_md_header(text)
+    if match:
+        markers['md']['header'] = match
+    match = match_md_blockquote(text)
+    if match:
+        markers['md']['blockquote'] = match
 
-    elif md_header or md_blockquote:
-        detector['language'] = 'Markdown'
+    match = match_rst_header(text)
+    if match:
+        markers['rst']['header'] = match
 
-    elif rst_header:
-        detector['language'] = 'reStructuredText'
+    print('markers: \n%s' % markers)
 
+    detector = check_markers(markers)
+    print('detector: \n%s' % detector)
     return detector
 
 
@@ -397,6 +413,10 @@ def process_markup(text, language_forced=None):
 
     language = language_forced if language_forced else markup_detector['language']
     markup['language'] = language
+    markup['errors'] = markup_detector['errors']
+
+    if markup['errors']:
+        return markup
 
     if language == 'reStructuredText':
         warnStream = StringIO()
diff --git a/markup/views.py b/markup/views.py
index b5e254e44..b6f1f5d67 100644
--- a/markup/views.py
+++ b/markup/views.py
@@ -22,6 +22,7 @@ def process(request):
     """
     form = MarkupTextForm(request.POST or None)
     if form.is_valid():
+        print('response: \n%s' % form.get_processed_markup())
         return JsonResponse(form.get_processed_markup())
     return JsonResponse({})
 
-- 
GitLab