SciPost Code Repository

Skip to content
Snippets Groups Projects
Commit f3d06ffa authored by Jean-Sébastien Caux's avatar Jean-Sébastien Caux
Browse files

Work on markup autodetection

parent 9075ae0d
No related branches found
No related tags found
No related merge requests found
...@@ -132,9 +132,15 @@ class MarkupTextForm(forms.Form): ...@@ -132,9 +132,15 @@ class MarkupTextForm(forms.Form):
def get_processed_markup(self): def get_processed_markup(self):
text = self.cleaned_data['markup_text'] text = self.cleaned_data['markup_text']
# Detect text format # Detect text format
language = detect_markup_language(text) markup_detector = detect_markup_language(text)
language = markup_detector['language']
print('language: %s' % language) print('language: %s' % language)
if markup_detector['errors']:
return markup_detector
if language == 'reStructuredText': if language == 'reStructuredText':
# This performs the same actions as the restructuredtext filter of app scipost # This performs the same actions as the restructuredtext filter of app scipost
from io import StringIO from io import StringIO
...@@ -160,6 +166,7 @@ class MarkupTextForm(forms.Form): ...@@ -160,6 +166,7 @@ class MarkupTextForm(forms.Form):
'language': language, 'language': language,
'errors': warnStream.getvalue() 'errors': warnStream.getvalue()
} }
# at this point, language is assumed to be plain text
from django.template.defaultfilters import linebreaksbr from django.template.defaultfilters import linebreaksbr
return { return {
'language': language, 'language': language,
......
...@@ -3,6 +3,7 @@ __license__ = "AGPL v3" ...@@ -3,6 +3,7 @@ __license__ = "AGPL v3"
from datetime import timedelta from datetime import timedelta
import re
from django.core.mail import EmailMultiAlternatives from django.core.mail import EmailMultiAlternatives
from django.db.models import Q from django.db.models import Q
...@@ -140,13 +141,65 @@ def detect_markup_language(text): ...@@ -140,13 +141,65 @@ def detect_markup_language(text):
""" """
Detect which markup language is being used. Detect which markup language is being used.
Possible return values: This method returns a dictionary containing:
* plain
* reStructuredText * language
* errors
Language can be one of: plain, reStructuredText
The criteria used are:
* if the ``math`` role or directive is found together with $...$, return error
* if the ``math`` role or directive is found, return ReST
Assumptions:
* MathJax is set up with $...$ for inline, \[...\] for online equations.
""" """
rst_headers = ["####", "****", "====", "----", "^^^^", "\"\"\"\"",]
# Inline maths
inline_math = re.search("\$[^$]+\$", text)
if inline_math:
print('inline math: %s' % inline_math.group(0))
# Online maths is of the form \[ ... \]
# The re.DOTALL is to also capture newline chars with the . (any single character)
online_math = re.search(r'[\\][[].+[\\][\]]', text, re.DOTALL)
if online_math:
print('online math: %s' % online_math.group(0))
rst_math = '.. math::' in text or ':math:`' in text
# Normal inline/online maths cannot be used simultaneously with ReST math.
# If this is detected, language is set to plain, and errors are reported.
# Otherwise if math present in ReST but not in/online math, assume ReST.
if rst_math:
if inline_math:
return {
'language': 'plain',
'errors': ('Cannot determine whether this is plain text or reStructuredText.\n'
'You have mixed inline maths ($...$) with reStructuredText markup.'
'\n\nPlease use one or the other, but not both!')
}
elif online_math:
return {
'language': 'plain',
'errors': ('Cannot determine whether this is plain text or reStructuredText.\n'
'You have mixed online maths (\[...\]) with reStructuredText markup.'
'\n\nPlease use one or the other, but not both!')
}
else: # assume ReST
return {
'language': 'reStructuredText',
'errors': None
}
# reStructuredText header patterns
rst_header_patterns = [
"^#{2,}$", "^\*{2,}$", "^={2,}$", "^-{2,}$", "^\^{2,}$", "^\"{2,}$",]
# See list of reStructuredText directives at # See list of reStructuredText directives at
# http://docutils.sourceforge.net/0.4/docs/ref/rst/directives.html # http://docutils.sourceforge.net/0.4/docs/ref/rst/directives.html
# We don't include the math one here since we covered it above.
rst_directives = [ rst_directives = [
"attention", "caution", "danger", "error", "hint", "important", "note", "tip", "attention", "caution", "danger", "error", "hint", "important", "note", "tip",
"warning", "admonition", "warning", "admonition",
...@@ -156,28 +209,50 @@ def detect_markup_language(text): ...@@ -156,28 +209,50 @@ def detect_markup_language(text):
"contents", "sectnum", "section-autonumbering", "header", "footer", "contents", "sectnum", "section-autonumbering", "header", "footer",
"target-notes", "target-notes",
"replace", "unicode", "date", "class", "role", "default-role", "replace", "unicode", "date", "class", "role", "default-role",
"math",] ]
# See list at http://docutils.sourceforge.net/0.4/docs/ref/rst/roles.html # See list at http://docutils.sourceforge.net/0.4/docs/ref/rst/roles.html
rst_roles = [ rst_roles = [
"emphasis", "literal", "pep-reference", "rfc-reference", "emphasis", "literal", "pep-reference", "rfc-reference",
"strong", "subscript", "superscript", "title-reference", "strong", "subscript", "superscript", "title-reference",
"math",] ]
nr_rst_roles = 0
nr_rst_headers = 0 nr_rst_headers = 0
for header in rst_headers: for header_pattern in rst_header_patterns:
if header in text: matches = re.findall(header_pattern, text, re.MULTILINE)
nr_rst_headers += 1 print ('%s matched %d times' % (header_pattern, len(matches)))
nr_rst_headers += len(matches)
nr_rst_directives = 0 nr_rst_directives = 0
for directive in rst_directives: for directive in rst_directives:
if ('.. %s::' % directive) in text: if ('.. %s::' % directive) in text:
nr_rst_directives += 1 nr_rst_directives += 1
nr_rst_roles = 0
for role in rst_roles: for role in rst_roles:
if (':%s:`' % role) in text: if (':%s:`' % role) in text:
nr_rst_roles += 1 nr_rst_roles += 1
if (nr_rst_headers > 0 or nr_rst_directives > 0 or nr_rst_roles > 0): if (nr_rst_headers > 0 or nr_rst_directives > 0 or nr_rst_roles > 0):
return 'reStructuredText' if inline_math:
return 'plain' return {
'language': 'plain',
'errors': ('Cannot determine whether this is plain text or reStructuredText.\n'
'You have mixed inline maths ($...$) with reStructuredText markup.'
'\n\nPlease use one or the other, but not both!')
}
elif online_math:
return {
'language': 'plain',
'errors': ('Cannot determine whether this is plain text or reStructuredText.\n'
'You have mixed online maths (\[...\]) with reStructuredText markup.'
'\n\nPlease use one or the other, but not both!')
}
else:
return {
'language': 'reStructuredText',
'errors': None
}
return {
'language': 'plain',
'errors': None
}
...@@ -31,14 +31,14 @@ $('#runPreviewButton').on('click', function(){ ...@@ -31,14 +31,14 @@ $('#runPreviewButton').on('click', function(){
$('#preview-description').css('background', '#feebce'); $('#preview-description').css('background', '#feebce');
$('#submitButton').hide(); $('#submitButton').hide();
$('#runPreviewButton').show(); $('#runPreviewButton').show();
alert("An error has occurred while processing the ReStructuredText:\n\n" + data.errors); alert("An error has occurred while processing the text:\n\n" + data.errors);
} }
$('#preview-description').html(data.processed_markup); $('#preview-description').html(data.processed_markup);
let preview = document.getElementById('preview-description'); let preview = document.getElementById('preview-description');
MathJax.Hub.Queue(["Typeset",MathJax.Hub, preview]); MathJax.Hub.Queue(["Typeset",MathJax.Hub, preview]);
}, },
error: function(data) { error: function(data) {
alert("An error has occurred while processing the ReStructuredText."); alert("An error has occurred while processing the text.");
} }
}); });
$('#runPreviewButton').hide(); $('#runPreviewButton').hide();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment