SciPost Code Repository

Skip to content
Snippets Groups Projects
Commit b39ff46c authored by Jean-Sébastien Caux's avatar Jean-Sébastien Caux
Browse files

Partial work on language recognition, and rendering

parent 761078bc
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,8 @@ from docutils.core import publish_parts
import markdown
import re
from mdx_math import MathExtension
from django import forms
from django.utils.encoding import force_text
from django.utils.safestring import mark_safe
......@@ -24,7 +26,6 @@ class MarkupTextForm(forms.Form):
# Detect text format
markup_detector = detect_markup_language(text)
language = markup_detector['language']
print('language: %s' % language)
if markup_detector['errors']:
return markup_detector
......@@ -43,7 +44,8 @@ class MarkupTextForm(forms.Form):
'doctitle_xform': False,
'raw_enabled': False,
'file_insertion_enabled': False,
'warning_stream': warnStream})
'warning_stream': warnStream
})
return {
'language': language,
'processed_markup': mark_safe(force_text(parts['html_body'])),
......@@ -58,11 +60,17 @@ class MarkupTextForm(forms.Form):
elif language == 'Markdown':
return {
'language': language,
#'processed_markup': markdown.markdown(escape(text), output_format='html5')
'processed_markup': bleach.clean(
markdown.markdown(text, output_format='html5'),
tags=BLEACH_ALLOWED_TAGS)
}
'processed_markup': mark_safe(
markdown.markdown(
bleach.clean(
text,
tags=BLEACH_ALLOWED_TAGS
).replace('&amp;', '&').replace(' &lt; ', ' < '),
output_format='html5',
extensions=[MathExtension(enable_dollar_delimiter=True)]
)
)
}
# at this point, language is assumed to be plain text
from django.template.defaultfilters import linebreaksbr
......
......@@ -6,6 +6,7 @@ import bleach
from docutils.core import publish_parts
from io import StringIO
import markdown
import re
from mdx_math import MathExtension
......@@ -27,13 +28,11 @@ def process_markup(text, language_forced=None):
return ''
markup_detector = detect_markup_language(text)
print('language detected: %s' % markup_detector['language'])
language = language_forced if language_forced else markup_detector['language']
if markup_detector['errors']:
return markup_detector['errors']
language = language_forced if language_forced else markup_detector['language']
if language == 'reStructuredText':
warnStream = StringIO()
try:
......@@ -64,7 +63,12 @@ def process_markup(text, language_forced=None):
bleach.clean(
text,
tags=BLEACH_ALLOWED_TAGS
).replace('&amp;', '&').replace(' &lt; ', ' < '),
).replace('&amp;', '&' # to preserve math separator for MathJax
).replace(' &lt; ', ' < ' # to preserve < for MathJax
).replace(' &gt; ', ' > ' # to preserve > for MathJax
).replace('&gt;&gt;', '>>' # to preserve nested Markdown blockquotes
).replace('&gt; ', '> ' # to preserve > for Markdown blockquotes
),
output_format='html5',
extensions=[MathExtension(enable_dollar_delimiter=True)]
)
......
......@@ -40,7 +40,7 @@ def match_md_header(text, level=None):
raise TypeError('level must be an int')
if level < 1 or level > 6:
raise ValueError('level must be an integer from 1 to 6')
return re.search(r'^#{' + level + ',}[ ].+$', text)
return re.search(r'^#{' + str(level) + ',}[ ].+$', text)
def match_md_blockquote(text):
"""Return first match of regex search for Markdown blockquote."""
......@@ -166,6 +166,7 @@ def detect_markup_language(text):
md_blockquote = match_md_blockquote(text)
if rst_math_role or rst_math_directive:
# reStructuredText presumed; check for errors
if inline_math:
detector['errors'] = (
'You have mixed inline maths ($ ... $ or \( ... \) ) with '
......@@ -187,6 +188,9 @@ def detect_markup_language(text):
else:
detector['language'] = 'reStructuredText'
elif md_header or md_blockquote:
detector['language'] = 'Markdown'
return detector
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment