diff --git a/scipost_django/SciPost_v1/settings/base.py b/scipost_django/SciPost_v1/settings/base.py index 52855255f9638eb287be166d3fb051a4be47e432..5865cea0b12e117f137d7aca6a1dab5f23a32215 100644 --- a/scipost_django/SciPost_v1/settings/base.py +++ b/scipost_django/SciPost_v1/settings/base.py @@ -489,6 +489,12 @@ LOGGING = { "filename": "/path/to/logs/arxiv.log", "formatter": "verbose", }, + "scipost_file_chemrxiv": { + "level": "INFO", + "class": "logging.FileHandler", + "filename": "/path/to/logs/chemrxiv.log", + "formatter": "verbose", + }, "scipost_file_doi": { "level": "INFO", "class": "logging.FileHandler", @@ -521,6 +527,12 @@ LOGGING = { "propagate": True, "formatter": "simple", }, + "scipost.services.chemrxiv": { + "handlers": ["scipost_file_chemrxiv"], + "level": "INFO", + "propagate": True, + "formatter": "simple", + }, "scipost.services.doi": { "handlers": ["scipost_file_doi"], "level": "INFO", diff --git a/scipost_django/SciPost_v1/settings/production_do1.py b/scipost_django/SciPost_v1/settings/production_do1.py index 03dbe9f227971d435476ccbe97fc0c4e4fabab25..043e790d71c6bde1cd8b7ee84249bb4be3c6a8a9 100644 --- a/scipost_django/SciPost_v1/settings/production_do1.py +++ b/scipost_django/SciPost_v1/settings/production_do1.py @@ -63,6 +63,9 @@ ITHENTICATE_PASSWORD = get_secret("ITHENTICATE_PASSWORD") LOGGING["handlers"]["scipost_file_arxiv"][ "filename" ] = "/home/scipost/SciPost_logs/arxiv.log" +LOGGING["handlers"]["scipost_file_chemrxiv"][ + "filename" +] = "/home/scipost/SciPost_logs/chemrxiv.log" LOGGING["handlers"]["scipost_file_doi"][ "filename" ] = "/home/scipost/SciPost_logs/doi.log" diff --git a/scipost_django/scipost/services.py b/scipost_django/scipost/services.py index 519940b117ddb1e53f87e24a7dcf8c8389f3f50d..090276d9bd6bb41176870a13d200dbc805e7e95f 100644 --- a/scipost_django/scipost/services.py +++ b/scipost_django/scipost/services.py @@ -3,6 +3,8 @@ __license__ = "AGPL v3" # Module for making external api calls as needed in the submissions cycle +import re +from django.db.models import query import feedparser import requests import datetime @@ -13,8 +15,10 @@ from common.utils import remove_extra_spacing from submissions.constants import FIGSHARE_PREPRINT_SERVERS from submissions.models import PreprintServer +from submissions.regexes import CHEMRXIV_DOI_PATTERN arxiv_logger = logging.getLogger("scipost.services.arxiv") +chemrxiv_logger = logging.getLogger("scipost.services.chemrxiv") doi_logger = logging.getLogger("scipost.services.doi") figshare_logger = logging.getLogger("scipost.services.figshare") osfpreprints_logger = logging.getLogger("scipost.services.osfpreprints") @@ -188,6 +192,69 @@ class ArxivCaller: return False +class ChemRxivCaller: + """ChemRxiv Caller will help retrieve Submission data from ChemRxiv API.""" + + query_base_url = "https://chemrxiv.org/engage/chemrxiv/public-api/v1/items/" + doi_pattern = r"(https://doi.org/)?" + f"({CHEMRXIV_DOI_PATTERN})" + url_id_pattern = ( + r"(https://chemrxiv.org/engage/chemrxiv/article-details/)?([a-z0-9]{24})" + ) + valid_patterns = f"(({doi_pattern})|({url_id_pattern}))" + + def __init__(self, identifier): + self.identifier = identifier + chemrxiv_logger.info("New ChemRxiv call for identifier %s" % identifier) + + self.metadata = self._call_chemrxiv() + self.data = self._format_data(self.metadata) if self.is_valid else None + + def _call_chemrxiv(self): + if m := re.match(self.doi_pattern, self.identifier): + doi = m.group(2) + url = self.query_base_url + "doi/" + doi + elif m := re.match(self.url_id_pattern, self.identifier): + item_id = m.group(2) + url = self.query_base_url + item_id + else: + self.is_valid = False + return + + request = requests.get(url) + try: + response_content = request.json() + except: + self.is_valid = False + return + + chemrxiv_logger.info( + "GET [{chemrxiv}] [request] | {url}".format( + chemrxiv=self.identifier, url=url + ) + ) + + self.url = url + self.is_valid = True + return response_content + + @staticmethod + def _format_data(data): + formatted_data = { + "title": data.get("title", ""), + "author_list": ", ".join( + [a["firstName"] + " " + a["lastName"] for a in data.get("authors", [])] + ), + "abstract": data.get("abstract", ""), + "pub_date": datetime.datetime.fromisoformat(pub_date) + if (pub_date := data.get("publishedDate")) + else None, + "identifier_w_vn_nr": data.get("doi", ""), + "preprint_link": "https://doi.org/" + data.get("doi", ""), + } + + return formatted_data + + class FigshareCaller: """ Figshare caller to get data from api.figshare.com. diff --git a/scipost_django/submissions/forms/__init__.py b/scipost_django/submissions/forms/__init__.py index 8bb3534cae68f083ac220b5f06e9054746015885..af0949cd349636332c1fdace1f3ed8fea69821f7 100644 --- a/scipost_django/submissions/forms/__init__.py +++ b/scipost_django/submissions/forms/__init__.py @@ -80,7 +80,13 @@ from preprints.helpers import get_new_scipost_identifier from preprints.models import Preprint from proceedings.models import Proceedings from profiles.models import Profile -from scipost.services import DOICaller, ArxivCaller, FigshareCaller, OSFPreprintsCaller +from scipost.services import ( + ChemRxivCaller, + DOICaller, + ArxivCaller, + FigshareCaller, + OSFPreprintsCaller, +) from scipost.models import Contributor, Remark from series.models import Collection import strings @@ -721,45 +727,18 @@ def check_arxiv_identifier_w_vn_nr(identifier): def check_chemrxiv_doi(doi): """ Call Crossref to get ChemRxiv preprint data. + `doi` is the DOI of the preprint, but can also be a link to the submission, or its database ID. """ - caller = DOICaller(doi) - if caller.is_valid: - data = caller.data - metadata = caller.data["crossref_data"] - else: - error_message = "A preprint associated to this DOI does not exist." - raise forms.ValidationError(error_message) - - # Check if the type of this resource is indeed a preprint - if "subtype" in metadata: - if metadata["subtype"] != "preprint": - error_message = ( - "This does not seem to be a preprint: the type " - "returned by Crossref on behalf of " - "%(preprint_server) is %(subtype). " - "Please contact techsupport." - ) - raise forms.ValidationError( - error_message, - code="wrong_subtype", - params={ - "preprint_server": preprint_server.name, - "subtype": metadata["subtype"], - }, - ) - else: - raise forms.ValidationError( - "Crossref failed to return a subtype. Please contact techsupport.", - code="wrong_subtype", + caller = ChemRxivCaller(doi) + if not caller.is_valid or not (data := caller.data): + error_message = ( + "The preprint could not be found. Please check the provided identifier." ) + raise forms.ValidationError(error_message) # Explicitly add ChemRxiv as the preprint server: data["preprint_server"] = PreprintServer.objects.get(name="ChemRxiv") - data["preprint_link"] = "https://doi.org/%s" % doi - # Build the identifier by stripping the DOI prefix: - identifier = doi - data["identifier_w_vn_nr"] = identifier - return data, metadata, identifier + return data, caller.metadata, data.get("identifier_w_vn_nr") def check_figshare_identifier_w_vn_nr(preprint_server, figshare_identifier_w_vn_nr): @@ -1054,14 +1033,14 @@ class ChemRxivPrefillForm(SubmissionPrefillForm): chemrxiv_doi = forms.RegexField( label="", - regex=CHEMRXIV_DOI_PATTERN, + regex=ChemRxivCaller.valid_patterns, strip=True, error_messages={"invalid": "Invalid ChemRxiv DOI"}, widget=forms.TextInput(), ) def __init__(self, *args, **kwargs): - self.crossref_data = {} + self.data = {} self.metadata = {} super().__init__(*args, **kwargs) @@ -1070,7 +1049,7 @@ class ChemRxivPrefillForm(SubmissionPrefillForm): identifier = self.cleaned_data.get("chemrxiv_doi", None).partition("/")[2] check_identifier_is_unused(identifier) - self.crossref_data, self.metadata, identifier = check_chemrxiv_doi( + self.data, self.metadata, identifier = check_chemrxiv_doi( self.cleaned_data["chemrxiv_doi"] ) return identifier @@ -1080,7 +1059,21 @@ class ChemRxivPrefillForm(SubmissionPrefillForm): Return dictionary to prefill `SubmissionForm`. """ form_data = super().get_prefill_data() - form_data.update(self.crossref_data) + form_data.update(self.data) + + # check metadata for specialties + category_titles = ( + [c["name"].title() for c in self.metadata["categories"]] + if self.metadata + else [] + ) + form_data["specialties"] = Specialty.objects.filter(name__in=category_titles) + + # check keywords for topics + keyword_titles = ( + [k.title() for k in self.metadata["keywords"]] if self.metadata else [] + ) + form_data["topics"] = Topic.objects.filter(name__in=keyword_titles) if self.is_resubmission(): form_data.update( @@ -1619,10 +1612,10 @@ class SubmissionForm(forms.ModelForm): def clean_title(self): return remove_extra_spacing(self.cleaned_data["title"]) - + def clean_abstract(self): return remove_extra_spacing(self.cleaned_data["abstract"]) - + @transaction.atomic def save(self): """ diff --git a/scipost_django/submissions/templates/submissions/submit_choose_preprint_server.html b/scipost_django/submissions/templates/submissions/submit_choose_preprint_server.html index 23cd5b34999788f652e8cce17320c0690b1e7d0c..ac9ef02482808354a613bf7c01ef3f23dd09c750 100644 --- a/scipost_django/submissions/templates/submissions/submit_choose_preprint_server.html +++ b/scipost_django/submissions/templates/submissions/submit_choose_preprint_server.html @@ -4,7 +4,9 @@ {% load bootstrap %} {% load journals_extras %} -{% block pagetitle %}: submit manuscript{% endblock pagetitle %} +{% block pagetitle %} + : submit manuscript +{% endblock pagetitle %} {% block breadcrumb_items %} {{ block.super }} @@ -14,7 +16,16 @@ {% block content %} <div class="row"> <div class="col-12"> - <h1 class="highlight">{% if thread_hash %}Resubmit a{% else %}Submit a new{% endif %} manuscript to {{ journal }}</h1> + <h1 class="highlight"> + + {% if thread_hash %} + Resubmit a + {% else %} + Submit a new + {% endif %} + + manuscript to {{ journal }} + </h1> </div> </div> @@ -22,106 +33,159 @@ <div class="row"> <div class="col-md-6"> - <h2 class=highlight>Step 3</h2> - {% if preprint_server_list|length > 1 %} - <h3>Which preprint server do you wish to use for your {% if thread_hash %}re{% endif %}submission?</h3> - <p>SciPost's own preprint server is always available. - We however strongly encourage you to use your field's customary - preprint server, to ensure maximal dissemination of your preprint.</p> - <p>If you use an external preprint server, make sure your submission - is publicly available there before proceeding here (our machines will query theirs - in order to automatically prefill our submission form for you).</p> - {% else %} - <h3>Please proceed with the SciPost preprint server (others are not available)</h3> - {% endif %} - </div> - <div class="col-md-6"> - {% include 'submissions/submit_steps.html' with step=3 thread_hash=thread_hash %} + <h2 class=highlight>Step 3</h2> + + {% if preprint_server_list|length > 1 %} + <h3> + Which preprint server do you wish to use for your + + {% if thread_hash %}re{% endif %} + + submission? + </h3> + <p> + SciPost's own preprint server is always available. + We however strongly encourage you to use your field's customary + preprint server, to ensure maximal dissemination of your preprint. + </p> + <p> + If you use an external preprint server, make sure your submission + is publicly available there before proceeding here (our machines will query theirs + in order to automatically prefill our submission form for you). + </p> + {% else %} + <h3>Please proceed with the SciPost preprint server (others are not available)</h3> + {% endif %} + </div> + <div class="col-md-6">{% include 'submissions/submit_steps.html' with step=3 thread_hash=thread_hash %}</div> </div> - <br> + <br /> <div class="container"> <div class="row row-cols-1 row-cols-lg-2 row-cols-xl-3"> - {% for preprint_server in preprint_server_list %} - <div class="col col-sm-12 col-lg-6 mb-2"> - <div class="card mb-4"> - <div class="card-header bg-dark text-white"> - <h3 class="p-2 m-0"><em>{% if thread_hash %}Resubmit{% else %}Submit{% endif %} via</em> <strong class="text-warning">{{ preprint_server.server }}</strong></h3> - </div> - <div class="card-body text-center"> - {% if preprint_server.server.name == 'SciPost' %} - <form action="{% url 'submissions:submit_manuscript_scipost' journal_doi_label=journal.doi_label %}" method="get"> - {{ preprint_server.prefill_form }} - {% if thread_hash %} - <input type="hidden" name="thread_hash" value="{{ thread_hash }}"/> - {% endif %} - <input type="submit" class="btn btn-primary text-white" value="Go to the SciPost submission form"/> - </form> - {% elif preprint_server.server.name == 'arXiv' %} - <h3>Please provide the arXiv identifier for your Submission</h3> - <p><em>without the website prefix but with version number, e.g. - <br> ####.####(#)v#(#)</em></p> - <form action="{% url 'submissions:submit_manuscript_arxiv' journal_doi_label=journal.doi_label %}" method="get"> - {{ preprint_server.prefill_form }} - {% if thread_hash %} - <input type="hidden" name="thread_hash" value="{{ thread_hash }}"/> - {% endif %} - {% include 'bi/arrow-right.html' %} - <input type="submit" class="btn btn-primary text-white" value="Query arXiv"/> - </form> - {% elif preprint_server.server.name == 'ChemRxiv' %} - <h3>Please provide the ChemRxiv DOI for your Submission</h3> - <p class="mb-0">Pattern: either - <ul> - <li>old style: <em>10.#####/chemrxiv.#######(.v#)</em></li> - <li>new style: <em>10.#####/chemrxiv-*****(-v#)</em></li> - </ul> - </p> - <form action="{% url 'submissions:submit_manuscript_chemrxiv' journal_doi_label=journal.doi_label %}" method="get"> - {{ preprint_server.prefill_form }} - {% if thread_hash %} - <input type="hidden" name="thread_hash" value="{{ thread_hash }}"/> - {% endif %} - {% include 'bi/arrow-right.html' %} - <input type="submit" class="btn btn-primary text-white" value="Query ChemRxiv"/> - </form> - {% elif preprint_server.server.served_by and preprint_server.server.served_by.name == 'OSFPreprints' %} - <h3>Please provide the {{ preprint_server.server.name }} identifier for your Submission</h3> - <p><em>just a few lowercase alphanumeric characters, e.g. #####</em></p> - <p><em>Hint: on the submission's {{ preprint_server.server.name }} page, look for the last few characters in the DOI to see the identifier.</em></p> - <p><em>Example: if the DOI is <strong>10.31235/osf.io/xxzbx</strong>, - then you need <strong>xxzbx</strong></em></p> - <form action="{% url 'submissions:submit_manuscript_osfpreprints' journal_doi_label=journal.doi_label %}" method="get"> - {{ preprint_server.prefill_form }} - {% if thread_hash %} - <input type="hidden" name="thread_hash" value="{{ thread_hash }}"/> - {% endif %} - {% include 'bi/arrow-right.html' %} - <input type="submit" class="btn btn-primary text-white" value="Query {{ preprint_server.server.name }}"/> - </form> - {% elif preprint_server.server.served_by and preprint_server.server.served_by.name == 'Figshare' %} - <h3>Please provide the {{ preprint_server.server.name }} identifier for your Submission</h3> - <p><em>without the url prefix but with version number, e.g. - <br> ########.v#</em></p> - <p><em>Hint: on the submission's {{ preprint_server.server.name }} page, click on the "Cite" button to see the identifier.</em></p> - <form action="{% url 'submissions:submit_manuscript_figshare' journal_doi_label=journal.doi_label %}" method="get"> - {{ preprint_server.prefill_form }} - {% if thread_hash %} - <input type="hidden" name="thread_hash" value="{{ thread_hash }}"/> - {% endif %} - {% include 'bi/arrow-right.html' %} - <input type="submit" class="btn btn-primary text-white" value="Query {{ preprint_server.server.name }}"/> - </form> - {% endif %} - </div> - </div> - </div> - {% endfor %} + + {% for preprint_server in preprint_server_list %} + <div class="col col-sm-12 col-lg-6 mb-2"> + <div class="card mb-4"> + <div class="card-header bg-dark text-white"> + <h3 class="p-2 m-0"> + <em> + + {% if thread_hash %} + Resubmit + {% else %} + Submit + {% endif %} + + via</em> <strong class="text-warning">{{ preprint_server.server }}</strong> + </h3> + </div> + <div class="card-body"> + + {% if preprint_server.server.name == 'SciPost' %} + <form action="{% url 'submissions:submit_manuscript_scipost' journal_doi_label=journal.doi_label %}" + method="get"> + {{ preprint_server.prefill_form }} + + {% if thread_hash %}<input type="hidden" name="thread_hash" value="{{ thread_hash }}" />{% endif %} + + <input type="submit" + class="btn btn-primary text-white" + value="Go to the SciPost submission form" /> + </form> + {% elif preprint_server.server.name == 'arXiv' %} + <h3>Please provide the arXiv identifier for your Submission</h3> + <p> + <em>without the website prefix but with version number, e.g. + <br /> + ####.####(#)v#(#)</em> + </p> + <form action="{% url 'submissions:submit_manuscript_arxiv' journal_doi_label=journal.doi_label %}" + method="get"> + {{ preprint_server.prefill_form }} + + {% if thread_hash %}<input type="hidden" name="thread_hash" value="{{ thread_hash }}" />{% endif %} + + {% include 'bi/arrow-right.html' %} + <input type="submit" class="btn btn-primary text-white" value="Query arXiv" /> + </form> + {% elif preprint_server.server.name == 'ChemRxiv' %} + <h3>Please provide the ChemRxiv DOI/URL for your Submission</h3> + <p class="mb-0"> + Patterns: + <ul> + <li>10.#####/chemrxiv-*****(-v#)</li> + <li>https://chemrxiv.org/****/article-details/****</li> + </ul> + </p> + <form action="{% url 'submissions:submit_manuscript_chemrxiv' journal_doi_label=journal.doi_label %}" + method="get"> + {{ preprint_server.prefill_form }} + + {% if thread_hash %}<input type="hidden" name="thread_hash" value="{{ thread_hash }}" />{% endif %} + + {% include 'bi/arrow-right.html' %} + <input type="submit" + class="btn btn-primary text-white" + value="Query ChemRxiv" /> + </form> + {% elif preprint_server.server.served_by and preprint_server.server.served_by.name == 'OSFPreprints' %} + <h3>Please provide the {{ preprint_server.server.name }} identifier for your Submission</h3> + <p> + <em>just a few lowercase alphanumeric characters, e.g. #####</em> + </p> + <p> + <em>Hint: on the submission's {{ preprint_server.server.name }} page, look for the last few characters in the DOI to see the identifier.</em> + </p> + <p> + <em>Example: if the DOI is <strong>10.31235/osf.io/xxzbx</strong>, + then you need <strong>xxzbx</strong></em> + </p> + <form action="{% url 'submissions:submit_manuscript_osfpreprints' journal_doi_label=journal.doi_label %}" + method="get"> + {{ preprint_server.prefill_form }} + + {% if thread_hash %}<input type="hidden" name="thread_hash" value="{{ thread_hash }}" />{% endif %} + + {% include 'bi/arrow-right.html' %} + <input type="submit" + class="btn btn-primary text-white" + value="Query {{ preprint_server.server.name }}" /> + </form> + {% elif preprint_server.server.served_by and preprint_server.server.served_by.name == 'Figshare' %} + <h3>Please provide the {{ preprint_server.server.name }} identifier for your Submission</h3> + <p> + <em>without the url prefix but with version number, e.g. + <br /> + ########.v#</em> + </p> + <p> + <em>Hint: on the submission's {{ preprint_server.server.name }} page, click on the "Cite" button to see the identifier.</em> + </p> + <form action="{% url 'submissions:submit_manuscript_figshare' journal_doi_label=journal.doi_label %}" + method="get"> + {{ preprint_server.prefill_form }} + + {% if thread_hash %}<input type="hidden" name="thread_hash" value="{{ thread_hash }}" />{% endif %} + + {% include 'bi/arrow-right.html' %} + <input type="submit" + class="btn btn-primary text-white" + value="Query {{ preprint_server.server.name }}" /> + </form> + {% endif %} + + </div> + </div> + </div> + {% endfor %} + + </div> </div> - </div> - {% else %} - <h3>You are currently not allowed to submit a manuscript.</h3> - {% endif %} -{% endblock content %} + {% else %} + <h3>You are currently not allowed to submit a manuscript.</h3> + {% endif %} + + {% endblock content %}