From 1d3919651eba0cc66cb0f3be311a5e208d92a118 Mon Sep 17 00:00:00 2001
From: George Katsikas <giorgakis.katsikas@gmail.com>
Date: Thu, 21 Dec 2023 17:12:31 +0200
Subject: [PATCH] add chemrxiv preprint caller and logger

---
 scipost_django/SciPost_v1/settings/base.py    |  12 +
 .../SciPost_v1/settings/production_do1.py     |   3 +
 scipost_django/scipost/services.py            |  67 +++++
 scipost_django/submissions/forms/__init__.py  |  75 +++--
 .../submit_choose_preprint_server.html        | 260 +++++++++++-------
 5 files changed, 278 insertions(+), 139 deletions(-)

diff --git a/scipost_django/SciPost_v1/settings/base.py b/scipost_django/SciPost_v1/settings/base.py
index 52855255f..5865cea0b 100644
--- a/scipost_django/SciPost_v1/settings/base.py
+++ b/scipost_django/SciPost_v1/settings/base.py
@@ -489,6 +489,12 @@ LOGGING = {
             "filename": "/path/to/logs/arxiv.log",
             "formatter": "verbose",
         },
+        "scipost_file_chemrxiv": {
+            "level": "INFO",
+            "class": "logging.FileHandler",
+            "filename": "/path/to/logs/chemrxiv.log",
+            "formatter": "verbose",
+        },
         "scipost_file_doi": {
             "level": "INFO",
             "class": "logging.FileHandler",
@@ -521,6 +527,12 @@ LOGGING = {
             "propagate": True,
             "formatter": "simple",
         },
+        "scipost.services.chemrxiv": {
+            "handlers": ["scipost_file_chemrxiv"],
+            "level": "INFO",
+            "propagate": True,
+            "formatter": "simple",
+        },
         "scipost.services.doi": {
             "handlers": ["scipost_file_doi"],
             "level": "INFO",
diff --git a/scipost_django/SciPost_v1/settings/production_do1.py b/scipost_django/SciPost_v1/settings/production_do1.py
index 03dbe9f22..043e790d7 100644
--- a/scipost_django/SciPost_v1/settings/production_do1.py
+++ b/scipost_django/SciPost_v1/settings/production_do1.py
@@ -63,6 +63,9 @@ ITHENTICATE_PASSWORD = get_secret("ITHENTICATE_PASSWORD")
 LOGGING["handlers"]["scipost_file_arxiv"][
     "filename"
 ] = "/home/scipost/SciPost_logs/arxiv.log"
+LOGGING["handlers"]["scipost_file_chemrxiv"][
+    "filename"
+] = "/home/scipost/SciPost_logs/chemrxiv.log"
 LOGGING["handlers"]["scipost_file_doi"][
     "filename"
 ] = "/home/scipost/SciPost_logs/doi.log"
diff --git a/scipost_django/scipost/services.py b/scipost_django/scipost/services.py
index 519940b11..090276d9b 100644
--- a/scipost_django/scipost/services.py
+++ b/scipost_django/scipost/services.py
@@ -3,6 +3,8 @@ __license__ = "AGPL v3"
 
 
 # Module for making external api calls as needed in the submissions cycle
+import re
+from django.db.models import query
 import feedparser
 import requests
 import datetime
@@ -13,8 +15,10 @@ from common.utils import remove_extra_spacing
 
 from submissions.constants import FIGSHARE_PREPRINT_SERVERS
 from submissions.models import PreprintServer
+from submissions.regexes import CHEMRXIV_DOI_PATTERN
 
 arxiv_logger = logging.getLogger("scipost.services.arxiv")
+chemrxiv_logger = logging.getLogger("scipost.services.chemrxiv")
 doi_logger = logging.getLogger("scipost.services.doi")
 figshare_logger = logging.getLogger("scipost.services.figshare")
 osfpreprints_logger = logging.getLogger("scipost.services.osfpreprints")
@@ -188,6 +192,69 @@ class ArxivCaller:
         return False
 
 
+class ChemRxivCaller:
+    """ChemRxiv Caller will help retrieve Submission data from ChemRxiv API."""
+
+    query_base_url = "https://chemrxiv.org/engage/chemrxiv/public-api/v1/items/"
+    doi_pattern = r"(https://doi.org/)?" + f"({CHEMRXIV_DOI_PATTERN})"
+    url_id_pattern = (
+        r"(https://chemrxiv.org/engage/chemrxiv/article-details/)?([a-z0-9]{24})"
+    )
+    valid_patterns = f"(({doi_pattern})|({url_id_pattern}))"
+
+    def __init__(self, identifier):
+        self.identifier = identifier
+        chemrxiv_logger.info("New ChemRxiv call for identifier %s" % identifier)
+
+        self.metadata = self._call_chemrxiv()
+        self.data = self._format_data(self.metadata) if self.is_valid else None
+
+    def _call_chemrxiv(self):
+        if m := re.match(self.doi_pattern, self.identifier):
+            doi = m.group(2)
+            url = self.query_base_url + "doi/" + doi
+        elif m := re.match(self.url_id_pattern, self.identifier):
+            item_id = m.group(2)
+            url = self.query_base_url + item_id
+        else:
+            self.is_valid = False
+            return
+
+        request = requests.get(url)
+        try:
+            response_content = request.json()
+        except:
+            self.is_valid = False
+            return
+
+        chemrxiv_logger.info(
+            "GET [{chemrxiv}] [request] | {url}".format(
+                chemrxiv=self.identifier, url=url
+            )
+        )
+
+        self.url = url
+        self.is_valid = True
+        return response_content
+
+    @staticmethod
+    def _format_data(data):
+        formatted_data = {
+            "title": data.get("title", ""),
+            "author_list": ", ".join(
+                [a["firstName"] + " " + a["lastName"] for a in data.get("authors", [])]
+            ),
+            "abstract": data.get("abstract", ""),
+            "pub_date": datetime.datetime.fromisoformat(pub_date)
+            if (pub_date := data.get("publishedDate"))
+            else None,
+            "identifier_w_vn_nr": data.get("doi", ""),
+            "preprint_link": "https://doi.org/" + data.get("doi", ""),
+        }
+
+        return formatted_data
+
+
 class FigshareCaller:
     """
     Figshare caller to get data from api.figshare.com.
diff --git a/scipost_django/submissions/forms/__init__.py b/scipost_django/submissions/forms/__init__.py
index 8bb3534ca..af0949cd3 100644
--- a/scipost_django/submissions/forms/__init__.py
+++ b/scipost_django/submissions/forms/__init__.py
@@ -80,7 +80,13 @@ from preprints.helpers import get_new_scipost_identifier
 from preprints.models import Preprint
 from proceedings.models import Proceedings
 from profiles.models import Profile
-from scipost.services import DOICaller, ArxivCaller, FigshareCaller, OSFPreprintsCaller
+from scipost.services import (
+    ChemRxivCaller,
+    DOICaller,
+    ArxivCaller,
+    FigshareCaller,
+    OSFPreprintsCaller,
+)
 from scipost.models import Contributor, Remark
 from series.models import Collection
 import strings
@@ -721,45 +727,18 @@ def check_arxiv_identifier_w_vn_nr(identifier):
 def check_chemrxiv_doi(doi):
     """
     Call Crossref to get ChemRxiv preprint data.
+    `doi` is the DOI of the preprint, but can also be a link to the submission, or its database ID.
     """
-    caller = DOICaller(doi)
-    if caller.is_valid:
-        data = caller.data
-        metadata = caller.data["crossref_data"]
-    else:
-        error_message = "A preprint associated to this DOI does not exist."
-        raise forms.ValidationError(error_message)
-
-    # Check if the type of this resource is indeed a preprint
-    if "subtype" in metadata:
-        if metadata["subtype"] != "preprint":
-            error_message = (
-                "This does not seem to be a preprint: the type "
-                "returned by Crossref on behalf of "
-                "%(preprint_server) is %(subtype). "
-                "Please contact techsupport."
-            )
-            raise forms.ValidationError(
-                error_message,
-                code="wrong_subtype",
-                params={
-                    "preprint_server": preprint_server.name,
-                    "subtype": metadata["subtype"],
-                },
-            )
-    else:
-        raise forms.ValidationError(
-            "Crossref failed to return a subtype. Please contact techsupport.",
-            code="wrong_subtype",
+    caller = ChemRxivCaller(doi)
+    if not caller.is_valid or not (data := caller.data):
+        error_message = (
+            "The preprint could not be found. Please check the provided identifier."
         )
+        raise forms.ValidationError(error_message)
 
     # Explicitly add ChemRxiv as the preprint server:
     data["preprint_server"] = PreprintServer.objects.get(name="ChemRxiv")
-    data["preprint_link"] = "https://doi.org/%s" % doi
-    # Build the identifier by stripping the DOI prefix:
-    identifier = doi
-    data["identifier_w_vn_nr"] = identifier
-    return data, metadata, identifier
+    return data, caller.metadata, data.get("identifier_w_vn_nr")
 
 
 def check_figshare_identifier_w_vn_nr(preprint_server, figshare_identifier_w_vn_nr):
@@ -1054,14 +1033,14 @@ class ChemRxivPrefillForm(SubmissionPrefillForm):
 
     chemrxiv_doi = forms.RegexField(
         label="",
-        regex=CHEMRXIV_DOI_PATTERN,
+        regex=ChemRxivCaller.valid_patterns,
         strip=True,
         error_messages={"invalid": "Invalid ChemRxiv DOI"},
         widget=forms.TextInput(),
     )
 
     def __init__(self, *args, **kwargs):
-        self.crossref_data = {}
+        self.data = {}
         self.metadata = {}
         super().__init__(*args, **kwargs)
 
@@ -1070,7 +1049,7 @@ class ChemRxivPrefillForm(SubmissionPrefillForm):
         identifier = self.cleaned_data.get("chemrxiv_doi", None).partition("/")[2]
 
         check_identifier_is_unused(identifier)
-        self.crossref_data, self.metadata, identifier = check_chemrxiv_doi(
+        self.data, self.metadata, identifier = check_chemrxiv_doi(
             self.cleaned_data["chemrxiv_doi"]
         )
         return identifier
@@ -1080,7 +1059,21 @@ class ChemRxivPrefillForm(SubmissionPrefillForm):
         Return dictionary to prefill `SubmissionForm`.
         """
         form_data = super().get_prefill_data()
-        form_data.update(self.crossref_data)
+        form_data.update(self.data)
+
+        # check metadata for specialties
+        category_titles = (
+            [c["name"].title() for c in self.metadata["categories"]]
+            if self.metadata
+            else []
+        )
+        form_data["specialties"] = Specialty.objects.filter(name__in=category_titles)
+
+        # check keywords for topics
+        keyword_titles = (
+            [k.title() for k in self.metadata["keywords"]] if self.metadata else []
+        )
+        form_data["topics"] = Topic.objects.filter(name__in=keyword_titles)
 
         if self.is_resubmission():
             form_data.update(
@@ -1619,10 +1612,10 @@ class SubmissionForm(forms.ModelForm):
 
     def clean_title(self):
         return remove_extra_spacing(self.cleaned_data["title"])
-    
+
     def clean_abstract(self):
         return remove_extra_spacing(self.cleaned_data["abstract"])
-    
+
     @transaction.atomic
     def save(self):
         """
diff --git a/scipost_django/submissions/templates/submissions/submit_choose_preprint_server.html b/scipost_django/submissions/templates/submissions/submit_choose_preprint_server.html
index 23cd5b349..ac9ef0248 100644
--- a/scipost_django/submissions/templates/submissions/submit_choose_preprint_server.html
+++ b/scipost_django/submissions/templates/submissions/submit_choose_preprint_server.html
@@ -4,7 +4,9 @@
 {% load bootstrap %}
 {% load journals_extras %}
 
-{% block pagetitle %}: submit manuscript{% endblock pagetitle %}
+{% block pagetitle %}
+  : submit manuscript
+{% endblock pagetitle %}
 
 {% block breadcrumb_items %}
   {{ block.super }}
@@ -14,7 +16,16 @@
 {% block content %}
   <div class="row">
     <div class="col-12">
-      <h1 class="highlight">{% if thread_hash %}Resubmit a{% else %}Submit a new{% endif %} manuscript to {{ journal }}</h1>
+      <h1 class="highlight">
+
+        {% if thread_hash %}
+          Resubmit a
+        {% else %}
+          Submit a new
+        {% endif %}
+
+        manuscript to {{ journal }}
+      </h1>
     </div>
   </div>
 
@@ -22,106 +33,159 @@
 
     <div class="row">
       <div class="col-md-6">
-	<h2 class=highlight>Step 3</h2>
-	{% if preprint_server_list|length > 1 %}
-	  <h3>Which preprint server do you wish to use for your {% if thread_hash %}re{% endif %}submission?</h3>
-	  <p>SciPost's own preprint server is always available.
-	    We however strongly encourage you to use your field's customary
-	    preprint server, to ensure maximal dissemination of your preprint.</p>
-	  <p>If you use an external preprint server, make sure your submission
-	    is publicly available there before proceeding here (our machines will query theirs
-	    in order to automatically prefill our submission form for you).</p>
-	{% else %}
-	  <h3>Please proceed with the SciPost preprint server (others are not available)</h3>
-	{% endif %}
-      </div>
-      <div class="col-md-6">
-	{% include 'submissions/submit_steps.html' with step=3 thread_hash=thread_hash %}
+        <h2 class=highlight>Step 3</h2>
+
+        {% if preprint_server_list|length > 1 %}
+          <h3>
+            Which preprint server do you wish to use for your
+
+            {% if thread_hash %}re{% endif %}
+
+            submission?
+          </h3>
+          <p>
+            SciPost's own preprint server is always available.
+            We however strongly encourage you to use your field's customary
+            preprint server, to ensure maximal dissemination of your preprint.
+          </p>
+          <p>
+            If you use an external preprint server, make sure your submission
+            is publicly available there before proceeding here (our machines will query theirs
+            in order to automatically prefill our submission form for you).
+          </p>
+        {% else %}
+          <h3>Please proceed with the SciPost preprint server (others are not available)</h3>
+        {% endif %}
+
       </div>
+      <div class="col-md-6">{% include 'submissions/submit_steps.html' with step=3 thread_hash=thread_hash %}</div>
     </div>
 
-    <br>
+    <br />
     <div class="container">
       <div class="row row-cols-1 row-cols-lg-2 row-cols-xl-3">
-	{% for preprint_server in preprint_server_list %}
-	  <div class="col col-sm-12 col-lg-6 mb-2">
-	    <div class="card mb-4">
-	      <div class="card-header bg-dark text-white">
-		<h3 class="p-2 m-0"><em>{% if thread_hash %}Resubmit{% else %}Submit{% endif %} via</em>&emsp;<strong class="text-warning">{{ preprint_server.server }}</strong></h3>
-	      </div>
-	      <div class="card-body text-center">
-		{% if preprint_server.server.name == 'SciPost' %}
-		  <form action="{% url 'submissions:submit_manuscript_scipost' journal_doi_label=journal.doi_label %}" method="get">
-		    {{ preprint_server.prefill_form }}
-		    {% if thread_hash %}
-		      <input type="hidden" name="thread_hash" value="{{ thread_hash }}"/>
-		    {% endif %}
-		    <input type="submit" class="btn btn-primary text-white" value="Go to the SciPost submission form"/>
-		  </form>
-		{% elif preprint_server.server.name == 'arXiv' %}
-		  <h3>Please provide the arXiv identifier for your Submission</h3>
-		  <p><em>without the website prefix but with version number, e.g.
-		    <br> ####.####(#)v#(#)</em></p>
-		  <form action="{% url 'submissions:submit_manuscript_arxiv' journal_doi_label=journal.doi_label %}" method="get">
-		    {{ preprint_server.prefill_form }}
-		    {% if thread_hash %}
-		      <input type="hidden" name="thread_hash" value="{{ thread_hash }}"/>
-		    {% endif %}
-	            {% include 'bi/arrow-right.html' %}
-		    <input type="submit" class="btn btn-primary text-white" value="Query arXiv"/>
-		  </form>
-		{% elif preprint_server.server.name == 'ChemRxiv' %}
-		  <h3>Please provide the ChemRxiv DOI for your Submission</h3>
-		  <p class="mb-0">Pattern: either
-		    <ul>
-		      <li>old style: <em>10.#####/chemrxiv.#######(.v#)</em></li>
-		      <li>new style: <em>10.#####/chemrxiv-*****(-v#)</em></li>
-		    </ul>
-		  </p>
-		  <form action="{% url 'submissions:submit_manuscript_chemrxiv' journal_doi_label=journal.doi_label %}" method="get">
-		    {{ preprint_server.prefill_form }}
-		    {% if thread_hash %}
-		      <input type="hidden" name="thread_hash" value="{{ thread_hash }}"/>
-		    {% endif %}
-	            {% include 'bi/arrow-right.html' %}
-		    <input type="submit" class="btn btn-primary text-white" value="Query ChemRxiv"/>
-		  </form>
-		{% elif preprint_server.server.served_by and preprint_server.server.served_by.name == 'OSFPreprints' %}
-		  <h3>Please provide the {{ preprint_server.server.name }} identifier for your Submission</h3>
-		  <p><em>just a few lowercase alphanumeric characters, e.g. #####</em></p>
-		  <p><em>Hint: on the submission's {{ preprint_server.server.name }} page, look for the last few characters in the DOI to see the identifier.</em></p>
-		  <p><em>Example: if the DOI is <strong>10.31235/osf.io/xxzbx</strong>,
-		    then you need <strong>xxzbx</strong></em></p>
-		  <form action="{% url 'submissions:submit_manuscript_osfpreprints' journal_doi_label=journal.doi_label %}" method="get">
-		    {{ preprint_server.prefill_form }}
-		    {% if thread_hash %}
-		      <input type="hidden" name="thread_hash" value="{{ thread_hash }}"/>
-		    {% endif %}
-	            {% include 'bi/arrow-right.html' %}
-		    <input type="submit" class="btn btn-primary text-white" value="Query {{ preprint_server.server.name }}"/>
-		  </form>
-		{% elif preprint_server.server.served_by and preprint_server.server.served_by.name == 'Figshare' %}
-		  <h3>Please provide the {{ preprint_server.server.name }} identifier for your Submission</h3>
-		  <p><em>without the url prefix but with version number, e.g.
-		    <br> ########.v#</em></p>
-		  <p><em>Hint: on the submission's {{ preprint_server.server.name }} page, click on the "Cite" button to see the identifier.</em></p>
-		  <form action="{% url 'submissions:submit_manuscript_figshare' journal_doi_label=journal.doi_label %}" method="get">
-		    {{ preprint_server.prefill_form }}
-		    {% if thread_hash %}
-		      <input type="hidden" name="thread_hash" value="{{ thread_hash }}"/>
-		    {% endif %}
-	            {% include 'bi/arrow-right.html' %}
-		    <input type="submit" class="btn btn-primary text-white" value="Query {{ preprint_server.server.name }}"/>
-		  </form>
-		{% endif %}
-	      </div>
-	    </div>
-	  </div>
-	{% endfor %}
+
+        {% for preprint_server in preprint_server_list %}
+          <div class="col col-sm-12 col-lg-6 mb-2">
+            <div class="card mb-4">
+              <div class="card-header bg-dark text-white">
+                <h3 class="p-2 m-0">
+                  <em>
+
+                    {% if thread_hash %}
+                      Resubmit
+                    {% else %}
+                      Submit
+                    {% endif %}
+
+                    via</em>&emsp;<strong class="text-warning">{{ preprint_server.server }}</strong>
+                  </h3>
+                </div>
+                <div class="card-body">
+
+                  {% if preprint_server.server.name == 'SciPost' %}
+                    <form action="{% url 'submissions:submit_manuscript_scipost' journal_doi_label=journal.doi_label %}"
+                          method="get">
+                      {{ preprint_server.prefill_form }}
+
+                      {% if thread_hash %}<input type="hidden" name="thread_hash" value="{{ thread_hash }}" />{% endif %}
+
+                      <input type="submit"
+                             class="btn btn-primary text-white"
+                             value="Go to the SciPost submission form" />
+                    </form>
+                  {% elif preprint_server.server.name == 'arXiv' %}
+                    <h3>Please provide the arXiv identifier for your Submission</h3>
+                    <p>
+                      <em>without the website prefix but with version number, e.g.
+                        <br />
+                      ####.####(#)v#(#)</em>
+                    </p>
+                    <form action="{% url 'submissions:submit_manuscript_arxiv' journal_doi_label=journal.doi_label %}"
+                          method="get">
+                      {{ preprint_server.prefill_form }}
+
+                      {% if thread_hash %}<input type="hidden" name="thread_hash" value="{{ thread_hash }}" />{% endif %}
+
+                      {% include 'bi/arrow-right.html' %}
+                      <input type="submit" class="btn btn-primary text-white" value="Query arXiv" />
+                    </form>
+                  {% elif preprint_server.server.name == 'ChemRxiv' %}
+                    <h3>Please provide the ChemRxiv DOI/URL for your Submission</h3>
+                    <p class="mb-0">
+                      Patterns:
+                      <ul>
+                        <li>10.#####/chemrxiv-*****(-v#)</li>
+                        <li>https://chemrxiv.org/****/article-details/****</li>
+                      </ul>
+                    </p>
+                    <form action="{% url 'submissions:submit_manuscript_chemrxiv' journal_doi_label=journal.doi_label %}"
+                          method="get">
+                      {{ preprint_server.prefill_form }}
+
+                      {% if thread_hash %}<input type="hidden" name="thread_hash" value="{{ thread_hash }}" />{% endif %}
+
+                      {% include 'bi/arrow-right.html' %}
+                      <input type="submit"
+                             class="btn btn-primary text-white"
+                             value="Query ChemRxiv" />
+                    </form>
+                  {% elif preprint_server.server.served_by and preprint_server.server.served_by.name == 'OSFPreprints' %}
+                    <h3>Please provide the {{ preprint_server.server.name }} identifier for your Submission</h3>
+                    <p>
+                      <em>just a few lowercase alphanumeric characters, e.g. #####</em>
+                    </p>
+                    <p>
+                      <em>Hint: on the submission's {{ preprint_server.server.name }} page, look for the last few characters in the DOI to see the identifier.</em>
+                    </p>
+                    <p>
+                      <em>Example: if the DOI is <strong>10.31235/osf.io/xxzbx</strong>,
+                      then you need <strong>xxzbx</strong></em>
+                    </p>
+                    <form action="{% url 'submissions:submit_manuscript_osfpreprints' journal_doi_label=journal.doi_label %}"
+                          method="get">
+                      {{ preprint_server.prefill_form }}
+
+                      {% if thread_hash %}<input type="hidden" name="thread_hash" value="{{ thread_hash }}" />{% endif %}
+
+                      {% include 'bi/arrow-right.html' %}
+                      <input type="submit"
+                             class="btn btn-primary text-white"
+                             value="Query {{ preprint_server.server.name }}" />
+                    </form>
+                  {% elif preprint_server.server.served_by and preprint_server.server.served_by.name == 'Figshare' %}
+                    <h3>Please provide the {{ preprint_server.server.name }} identifier for your Submission</h3>
+                    <p>
+                      <em>without the url prefix but with version number, e.g.
+                        <br />
+                      ########.v#</em>
+                    </p>
+                    <p>
+                      <em>Hint: on the submission's {{ preprint_server.server.name }} page, click on the "Cite" button to see the identifier.</em>
+                    </p>
+                    <form action="{% url 'submissions:submit_manuscript_figshare' journal_doi_label=journal.doi_label %}"
+                          method="get">
+                      {{ preprint_server.prefill_form }}
+
+                      {% if thread_hash %}<input type="hidden" name="thread_hash" value="{{ thread_hash }}" />{% endif %}
+
+                      {% include 'bi/arrow-right.html' %}
+                      <input type="submit"
+                             class="btn btn-primary text-white"
+                             value="Query {{ preprint_server.server.name }}" />
+                    </form>
+                  {% endif %}
+
+                </div>
+              </div>
+            </div>
+          {% endfor %}
+
+        </div>
       </div>
-    </div>
 
-  {% else %}
-    <h3>You are currently not allowed to submit a manuscript.</h3>
-  {% endif %}
-{% endblock content %}
+    {% else %}
+      <h3>You are currently not allowed to submit a manuscript.</h3>
+    {% endif %}
+
+  {% endblock content %}
-- 
GitLab