From 8bec49b7571bf1beb7310e03b15edae5f47b6ef1 Mon Sep 17 00:00:00 2001 From: "J.-S. Caux" <J.S.Caux@uva.nl> Date: Fri, 30 Apr 2021 05:51:22 +0200 Subject: [PATCH] Basic working version of ChemRxiv integration --- preprints/models.py | 10 +- scipost/services.py | 66 +++++++++++ submissions/constants.py | 20 +++- submissions/forms.py | 103 ++++++++++++++++-- .../submit_choose_preprint_server.html | 12 ++ submissions/urls.py | 5 + submissions/views.py | 48 ++++++-- 7 files changed, 241 insertions(+), 23 deletions(-) diff --git a/preprints/models.py b/preprints/models.py index 846566100..eb1d8fc41 100644 --- a/preprints/models.py +++ b/preprints/models.py @@ -47,7 +47,10 @@ class Preprint(models.Model): """Return the absolute URL of the pdf for the meta tag for Google Scholar.""" if self._file: # means this is a SciPost-hosted preprint return "https://scipost.org%s" % self.get_absolute_url() - return self.get_absolute_url().replace("/abs/", "/pdf/") + elif self.is_arXiv: + return self.get_absolute_url().replace("/abs/", "/pdf/") + elif self.is_ChemRxiv: + return self.get_absolute_url() @property def is_SciPost(self): @@ -58,3 +61,8 @@ class Preprint(models.Model): def is_arXiv(self): """Return True if this preprint is hosted on arXiv.""" return 'arxiv.org' in self.url + + @property + def is_ChemRxiv(self): + """Return True if this preprint is hosted on ChemRxiv.""" + return 'chemrxiv' in self.url diff --git a/scipost/services.py b/scipost/services.py index 63278cdff..3308bf2e3 100644 --- a/scipost/services.py +++ b/scipost/services.py @@ -11,6 +11,7 @@ import logging arxiv_logger = logging.getLogger('scipost.services.arxiv') doi_logger = logging.getLogger('scipost.services.doi') +figshare_logger = logging.getLogger('scipost.services.figshare') class DOICaller: @@ -159,3 +160,68 @@ class ArxivCaller: if len(data.get('entries', [])) > 0: return 'title' in data['entries'][0] return False + + +class FigshareCaller: + """ + Figshare caller to get data from api.figshare.com. + """ + + query_base_url = 'https://api.figshare.com/v2/articles/%s/versions/%s' + + def __init__(self, identifier_w_vn_nr): + self.identifier_w_vn_nr = identifier_w_vn_nr + self.identifier = identifier_w_vn_nr.split('.')[0] + self.version = identifier_w_vn_nr.split('.v')[1] + figshare_logger.info( + 'New figshare API call for identifier %s.v%s' % (self.identifier, self.version)) + self._call_figshare() + if self.is_valid: + self._format_data() + + def _call_figshare(self): + url = self.query_base_url % (self.identifier, self.version) + request = requests.get(url) + response_content = request.json() + figshare_logger.info('GET [{identifier_w_vn_nr} [request] | {url}'.format( + identifier_w_vn_nr=self.identifier_w_vn_nr, + url=url, + )) + if self._result_present(response_content): + self.is_valid = True + self._figshare_data = response_content + self.metadata = response_content + else: + self.is_valid = False + + figshare_logger.info('GET [{identifier}] [response {valid}] | {response}'.format( + identifier=self.identifier, + valid='VALID' if self.is_valid else 'INVALID', + response=response_content, + )) + + def _format_data(self): + title = self._figshare_data['title'] + author_list = [author['full_name'] for author in self._figshare_data.get('authors', [])] + # author_list is given as a comma separated list of names on the relevant models (Commentary, Submission) + author_list = ", ".join(author_list) + chemrxiv_identifier_w_vn_nr = self._figshare_data['doi'].partition('chemrxiv.')[2] + abstract = self._figshare_data['description'] + pub_date = self._figshare_data['published_date'] + self.data = { + 'title': title, + 'author_list': author_list, + 'chemrxiv_identifier_w_vn_nr': chemrxiv_identifier_w_vn_nr, + 'pub_abstract': abstract, + 'abstract': abstract, # Duplicate for Commentary/Submission cross-compatibility + 'pub_date': pub_date, + } + figshare_logger.info('GET [{identifier}] [formatted data] | {data}'.format( + identifier=self.identifier, + data=self.data, + )) + + def _result_present(self, data): + if data['id'] == int(self.identifier): + return True + return False diff --git a/submissions/constants.py b/submissions/constants.py index c1578141f..bacf8cd21 100644 --- a/submissions/constants.py +++ b/submissions/constants.py @@ -266,11 +266,19 @@ PLAGIARISM_STATUSES = ( ) # Define regexes -arxiv_regex_wo_vn = '[0-9]{4,}.[0-9]{4,}' -arxiv_regex_w_vn = '[0-9]{4,}.[0-9]{4,}v[0-9]{1,2}' scipost_regex_wo_vn = 'scipost_[0-9]{4,}_[0-9]{4,}' scipost_regex_w_vn = 'scipost_[0-9]{4,}_[0-9]{4,}v[0-9]{1,2}' -SUBMISSIONS_WO_VN_REGEX = '(?P<identifier_wo_vn_nr>(%s|%s))' % ( - arxiv_regex_wo_vn, scipost_regex_wo_vn) -SUBMISSIONS_COMPLETE_REGEX = '(?P<identifier_w_vn_nr>(%s|%s))' % ( - arxiv_regex_w_vn, scipost_regex_w_vn) +arxiv_regex_wo_vn = '[0-9]{4,}.[0-9]{4,}' +arxiv_regex_w_vn = '[0-9]{4,}.[0-9]{4,}v[0-9]{1,2}' +chemrxiv_regex_wo_vn = 'chemrxiv_[0-9]+' +chemrxiv_regex_w_vn = 'chemrxiv_[0-9]+.v[0-9]{1,2}' +SUBMISSIONS_WO_VN_REGEX = '(?P<identifier_wo_vn_nr>(%s|%s|%s))' % ( + scipost_regex_wo_vn, + arxiv_regex_wo_vn, + chemrxiv_regex_wo_vn +) +SUBMISSIONS_COMPLETE_REGEX = '(?P<identifier_w_vn_nr>(%s|%s|%s))' % ( + scipost_regex_w_vn, + arxiv_regex_w_vn, + chemrxiv_regex_w_vn +) diff --git a/submissions/forms.py b/submissions/forms.py index 1c603d1d0..996cd0d05 100644 --- a/submissions/forms.py +++ b/submissions/forms.py @@ -45,13 +45,15 @@ from ontology.models import AcademicField, Specialty from preprints.helpers import get_new_scipost_identifier from preprints.models import Preprint from profiles.models import Profile -from scipost.services import ArxivCaller +from scipost.services import ArxivCaller, FigshareCaller from scipost.models import Contributor, Remark import strings import iThenticate ARXIV_IDENTIFIER_PATTERN_NEW = r'^[0-9]{4,}\.[0-9]{4,5}v[0-9]{1,2}$' +CHEMRXIV_DOI_PATTERN = r'https://doi.org/10.26434/chemrxiv.[0-9]+.v[0-9]{1,2}$' +CHEMRXIV_IDENTIFIER_PATTERN = r'^[0-9]+\.v[0-9]{1,2}$' class SubmissionSearchForm(forms.Form): @@ -163,12 +165,35 @@ def check_arxiv_identifier_w_vn_nr(identifier): if published_id: error_message = ('This paper has been published under DOI %(published_id)s. ' - 'It cannot be submitted again.'), + 'It cannot be submitted again.') raise forms.ValidationError(error_message, code='published', params={'published_id': published_id}) return arxiv_data, metadata, identifier +def check_chemrxiv_identifier_w_vn_nr(chemrxiv_identifier_w_vn_nr): + caller = FigshareCaller(chemrxiv_identifier_w_vn_nr) + if caller.is_valid: + chemrxiv_data = caller.data + metadata = caller.metadata + else: + error_message = 'A preprint associated to this identifier does not exist.' + raise forms.ValidationError(error_message) + + # Check if this article has already been published (according to Figshare) + published_id = None + if 'resource_doi' in metadata: + published_id = metadata['resource_doi'] + + if published_id: + error_message = ('This paper has been published under DOI %(published_id)s. ' + 'It cannot be submitted again.') + raise forms.ValidationError(error_message, code='published', + params={'published_id': published_id}) + identifier = 'chemrxiv_' + chemrxiv_identifier_w_vn_nr + return chemrxiv_data, metadata, identifier + + class SubmissionPrefillForm(forms.Form): """ Base class for all SubmissionPrefillForms (one per integrated preprint server). @@ -297,6 +322,57 @@ class ArXivPrefillForm(SubmissionPrefillForm): return form_data +class ChemRxivPrefillForm(SubmissionPrefillForm): + """ + Provide initial data for SubmissionForm (ChemRxiv preprint server route). + + This adds the `chemrxiv_identifier_w_vn_nr` kwarg to those + from the `SubmissionPrefillForm` base class. + """ + chemrxiv_identifier_w_vn_nr = forms.RegexField( + label='', + regex=CHEMRXIV_IDENTIFIER_PATTERN, strip=True, + error_messages={'invalid': 'Invalid ChemRxiv identifier'}, + widget=forms.TextInput() + ) + + def __init__(self, *args, **kwargs): + self.preprint_server = 'ChemRxiv' + self.chemrxiv_data = {} + self.metadata = {} + self.identifier = None + super().__init__(*args, **kwargs) + + def clean_chemrxiv_identifier_w_vn_nr(self): + """ + Do basic prechecks based on the ChemRxiv identifier. + """ + self.chemrxiv_data, self.metadata, self.identifier = \ + check_chemrxiv_identifier_w_vn_nr(self.cleaned_data['chemrxiv_identifier_w_vn_nr']) + check_identifier_is_unused(self.identifier) + return self.cleaned_data['chemrxiv_identifier_w_vn_nr'] + + def get_prefill_data(self): + """ + Return dictionary to prefill `SubmissionForm`. + """ + form_data = super().get_prefill_data() + form_data.update(self.chemrxiv_data) + form_data['identifier_w_vn_nr'] = self.identifier + form_data['chemrxiv_doi'] = ('https://doi.org/10.26434/chemrxiv.' + + self.cleaned_data['chemrxiv_identifier_w_vn_nr']) + + if self.is_resubmission(): + form_data.update({ + 'approaches': self.latest_submission.approaches, + 'referees_flagged': self.latest_submission.referees_flagged, + 'referees_suggested': self.latest_submission.referees_suggested, + 'acad_field': self.latest_submission.acad_field.id, + 'specialties': [s.id for s in self.latest_submission.specialties.all()] + }) + return form_data + + ################## # # Submission form @@ -328,6 +404,7 @@ class SubmissionForm(forms.ModelForm): help_text=('Please submit the processed .pdf (not the source files; ' 'these will only be required at the post-acceptance proofs stage)')) arxiv_link = forms.URLField(label='arXiv link (including version nr)') + chemrxiv_doi = forms.URLField(label='ChemRxiv DOI (including version nr)') class Meta: model = Submission @@ -355,6 +432,8 @@ class SubmissionForm(forms.ModelForm): 'thread_hash': forms.HiddenInput(), 'arxiv_link': forms.TextInput( attrs={'placeholder': 'Full URL, ex.: https://arxiv.org/abs/1234.56789v1'}), + 'chemrxiv_doi': forms.TextInput( + attrs={'placeholder': 'Full URL, ex.: https://doi.org/10.26434/chemrxiv.1234567.v1'}), 'code_repository_url': forms.TextInput( attrs={'placeholder': 'If applicable; please give the full URL'}), 'data_repository_url': forms.TextInput( @@ -379,11 +458,11 @@ class SubmissionForm(forms.ModelForm): def __init__(self, *args, **kwargs): self.requested_by = kwargs.pop('requested_by') self.preprint_server = kwargs.pop('preprint_server') - data = data = args[0] if len(args) > 1 else kwargs.get('data', {}) + data = args[0] if len(args) > 1 else kwargs.get('data', {}) self.thread_hash = kwargs['initial'].get('thread_hash', None) or data.get('thread_hash') self.is_resubmission_of = kwargs['initial'].get( 'is_resubmission_of', None) or data.get('is_resubmission_of') - self.arxiv_data = {} + self.preprint_data = {} self.metadata = {} # container for possible external server-provided metadata super().__init__(*args, **kwargs) @@ -398,6 +477,8 @@ class SubmissionForm(forms.ModelForm): if not self.preprint_server == 'arXiv': # No arXiv-specific data required. del self.fields['arxiv_link'] + if not self.preprint_server == 'ChemRxiv': + del self.fields['chemrxiv_doi'] if not self.is_resubmission(): del self.fields['is_resubmission_of'] @@ -495,7 +576,10 @@ class SubmissionForm(forms.ModelForm): check_identifier_is_unused(identifier) if self.preprint_server == 'arXiv': - self.arxiv_data, self.metadata, identifier = check_arxiv_identifier_w_vn_nr(identifier) + self.preprint_data, self.metadata, identifier = check_arxiv_identifier_w_vn_nr(identifier) + elif self.preprint_server == 'ChemRxiv': + self.preprint_data, self.metadata, identifier = check_chemrxiv_identifier_w_vn_nr( + identifier.lstrip('chemrxiv_')) return identifier @transaction.atomic @@ -507,12 +591,17 @@ class SubmissionForm(forms.ModelForm): submission.submitted_by = self.requested_by.contributor # Save identifiers + url = None + if self.preprint_server == 'arXiv': + url = self.cleaned_data.get('arxiv_link', '') + elif self.preprint_server == 'ChemRxiv': + url = self.cleaned_data.get('chemrxiv_doi', '') preprint, __ = Preprint.objects.get_or_create( identifier_w_vn_nr=self.cleaned_data['identifier_w_vn_nr'], - url=self.cleaned_data.get('arxiv_link', ''), + url=url, _file=self.cleaned_data.get('preprint_file', None), ) - # Save metadata directly from ArXiv call without possible user interception + # Save metadata directly from preprint server call without possible user interception submission.metadata = self.metadata submission.preprint = preprint diff --git a/submissions/templates/submissions/submit_choose_preprint_server.html b/submissions/templates/submissions/submit_choose_preprint_server.html index 8d8726261..7d525641c 100644 --- a/submissions/templates/submissions/submit_choose_preprint_server.html +++ b/submissions/templates/submissions/submit_choose_preprint_server.html @@ -70,6 +70,18 @@ {% include 'bi/arrow-right.html' %} <input type="submit" class="btn btn-primary text-white" value="Query arXiv"/> </form> + {% elif server.name == 'ChemRxiv' %} + <h3>Please provide the ChemRxiv identifier for your Submission</h3> + <p><em>without the url prefix but with version number, e.g. + <br> #######.v#</em></p> + <form action="{% url 'submissions:submit_manuscript_chemrxiv' journal_doi_label=journal.doi_label %}" method="get"> + {{ chemrxiv_prefill_form }} + {% if thread_hash %} + <input type="hidden" name="thread_hash" value="{{ thread_hash }}"/> + {% endif %} + {% include 'bi/arrow-right.html' %} + <input type="submit" class="btn btn-primary text-white" value="Query ChemRxiv"/> + </form> {% endif %} </div> </div> diff --git a/submissions/urls.py b/submissions/urls.py index e13a35eb2..e9ddab7f8 100644 --- a/submissions/urls.py +++ b/submissions/urls.py @@ -153,6 +153,11 @@ urlpatterns = [ views.RequestSubmissionUsingArXivView.as_view(), name='submit_manuscript_arxiv' ), + path( # Submit using arXiv (thread_hash as GET param if resubmission) + 'submit_manuscript/<journal_doi_label:journal_doi_label>/chemrxiv', + views.RequestSubmissionUsingChemRxivView.as_view(), + name='submit_manuscript_chemrxiv' + ), url( r'^withdraw_manuscript/{regex}/$'.format(regex=SUBMISSIONS_COMPLETE_REGEX), diff --git a/submissions/views.py b/submissions/views.py index a754173b8..db2364fa9 100644 --- a/submissions/views.py +++ b/submissions/views.py @@ -40,7 +40,7 @@ from .models import ( EditorialAssignment, RefereeInvitation, Report, SubmissionEvent) from .mixins import SubmissionMixin, SubmissionAdminViewMixin from .forms import ( - ArXivPrefillForm, SciPostPrefillForm, + SciPostPrefillForm, ArXivPrefillForm, ChemRxivPrefillForm, SubmissionForm, SubmissionSearchForm, RecommendationVoteForm, ConsiderAssignmentForm, InviteEditorialAssignmentForm, EditorialAssignmentForm, VetReportForm, SetRefereeingDeadlineForm, RefereeSearchForm, @@ -163,15 +163,24 @@ def submit_choose_preprint_server(request, journal_doi_label): thread_hash = request.GET.get('thread_hash') or None # Each integrated preprint server has a prefill form: scipost_prefill_form = SciPostPrefillForm( - requested_by=request.user, journal_doi_label=journal_doi_label, thread_hash=thread_hash) + requested_by=request.user, + journal_doi_label=journal_doi_label, + thread_hash=thread_hash) arxiv_prefill_form = ArXivPrefillForm( - requested_by=request.user, journal_doi_label=journal_doi_label, thread_hash=thread_hash) + requested_by=request.user, + journal_doi_label=journal_doi_label, + thread_hash=thread_hash) + chemrxiv_prefill_form = ChemRxivPrefillForm( + requested_by=request.user, + journal_doi_label=journal_doi_label, + thread_hash=thread_hash) context = { 'journal': journal, 'thread_hash': thread_hash, 'preprint_servers': preprint_servers, 'scipost_prefill_form': scipost_prefill_form, 'arxiv_prefill_form': arxiv_prefill_form, + 'chemrxiv_prefill_form': chemrxiv_prefill_form, } return render(request, 'submissions/submit_choose_preprint_server.html', context) @@ -191,7 +200,7 @@ class RequestSubmissionView(LoginRequiredMixin, PermissionRequiredMixin, CreateV def get(self, request, journal_doi_label): """ - Redirect to `submit_choose_preprint_server` if arXiv identifier is not known. + Redirect to `submit_choose_preprint_server` if preprint identifier is not known. """ if self.prefill_form.is_valid(): if self.prefill_form.is_resubmission(): @@ -201,13 +210,13 @@ class RequestSubmissionView(LoginRequiredMixin, PermissionRequiredMixin, CreateV 'Please check everything carefully!') messages.success(request, resubmessage, fail_silently=True) else: - if self.prefill_form.preprint_server == 'arXiv': + if self.prefill_form.preprint_server in ['arXiv', 'ChemRxiv']: readymessage = ('We have pre-filled the form where possible. ' 'Please check everything carefully!') else: readymessage = 'Your submission form is now ready to be filled in.' messages.success(request, readymessage, fail_silently=True) - # Gather data from ArXiv API if prefill form is valid + # Gather data from preprint server API if prefill form is valid self.initial_data = self.prefill_form.get_prefill_data() return super().get(request) else: @@ -300,6 +309,25 @@ class RequestSubmissionUsingArXivView(RequestSubmissionView): return kwargs +class RequestSubmissionUsingChemRxivView(RequestSubmissionView): + """Formview to submit a new Submission using ChemRxiv.""" + + def get(self, request, journal_doi_label): + """ + Redirect to `submit_choose_preprint_server` if ChemRxiv identifier is not known. + """ + self.prefill_form = ChemRxivPrefillForm( + request.GET or None, + requested_by=self.request.user, + journal_doi_label=journal_doi_label, + thread_hash=request.GET.get('thread_hash')) + return super().get(request, journal_doi_label) + + def get_form_kwargs(self): + """Form requires extra kwargs.""" + kwargs = super().get_form_kwargs() + kwargs['preprint_server'] = 'ChemRxiv' + return kwargs @login_required @@ -417,6 +445,7 @@ def submission_detail_wo_vn_nr(request, identifier_wo_vn_nr): def submission_detail(request, identifier_w_vn_nr): """Public detail page of Submission.""" + submission = get_object_or_404(Submission, preprint__identifier_w_vn_nr=identifier_w_vn_nr) context = { 'can_read_editorial_information': False, @@ -431,9 +460,9 @@ def submission_detail(request, identifier_w_vn_nr): if not request.user.is_authenticated: raise Http404 elif not request.user.has_perm( - 'scipost.can_assign_submissions') and not submission.fellows.filter( - contributor__user=request.user).exists(): - raise Http404 + 'scipost.can_assign_submissions') and not submission.fellows.filter( + contributor__user=request.user).exists(): + raise Http404 if is_author: context['proofs_decision_form'] = ProofsDecisionForm() @@ -483,6 +512,7 @@ def submission_detail(request, identifier_w_vn_nr): 'is_author': is_author, 'is_author_unchecked': is_author_unchecked, }) + print(context) return render(request, 'submissions/submission_detail.html', context) -- GitLab