SciPost Code Repository

Skip to content
Snippets Groups Projects
Commit 7a28ceef authored by Jorran de Wit's avatar Jorran de Wit
Browse files

Add mvp for plagiarism, needs testing on paid account

parent 51a11bcb
No related branches found
No related tags found
No related merge requests found
......@@ -12,3 +12,7 @@ class CycleUpdateDeadlineError(BaseCustomException):
class InvalidReportVettingValue(BaseCustomException):
pass
class ArxivPDFNotFound(Exception):
pass
from django import forms
from django.conf import settings
from django.contrib.auth.models import Group
from django.db import transaction
from django.utils import timezone
......@@ -10,17 +11,18 @@ from .constants import ASSIGNMENT_BOOL, ASSIGNMENT_REFUSAL_REASONS, STATUS_RESUB
STATUS_REJECTED, STATUS_REJECTED_VISIBLE, STATUS_RESUBMISSION_INCOMING,\
STATUS_DRAFT, STATUS_UNVETTED, REPORT_ACTION_ACCEPT, REPORT_ACTION_REFUSE,\
STATUS_VETTED
from .exceptions import InvalidReportVettingValue
from .models import Submission, RefereeInvitation, Report, EICRecommendation, EditorialAssignment
from . import exceptions, helpers
from .models import Submission, RefereeInvitation, Report, EICRecommendation, EditorialAssignment,\
iThenticateReport
from scipost.constants import SCIPOST_SUBJECT_AREAS
from scipost.services import ArxivCaller
from scipost.models import Contributor
import strings
from crispy_forms.helper import FormHelper
from crispy_forms.layout import Layout, Div, Field, HTML, Submit
import strings
import iThenticate
class SubmissionSearchForm(forms.Form):
......@@ -554,7 +556,7 @@ class VetReportForm(forms.Form):
# The report is rejected
report.status = self.cleaned_data['refusal_reason']
else:
raise InvalidReportVettingValue(self.cleaned_data['action_option'])
raise exceptions.InvalidReportVettingValue(self.cleaned_data['action_option'])
report.save()
return report
......@@ -643,3 +645,114 @@ class SubmissionCycleChoiceForm(forms.ModelForm):
other_submission = self.instance.other_versions.first()
if other_submission:
self.fields['referees_reinvite'].queryset = other_submission.referee_invitations.all()
class iThenticateReportForm(forms.ModelForm):
class Meta:
model = iThenticateReport
fields = []
def __init__(self, submission, *args, **kwargs):
self.submission = submission
super().__init__(*args, **kwargs)
if kwargs.get('files', {}).get('file'):
# Add file field if file data is coming in!
self.fields['file'] = forms.FileField()
def clean(self):
cleaned_data = super().clean()
doc_id = self.instance.doc_id
if not doc_id and not self.fields.get('file'):
try:
cleaned_data['document'] = helpers.retrieve_pdf_from_arxiv(
self.submission.arxiv_identifier_w_vn_nr)
except exceptions.ArxivPDFNotFound:
self.add_error(None, ('The pdf could not be found at arXiv.'
' Please upload the pdf manually.'))
self.fields['file'] = forms.FileField()
elif not doc_id and cleaned_data.get('file'):
cleaned_data['document'] = cleaned_data['file']
elif doc_id:
self.document_id = doc_id
# Login client to append login-check to form
self.client = self.get_client()
# Document (id) is found
if cleaned_data.get('document'):
self.document = cleaned_data['document']
self.response = self.call_ithenticate()
elif hasattr(self, 'document_id'):
self.response = self.call_ithenticate()
if self.response:
return cleaned_data
# Don't return anything as someone submitted invalid data for the form at this point!
return None
def save(self, *args, **kwargs):
if self.instance:
report = self.instance
else:
report = iThenticateReport.objects.get_or_create(doc_id=self.response['data']['id'])
report.submission = self.submission
report.uploaded_time = data['uploaded_time']
report.processed_time = data['processed_time']
report.percent_match = data['percent_match']
report.save()
return report
def call_ithenticate(self):
if hasattr(self, 'document_id'):
# Update iThenticate status
return self.update_status()
elif hasattr(self, 'document'):
# Upload iThenticate document first time
return self.upload_document()
def get_client(self):
client = iThenticate.API.Client(settings.ITHENTICATE_USERNAME,
settings.ITHENTICATE_PASSWORD)
if client.login():
return client
self.add_error(None, "Failed to login to iThenticate.")
return None
def update_status(self):
client = self.client
response = client.documents.get(self.document_id)
if response['status'] == 200:
return response['data']
self.add_error(None, "Updating failed. iThenticate didn't return valid data [1]")
self.add_error(None, client.messages[0])
return None
def upload_document(self):
client = self.client
# Get first folder available
# TODO: Fix this ugly piece of crap
folders = client.folders.all()
if folders['status'] == 200:
folder_id = folders['data'][0]['id']
else:
self.add_error(None, "Uploading failed. iThenticate didn't return valid data [2]")
self.add_error(None, client.messages[0])
# Finally, upload the file
author = self.submission.authors.first()
response = client.documents.add(
self.document.read(),
folder_id,
author.user.first_name,
author.user.last_name,
self.submission.title,
)
if response['status'] == 200:
return response['data']
self.add_error(None, "Updating failed. iThenticate didn't return valid data [3]")
self.add_error(None, client.messages[0])
return None
import requests
from .exceptions import ArxivPDFNotFound
def retrieve_pdf_from_arxiv(arxiv_id):
"""
Try to download the pdf as bytes object from arXiv for a certain arXiv Identifier.
Raise ArxivPDFNotFound instead.
:arxiv_id: Arxiv Identifier with or without (takes latest version instead) version number
"""
path_to_pdf = 'https://arxiv.org/pdf/{arxiv_id}.pdf'.format(arxiv_id=arxiv_id)
response = requests.get(path_to_pdf)
if response.status_code != 200:
raise ArxivPDFNotFound('No pdf found on arXiv.')
return response.content
from django.urls import NoReverseMatch
from django.contrib.auth.mixins import PermissionRequiredMixin
from django.views.generic.list import ListView
from .models import Submission
......@@ -20,7 +22,17 @@ class FriendlyPermissionMixin(PermissionRequiredMixin):
return super().dispatch(request, *args, **kwargs)
class SubmissionAdminViewMixin(FriendlyPermissionMixin):
class SubmissionFormViewMixin:
def get_form_kwargs(self):
"""
Ideally all ModelForms on Submission-related objects have a required argument `submission`.
"""
kwargs = super().get_form_kwargs()
kwargs['submission'] = self._original_submission
return kwargs
class SubmissionAdminViewMixin(FriendlyPermissionMixin, SubmissionFormViewMixin):
"""
This mixin will provide all basic methods and checks required for Submission
administrational actions regarding Submissions.
......@@ -38,7 +50,46 @@ class SubmissionAdminViewMixin(FriendlyPermissionMixin):
return not self.editorial_page
def get_queryset(self):
"""
Return either of two sets of Submissions, with an author filter for the current user.
This method is used in all Class-Based-Views. However, if one overwrites either one of the
- get_object()
- get_queryset()
methods, please don't forget to call super().method_name() to not remove this filter!
"""
qs = super().get_queryset()
if self.pool:
return qs.get_pool(self.request.user)
return qs.filter_editorial_page(self.request.user)
def get_object(self):
"""
Save the original Submission instance for performance reasons to the view,
which may be used in get_context_data().
"""
obj = super().get_object()
self.submission = obj
return obj
def get_context_data(self, *args, **kwargs):
"""
If the main object in a DetailView is not a Submission instance, it will be lost.
Here, explicitly save the Submission instance to the context data.
"""
ctx = super().get_context_data(*args, **kwargs)
if not ctx.get('submission') and not isinstance(self, ListView):
# Call parent get_object() to explicitly save the submission which is related
# to the view's main object.
ctx['submission'] = self._original_submission
return ctx
@property
def _original_submission(self):
if hasattr(self, 'submission'):
return self.submission
obj = super().get_object()
if isinstance(obj, Submission):
return obj
return None
......@@ -506,6 +506,11 @@ class iThenticateReport(TimeStampedModel):
doc_id = models.IntegerField(primary_key=True)
percent_match = models.IntegerField(null=True, blank=True)
def get_absolute_url(self):
return reverse('submissions:plagiarism', kwargs={
'arxiv_identifier_w_vn_nr':
self.to_submission.arxiv_identifier_w_vn_nr})
@property
def score(self):
return self.percent_match
{% extends 'scipost/_personal_page_base.html' %}
{% load bootstrap %}
{% block pagetitle %}: plagiarism report ({{ submission.arxiv_identifier_w_vn_nr }}){% endblock pagetitle %}
{% block breadcrumb_items %}
......@@ -18,8 +20,9 @@
No Plagiarism Report found.
{% endif %}
<form method="post" class="mt-3">
<form method="post" class="mt-3" enctype="multipart/form-data">
{% csrf_token %}
{{form|bootstrap}}
<input type="submit" class="btn btn-primary" value="{% if submission.plagiarism_report %}Update report status{% else %}Submit submission for plagiarism check{% endif %}">
</form>
......
import datetime
import feedparser
from django.conf import settings
from django.contrib import messages
from django.contrib.auth.decorators import login_required, permission_required
from django.contrib.auth.models import Group
......@@ -13,12 +12,11 @@ from django.template import Template, Context
from django.utils import timezone
from django.utils.decorators import method_decorator
from django.views.generic.detail import DetailView
from django.views.generic.edit import CreateView
from django.views.generic.edit import CreateView, UpdateView
from django.views.generic.list import ListView
from guardian.decorators import permission_required_or_403
from guardian.shortcuts import assign_perm, get_objects_for_user
import iThenticate
from .constants import SUBMISSION_STATUS_VOTING_DEPRECATED, STATUS_VETTED, STATUS_EIC_ASSIGNED,\
SUBMISSION_STATUS_PUBLICLY_INVISIBLE, SUBMISSION_STATUS, ED_COMM_CHOICES,\
......@@ -31,7 +29,8 @@ from .forms import SubmissionIdentifierForm, RequestSubmissionForm, SubmissionSe
SetRefereeingDeadlineForm, RefereeSelectForm, RefereeRecruitmentForm,\
ConsiderRefereeInvitationForm, EditorialCommunicationForm,\
EICRecommendationForm, ReportForm, VetReportForm, VotingEligibilityForm,\
SubmissionCycleChoiceForm, ReportPDFForm, SubmissionReportsForm
SubmissionCycleChoiceForm, ReportPDFForm, SubmissionReportsForm,\
iThenticateReportForm
from .utils import SubmissionUtils
from scipost.forms import ModifyPersonalMessageForm, RemarkForm
......@@ -90,6 +89,7 @@ class RequestSubmission(CreateView):
messages.warning(self.request, *error_messages)
return super().form_invalid(form)
@login_required
@permission_required('scipost.can_submit_manuscript', raise_exception=True)
def prefill_using_arxiv_identifier(request):
......@@ -1472,23 +1472,20 @@ class EditorialSummaryView(SubmissionAdminViewMixin, ListView):
.get(arxiv_identifier_w_vn_nr=arxiv_id))
except (AssertionError, Submission.DoesNotExist):
context['submission'] = None
context['latest_events'] = SubmissionEvent.objects.for_eic()#.last_hours()
context['latest_events'] = SubmissionEvent.objects.for_eic().last_hours()
return context
class PlagiarismView(SubmissionAdminViewMixin, DetailView):
class PlagiarismView(SubmissionAdminViewMixin, UpdateView):
permission_required = 'scipost.can_do_plagiarism_checks'
template_name = 'submissions/admin/plagiarism_report.html'
editorial_page = True
success_url = reverse_lazy('submissions:plagiarism')
form_class = iThenticateReportForm
def post(self, request, *args, **kwargs):
client = iThenticate.API.Client(settings.ITHENTICATE_USERNAME,
settings.ITHENTICATE_PASSWORD)
submission = self.get_object()
if submission.plagiarism_report:
# Plagiarism Report needs an update
client.documents.get()
else:
# Plagiarism Report needs to be uploaded still
client.folders.all()
raise NotImplementedError
def get_object(self):
submission = super().get_object()
return submission.plagiarism_report
# def post(self, request, *args, **kwargs):
# raise NotImplementedError
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment