diff --git a/SciPost_v1/settings/base.py b/SciPost_v1/settings/base.py index 2763ae2b8cdd04877322eec621a898a344170d0f..608d0164338eccbe66fe238579c2b4db558ad3fb 100644 --- a/SciPost_v1/settings/base.py +++ b/SciPost_v1/settings/base.py @@ -146,7 +146,7 @@ HAYSTACK_CONNECTIONS = { # Brute force automatically re-index Haystack using post_save signals on all models. # When write-traffic increases, a custom processor is preferred which only connects # signals to eg. `vet-accepted` signals possibly using cron jobs instead of realtime updates. -HAYSTACK_SIGNAL_PROCESSOR = 'SciPost_v1.signalprocessors.AutoSearchIndexingProcessor' +HAYSTACK_SIGNAL_PROCESSOR = 'SciPost_v1.signalprocessors.SearchIndexingProcessor' SPHINXDOC_BASE_TEMPLATE = 'scipost/base.html' diff --git a/SciPost_v1/signalprocessors.py b/SciPost_v1/signalprocessors.py index 65fbd4313b1b93b28bb88f3896d0f7e7774ab4e8..b65bbc1d413178049f87a693e7a1d319ae8bee73 100644 --- a/SciPost_v1/signalprocessors.py +++ b/SciPost_v1/signalprocessors.py @@ -4,18 +4,13 @@ __license__ = "AGPL v3" from django.contrib.contenttypes.models import ContentType -from celery import shared_task from haystack import connection_router, connections, signals from haystack.exceptions import NotHandled -from commentaries.models import Commentary -from comments.models import Comment -from journals.models import Publication -from submissions.models import Submission, Report +from notifications.models import Notification +from submissions.models import Submission - -@shared_task(name='signalprocessors.remove_object_indexes') def remove_objects_indexes(sender_type_id, object_type_id, object_id): """ Given a set of `objects` model instances, remove them from the index as preparation @@ -23,10 +18,7 @@ def remove_objects_indexes(sender_type_id, object_type_id, object_id): """ sender = ContentType.objects.get_for_id(sender_type_id) object_type = ContentType.objects.get_for_id(object_type_id) - try: - instance = object_type.get_object_for_this_type(pk=object_id) - except object_type.model_class().DoesNotExist: - return None + instance = object_type.get_object_for_this_type(pk=object_id) if isinstance(instance, Submission): # Submission have complex status handling, so a status change should lead to @@ -42,7 +34,7 @@ def remove_objects_indexes(sender_type_id, object_type_id, object_id): try: using_backends = connection_router.for_write(instance=objects[0]) except IndexError: - # No objects given, stop processing here + # No submissions given, stop processing here return None for instance in objects: @@ -55,17 +47,13 @@ def remove_objects_indexes(sender_type_id, object_type_id, object_id): pass -@shared_task(name='signalprocessors.update_instance_indexes') def update_instance_indexes(sender_type_id, object_type_id, object_id): """ Given an individual model instance, update its entire indexes. """ sender = ContentType.objects.get_for_id(sender_type_id) object_type = ContentType.objects.get_for_id(object_type_id) - try: - instance = object_type.get_object_for_this_type(pk=object_id) - except object_type.model_class().DoesNotExist: - return None + instance = object_type.get_object_for_this_type(pk=object_id) try: using_backends = connection_router.for_write(instance=instance) @@ -82,25 +70,12 @@ def update_instance_indexes(sender_type_id, object_type_id, object_id): pass -class AutoSearchIndexingProcessor(signals.RealtimeSignalProcessor): - +class SearchIndexingProcessor(signals.BaseSignalProcessor): def handle_save(self, sender, instance, **kwargs): - """ - Explicitly perform haystack reindex when saving objects of class: - Submission, Report, Comment, Publication. - """ - try: - if (isinstance(instance, Submission) or - isinstance(instance, Report) or - isinstance(instance, Comment) or - isinstance(instance, Publication) or - isinstance(instance, Commentary)): - sender_type_id = ContentType.objects.get_for_model(sender).id - instance_type_id = ContentType.objects.get_for_model(instance).id - chain = ( - remove_objects_indexes.s(sender_type_id, instance_type_id, instance.id) - # 2nd method uses an immutable signature, to ignore return value of 1st method - | update_instance_indexes.si(sender_type_id, instance_type_id, instance.id)) - chain() - except AttributeError: - pass + if not isinstance(instance, Notification): + sender_type_id = ContentType.objects.get_for_model(sender).id + instance_type_id = ContentType.objects.get_for_model(instance).id + chain = ( + remove_objects_indexes.s(sender_type_id, instance_type_id, instance.id) + | update_instance_indexes.s(sender_type_id, instance_type_id, instance.id)) + chain() diff --git a/commentaries/search_indexes.py b/commentaries/search_indexes.py index c695f0d07461adfd38f6e7de12400a5a446ca259..3c96661dbedd720b1ad116144e0d2ede92d5d77a 100644 --- a/commentaries/search_indexes.py +++ b/commentaries/search_indexes.py @@ -15,6 +15,9 @@ class CommentaryIndex(indexes.SearchIndex, indexes.Indexable): date = indexes.DateTimeField(model_attr='pub_date', null=True) abstract = indexes.CharField(model_attr='pub_abstract') + def get_updated_field(self): + return 'latest_activity' + def get_model(self): return Commentary diff --git a/comments/search_indexes.py b/comments/search_indexes.py index 69df86d8bc3818d53940c64664a9d8c2ac0dd467..eda2ecd57bedfaa8aba00d6f72cde53d534f3cfa 100644 --- a/comments/search_indexes.py +++ b/comments/search_indexes.py @@ -14,6 +14,9 @@ class CommentIndex(indexes.SearchIndex, indexes.Indexable): authors = indexes.CharField(model_attr='author') date = indexes.DateTimeField(model_attr='date_submitted') + def get_updated_field(self): + return 'latest_activity' + def get_model(self): return Comment diff --git a/journals/migrations/0057_auto_20190404_1738.py b/journals/migrations/0057_auto_20190404_1738.py new file mode 100644 index 0000000000000000000000000000000000000000..b414d823295c762c8321dc71e8f140c893b2314a --- /dev/null +++ b/journals/migrations/0057_auto_20190404_1738.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2019-04-04 15:38 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('journals', '0056_publicationauthorstable_profile'), + ] + + operations = [ + migrations.AlterField( + model_name='publication', + name='latest_activity', + field=models.DateTimeField(auto_now=True), + ), + ] diff --git a/journals/models.py b/journals/models.py index 7fd04b4fc60808856a5f83d2b6c281224668c00a..59139e906ef1d591c61b7e6676b7f38a92342491 100644 --- a/journals/models.py +++ b/journals/models.py @@ -478,7 +478,7 @@ class Publication(models.Model): publication_date = models.DateField(verbose_name='publication date') latest_citedby_update = models.DateTimeField(null=True, blank=True) latest_metadata_update = models.DateTimeField(blank=True, null=True) - latest_activity = models.DateTimeField(default=timezone.now) + latest_activity = models.DateTimeField(auto_now=True) # Needs `auto_now` as its not explicity updated anywhere? objects = PublicationQuerySet.as_manager() diff --git a/journals/search_indexes.py b/journals/search_indexes.py index 5dbdf4ec793a99ddc72b95e2298209c550adb005..b3e328b358a423e2a252b4fefc63bf701d18f06e 100644 --- a/journals/search_indexes.py +++ b/journals/search_indexes.py @@ -15,6 +15,9 @@ class PublicationIndex(indexes.SearchIndex, indexes.Indexable): doi_label = indexes.CharField(model_attr='doi_label') institutions = indexes.MultiValueField() + def get_updated_field(self): + return 'latest_activity' + def prepare_institutions(self, obj): return [inst.name for inst in obj.institutions.all()] diff --git a/submissions/forms.py b/submissions/forms.py index 9feb39325c0ea6b8ed5bb7093e06b12cfdeadd5b..92f5814d51dd526d4805527b3888bd9cf87234d6 100644 --- a/submissions/forms.py +++ b/submissions/forms.py @@ -948,8 +948,7 @@ class RefereeSearchForm(forms.Form): 'placeholder': 'Search for a referee in the SciPost Profiles database'})) def search(self): - return Profile.objects.filter( - last_name__icontains=self.cleaned_data['last_name']) + return Profile.objects.filter(last_name__icontains=self.cleaned_data['last_name']) # return Profile.objects.annotate( # similarity=TrigramSimilarity('last_name', self.cleaned_data['last_name']), # ).filter(similarity__gt=0.3).order_by('-similarity') diff --git a/submissions/management/commands/update_search_indices.py b/submissions/management/commands/update_search_indices.py new file mode 100644 index 0000000000000000000000000000000000000000..31a56d13f512b845da7f8a3811221c638e1aae22 --- /dev/null +++ b/submissions/management/commands/update_search_indices.py @@ -0,0 +1,38 @@ +__copyright__ = "Copyright © Stichting SciPost (SciPost Foundation)" +__license__ = "AGPL v3" + + +from django.core.management import BaseCommand + + + +class Command(BaseCommand): + """Update/Reindex all Haystack's search indices.""" + + help = 'Update/Reindex all Haystack's search indices' + + def handle(self, *args, **kwargs): + + count = 0 + + for fellow in fellows: + nr_potfels_to_vote_on = PotentialFellowship.objects.to_vote_on(fellow).count() + recs_to_vote_on = EICRecommendation.objects.user_must_vote_on(fellow.user) + assignments_ongoing = fellow.editorial_assignments.ongoing() + assignments_to_consider = fellow.editorial_assignments.invited() + assignments_upcoming_deadline = assignments_ongoing.refereeing_deadline_within(days=7) + if recs_to_vote_on or assignments_ongoing or assignments_to_consider or assignments_upcoming_deadline: + mail_sender = DirectMailUtil( + 'fellows/email_fellow_tasklist', + # Render immediately, because m2m/querysets cannot be saved for later rendering: + delayed_processing=False, + object=fellow, + fellow=fellow, + nr_potfels_to_vote_on=nr_potfels_to_vote_on, + recs_to_vote_on=recs_to_vote_on, + assignments_ongoing=assignments_ongoing, + assignments_to_consider=assignments_to_consider, + assignments_upcoming_deadline=assignments_upcoming_deadline) + mail_sender.send_mail() + count += 1 + self.stdout.write(self.style.SUCCESS('Emailed {} fellows.'.format(count))) diff --git a/submissions/migrations/0054_submission_update_search_index.py b/submissions/migrations/0054_submission_update_search_index.py new file mode 100644 index 0000000000000000000000000000000000000000..adfc84425162c0afebbd528dd748b4a59b58b29d --- /dev/null +++ b/submissions/migrations/0054_submission_update_search_index.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2019-04-04 15:38 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('submissions', '0053_merge_20190303_1304'), + ] + + operations = [ + migrations.AddField( + model_name='submission', + name='update_search_index', + field=models.BooleanField(default=True), + ), + ] diff --git a/submissions/models.py b/submissions/models.py index 3776d3c619bb82a15cef02e251aed8fbcd5497c6..9b6cd67fd4c7ddb65018b6f000418e81b6df4f24 100644 --- a/submissions/models.py +++ b/submissions/models.py @@ -132,6 +132,7 @@ class Submission(models.Model): submission_date = models.DateField(verbose_name='submission date', default=datetime.date.today) acceptance_date = models.DateField(verbose_name='acceptance date', null=True, blank=True) latest_activity = models.DateTimeField(auto_now=True) + update_search_index = models.BooleanField(default=True) # Topics for semantic linking topics = models.ManyToManyField('ontology.Topic', blank=True) diff --git a/submissions/search_indexes.py b/submissions/search_indexes.py index 4b4b7c0804847789df2bbe06e4c01283172fb311..1d031f6d66a4b87610f4002d81f1023a647d46a8 100644 --- a/submissions/search_indexes.py +++ b/submissions/search_indexes.py @@ -15,6 +15,9 @@ class SubmissionIndex(indexes.SearchIndex, indexes.Indexable): date = indexes.DateTimeField(model_attr='submission_date') abstract = indexes.CharField(model_attr='abstract') + def get_updated_field(self): + return 'latest_activity' + def get_model(self): return Submission diff --git a/submissions/services.py b/submissions/services.py index 5fe3f68c04a028f76c465eaa9171fc0a5b6dabc9..d8df46655e11e3ff5da6a07ab46f8d2d0fd2983f 100644 --- a/submissions/services.py +++ b/submissions/services.py @@ -1,10 +1,10 @@ __copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" __license__ = "AGPL v3" +from django import forms from django.conf import settings from .helpers import retrieve_pdf_from_arxiv -from .models import iThenticateReport from .plagiarism import iThenticate @@ -24,7 +24,6 @@ class iThenticateCaller: def update_status(self): if not self.document_id: return False - # ... def upload_document(self, document=None): @@ -40,7 +39,7 @@ class iThenticateCaller: if not client: return None - try: + try: plagiarism = iThenticate() data = plagiarism.upload_submission(document, self.submission) @@ -50,133 +49,15 @@ class iThenticateCaller: for msg in plagiarism.get_messages(): self.add_error(None, msg) # To do: wrong. return None - except AttributeError: - # To do: all wrong... - if not self.fields.get('file'): - # The document is invalid. - self.add_error(None, ('A valid pdf could not be found at arXiv.' - ' Please upload the pdf manually.')) - else: - self.add_error(None, ('The uploaded file is not valid.' - ' Please upload a valid pdf.')) + except AttributeError: + # To do: all wrong... + if not self.fields.get('file'): + # The document is invalid. + self.add_error(None, ('A valid pdf could not be found at arXiv.' + ' Please upload the pdf manually.')) + else: + self.add_error(None, ('The uploaded file is not valid.' + ' Please upload a valid pdf.')) self.fields['file'] = forms.FileField() return data - -# class iThenticateReportForm(forms.ModelForm): -# class Meta: -# model = iThenticateReport -# fields = [] -# -# def __init__(self, submission, *args, **kwargs): -# self.submission = submission -# super().__init__(*args, **kwargs) -# -# if kwargs.get('files', {}).get('file'): -# # Add file field if file data is coming in! -# self.fields['file'] = forms.FileField() -# -# def clean(self): -# cleaned_data = super().clean() -# doc_id = self.instance.doc_id -# if not doc_id and not self.fields.get('file'): -# try: -# cleaned_data['document'] = helpers.retrieve_pdf_from_arxiv( -# self.submission.preprint.identifier_w_vn_nr) -# except exceptions.ArxivPDFNotFound: -# self.add_error( -# None, 'The pdf could not be found at arXiv. Please upload the pdf manually.') -# self.fields['file'] = forms.FileField() -# elif not doc_id and cleaned_data.get('file'): -# cleaned_data['document'] = cleaned_data['file'].read() -# elif doc_id: -# self.document_id = doc_id -# -# # Login client to append login-check to form -# self.client = self.get_client() -# -# if not self.client: -# return None -# -# # Document (id) is found -# if cleaned_data.get('document'): -# self.document = cleaned_data['document'] -# try: -# self.response = self.call_ithenticate() -# except AttributeError: -# if not self.fields.get('file'): -# # The document is invalid. -# self.add_error(None, ('A valid pdf could not be found at arXiv.' -# ' Please upload the pdf manually.')) -# else: -# self.add_error(None, ('The uploaded file is not valid.' -# ' Please upload a valid pdf.')) -# self.fields['file'] = forms.FileField() -# elif hasattr(self, 'document_id'): -# self.response = self.call_ithenticate() -# -# if hasattr(self, 'response') and self.response: -# return cleaned_data -# -# # Don't return anything as someone submitted invalid data for the form at this point! -# return None -# -# def save(self, *args, **kwargs): -# data = self.response -# -# report, created = iThenticateReport.objects.get_or_create(doc_id=data['id']) -# -# if not created: -# try: -# iThenticateReport.objects.filter(doc_id=data['id']).update( -# uploaded_time=data['uploaded_time'], -# processed_time=data['processed_time'], -# percent_match=data['percent_match'], -# part_id=data.get('parts', [{}])[0].get('id') -# ) -# except KeyError: -# pass -# else: -# report.save() -# Submission.objects.filter(id=self.submission.id).update(plagiarism_report=report) -# return report -# -# def call_ithenticate(self): -# if hasattr(self, 'document_id'): -# # Update iThenticate status -# return self.update_status() -# elif hasattr(self, 'document'): -# # Upload iThenticate document first time -# return self.upload_document() -# -# def get_client(self): -# client = iThenticate.API.Client(settings.ITHENTICATE_USERNAME, -# settings.ITHENTICATE_PASSWORD) -# if client.login(): -# return client -# self.add_error(None, "Failed to login to iThenticate.") -# return None -# -# def update_status(self): -# client = self.client -# response = client.documents.get(self.document_id) -# if response['status'] == 200: -# return response.get('data')[0].get('documents')[0] -# self.add_error(None, "Updating failed. iThenticate didn't return valid data [1]") -# -# for msg in client.messages: -# self.add_error(None, msg) -# return None -# -# def upload_document(self): -# from .plagiarism import iThenticate -# plagiarism = iThenticate() -# data = plagiarism.upload_submission(self.document, self.submission) -# -# # Give feedback to the user -# if not data: -# self.add_error(None, "Updating failed. iThenticate didn't return valid data [3]") -# for msg in plagiarism.get_messages(): -# self.add_error(None, msg) -# return None -# return data diff --git a/theses/search_indexes.py b/theses/search_indexes.py index 7b701cab90c856c3ad9371f9253973fc0d499b19..9ae78b66fd37b32e0cc42af07c9e9f377d362fbf 100644 --- a/theses/search_indexes.py +++ b/theses/search_indexes.py @@ -16,6 +16,9 @@ class ThesisIndex(indexes.SearchIndex, indexes.Indexable): date = indexes.DateTimeField(model_attr='defense_date') abstract = indexes.CharField(model_attr='abstract') + def get_updated_field(self): + return 'latest_activity' + def get_model(self): return ThesisLink