diff --git a/SciPost_v1/settings/base.py b/SciPost_v1/settings/base.py index 3a2a26e166f5db576aa70102b2ff4ec399b82d76..4f0157685f48ec28497db7b987d607f7be168781 100644 --- a/SciPost_v1/settings/base.py +++ b/SciPost_v1/settings/base.py @@ -98,7 +98,6 @@ INSTALLED_APPS = ( 'journals', 'mailing_lists', 'mails', - 'metacore', 'news', 'notifications', 'partners', diff --git a/SciPost_v1/urls.py b/SciPost_v1/urls.py index 3d3df39a80fbf5e3f26f4d06658795b15fd6991c..2b83a6d4b3f2810e2f3179e47727b39c1ac08cf4 100644 --- a/SciPost_v1/urls.py +++ b/SciPost_v1/urls.py @@ -52,7 +52,6 @@ urlpatterns = [ url(r'^invitations/', include('invitations.urls', namespace="invitations")), url(r'^journals/', include('journals.urls.general', namespace="journals")), url(r'^mailing_list/', include('mailing_lists.urls', namespace="mailing_lists")), - url(r'^metacore/', include('metacore.urls', namespace="metacore")), url(r'^submissions/', include('submissions.urls', namespace="submissions")), url(r'^submission/', include('submissions.urls', namespace="_submissions")), url(r'^theses/', include('theses.urls', namespace="theses")), diff --git a/metacore/__init__.py b/metacore/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/metacore/admin.py b/metacore/admin.py deleted file mode 100644 index 1f67f7cc455474178c094ee0f09e44301e440dd7..0000000000000000000000000000000000000000 --- a/metacore/admin.py +++ /dev/null @@ -1,130 +0,0 @@ -__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" -__license__ = "AGPL v3" - - -import json - -from django.contrib import admin -from django.contrib import messages - -from celery.result import AsyncResult -from django_celery_beat.models import PeriodicTask, IntervalSchedule - -from .models import Citable, Journal -from .services import ( - import_journal_full, import_journal_incremental, get_crossref_work_count, - add_journal_to_existing) - - -class JournalAdmin(admin.ModelAdmin): - fields = ('name', 'ISSN_digital', 'last_full_sync') - list_display = ( - 'name', 'ISSN_digital', 'last_full_sync', 'count_metacore', - 'count_crossref', 'last_update', 'task_status') - actions = [ - 'import_full', 'scheduled_import_incremental', 'import_incremental', - 'update_counts', 'add_journal_to_items', 'delete_all_citables'] - - def import_full(self, request, queryset): - """ Starts background task to import all works by this journal """ - - for journal in queryset: - # Celery Async version - task = import_journal_full.delay(journal.ISSN_digital) - journal.last_task_id = task.id - journal.save() - - messages.add_message( - request, messages.INFO, 'Import task for journal {} added.'.format(journal.name)) - - - def import_incremental(self, request, queryset): - """Starts background task to import all works by this journal.""" - - for journal in queryset: - if journal.last_full_sync: - task = import_journal_incremental.delay( - journal.ISSN_digital, journal.last_full_sync.strftime('%Y-%m-%d')) - journal.last_task_id = task.id - journal.save() - messages.add_message( - request, messages.INFO, - 'Import task for journal {} added.'.format(journal.name)) - else: - messages.add_message( - request, messages.WARNING, - ('Incremental import task for journal {} could not be started, ' - 'since date of last full sync is not set.'.format(journal.name))) - - def scheduled_import_incremental(self, request, queryset): - """Starts background task to import all works by this journal and repeats every day.""" - # TODO: make sure the from_date gets updated! - - schedule, __ = IntervalSchedule.objects.get_or_create( - every=1, period=IntervalSchedule.DAYS) - - for journal in queryset: - last_sync = '' - if journal.last_full_sync: - last_sync = journal.last_full_sync.strftime('%Y-%m-%d') - - task, created = PeriodicTask.objects.get_or_create( - interval=schedule, - name='Inc. import {}'.format(journal.name), - task='metacore.services.import_journal_incremental', - args=json.dumps([journal.ISSN_digital, last_sync])) - PeriodicTask.objects.filter(id=task.id).update(enabled=True) - - #TODO: figure out a way to put the individual task id in the journal - # everytime the scheduled task fires - journal.last_task_id = '' - journal.save() - - if created: - txt = 'Repeating import task for journal {} added.'.format(journal.name) - elif task.enabled: - txt = 'Repeating import task for journal {} already exists.'.format(journal.name) - else: - txt = 'Repeating import task for journal {} activated.'.format(journal.name) - txt += ' Go to Periodic Tasks in admin to view.'.format(journal.name) - - messages.add_message(request, messages.INFO, txt) - - def update_counts(self, request, queryset): - for journal in queryset: - journal.count_metacore = Citable.objects(metadata__ISSN=journal.ISSN_digital).count() - journal.count_crossref = get_crossref_work_count(journal.ISSN_digital) - journal.save() - - messages.add_message(request, messages.INFO, 'Counts updated.') - - def add_journal_to_items(self, request, queryset): - for journal in queryset: - add_journal_to_existing(journal.ISSN_digital) - messages.add_message( - request, messages.INFO, - ('"Add journal" task for journal {} added. Go to Background' - ' Tasks -> Tasks in admin to view'.format(journal.name))) - - - def delete_all_citables(self, request, queryset): - for journal in queryset: - journal.purge_citables() - messages.add_message( - request, messages.INFO, - 'All citables from journal "{}" deleted.'.format(journal.name)) - - def get_actions(self, request): - actions = super().get_actions(request) - if 'delete_selected' in actions: - del actions['delete_selected'] - return actions - - def task_status(self, journal): - if journal.last_task_id: - task = AsyncResult(journal.last_task_id) - if task: - return task.result - return '' - -admin.site.register(Journal, JournalAdmin) diff --git a/metacore/apps.py b/metacore/apps.py deleted file mode 100644 index 70c22b4ece40e82b9892cde751266d687b811d7e..0000000000000000000000000000000000000000 --- a/metacore/apps.py +++ /dev/null @@ -1,9 +0,0 @@ -__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" -__license__ = "AGPL v3" - - -from django.apps import AppConfig - - -class MetacoreConfig(AppConfig): - name = 'metacore' diff --git a/metacore/forms.py b/metacore/forms.py deleted file mode 100644 index 76695920a9bf2b753c278026b50bf1f4f728cda5..0000000000000000000000000000000000000000 --- a/metacore/forms.py +++ /dev/null @@ -1,75 +0,0 @@ -__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" -__license__ = "AGPL v3" - - -from django import forms - -import logging -import re - -from .models import Citable - -logger = logging.getLogger(__name__) - -# Move to application-wide constant if used more -# Taken from https://www.crossref.org/blog/dois-and-matching-regular-expressions -doi_regex = re.compile(r'^10.\d{4,9}\/[-._;()/:A-Z0-9]+$', re.IGNORECASE) - - -class CitableSearchForm(forms.Form): - omni = forms.CharField( - max_length=100, required=False, label="Author, title, journal - matches only full words") - author = forms.CharField(max_length=100, required=False, label="Author(s)") - title = forms.CharField(max_length=100, required=False) - publisher = forms.CharField(max_length=100, required=False) - journal = forms.CharField(max_length=100, required=False) - doi = forms.CharField(max_length=100, required=False) - - def search_results(self): - """Return all Citable objects according to search""" - query_params = { - 'title__icontains': self.cleaned_data.get('title', ''), - 'authors__icontains': self.cleaned_data.get('author', ''), - 'publisher__icontains': self.cleaned_data.get('publisher', ''), - 'metadata__container-title__icontains': self.cleaned_data.get('journal', ''), - } - - # DOI's are always lower case in the metacore app - doi_query = self.cleaned_data.get('doi', '').lower() - if doi_regex.match(doi_query): - # Use index (fast) - print('Using doi index') - query_params['doi'] = doi_query - else: - # Partial match (can't use index) - print('Not using doi index') - query_params['doi__icontains'] = doi_query - - if self.cleaned_data.get('omni', False): - """If a text index is present, search using the authors/title box is enables""" - return Citable.objects.simple().filter(**query_params).omni_search( - self.cleaned_data.get('omni'), 'and') - else: - if self.is_empty(): - return None - - return Citable.objects.simple().filter(**query_params) - - def is_empty(self): - form_empty = True - for field_value in self.cleaned_data.values(): - if field_value is not None and field_value != '': - form_empty = False - break - - if form_empty: - return None - - def is_bound_advanced_search(self): - if not self.is_bound: - return False - - return ( - (self.cleaned_data['author'] + self.cleaned_data['title'] - + self.cleaned_data['publisher'] + self.cleaned_data['journal'] - + self.cleaned_data['doi']) != '') diff --git a/metacore/managers.py b/metacore/managers.py deleted file mode 100644 index 39e6705225543b8f48c4f503493559548826da06..0000000000000000000000000000000000000000 --- a/metacore/managers.py +++ /dev/null @@ -1,39 +0,0 @@ -__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" -__license__ = "AGPL v3" - - -from mongoengine import QuerySet - - -class CitableQuerySet(QuerySet): - - def cited_by(self, dois): - if isinstance(dois, list): - return self.only('references').filter(references__in=dois) - else: - return self.only('references').filter(references=dois) - - def simple(self): - return self.only( - 'doi', 'title', 'authors', 'metadata.is-referenced-by-count', 'publication_date', - 'publisher', 'metadata.container-title', 'journal') - - def prl(self): - return self.filter(metadata__ISSN='0031-9007') - - def omni_search(self, query, mode='and'): - if mode == 'and': - query_list = query.split(' ') - - # Treat words that start with '-' (exclude) differently - query_list_without_excludes = [q for q in query_list if not q[0] == '-'] - query_with_quotes = '"{0}"'.format('" "'.join(query_list_without_excludes)) - - query_list_excludes = [q for q in query_list if q not in query_list_without_excludes] - query_with_quotes = query_with_quotes + ' ' + ' '.join(query_list_excludes) - - return self.search_text(query_with_quotes) - elif mode == 'or': - return self.search_text(query) - else: - raise ValueError('Invalid mode used in omni_search') diff --git a/metacore/migrations/0001_initial.py b/metacore/migrations/0001_initial.py deleted file mode 100644 index ca1e4f9e630682d779d3186ffed7f859461d246e..0000000000000000000000000000000000000000 --- a/metacore/migrations/0001_initial.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by Django 1.11.4 on 2018-04-17 06:46 -from __future__ import unicode_literals - -import django.core.validators -from django.db import migrations, models - - -class Migration(migrations.Migration): - - initial = True - - dependencies = [ - ] - - operations = [ - migrations.CreateModel( - name='Journal', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('name', models.CharField(max_length=250)), - ('ISSN_digital', models.CharField(max_length=9, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')])), - ('ISSN_print', models.CharField(blank=True, max_length=9, null=True, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')])), - ('last_full_sync', models.DateTimeField(blank=True, null=True)), - ('last_cursor', models.CharField(blank=True, max_length=250, null=True)), - ('last_errors', models.TextField(blank=True, null=True)), - ], - ), - ] diff --git a/metacore/migrations/0002_auto_20180417_1036.py b/metacore/migrations/0002_auto_20180417_1036.py deleted file mode 100644 index b6197fb09671b9f79766c3dfa631dde18714d329..0000000000000000000000000000000000000000 --- a/metacore/migrations/0002_auto_20180417_1036.py +++ /dev/null @@ -1,25 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by Django 1.11.4 on 2018-04-17 08:36 -from __future__ import unicode_literals - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('metacore', '0001_initial'), - ] - - operations = [ - migrations.AddField( - model_name='journal', - name='count_crossref', - field=models.IntegerField(blank=True, null=True), - ), - migrations.AddField( - model_name='journal', - name='count_metacore', - field=models.IntegerField(blank=True, null=True), - ), - ] diff --git a/metacore/migrations/0003_auto_20180508_0911.py b/metacore/migrations/0003_auto_20180508_0911.py deleted file mode 100644 index 530a9a2ec6de7be8b7444b7a4a80ae049a3dd534..0000000000000000000000000000000000000000 --- a/metacore/migrations/0003_auto_20180508_0911.py +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by Django 1.11.4 on 2018-05-08 07:11 -from __future__ import unicode_literals - -import django.core.validators -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('metacore', '0002_auto_20180417_1036'), - ] - - operations = [ - migrations.AddField( - model_name='journal', - name='count_running', - field=models.IntegerField(blank=True, null=True), - ), - migrations.AddField( - model_name='journal', - name='last_update', - field=models.DateTimeField(blank=True, null=True), - ), - migrations.AlterField( - model_name='journal', - name='ISSN_digital', - field=models.CharField(max_length=9, unique=True, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')]), - ), - migrations.AlterField( - model_name='journal', - name='ISSN_print', - field=models.CharField(blank=True, max_length=9, null=True, unique=True, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')]), - ), - ] diff --git a/metacore/migrations/0004_auto_20180508_0916.py b/metacore/migrations/0004_auto_20180508_0916.py deleted file mode 100644 index 4261c3727d56e70c227ce1f81e8c1ec13569e574..0000000000000000000000000000000000000000 --- a/metacore/migrations/0004_auto_20180508_0916.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by Django 1.11.4 on 2018-05-08 07:16 -from __future__ import unicode_literals - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('metacore', '0003_auto_20180508_0911'), - ] - - operations = [ - migrations.AlterField( - model_name='journal', - name='last_update', - field=models.DateTimeField(auto_now=True, null=True), - ), - ] diff --git a/metacore/migrations/0005_journal_last_task_id.py b/metacore/migrations/0005_journal_last_task_id.py deleted file mode 100644 index ffbe641755c59b5fcfcac3ac3b69e5f24fa5625f..0000000000000000000000000000000000000000 --- a/metacore/migrations/0005_journal_last_task_id.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by Django 1.11.4 on 2018-06-12 12:10 -from __future__ import unicode_literals - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('metacore', '0004_auto_20180508_0916'), - ] - - operations = [ - migrations.AddField( - model_name='journal', - name='last_task_id', - field=models.IntegerField(blank=True, null=True), - ), - ] diff --git a/metacore/migrations/0006_auto_20180612_1419.py b/metacore/migrations/0006_auto_20180612_1419.py deleted file mode 100644 index deb7f3a43d9c762cebc9e7d3accd077c33338a78..0000000000000000000000000000000000000000 --- a/metacore/migrations/0006_auto_20180612_1419.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by Django 1.11.4 on 2018-06-12 12:19 -from __future__ import unicode_literals - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('metacore', '0005_journal_last_task_id'), - ] - - operations = [ - migrations.AlterField( - model_name='journal', - name='last_task_id', - field=models.CharField(blank=True, max_length=250, null=True), - ), - ] diff --git a/metacore/migrations/0007_auto_20181028_1902.py b/metacore/migrations/0007_auto_20181028_1902.py deleted file mode 100644 index 0b3f674cc1e47151955a688aa031b8dcc1e99d9c..0000000000000000000000000000000000000000 --- a/metacore/migrations/0007_auto_20181028_1902.py +++ /dev/null @@ -1,40 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by Django 1.11.4 on 2018-10-28 18:02 -from __future__ import unicode_literals - -import django.core.validators -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('metacore', '0006_auto_20180612_1419'), - ] - - operations = [ - migrations.AlterField( - model_name='journal', - name='ISSN_print', - field=models.CharField(blank=True, default='', max_length=9, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')]), - preserve_default=False, - ), - migrations.AlterField( - model_name='journal', - name='last_cursor', - field=models.CharField(blank=True, default='', max_length=250), - preserve_default=False, - ), - migrations.AlterField( - model_name='journal', - name='last_errors', - field=models.TextField(blank=True, default=''), - preserve_default=False, - ), - migrations.AlterField( - model_name='journal', - name='last_task_id', - field=models.CharField(blank=True, default='', max_length=250), - preserve_default=False, - ), - ] diff --git a/metacore/migrations/__init__.py b/metacore/migrations/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/metacore/models.py b/metacore/models.py deleted file mode 100644 index cdb46bfa3c5b870d6b1938e1f4aceb53402dd172..0000000000000000000000000000000000000000 --- a/metacore/models.py +++ /dev/null @@ -1,134 +0,0 @@ -__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" -__license__ = "AGPL v3" - - -import requests - -from mongoengine import ( - connect, DynamicDocument, ListField, StringField, DynamicField, URLField, DateTimeField) - -from django.db import models -from django.conf import settings -from django.core.validators import RegexValidator - -from .managers import CitableQuerySet - -# Make the connection to MongoDB - this could be put in settings.py as well -# It uses default settings for the mongo server -connect( - settings.MONGO_DATABASE['database'], host=settings.MONGO_DATABASE['host'], - username=settings.MONGO_DATABASE['user'], password=settings.MONGO_DATABASE['password'], - port=settings.MONGO_DATABASE['port'], authSource='admin') - - -class Citable(DynamicDocument): - """ - Citable is a generic object in the metacore database - either a version of records - (with DOI) or preprint of an published/unpublished document. - - NOTE: extra text index for authors/title is defined through mongo shell! - This should be in the readme, but I'll temporarily add it here for ease of use: - For the text index, execute this in the mongo shell: - use scipost - db.citable.createIndex({authors: "text", title: "text", journal: "text"}) - """ - - # Fields that are extracted from the source metadata in order to normalize - # some of the data for searching / metrics - references = ListField(StringField()) - authors = ListField(StringField()) - title = StringField() - publisher = StringField() - license = URLField() - publication_date = DateTimeField() - journal = StringField() - - # Dump all the raw source metadata here - metadata = DynamicField() - - # Settings for mongoengine - meta = { - 'queryset_class': CitableQuerySet, # use the custom queryset - 'indexes': ['doi', 'title', 'publication_date', 'publisher', 'references', 'journal'], - 'allow_inheritance': True - } - - def times_cited(self): - return [] - - def author_list(self, max_n=None): - if max_n and max_n < len(self.authors): - return '; '.join(self.authors[:max_n]) + ' et al.' - else: - return '; '.join(self.authors) - - def crossref_ref_count(self): - return self.metadata['is-referenced-by-count'] - - -class CitableWithDOI(Citable): - """ - CitableWithDOI is the subclass of Citable meant for documents that have a DOI, - which enables the times_cited metric. - """ - doi = StringField(require=True, unique=True) - - def times_cited(self): - return CitableWithDOI.objects.cited_by(self.doi).count() - - -class Journal(models.Model): - """Provides interface for importing citables of a journal into Metacore.""" - - name = models.CharField(max_length=250, blank=False) - ISSN_digital = models.CharField(max_length=9, unique=True, - validators=[RegexValidator(r'^[0-9]{4}-[0-9]{3}[0-9X]$')]) - # Print ISSN not used right now, but there for future use - ISSN_print = models.CharField( - max_length=9, blank=True, - validators=[RegexValidator(r'^[0-9]{4}-[0-9]{3}[0-9X]$')]) - last_full_sync = models.DateTimeField(blank=True, null=True) - last_cursor = models.CharField(max_length=250, blank=True) - last_errors = models.TextField(blank=True) - count_metacore = models.IntegerField(blank=True, null=True) - count_crossref = models.IntegerField(blank=True, null=True) - count_running = models.IntegerField(blank=True, null=True) # Tracks progress during import tasks - last_update = models.DateTimeField(blank=True, null=True, auto_now=True) # Set during import tasks - last_task_id = models.CharField(max_length=250, blank=True) # Set after task related to journal is started - - def __str__(self): - return self.name - - def update_count_metacore(self): - count = Citable.objects(metadata__ISSN=self.ISSN_digital).count() - self.count_metacore = count - - def update_count_crossref(self): - """ - Returns the total number of citables that are present in CR for a given ISSN. - - Needs to be merged with .services but need to work out imports first (circular) - """ - - # Formulate the CR query - url = 'https://api.crossref.org/journals/{}/works'.format(self.ISSN_digital) - - # If the loop is allowed to complete, it fetches (rows * batches) records - rows = 0 - - params = {'rows': rows, 'mailto': 'b.g.t.ponsioen@uva.nl'} - r = requests.get(url, params=params) - r_json = r.json() - - result = r_json['message'] - - if 'total-results' in result: - self.count_metacore = result['total-results'] - - - def purge_citables(self): - """ - This will delete all citables with their issn set to this journal's issn! - """ - - Citable.objects(metadata__ISSN=self.ISSN_digital).delete() diff --git a/metacore/services.py b/metacore/services.py deleted file mode 100644 index 37e2adea42e36da17093707b1b2e85a2c9e3ac1d..0000000000000000000000000000000000000000 --- a/metacore/services.py +++ /dev/null @@ -1,283 +0,0 @@ -from __future__ import absolute_import, unicode_literals - -import logging -import requests - -from celery import shared_task, current_task -from mongoengine.python_support import pymongo -from rest_framework import serializers - -from django.utils import timezone - -from .models import Citable, CitableWithDOI, Journal - -logger = logging.getLogger(__name__) - - -@shared_task -def import_journal_full(issn, cursor='*'): - """ - Task to query CrossRef for all works of a journal with given ISSN - and store them in the Metacore mongo database - """ - return import_journal(issn=issn, cursor=cursor, from_index_date=None) - - -@shared_task -def import_journal_incremental(issn, from_index_date, cursor='*'): - """ - Task to query CrossRef for all works of a journal with given ISSN - from a given date onward and store them in the Metacore mongo database - """ - - # Get from date from the journal itself (necessary for periodic tasks) - # TODO: make periodic tasks call this function without the date - journal = Journal.objects.get(ISSN_digital=issn) - from_index_date = journal.last_full_sync.strftime('%Y-%m-%d') - - import_journal(issn=issn, cursor=cursor, from_index_date=from_index_date) - - -def import_journal(issn, cursor='*', from_index_date=None): - # Get journal to track progress - - # Formulate the CR query - url = 'https://api.crossref.org/journals/{}/works'.format(issn) - - # If the loop is allowed to complete, it fetches (rows * batches) records - rows = 500 - batches = 2000 - last_cursor = cursor - total_processed = 0 - error_count = 0 - total_upserted = 0 - total_modified = 0 - - validation_errors = [] - - for i in range(batches): - logger.info("-------------------------------") - logger.info("Batch %s" % (i, )) - logger.info("Last cursor: {}".format(last_cursor)) - logger.info("Current cursor: {}".format(cursor)) - - params = {'cursor': cursor, 'rows': rows, 'mailto': 'jorrandewit@scipost.org'} - if from_index_date: - params['filter'] = 'from-index-date:{}'.format(from_index_date) - - last_cursor = cursor - r_json = requests.get(url, params=params).json() - - citables_json = r_json['message']['items'] - cursor = r_json['message']['next-cursor'] - number_of_results = len(r_json['message']['items']) - - citables = [] - serialized_objects = [] - for cit in citables_json: - serialized_object = CitableCrossrefSerializer(data=cit) - if serialized_object.is_valid(): - citables.append(CitableWithDOI(**serialized_object.validated_data)) - serialized_objects.append(serialized_object) - else: - # TODO: insert the actual validation errors instead - citables.append(False) - logger.info("Error at {}".format(cit)) - validation_errors.append(serialized_object.errors) - - # Parser returns False if there's an error - errors = [not i for i in citables if i == False] - error_count = error_count + len(errors) - citables = [citable for citable in citables if citable] - - # Mass insert in database (will fail on encountering existing documents - # with same DOI - if citables: - operations = [obj.to_UpdateOne() for obj in serialized_objects] - col = Citable._get_collection() - bulk_res = col.bulk_write(operations, ordered=False) - - current_task.update_state(state='PROGRESS', meta={ - 'current': total_processed, - 'errors': error_count, - 'last_upserted': bulk_res.upserted_count, - 'last_matched_count': bulk_res.matched_count, - 'last_inserted': bulk_res.inserted_count - }) - - total_upserted += bulk_res.upserted_count - total_modified += bulk_res.modified_count - - # Save current count so progress can be tracked in the admin page - total_processed += number_of_results - Journal.objects.filter(ISSN_digital=issn).update(count_running=total_processed) - current_task.update_state(state='PROGRESS', - meta={'current': total_processed, 'errors': error_count}) - - # For debugging purposes - logger.info(current_task) - if citables: - logger.info("Upserted: {}".format(bulk_res.upserted_count)) - logger.info("Modified: {}".format(bulk_res.modified_count)) - - logger.info("Errors: {}".format(error_count)) - logger.info(validation_errors) - - - if number_of_results < rows: - logger.info(number_of_results) - logger.info('End reached.') - break - - count_crossref = get_crossref_work_count(issn) - Journal.objects.filter(ISSN_digital=issn).update( - count_metacore=Citable.objects(metadata__ISSN=issn).count(), - count_crossref=count_crossref, - last_task_id=current_task.id - ) - Journal.objects.filter(ISSN_digital=issn, count_metacore=count_crossref).update( - last_full_sync=timezone.now()) - - # Pack stuff for result - return { - 'total processed': total_processed, - 'total inserted': total_upserted, - 'total modified': total_modified, - 'validation errors': len(validation_errors) - } - - -def get_crossref_work_count(issn): - """ - Returns the total number of citables that are present in CR for a given ISSN - """ - - # Formulate the CR query - url = 'https://api.crossref.org/journals/{}/works'.format(issn) - - # If the loop is allowed to complete, it fetches (rows * batches) records - rows = 0 - - params = {'rows': rows, 'mailto': 'jorrandewit@scipost.org'} - r = requests.get(url, params=params) - r_json = r.json() - - result = r_json['message'] - - if 'total-results' in result: - return result['total-results'] - - -def convert_doi_to_lower_case(): - # If you accidentally import 100.000+ records that have random uppercase characters - # in their reference DOI list - i = 0 - cits = Citable.objects(__raw__={'references': {'$regex': '([A-Z])\w+'}}) - for cit in cits.only('references'): - i = i + 1 - refs = [ref.lower() for ref in cit.references] - cit.modify(references=refs) - - if i % 1000 == 0: - print(i) - - -def add_journal_to_existing(journal_issn=None): - # Take journal from metadata ('container-title') and put it in top-level 'journal' field - # for all existing citables - i = 0 - errors = 0 - if journal_issn: - print('Using given journal ISSN ', journal_issn) - cits = Citable.objects(metadata__ISSN=journal_issn, journal__exists=False) - else: - cits = Citable.objects(journal__exists=False) - - for cit in cits.only('metadata', 'journal'): - i = i + 1 - if 'container-title' in cit.metadata: - journal = cit.metadata['container-title'][0] - cit.modify(journal=journal) - else: - errors = errors + 1 - - if i % 1000 == 0: - print(i) - print(errors, ' errors') - print('-------') - - -class CitableCrossrefSerializer(serializers.BaseSerializer): - """ - Class for deserializing a JSON object into the correct form to create a CitableWithDOI out of. - Specifically for Crossref REST API format - - Usage: - json_data = { ... } - serialized_object = CitableCrossrefSerializer(data=json_data) - serialized_object.is_valid() - # Validated/parsed data: serialized_object.validated_data - CitableWithDOI.create(**serialized_object.validated_data) - """ - - def to_internal_value(self, data): - authors_raw = data.get('author') - references_raw = data.get('reference') - - doi = data.get('DOI') - publisher = data.get('publisher') - # {'issued': {'date-parts': ['...']}} - publication_date_raw = data.get('issued', {}).get('date-parts', [''])[0] - # {'title': ['...']} - title = data.get('title', [''])[0] - # {'container-title': ['...']} - journal = data.get('container-title', [''])[0] - # {'license': [{'url': '...'}]} - license = data.get('license', [{}])[0].get('URL') - - # Validation errors - if not doi: - raise serializers.ValidationError({'DOI': 'DOI not given.'}) - if not authors_raw: - raise serializers.ValidationError({'authors': 'Author list is empty.'}) - if not title: - raise serializers.ValidationError({'title': 'Title is not present.'}) - if not publication_date_raw: - raise serializers.ValidationError({'publication_date': 'Publication date is missing.'}) - - # More complex parsing logic - publication_date = '-'.join([str(date_part) for date_part in publication_date_raw]) - - authors = [] - for author_names in authors_raw: - author = [] - if 'given' in author_names: - author.append(author_names['given']) - if 'family' in author_names: - author.append(author_names['family']) - authors.append(' '.join(author)) - - if references_raw: - references_with_doi = [ref for ref in references_raw if 'DOI' in ref] - references = [ref['DOI'].lower() for ref in references_with_doi] - else: - references = [] - - return { - '_cls': CitableWithDOI._class_name, - 'authors': authors, - 'doi': doi.lower(), - 'references': references, - 'publisher': publisher, - 'publication-date': publication_date, - 'title': title, - 'journal': journal, - 'license': license, - 'metadata': data, - } - - def to_UpdateOne(self): - filters = {'doi': self.validated_data.pop('doi')} - mods = {'$set': self.validated_data} - - return pymongo.UpdateOne(filters, mods, upsert=True) diff --git a/metacore/tasks.py b/metacore/tasks.py deleted file mode 100644 index 6d07721e4cfff302bf566a82a6c04ff9e27035fc..0000000000000000000000000000000000000000 --- a/metacore/tasks.py +++ /dev/null @@ -1,4 +0,0 @@ -from __future__ import absolute_import, unicode_literals - - -# Add tasks here... diff --git a/metacore/templates/citable_list.html b/metacore/templates/citable_list.html deleted file mode 100644 index 7482eaaf5f0b5c57bfe5e72b8cfeaabc27ac48bc..0000000000000000000000000000000000000000 --- a/metacore/templates/citable_list.html +++ /dev/null @@ -1,96 +0,0 @@ -{% extends 'scipost/base.html' %} - -{% load bootstrap %} -{% load humanize %} -{% load request_filters %} - -{% block pagetitle %}: Metacore{% endblock pagetitle %} - -{% block content %} -<h1 class="highlight">Metacore</h1> - -<div class="row"> - <div class="col-12"> - <form action="{% url 'metacore:citable-list' %}" method="get" class="form-xinline"> - <div class="form-group"> - {{ form.omni.errors }} - <label for="{{ form.omni.auto_id }}" class="my-1 mr-2">{{ form.omni.label }}</label> - <div class="input-group"> - <input type="text" name="{{ form.omni.name }}" class="form-control form-control-lg" id="{{ form.omni.auto_id }}" aria-describedby="search_help" placeholder="Search term" value="{{ form.omni.value|default:'' }}" required="required"> - <div class="input-group-append"> - <input type="submit" class="btn btn-primary px-4" value="Search"> - </div> - </div> - </div> - <a href="javascript:;" data-toggle="toggle" data-target="#advanced-search" class="float-right">Use advanced search</a> - <br> - - <div id="advanced-search"{% if not form.is_bound_advanced_search %} style="display: none"{% endif %} class="pb-3"> - {% for field in form %} - <div class="form-group"> - {% if field.id_for_label != 'id_omni' %} - {{ field.errors }} - {{ field|bootstrap }} - {% if field.help_text %} - <p class="help">{{ field.help_text|safe }}</p> - {% endif %} - {% endif %} - </div> - {% endfor %} - </div> - - <div class="text-muted"> - Found {{ object_list.count|intcomma }} results - {% if object_list %} - · - Order by: - <a href="?{% url_replace orderby='citations' page='' %}" class="d-inline-block mb-1 ml-2 active-bold {% active_get_request 'orderby' 'citations' %}{% if not request.GET.orderby %} active{% endif %}">Citations</a> - <a href="?{% url_replace orderby='journal' page='' %}" class="d-inline-block mb-1 ml-2 active-bold {% active_get_request 'orderby' 'journal' %}">Journal</a> - <a href="?{% url_replace orderby='name' page='' %}" class="d-inline-block mb-1 ml-2 active-bold {% active_get_request 'orderby' 'name' %}">Name</a> - - · - Show - <select name="results" class="form-control d-inline-block w-auto" onchange="this.form.submit()"> - <option value="10"{% if not request.GET.results or request.GET.results == '10' %} selected{% endif %}>10</option> - <option value="20"{% if request.GET.results == '20' %} selected{% endif %}>20</option> - <option value="50"{% if request.GET.results == '50' %} selected{% endif %}>50</option> - <option value="100"{% if request.GET.results == '100' %} selected{% endif %}>100</option> - </select> - results - {% endif %} - </div> - - </form> - </div> - - <div class="col-12"> - <hr> - - {% if is_paginated %} - <div class="pb-3"> - {% include 'partials/pagination.html' with page_obj=page_obj %} - </div> - {% endif %} - - <ul class="list-group list-group-flush"> - {% for citable in object_list %} - <li class="list-group-item"> - <div class="card-body px-0"> - {% include 'partials/citable_card_content.html' with citable=citable %} - </div> - </li> - {% empty %} - <p>No match found for your search query.</p> - {% endfor %} - </ul> - - {% if is_paginated %} - <div class="p-3"> - {% include 'partials/pagination.html' with page_obj=page_obj %} - </div> - {% endif %} - </div> - -</div> - -{% endblock content %} diff --git a/metacore/templates/partials/citable_card_content.html b/metacore/templates/partials/citable_card_content.html deleted file mode 100644 index 05c2e4e857e54e3e86f8014396ac92410a83e0a9..0000000000000000000000000000000000000000 --- a/metacore/templates/partials/citable_card_content.html +++ /dev/null @@ -1,24 +0,0 @@ -{% load humanize %} -{% load metacore_extras %} - -<div class="submission_title"> - <h3 class="card-title mb-0 submisssion_title">{{ citable.title }}</h3> - <div class="author_list mb-0">by {{ citable.authors|join_authors_list:16 }} - {% if citable.authors|length > 16 %} - · <a href="javascript:;" data-toggle="toggle" data-target="#authors-{{ citable.id }}">See all authors</a> - <div style="display: none;" id="authors-{{ citable.id }}" class="py-2">{{ citable.authors|join_authors_list }}</div> - {% endif %} - </div> -</div> - -{% block card_footer %} - <p class="text-muted mb-0 mt-3"> - Cited {{ citable.crossref_ref_count|intcomma }} times (CrossRef) / {{ citable.times_cited|intcomma }} times (SciPost Meta) - · doi: <a href="//doi.org/{{ citable.doi }}" target="_blank">{{ citable.doi }}</a> - <br> - Published {{ citable.publication_date|date:"d-m-Y" }} by <b>{{ citable.publisher }}</b> - {% if citable.journal %} - in <b>{{ citable.journal }}</b> - {% endif %} - </p> -{% endblock %} diff --git a/metacore/templatetags/__init__.py b/metacore/templatetags/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/metacore/templatetags/metacore_extras.py b/metacore/templatetags/metacore_extras.py deleted file mode 100644 index 56b45b0b472989c110ded9f95633bda28d653292..0000000000000000000000000000000000000000 --- a/metacore/templatetags/metacore_extras.py +++ /dev/null @@ -1,17 +0,0 @@ -__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" -__license__ = "AGPL v3" - - -from django import template - -register = template.Library() - - -@register.filter -def join_authors_list(authors, max_n=None): - """ Returns authors list as string, truncated to max_n authors when the list is longer.""" - if max_n and max_n < len(authors): - return ', '.join(authors[:max_n - 1]) + ' ... ' + authors[-1] - elif len(authors) > 1: - return ', '.join(authors[:-1]) + ' and ' + authors[-1] - return authors[0] diff --git a/metacore/tests.py b/metacore/tests.py deleted file mode 100644 index 9135c42ab26e15b71fd25dc25d9f92bcec7e676b..0000000000000000000000000000000000000000 --- a/metacore/tests.py +++ /dev/null @@ -1,7 +0,0 @@ -__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" -__license__ = "AGPL v3" - - -from django.test import TestCase - -# Create your tests here. diff --git a/metacore/urls.py b/metacore/urls.py deleted file mode 100644 index 6bf43718f84cac38da35147fa93129f84208ae94..0000000000000000000000000000000000000000 --- a/metacore/urls.py +++ /dev/null @@ -1,13 +0,0 @@ -__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" -__license__ = "AGPL v3" - - -from django.conf.urls import url -from django.views.generic import TemplateView - -from . import views - -urlpatterns = [ - # Citables - url(r'^$', views.CitableListView.as_view(), name='citable-list'), -] diff --git a/metacore/views.py b/metacore/views.py deleted file mode 100644 index 675893aedae149343300c0e692b8d08b83766f15..0000000000000000000000000000000000000000 --- a/metacore/views.py +++ /dev/null @@ -1,49 +0,0 @@ -__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" -__license__ = "AGPL v3" - - -from django.views.generic.list import ListView - -from .models import Citable -from .forms import CitableSearchForm - - -class CitableListView(ListView): - model = Citable - template_name = 'citable_list.html' - form = CitableSearchForm - - def get_queryset(self): - self.form = self.form(self.request.GET or None) - - if self.form.is_valid():# and self.form.has_changed(): - qs = self.form.search_results() - else: - qs = Citable.objects.simple().limit(10) - return qs.order_by(self.get_ordering()) - - - def get_context_data(self, **kwargs): - # Call the base implementation first to get a context - context = super().get_context_data(**kwargs) - - # Form into the context! - context['form'] = self.form - - return context - - def get_paginate_by(self, queryset): - """Dynamically compute pagination setting.""" - try: - return min(int(self.request.GET.get('results', 10)), 100) - except ValueError: - return 10 - - def get_ordering(self): - if not self.request.GET.get('orderby'): - return '-metadata.is-referenced-by-count' - elif self.request.GET['orderby'] == 'name': - return '-title' - elif self.request.GET['orderby'] == 'journal': - return '-journal' - return '-metadata.is-referenced-by-count'