diff --git a/metacore/admin.py b/metacore/admin.py index f92ab10262e9e6bc42cecf2f24cec54eacab1214..2cefe742154728c4c727d612844235e7fd1d9dfe 100644 --- a/metacore/admin.py +++ b/metacore/admin.py @@ -6,8 +6,8 @@ from .services import get_crossref_test, import_journal_full, get_crossref_work_ # Register your models here. class JournalAdmin(admin.ModelAdmin): fields = ('name', 'ISSN_digital', 'last_full_sync') - list_display = ('name', 'ISSN_digital', 'last_full_sync', 'count_metacore', 'count_crossref') - actions = ['import_full', 'update_counts', 'add_journal_to_items'] + list_display = ('name', 'ISSN_digital', 'last_full_sync', 'count_metacore', 'count_crossref', 'last_update') + actions = ['import_full', 'update_counts', 'add_journal_to_items', 'delete_all_citables'] def import_full(self, request, queryset): """ Starts background task to import all works by this journal """ @@ -32,6 +32,11 @@ class JournalAdmin(admin.ModelAdmin): messages.add_message(request, messages.WARNING, 'Make sure that "./manage.py process_tasks" is running (otherwise start it).') + def delete_all_citables(self, request, queryset): + for journal in queryset: + journal.purge_citables() + messages.add_message(request, messages.INFO, 'All citables from journal "{}" deleted.'.format(journal.name)) + def get_actions(self, request): actions = super().get_actions(request) if 'delete_selected' in actions: diff --git a/metacore/migrations/0003_auto_20180508_0911.py b/metacore/migrations/0003_auto_20180508_0911.py new file mode 100644 index 0000000000000000000000000000000000000000..530a9a2ec6de7be8b7444b7a4a80ae049a3dd534 --- /dev/null +++ b/metacore/migrations/0003_auto_20180508_0911.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2018-05-08 07:11 +from __future__ import unicode_literals + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('metacore', '0002_auto_20180417_1036'), + ] + + operations = [ + migrations.AddField( + model_name='journal', + name='count_running', + field=models.IntegerField(blank=True, null=True), + ), + migrations.AddField( + model_name='journal', + name='last_update', + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AlterField( + model_name='journal', + name='ISSN_digital', + field=models.CharField(max_length=9, unique=True, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')]), + ), + migrations.AlterField( + model_name='journal', + name='ISSN_print', + field=models.CharField(blank=True, max_length=9, null=True, unique=True, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')]), + ), + ] diff --git a/metacore/migrations/0004_auto_20180508_0916.py b/metacore/migrations/0004_auto_20180508_0916.py new file mode 100644 index 0000000000000000000000000000000000000000..4261c3727d56e70c227ce1f81e8c1ec13569e574 --- /dev/null +++ b/metacore/migrations/0004_auto_20180508_0916.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2018-05-08 07:16 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('metacore', '0003_auto_20180508_0911'), + ] + + operations = [ + migrations.AlterField( + model_name='journal', + name='last_update', + field=models.DateTimeField(auto_now=True, null=True), + ), + ] diff --git a/metacore/models.py b/metacore/models.py index f251ce049a10313740383280f368aded42b231c9..fd8962ba432efab07d5acca289c96dd751f29ad8 100644 --- a/metacore/models.py +++ b/metacore/models.py @@ -80,11 +80,12 @@ class Journal(models.Model): ISSN_digital = models.CharField( max_length=9, validators=[RegexValidator(r'^[0-9]{4}-[0-9]{3}[0-9X]$')], - blank=False) + blank=False, unique=True) + # Print ISSN not used right now, but there for future use ISSN_print = models.CharField( max_length=9, validators=[RegexValidator(r'^[0-9]{4}-[0-9]{3}[0-9X]$')], - blank=True, null=True) + blank=True, null=True, unique=True) last_full_sync = models.DateTimeField(blank=True, null=True) last_cursor = models.CharField( max_length=250, @@ -93,6 +94,9 @@ class Journal(models.Model): blank=True, null=True) count_metacore = models.IntegerField(blank=True, null=True) count_crossref = models.IntegerField(blank=True, null=True) + count_running = models.IntegerField(blank=True, null=True) # Tracks progress during import tasks + last_update = models.DateTimeField(blank=True, null=True, auto_now=True) # Set during import tasks + def update_count_metacore(self): @@ -121,3 +125,11 @@ class Journal(models.Model): if 'total-results' in result: self.count_metacore = result['total-results'] + + def purge_citables(self): + """ + This will delete all citables with their issn set to this journal's issn! + """ + + Citable.objects(metadata__ISSN=self.ISSN_digital).delete() + diff --git a/metacore/services.py b/metacore/services.py index 0d0462df0a86a615723f7776127b294abb53dc47..df5006f88dbced1baed8223d648ac16b5bfa199f 100644 --- a/metacore/services.py +++ b/metacore/services.py @@ -1,6 +1,7 @@ import requests -from .models import Citable, CitableWithDOI +from .models import Citable, CitableWithDOI, Journal from background_task import background +from django.utils import timezone import logging logger = logging.getLogger(__name__) @@ -12,6 +13,8 @@ def import_journal_full(issn, cursor='*'): and store them in the Metacore mongo database """ + # Get journal to track progress + # Formulate the CR query url = 'https://api.crossref.org/journals/{}/works'.format(issn) @@ -19,6 +22,7 @@ def import_journal_full(issn, cursor='*'): rows = 500 batches = 2000 last_cursor = cursor + total_processed = 0 for i in range(0,batches): # print("-------------------------------") @@ -52,6 +56,14 @@ def import_journal_full(issn, cursor='*'): citable = [] + # Save current count so progress can be tracked in the admin page + # TODO: make this work (currently only executed after whole import + # task is completed! + # total_processed += number_of_results + # Journal.objects.filter(ISSN_digital=issn).update(count_running = total_processed) + # logger.info('Journal count updated') + # print('Journal count updated to {}.'.format(Journal.objects.get(ISSN_digital=issn).count_running)) + if number_of_results < rows: # print(number_of_results) # print('End reached.') @@ -59,6 +71,18 @@ def import_journal_full(issn, cursor='*'): logger.info('End reached.') break + # Get a full count when done + current_count = get_crossref_work_count(issn) + + journal = Journal.objects.get(ISSN_digital=issn) + journal.count_metacore = Citable.objects(metadata__ISSN=issn).count() + journal.count_crossref = get_crossref_work_count(issn) + + if journal.count_metacore == journal.count_crossref: + journal.last_full_sync = timezone.now() + + journal.save() + def get_crossref_work_count(issn): """ Returns the total number of citables that are present in CR for a given ISSN