From 4d279aca5e764e8d3b991d82d5fdbca303cb56a6 Mon Sep 17 00:00:00 2001 From: "J.-S. Caux" <J.S.Caux@uva.nl> Date: Tue, 11 Jul 2017 09:27:55 +0200 Subject: [PATCH] General metadata improvements (Crossref, CLOCKSS) --- journals/admin.py | 6 +- journals/migrations/0031_clockssmetadata.py | 24 ++++++ journals/models.py | 13 ++++ .../templates/journals/manage_metadata.html | 7 +- .../journals/publication_metadata_jats.xml | 22 ++++++ journals/urls/general.py | 3 + journals/views.py | 74 ++++++++++++++----- 7 files changed, 128 insertions(+), 21 deletions(-) create mode 100644 journals/migrations/0031_clockssmetadata.py create mode 100644 journals/templates/journals/publication_metadata_jats.xml diff --git a/journals/admin.py b/journals/admin.py index 167b0f62e..59f964b20 100644 --- a/journals/admin.py +++ b/journals/admin.py @@ -1,7 +1,8 @@ from django.contrib import admin, messages from django import forms -from journals.models import UnregisteredAuthor, Journal, Volume, Issue, Publication, Deposit +from journals.models import UnregisteredAuthor, Journal, Volume, Issue, Publication, \ + Deposit, CLOCKSSmetadata from scipost.models import Contributor from submissions.models import Submission @@ -79,3 +80,6 @@ class DepositAdmin(admin.ModelAdmin): admin.site.register(Deposit, DepositAdmin) + + +admin.site.register(CLOCKSSmetadata) diff --git a/journals/migrations/0031_clockssmetadata.py b/journals/migrations/0031_clockssmetadata.py new file mode 100644 index 000000000..e9f252ab1 --- /dev/null +++ b/journals/migrations/0031_clockssmetadata.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.3 on 2017-07-11 03:34 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('journals', '0030_auto_20170710_1051'), + ] + + operations = [ + migrations.CreateModel( + name='CLOCKSSmetadata', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('metadata_xml_file_CLOCKSS', models.FileField(blank=True, max_length=512, null=True, upload_to='')), + ('publication', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='journals.Publication')), + ], + ), + ] diff --git a/journals/models.py b/journals/models.py index 3f5454a8f..e79ea87f0 100644 --- a/journals/models.py +++ b/journals/models.py @@ -236,3 +236,16 @@ class Deposit(models.Model): def __str__(self): return (self.deposition_date.strftime('%Y-%m-%D') + ' for 10.21468/' + self.publication.doi_label) + +class CLOCKSSmetadata(models.Model): + """ + For the CLOCKSS archive, JATS formatted XML is produced. + """ + publication = models.ForeignKey(Publication, on_delete=models.CASCADE) + metadata_xml_file_CLOCKSS = models.FileField(blank=True, null=True, max_length=512) + + class Meta: + verbose_name = 'CLOCKSS metadata' + + def __str__(self): + return ('CLOCKSS metadata for 10.21468/' + self.publication.doi_label) diff --git a/journals/templates/journals/manage_metadata.html b/journals/templates/journals/manage_metadata.html index 2d5ec8c24..839694a80 100644 --- a/journals/templates/journals/manage_metadata.html +++ b/journals/templates/journals/manage_metadata.html @@ -35,6 +35,7 @@ event: "focusin" <th>Publication date</th> <th>Latest metadata update</th> <th>Latest successful Crossref deposit</th> + <th>CLOCKSS file exists?</th> </tr> </thead> @@ -51,9 +52,10 @@ event: "focusin" <td>No info available</td> {% endif %} <td>{{ publication|latest_successful_crossref_deposit }}</td> + <td>{% if publication.clockssmetadata_set.all.exists %}Yes{% else %}No{% endif %}</td> </tr> <tr id="collapse{{ publication.id }}" class="collapse" role="tabpanel" aria-labelledby="heading{{ publication.id }}" style="background-color: #fff;"> - <td colspan="4"> + <td colspan="5"> <h3 class="ml-3">Actions</h3> <ul> <li>Mark the first author (currently: {% if publication.first_author %}{{ publication.first_author }} {% elif publication.first_author_unregistered %}{{ publication.first_author_unregistered }} (unregistered){% endif %}) @@ -87,8 +89,9 @@ event: "focusin" <li><a href="{% url 'journals:create_metadata_xml' publication.doi_label %}">(re)create metadata</a></li> <li><a href="{% url 'journals:metadata_xml_deposit' publication.doi_label 'test' %}">Test metadata deposit (via Crossref test server)</a></li> <li><a href="{% url 'journals:metadata_xml_deposit' publication.doi_label 'deposit' %}">Deposit the metadata to Crossref</a></li> + <li><a href="{% url 'journals:produce_CLOCKSS_metadata_file' doi_label=publication.doi_label %}">Produce CLOCKSS metadata file</a></li> </ul> - <h3 class="ml-3">Deposits</h3> + <h3 class="ml-3">Crossref Deposits</h3> <table class="ml-5"> <thead class="thead-default"> <th>Timestamp</th> diff --git a/journals/templates/journals/publication_metadata_jats.xml b/journals/templates/journals/publication_metadata_jats.xml new file mode 100644 index 000000000..6f97e3fb7 --- /dev/null +++ b/journals/templates/journals/publication_metadata_jats.xml @@ -0,0 +1,22 @@ +<article dtd-version="1.1d3"> +<front> +<journal-meta> +<publisher> +<publisher-name>SciPost</publisher-name> +</publisher> +<issn>{{ publication.in_issue.in_volume.in_journal.issn }}</issn> +</journal-meta> +<article-meta> +<title-group> +<article-title>{{ publication.title }}</article-title> +</title-group> +<article-id pub-id-type="doi">{{ publication.doi_string }}</article-id> +<volume>{{ publication.in_issue.in_volume.number }}</volume> +<issue>{{ publication.in_issue.number }}</issue> +<pub-date publication-format="epub" date-type="pub" iso-8601-date="{{ publication.publication_date|date:'Y-m-d' }}"> +<day>{{ publication.publication_date|date:'d' }}</day> +<month>{{ publication.publication_date|date:'m' }}</month> +<year>{{ publication.publication_date|date:'Y' }}</year> +</pub-date></article-meta> +</front> +</article> diff --git a/journals/urls/general.py b/journals/urls/general.py index 3fd938879..b9ca2cef6 100644 --- a/journals/urls/general.py +++ b/journals/urls/general.py @@ -55,6 +55,9 @@ urlpatterns = [ url(r'^mark_deposit_success/(?P<deposit_id>[0-9]+)/(?P<success>[0-1])$', journals_views.mark_deposit_success, name='mark_deposit_success'), + url(r'^produce_CLOCKSS_metadata_file/(?P<doi_label>[a-zA-Z]+.[0-9]+.[0-9]+.[0-9]{3,})$', + journals_views.produce_CLOCKSS_metadata_file, + name='produce_CLOCKSS_metadata_file'), url(r'^harvest_citedby_list/$', journals_views.harvest_citedby_list, name='harvest_citedby_list'), diff --git a/journals/views.py b/journals/views.py index 4eb5a6810..b4c25fbbf 100644 --- a/journals/views.py +++ b/journals/views.py @@ -11,6 +11,8 @@ from django.conf import settings from django.contrib import messages from django.utils import timezone from django.shortcuts import get_object_or_404, render, redirect +from django.template import Template, Context +from django.template.loader import get_template from django.db import transaction from django.http import HttpResponse @@ -21,7 +23,7 @@ from .forms import FundingInfoForm, InitiatePublicationForm, ValidatePublication UnregisteredAuthorForm, CreateMetadataXMLForm, CitationListBibitemsForm from .utils import JournalUtils -from journals.models import Publication, Deposit +from journals.models import Publication, Deposit, CLOCKSSmetadata from submissions.models import Submission from scipost.models import Contributor @@ -475,7 +477,7 @@ def create_metadata_xml(request, doi_label): '<abbrev_title>' + publication.in_issue.in_volume.in_journal.get_abbreviation_citation() + '</abbrev_title>\n' - '<issn>' + publication.in_issue.in_volume.in_journal.issn + '</issn>\n' + '<issn media_type=\'electronic\'>' + publication.in_issue.in_volume.in_journal.issn + '</issn>\n' '<doi_data>\n' '<doi>' + publication.in_issue.in_volume.in_journal.doi_string + '</doi>\n' '<resource>https://scipost.org/' @@ -548,6 +550,7 @@ def create_metadata_xml(request, doi_label): + publication.doi_string + '/pdf</resource>\n' '</item></collection>\n' '</doi_data>\n' + '<pages><first_page>' + publication.paper_nr + '</first_page></pages>\n' ) try: if publication.metadata['citation_list']: @@ -608,22 +611,30 @@ def metadata_xml_deposit(request, doi_label, option='test'): response_headers = r.headers response_text = r.text - # Then, if deposit, create the associated Deposit object (saving the metadata to a file) - content = ContentFile(publication.metadata_xml) - timestamp = (publication.metadata_xml.partition( - '<timestamp>'))[2].partition('</timestamp>')[0] - doi_batch_id = (publication.metadata_xml.partition( - '<doi_batch_id>'))[2].partition('</doi_batch_id>')[0] - path = (settings.MEDIA_ROOT + publication.in_issue.path + '/' - + publication.get_paper_nr() + '/' + publication.doi_label.replace('.', '_') - + '_' + timestamp + '.xml') - deposit = Deposit(publication=publication, timestamp=timestamp, doi_batch_id=doi_batch_id, - metadata_xml=publication.metadata_xml, deposition_date=timezone.now()) - deposit.metadata_xml_file.save(path, content) - deposit.response_text = r.text - deposit.save() - publication.latest_crossref_deposit = timezone.now() - publication.save() + # Then create the associated Deposit object (saving the metadata to a file) + if option == 'deposit': + content = ContentFile(publication.metadata_xml) + timestamp = (publication.metadata_xml.partition( + '<timestamp>'))[2].partition('</timestamp>')[0] + doi_batch_id = (publication.metadata_xml.partition( + '<doi_batch_id>'))[2].partition('</doi_batch_id>')[0] + path = (settings.MEDIA_ROOT + publication.in_issue.path + '/' + + publication.get_paper_nr() + '/' + publication.doi_label.replace('.', '_') + + '_' + timestamp + '.xml') + deposit = Deposit(publication=publication, timestamp=timestamp, doi_batch_id=doi_batch_id, + metadata_xml=publication.metadata_xml, deposition_date=timezone.now()) + deposit.metadata_xml_file.save(path, content) + deposit.response_text = r.text + deposit.save() + publication.latest_crossref_deposit = timezone.now() + publication.save() + # Save a copy to the filename without timestamp + path1 = (settings.MEDIA_ROOT + publication.in_issue.path + '/' + + publication.get_paper_nr() + '/' + publication.doi_label.replace('.', '_') + + '.xml') + f = open(path1, 'w') + f.write(xml) + f.close() context = { 'option': option, @@ -645,6 +656,33 @@ def mark_deposit_success(request, deposit_id, success): return redirect(reverse('journals:manage_metadata')) +@permission_required('scipost.can_publish_accepted_submission', return_403=True) +def produce_CLOCKSS_metadata_file(request, doi_label): + publication = get_object_or_404(Publication, doi_label=doi_label) + context = Context({'publication': publication,}) + xml = get_template('journals/publication_metadata_jats.xml').render(context) + content = ContentFile(xml) + timestamp = (publication.metadata_xml.partition( + '<timestamp>'))[2].partition('</timestamp>')[0] + path = (settings.MEDIA_ROOT + publication.in_issue.path + '/' + + publication.get_paper_nr() + '/' + publication.doi_label.replace('.', '_') + + '_CLOCKSS_' + timestamp + '.xml') + if os.path.isfile(path): + errormessage = 'The CLOCKSS metadata file for this metadata timestamp already exists' + return render(request, 'scipost/error.html', context={'errormessage': errormessage}) + clockssmeta = CLOCKSSmetadata(publication=publication) + clockssmeta.metadata_xml_file_CLOCKSS.save(path, content) + clockssmeta.save() + # Save a copy to the filename without timestamp + path1 = (settings.MEDIA_ROOT + publication.in_issue.path + '/' + + publication.get_paper_nr() + '/' + publication.doi_label.replace('.', '_') + + '_CLOCKSS.xml') + f = open(path1, 'w') + f.write(xml) + f.close() + return redirect(reverse('journals:manage_metadata')) + + @permission_required('scipost.can_publish_accepted_submission', return_403=True) def harvest_citedby_list(request): publications = Publication.objects.order_by('-publication_date') -- GitLab