SciPost Code Repository

Skip to content
Snippets Groups Projects
Commit 4e987d3e authored by Boris Ponsioen's avatar Boris Ponsioen
Browse files

Adds background tasks for importing into citables, as well as admin panel for metacore journals

parent 95dfb5c7
No related branches found
No related tags found
No related merge requests found
......@@ -107,6 +107,7 @@ INSTALLED_APPS = (
'petitions',
'webpack_loader',
'metacore',
'background_task',
)
......
from django.contrib import admin
from .models import Citable, CitableWithDOI
from .services import get_crossref_test
from django.contrib import messages
from .models import Citable, CitableWithDOI, Journal
from .services import get_crossref_test, import_journal_full, get_crossref_work_count
# Register your models here.
# def import_data_from_crossref(modeladmin, request, queryset):
# get_crossref_test()
class JournalAdmin(admin.ModelAdmin):
fields = ('name', 'ISSN_digital', 'last_full_sync')
list_display = ('name', 'ISSN_digital', 'last_full_sync', 'count_metacore', 'count_crossref')
actions = ['import_full', 'update_counts']
# class CitableAdmin(admin.ModelAdmin):
# # list_display = ['title', 'status']
# # ordering = ['title']
# actions = [import_data_from_crossref]
def import_full(self, request, queryset):
""" Starts background task to import all works by this journal """
# def get_queryset(self, request):
# return []
for journal in queryset:
t = import_journal_full(journal.ISSN_digital)
messages.add_message(request, messages.INFO, 'Import task for journal {} added. Go to Background Tasks -> Tasks in admin to view them'.format(journal.name))
# admin.site.register(Citable, CitableAdmin)
messages.add_message(request, messages.WARNING, 'Make sure to start the tasks by running ./manage.py process_tasks')
def update_counts(self, request, queryset):
for journal in queryset:
journal.count_metacore = Citable.objects(metadata__ISSN=journal.ISSN_digital).count()
journal.count_crossref = get_crossref_work_count(journal.ISSN_digital)
journal.save()
messages.add_message(request, messages.INFO, 'Counts updated.')
def get_actions(self, request):
actions = super().get_actions(request)
if 'delete_selected' in actions:
del actions['delete_selected']
return actions
admin.site.register(Journal, JournalAdmin)
......@@ -8,6 +8,7 @@ class CitableSearchForm(forms.Form):
author = forms.CharField(max_length=100, required=False, label="Author(s)")
title = forms.CharField(max_length=100, required=False)
publisher = forms.CharField(max_length=100, required=False)
journal = forms.CharField(max_length=100, required=False)
def search_results(self):
"""Return all Citable objects according to search"""
......@@ -16,6 +17,7 @@ class CitableSearchForm(forms.Form):
title__icontains=self.cleaned_data.get('title', ''),
authors__icontains=self.cleaned_data.get('author', ''),
publisher__icontains=self.cleaned_data.get('publisher', ''),
**{'metadata__container-title__icontains': self.cleaned_data.get('journal', '')},
)
else:
"""If a text index is present, search using the authors/title box is enables"""
......@@ -23,5 +25,6 @@ class CitableSearchForm(forms.Form):
title__icontains=self.cleaned_data.get('title', ''),
authors__icontains=self.cleaned_data.get('author', ''),
publisher__icontains=self.cleaned_data.get('publisher', ''),
**{'metadata__container-title__icontains': self.cleaned_data.get('journal', '')},
).omni_search(self.cleaned_data.get('omni'), 'and')
......@@ -10,7 +10,7 @@ class CitableQuerySet(QuerySet):
return self.only('references').filter(references=dois)
def simple(self):
return self.only('doi', 'title', 'authors', 'metadata.is-referenced-by-count', 'publication_date', 'publisher')
return self.only('doi', 'title', 'authors', 'metadata.is-referenced-by-count', 'publication_date', 'publisher', 'metadata.container-title')
def prl(self):
return self.filter(metadata__ISSN='0031-9007')
......
# -*- coding: utf-8 -*-
# Generated by Django 1.11.4 on 2018-04-17 06:46
from __future__ import unicode_literals
import django.core.validators
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='Journal',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=250)),
('ISSN_digital', models.CharField(max_length=9, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')])),
('ISSN_print', models.CharField(blank=True, max_length=9, null=True, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')])),
('last_full_sync', models.DateTimeField(blank=True, null=True)),
('last_cursor', models.CharField(blank=True, max_length=250, null=True)),
('last_errors', models.TextField(blank=True, null=True)),
],
),
]
# -*- coding: utf-8 -*-
# Generated by Django 1.11.4 on 2018-04-17 08:36
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('metacore', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='journal',
name='count_crossref',
field=models.IntegerField(blank=True, null=True),
),
migrations.AddField(
model_name='journal',
name='count_metacore',
field=models.IntegerField(blank=True, null=True),
),
]
from django.db import models
from django.conf import settings
from django.core.validators import RegexValidator
import requests
from mongoengine import connect, DynamicDocument, ListField, StringField,\
DynamicField, URLField, DateTimeField
from .managers import CitableQuerySet
# Make the connection to MongoDB - this could be put in settings.py as well
# It uses default settings for the mongo server
connect(settings.MONGO_DATABASE['database'],
......@@ -31,6 +32,7 @@ class Citable(DynamicDocument):
publisher = StringField()
license = URLField()
publication_date = DateTimeField()
journal = StringField()
# Dump all the raw source metadata here
metadata = DynamicField()
......@@ -38,7 +40,7 @@ class Citable(DynamicDocument):
# Settings for mongoengine
meta = {
'queryset_class': CitableQuerySet, # use the custom queryset
'indexes': ['doi', 'title', 'publication_date', 'publisher', 'references'], # define indices on database
'indexes': ['doi', 'title', 'publication_date', 'publisher', 'references', 'journal'], # define indices on database
'allow_inheritance': True
}
"""
......@@ -46,7 +48,7 @@ class Citable(DynamicDocument):
This should be in the readme, but I'll temporarily add it here for ease of use:
For the text index, execute this in the mongo shell:
use scipost
db.citable.createIndex({authors: "text", title: "text"})
db.citable.createIndex({authors: "text", title: "text", journal: "text"})
"""
def times_cited(self):
......@@ -68,3 +70,54 @@ class CitableWithDOI(Citable):
def times_cited(self):
return CitableWithDOI.objects.cited_by(self.doi).count()
class Journal(models.Model):
""" Provides interface for importing citables of a journal into Metacore """
name = models.CharField(
max_length=250,
blank=False)
ISSN_digital = models.CharField(
max_length=9,
validators=[RegexValidator(r'^[0-9]{4}-[0-9]{3}[0-9X]$')],
blank=False)
ISSN_print = models.CharField(
max_length=9,
validators=[RegexValidator(r'^[0-9]{4}-[0-9]{3}[0-9X]$')],
blank=True, null=True)
last_full_sync = models.DateTimeField(blank=True, null=True)
last_cursor = models.CharField(
max_length=250,
blank=True, null=True)
last_errors = models.TextField(
blank=True, null=True)
count_metacore = models.IntegerField(blank=True, null=True)
count_crossref = models.IntegerField(blank=True, null=True)
def update_count_metacore(self):
count = Citable.objects(metadata__ISSN=self.ISSN_digital).count()
self.count_metacore = count
def update_count_crossref(self):
"""
Returns the total number of citables that are present in CR for a given ISSN
Needs to be merged with .services but need to work out imports first (circular)
"""
# Formulate the CR query
url = 'https://api.crossref.org/journals/{}/works'.format(self.ISSN_digital)
# If the loop is allowed to complete, it fetches (rows * batches) records
rows = 0
params = {'rows': rows, 'mailto': 'b.g.t.ponsioen@uva.nl'}
r = requests.get(url, params=params)
r_json = r.json()
result = r_json['message']
if 'total-results' in result:
self.count_metacore = result['total-results']
import requests
from .models import Citable, CitableWithDOI
from background_task import background
import logging
logger = logging.getLogger(__name__)
@background()
def import_journal_full(issn, cursor='*'):
"""
Task to query CrossRef for all works of a journal with given ISSN
and store them in the Metacore mongo database
"""
# Formulate the CR query
url = 'https://api.crossref.org/journals/{}/works'.format(issn)
# If the loop is allowed to complete, it fetches (rows * batches) records
rows = 500
batches = 2000
last_cursor = cursor
for i in range(0,batches):
# print("-------------------------------")
# print("Batch %s" % (i, ))
# print("Last cursor: ", last_cursor)
# print("Current cursor: ", cursor)
logger.info("-------------------------------")
logger.info("Batch %s" % (i, ))
logger.info("Last cursor: ", last_cursor)
logger.info("Current cursor: ", cursor)
params = {'cursor': cursor, 'rows': rows, 'mailto': 'b.g.t.ponsioen@uva.nl'}
last_cursor = cursor
r = requests.get(url, params=params)
r_json = r.json()
citables_json = r_json['message']['items']
cursor = r_json['message']['next-cursor']
number_of_results = len(r_json['message']['items'])
citables = [parse_crossref_citable(it) for it in citables_json]
# Parser returns False if there's an error
errors = any([not i for i in citables if i == False])
orig_citables = citables
citables = [citable for citable in citables if citable]
# Mass insert in database (will fail on encountering existing documents
# with same DOI
if citables:
Citable.objects.insert(citables)
citable = []
if number_of_results < rows:
# print(number_of_results)
# print('End reached.')
logger.info(number_of_results)
logger.info('End reached.')
break
def get_crossref_work_count(issn):
"""
Returns the total number of citables that are present in CR for a given ISSN
"""
# Formulate the CR query
url = 'https://api.crossref.org/journals/{}/works'.format(issn)
# If the loop is allowed to complete, it fetches (rows * batches) records
rows = 0
params = {'rows': rows, 'mailto': 'b.g.t.ponsioen@uva.nl'}
r = requests.get(url, params=params)
r_json = r.json()
result = r_json['message']
if 'total-results' in result:
return result['total-results']
def get_crossref_test(cursor='*'):
"""
......@@ -23,10 +100,14 @@ def get_crossref_test(cursor='*'):
last_cursor = cursor
for i in range(0,batches):
print("-------------------------------")
print("Batch %s" % (i, ))
print("Last cursor: ", last_cursor)
print("Current cursor: ", cursor)
# print("-------------------------------")
# print("Batch %s" % (i, ))
# print("Last cursor: ", last_cursor)
# print("Current cursor: ", cursor)
logger.info("-------------------------------")
logger.info("Batch %s" % (i, ))
logger.info("Last cursor: ", last_cursor)
logger.info("Current cursor: ", cursor)
params = {'cursor': cursor, 'rows': rows, 'mailto': 'b.g.t.ponsioen@uva.nl'}
last_cursor = cursor
......@@ -51,8 +132,10 @@ def get_crossref_test(cursor='*'):
citable = []
if number_of_results < rows:
print(number_of_results)
print('End reached.')
# print(number_of_results)
# print('End reached.')
logger.info(number_of_results)
logger.info('End reached.')
break
def convert_doi_to_lower_case():
......@@ -107,6 +190,9 @@ def parse_crossref_citable(citable_item):
else:
license = ''
if 'container-title' in citable_item:
journal = citable_item['container-title'][0]
return CitableWithDOI(doi=doi, references=references, authors=authors, publisher=publisher, title=title,
publication_date=publication_date, license=license, metadata=citable_item)
......@@ -115,6 +201,11 @@ def parse_crossref_citable(citable_item):
# print(e)
# # raise
except Exception as e:
print("Error: ", e)
print(citable_item['DOI'])
# print("Error: ", e)
# print(citable_item['DOI'])
# print(citable_item.keys())
logger.error("Error: ", e)
logger.error(citable_item['DOI'])
logger.error(citable_item.keys())
return False
......@@ -44,7 +44,7 @@
<div class="fieldWrapper">
{{ form.omni.errors }}
<label for="{{ form.subject.id_for_label }}">Author and/or title (matches full words)</label>
<label for="{{ form.subject.id_for_label }}">Author, title, journal (matches full words)</label>
{{ form.omni }}
</div>
<br/>
......
......@@ -12,5 +12,9 @@
| DOI <a href='https://doi.org/{{ citable.doi }}'> {{ citable.doi }} </a>
<br>
Published {{ citable.publication_date|date:"d-m-Y" }} by {{ citable.publisher }}
{% if citable.journal %}
<br>
in {{ citable.journal }}
{% endif %}
</p>
{% endblock %}
......@@ -52,7 +52,7 @@ html2text
# Mongo (Metacore)
mongoengine==0.15.0
django-background-tasks==1.1.13
# Possibly dead (most probably not used anymore and possibly not up-to-date packages) -- JdW (August 15th, 2017)
imagesize==0.7.1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment