diff --git a/README.md b/README.md index cf1bf6acfba45c96c6b4826965dcdecc43b2893e..405aa98b76940c1be82eccc9f29c84811f730edf 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,41 @@ higher. Python dependencies are listed in `requirements.txt`. Frontend dependenc Make sure that PostgreSQL is installed and running and that a database with user is set up. A good guide how to do this can be found [here](https://djangogirls.gitbooks.io/django-girls-tutorial-extensions/content/optional_postgresql_installation/) (NOTE: stop before the 'Update settings' part). +#### MongoDB +The metacore part of the project runs on a [MongoDB database](https://docs.mongodb.com/manual/installation/). Make sure to install a MongoDB as well. Eg. on MacOS: + +```shell +$ brew update +$ brew install mongodb +``` + +After installing, create the database: +```shell +$ mongo +> use <database> +switched to db <database> +``` + +To create a user for the database: +```shell +> db.createUser({ +user: "<name>", +pwd: "<cleartext password>", +roles: [{ role: "readWrite", db: "<database>" }] +}) +``` + +The following MongoDB configuration is set by default, you may overwrite it in your local settings: +```python +MONGO_DATABASE = { + 'database': 'scipost', + 'host': 'localhost', + 'user': '', + 'password': '', + 'port': '27017', +} +``` + ### Python version Make sure you're using Python 3.5. You are strongly encouraged to use a [virtual environment](https://docs.python.org/3.5/library/venv.html). @@ -314,3 +349,50 @@ SHELL_PLUS_POST_IMPORTS = ( ('comments.factories', ('CommentFactory')), ) ``` + +## Scheduled tasks +The tasks that involve large requests from CR are supposed to run in the background. For this to work, Celery is required. The following commands assume that you are in the `scipost_v1` main folder, inside the right virtual environment. + +Celery depends on a broker, for which we use RabbitMQ. On MacOS one may simply install this by executing: + +```shell +$ brew update +$ brew install rabbitmq +``` + +To start the RabbitMQ broker: +```bash +nohup nice rabbitmq-server > ../logs/rabbitmq.log 2>&1 & +``` + +Then the Celery worker itself: +```bash +nohup nice celery -A SciPost_v1 worker --loglevel=info -E > ../logs/celery_worker.log 2>&1 & +``` + +And finally `beat`, which enables setting up periodic tasks: +```bash +nohup nice celery -A SciPost_v1 beat --loglevel=info --scheduler django_celery_beat.schedulers:DatabaseScheduler > ../logs/celery_beat.log 2>&1 & +``` + +Note: on the staging server, these commands are contained in two shell scripts in the `scipoststg` home folder. Just run +```bash +./start_celery.sh +``` + +## Metacore (still in development) +The Metacore app for citables, sourced - for now only - from Crossref, is available at /metacore. +In order to get it running on the server (right now implemented on staging), the following things need to be running: + +First of all the Mongo daemon (staging server): +```bash +/home/scipoststg/webapps/mongo/mongodb-linux-x86_64-amazon-3.6.3/bin/mongod --auth --dbpath /home/scipoststg/webapps/mongo/data --port 21145 --logpath /home/scipoststg/webapps/scipost/logs/mongod.log --fork +``` + +### Indexing +The search methods use the mongo text index for authors/title. They are defined through +the mongo shell. Execute the following in the mongo shell: +```bash +use scipost +db.citable.createIndex({authors: "text", title: "text", journal: "text"}) +``` diff --git a/SciPost_v1/settings/base.py b/SciPost_v1/settings/base.py index 966b7dd417e41ac276a9faeb7dc81ff4049792d4..013e1aebd949946a97e5e7ecbaddc2b27d4902e3 100644 --- a/SciPost_v1/settings/base.py +++ b/SciPost_v1/settings/base.py @@ -74,20 +74,17 @@ INSTALLED_APPS = ( 'django.contrib.admindocs', 'django.contrib.auth', 'django.contrib.contenttypes', + 'django.contrib.humanize', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'django.contrib.sites', 'django_countries', 'django_extensions', - 'django_mathjax', + # 'django_mathjax', + 'affiliations', 'ajax_select', 'captcha', - 'guardian', - 'haystack', - 'rest_framework', - 'sphinxdoc', - 'affiliations', 'colleges', 'commentaries', 'comments', @@ -96,27 +93,32 @@ INSTALLED_APPS = ( 'django_celery_beat', 'finances', 'guides', + 'guardian', + # 'haystack', 'invitations', 'journals', - 'mails', 'mailing_lists', + 'mails', + 'metacore', 'news', 'notifications', + 'partners', + 'petitions', + 'proceedings', + 'production', + 'rest_framework', 'scipost', + 'sphinxdoc', + 'stats', 'submissions', 'theses', 'virtualmeetings', 'organizations', - 'proceedings', - 'production', 'profiles', # TODO: partners to be deprecated in favour of sponsors - 'partners', 'sponsors', 'preprints', 'funders', - 'stats', - 'petitions', 'sitesserved', 'webpack_loader', 'maintenancemode', @@ -167,13 +169,13 @@ SHELL_PLUS_POST_IMPORTS = ( ('scipost.factories', ('ContributorFactory')), ) -MATHJAX_ENABLED = True -MATHJAX_CONFIG_DATA = { - "tex2jax": { - "inlineMath": [['$', '$'], ['\\(', '\\)']], - "processEscapes": True - } - } +# MATHJAX_ENABLED = True +# MATHJAX_CONFIG_DATA = { +# "tex2jax": { +# "inlineMath": [['$', '$'], ['\\(', '\\)']], +# "processEscapes": True +# } +# } MIDDLEWARE = ( # 'django.middleware.http.ConditionalGetMiddleware', @@ -234,7 +236,13 @@ DATABASES = { 'PORT': '5432' } } - +MONGO_DATABASE = { + 'database': 'scipost', + 'host': 'localhost', + 'user': '', + 'password': '', + 'port': '27017', +} # Internationalization # https://docs.djangoproject.com/en/1.8/topics/i18n/ @@ -384,7 +392,7 @@ LOGGING = { # Celery scheduled tasks CELERY_RESULT_BACKEND = 'django-db' CELERY_BROKER_URL = 'amqp://localhost' -CELERY_IMPORTS = ('submissions.tasks', ) +CELERY_IMPORTS = ('submissions.tasks', 'metacore.tasks') # Automation. diff --git a/SciPost_v1/settings/local_boris.py b/SciPost_v1/settings/local_boris.py new file mode 100644 index 0000000000000000000000000000000000000000..1456e28912d3931620851ecb8b1f21cc54edb1b5 --- /dev/null +++ b/SciPost_v1/settings/local_boris.py @@ -0,0 +1,42 @@ +from .base import * + +# THE MAIN THING HERE +DEBUG = True + +# Debug toolbar settings +INSTALLED_APPS += ( + 'debug_toolbar', +) +MIDDLEWARE += ( + 'debug_toolbar.middleware.DebugToolbarMiddleware', +) +INTERNAL_IPS = ['127.0.0.1', '::1'] + +# Static and media +STATIC_ROOT = '/Users/boris/Documents/Websites/SciPost/scipost_v1/local_files/static/' +MEDIA_ROOT = '/Users/boris/Documents/Websites/SciPost/scipost_v1/local_files/media/' +WEBPACK_LOADER['DEFAULT']['BUNDLE_DIR_NAME'] =\ + '/Users/boris/Documents/Websites/SciPost/scipost_v1/local_files/static/bundles/' + +# MAILCHIMP_API_USER = get_secret("MAILCHIMP_API_USER") +# MAILCHIMP_API_KEY = get_secret("MAILCHIMP_API_KEY") + +DATABASES['default']['PORT'] = '5432' + +# iThenticate +# ITHENTICATE_USERNAME = get_secret('ITHENTICATE_USERNAME') +# ITHENTICATE_PASSWORD = get_secret('ITHENTICATE_PASSWORD') + +# Logging +LOGGING['handlers']['scipost_file_arxiv']['filename'] = '/Users/boris/Documents/Websites/SciPost/scipost_v1/logs/arxiv.log' +LOGGING['handlers']['scipost_file_doi']['filename'] = '/Users/boris/Documents/Websites/SciPost/scipost_v1/logs/doi.log' + +# Other +# CROSSREF_LOGIN_ID = get_secret("CROSSREF_LOGIN_ID") +# CROSSREF_LOGIN_PASSWORD = get_secret("CROSSREF_LOGIN_PASSWORD") +# CROSSREF_DEPOSIT_EMAIL = 'borisponsioen@scipost.org' + +# Mongo +MONGO_DATABASE['user'] = get_secret('MONGO_DB_USER') +MONGO_DATABASE['password'] = get_secret('MONGO_DB_PASSWORD') +MONGO_DATABASE['port'] = get_secret('MONGO_DB_PORT') diff --git a/SciPost_v1/settings/local_jorran.py b/SciPost_v1/settings/local_jorran.py index 6db72e8b3e4cc6d3d5f0393f45fc99ab32049d97..456c6e115c354cf25e0a8e825af2e34338643788 100644 --- a/SciPost_v1/settings/local_jorran.py +++ b/SciPost_v1/settings/local_jorran.py @@ -38,3 +38,8 @@ CROSSREF_DEPOSIT_EMAIL = 'jorrandewit@scipost.org' # Customized mailbackend EMAIL_BACKEND = 'mails.backends.filebased.ModelEmailBackend' EMAIL_BACKEND_ORIGINAL = 'mails.backends.filebased.EmailBackend' + +# Mongo +MONGO_DATABASE['user'] = get_secret('MONGO_DB_USER') +MONGO_DATABASE['password'] = get_secret('MONGO_DB_PASSWORD') +MONGO_DATABASE['port'] = get_secret('MONGO_DB_PORT') diff --git a/SciPost_v1/settings/staging.py b/SciPost_v1/settings/staging.py index 62c0b2932720cb11fc62559d50d1bb798f20b1af..dc98e95d9e95144eef78a14654c48a8655c64671 100644 --- a/SciPost_v1/settings/staging.py +++ b/SciPost_v1/settings/staging.py @@ -29,3 +29,8 @@ CSRF_COOKIE_SECURE = True # Email EMAIL_BACKEND = 'mails.backends.filebased.ModelEmailBackend' EMAIL_BACKEND_ORIGINAL = 'django.core.mail.backends.dummy.EmailBackend' # Disable real processing + +# Mongo +MONGO_DATABASE['user'] = get_secret('MONGO_DB_USER') +MONGO_DATABASE['password'] = get_secret('MONGO_DB_PASSWORD') +MONGO_DATABASE['port'] = get_secret('MONGO_DB_PORT') diff --git a/SciPost_v1/settings/staging_release.py b/SciPost_v1/settings/staging_release.py new file mode 100644 index 0000000000000000000000000000000000000000..28c689878b7aa3fc0969ea1d0e6b47a711eb9915 --- /dev/null +++ b/SciPost_v1/settings/staging_release.py @@ -0,0 +1,29 @@ +from .base import * + +# This file is meant for the server used for the release branches +# + +# THE MAIN THING HERE +DEBUG = False +ALLOWED_HOSTS = ['jdewit.webfactional.com'] + +# Recaptcha +RECAPTCHA_PUBLIC_KEY = get_secret("GOOGLE_RECAPTCHA_PUBLIC_KEY") +RECAPTCHA_PRIVATE_KEY = get_secret("GOOGLE_RECAPTCHA_PRIVATE_KEY") + +# Static and media +STATIC_URL = '/static/' +STATIC_ROOT = '/home/jdewit/webapps/scipost_static/' +MEDIA_URL = '/media/' +MEDIA_ROOT = '/home/jdewit/webapps/scipost_media/' + +WEBPACK_LOADER['DEFAULT']['CACHE'] = True +WEBPACK_LOADER['DEFAULT']['BUNDLE_DIR_NAME'] = '/home/jdewit/webapps/scipost_static/bundles/' + +# Logging +LOGGING['handlers']['scipost_file_arxiv']['filename'] = '/home/jdewit/webapps/scipost/logs/arxiv.log' +LOGGING['handlers']['scipost_file_doi']['filename'] = '/home/jdewit/webapps/scipost/logs/doi.log' + +MONGO_DATABASE['user'] = get_secret('MONGO_DB_USER') +MONGO_DATABASE['password'] = get_secret('MONGO_DB_PASSWORD') +MONGO_DATABASE['port'] = get_secret("MONGO_DB_PORT") diff --git a/SciPost_v1/urls.py b/SciPost_v1/urls.py index d8fe905e9bed3cda2711ff977c984c5f37aa3af2..8a231d8c6d0a2ed0e017db8b40b4fabb88fb241c 100644 --- a/SciPost_v1/urls.py +++ b/SciPost_v1/urls.py @@ -50,6 +50,7 @@ urlpatterns = [ url(r'^invitations/', include('invitations.urls', namespace="invitations")), url(r'^journals/', include('journals.urls.general', namespace="journals")), url(r'^mailing_list/', include('mailing_lists.urls', namespace="mailing_lists")), + url(r'^metacore/', include('metacore.urls', namespace="metacore")), url(r'^submissions/', include('submissions.urls', namespace="submissions")), url(r'^submission/', include('submissions.urls', namespace="_submissions")), url(r'^theses/', include('theses.urls', namespace="theses")), diff --git a/celerybeat.pid b/celerybeat.pid new file mode 100644 index 0000000000000000000000000000000000000000..887a0ee2e965376d0a117e5a25d0856364dfa292 --- /dev/null +++ b/celerybeat.pid @@ -0,0 +1 @@ +30193 diff --git a/metacore/__init__.py b/metacore/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/metacore/admin.py b/metacore/admin.py new file mode 100644 index 0000000000000000000000000000000000000000..1f67f7cc455474178c094ee0f09e44301e440dd7 --- /dev/null +++ b/metacore/admin.py @@ -0,0 +1,130 @@ +__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" +__license__ = "AGPL v3" + + +import json + +from django.contrib import admin +from django.contrib import messages + +from celery.result import AsyncResult +from django_celery_beat.models import PeriodicTask, IntervalSchedule + +from .models import Citable, Journal +from .services import ( + import_journal_full, import_journal_incremental, get_crossref_work_count, + add_journal_to_existing) + + +class JournalAdmin(admin.ModelAdmin): + fields = ('name', 'ISSN_digital', 'last_full_sync') + list_display = ( + 'name', 'ISSN_digital', 'last_full_sync', 'count_metacore', + 'count_crossref', 'last_update', 'task_status') + actions = [ + 'import_full', 'scheduled_import_incremental', 'import_incremental', + 'update_counts', 'add_journal_to_items', 'delete_all_citables'] + + def import_full(self, request, queryset): + """ Starts background task to import all works by this journal """ + + for journal in queryset: + # Celery Async version + task = import_journal_full.delay(journal.ISSN_digital) + journal.last_task_id = task.id + journal.save() + + messages.add_message( + request, messages.INFO, 'Import task for journal {} added.'.format(journal.name)) + + + def import_incremental(self, request, queryset): + """Starts background task to import all works by this journal.""" + + for journal in queryset: + if journal.last_full_sync: + task = import_journal_incremental.delay( + journal.ISSN_digital, journal.last_full_sync.strftime('%Y-%m-%d')) + journal.last_task_id = task.id + journal.save() + messages.add_message( + request, messages.INFO, + 'Import task for journal {} added.'.format(journal.name)) + else: + messages.add_message( + request, messages.WARNING, + ('Incremental import task for journal {} could not be started, ' + 'since date of last full sync is not set.'.format(journal.name))) + + def scheduled_import_incremental(self, request, queryset): + """Starts background task to import all works by this journal and repeats every day.""" + # TODO: make sure the from_date gets updated! + + schedule, __ = IntervalSchedule.objects.get_or_create( + every=1, period=IntervalSchedule.DAYS) + + for journal in queryset: + last_sync = '' + if journal.last_full_sync: + last_sync = journal.last_full_sync.strftime('%Y-%m-%d') + + task, created = PeriodicTask.objects.get_or_create( + interval=schedule, + name='Inc. import {}'.format(journal.name), + task='metacore.services.import_journal_incremental', + args=json.dumps([journal.ISSN_digital, last_sync])) + PeriodicTask.objects.filter(id=task.id).update(enabled=True) + + #TODO: figure out a way to put the individual task id in the journal + # everytime the scheduled task fires + journal.last_task_id = '' + journal.save() + + if created: + txt = 'Repeating import task for journal {} added.'.format(journal.name) + elif task.enabled: + txt = 'Repeating import task for journal {} already exists.'.format(journal.name) + else: + txt = 'Repeating import task for journal {} activated.'.format(journal.name) + txt += ' Go to Periodic Tasks in admin to view.'.format(journal.name) + + messages.add_message(request, messages.INFO, txt) + + def update_counts(self, request, queryset): + for journal in queryset: + journal.count_metacore = Citable.objects(metadata__ISSN=journal.ISSN_digital).count() + journal.count_crossref = get_crossref_work_count(journal.ISSN_digital) + journal.save() + + messages.add_message(request, messages.INFO, 'Counts updated.') + + def add_journal_to_items(self, request, queryset): + for journal in queryset: + add_journal_to_existing(journal.ISSN_digital) + messages.add_message( + request, messages.INFO, + ('"Add journal" task for journal {} added. Go to Background' + ' Tasks -> Tasks in admin to view'.format(journal.name))) + + + def delete_all_citables(self, request, queryset): + for journal in queryset: + journal.purge_citables() + messages.add_message( + request, messages.INFO, + 'All citables from journal "{}" deleted.'.format(journal.name)) + + def get_actions(self, request): + actions = super().get_actions(request) + if 'delete_selected' in actions: + del actions['delete_selected'] + return actions + + def task_status(self, journal): + if journal.last_task_id: + task = AsyncResult(journal.last_task_id) + if task: + return task.result + return '' + +admin.site.register(Journal, JournalAdmin) diff --git a/metacore/apps.py b/metacore/apps.py new file mode 100644 index 0000000000000000000000000000000000000000..70c22b4ece40e82b9892cde751266d687b811d7e --- /dev/null +++ b/metacore/apps.py @@ -0,0 +1,9 @@ +__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" +__license__ = "AGPL v3" + + +from django.apps import AppConfig + + +class MetacoreConfig(AppConfig): + name = 'metacore' diff --git a/metacore/forms.py b/metacore/forms.py new file mode 100644 index 0000000000000000000000000000000000000000..76695920a9bf2b753c278026b50bf1f4f728cda5 --- /dev/null +++ b/metacore/forms.py @@ -0,0 +1,75 @@ +__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" +__license__ = "AGPL v3" + + +from django import forms + +import logging +import re + +from .models import Citable + +logger = logging.getLogger(__name__) + +# Move to application-wide constant if used more +# Taken from https://www.crossref.org/blog/dois-and-matching-regular-expressions +doi_regex = re.compile(r'^10.\d{4,9}\/[-._;()/:A-Z0-9]+$', re.IGNORECASE) + + +class CitableSearchForm(forms.Form): + omni = forms.CharField( + max_length=100, required=False, label="Author, title, journal - matches only full words") + author = forms.CharField(max_length=100, required=False, label="Author(s)") + title = forms.CharField(max_length=100, required=False) + publisher = forms.CharField(max_length=100, required=False) + journal = forms.CharField(max_length=100, required=False) + doi = forms.CharField(max_length=100, required=False) + + def search_results(self): + """Return all Citable objects according to search""" + query_params = { + 'title__icontains': self.cleaned_data.get('title', ''), + 'authors__icontains': self.cleaned_data.get('author', ''), + 'publisher__icontains': self.cleaned_data.get('publisher', ''), + 'metadata__container-title__icontains': self.cleaned_data.get('journal', ''), + } + + # DOI's are always lower case in the metacore app + doi_query = self.cleaned_data.get('doi', '').lower() + if doi_regex.match(doi_query): + # Use index (fast) + print('Using doi index') + query_params['doi'] = doi_query + else: + # Partial match (can't use index) + print('Not using doi index') + query_params['doi__icontains'] = doi_query + + if self.cleaned_data.get('omni', False): + """If a text index is present, search using the authors/title box is enables""" + return Citable.objects.simple().filter(**query_params).omni_search( + self.cleaned_data.get('omni'), 'and') + else: + if self.is_empty(): + return None + + return Citable.objects.simple().filter(**query_params) + + def is_empty(self): + form_empty = True + for field_value in self.cleaned_data.values(): + if field_value is not None and field_value != '': + form_empty = False + break + + if form_empty: + return None + + def is_bound_advanced_search(self): + if not self.is_bound: + return False + + return ( + (self.cleaned_data['author'] + self.cleaned_data['title'] + + self.cleaned_data['publisher'] + self.cleaned_data['journal'] + + self.cleaned_data['doi']) != '') diff --git a/metacore/managers.py b/metacore/managers.py new file mode 100644 index 0000000000000000000000000000000000000000..39e6705225543b8f48c4f503493559548826da06 --- /dev/null +++ b/metacore/managers.py @@ -0,0 +1,39 @@ +__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" +__license__ = "AGPL v3" + + +from mongoengine import QuerySet + + +class CitableQuerySet(QuerySet): + + def cited_by(self, dois): + if isinstance(dois, list): + return self.only('references').filter(references__in=dois) + else: + return self.only('references').filter(references=dois) + + def simple(self): + return self.only( + 'doi', 'title', 'authors', 'metadata.is-referenced-by-count', 'publication_date', + 'publisher', 'metadata.container-title', 'journal') + + def prl(self): + return self.filter(metadata__ISSN='0031-9007') + + def omni_search(self, query, mode='and'): + if mode == 'and': + query_list = query.split(' ') + + # Treat words that start with '-' (exclude) differently + query_list_without_excludes = [q for q in query_list if not q[0] == '-'] + query_with_quotes = '"{0}"'.format('" "'.join(query_list_without_excludes)) + + query_list_excludes = [q for q in query_list if q not in query_list_without_excludes] + query_with_quotes = query_with_quotes + ' ' + ' '.join(query_list_excludes) + + return self.search_text(query_with_quotes) + elif mode == 'or': + return self.search_text(query) + else: + raise ValueError('Invalid mode used in omni_search') diff --git a/metacore/migrations/0001_initial.py b/metacore/migrations/0001_initial.py new file mode 100644 index 0000000000000000000000000000000000000000..ca1e4f9e630682d779d3186ffed7f859461d246e --- /dev/null +++ b/metacore/migrations/0001_initial.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2018-04-17 06:46 +from __future__ import unicode_literals + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Journal', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=250)), + ('ISSN_digital', models.CharField(max_length=9, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')])), + ('ISSN_print', models.CharField(blank=True, max_length=9, null=True, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')])), + ('last_full_sync', models.DateTimeField(blank=True, null=True)), + ('last_cursor', models.CharField(blank=True, max_length=250, null=True)), + ('last_errors', models.TextField(blank=True, null=True)), + ], + ), + ] diff --git a/metacore/migrations/0002_auto_20180417_1036.py b/metacore/migrations/0002_auto_20180417_1036.py new file mode 100644 index 0000000000000000000000000000000000000000..b6197fb09671b9f79766c3dfa631dde18714d329 --- /dev/null +++ b/metacore/migrations/0002_auto_20180417_1036.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2018-04-17 08:36 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('metacore', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='journal', + name='count_crossref', + field=models.IntegerField(blank=True, null=True), + ), + migrations.AddField( + model_name='journal', + name='count_metacore', + field=models.IntegerField(blank=True, null=True), + ), + ] diff --git a/metacore/migrations/0003_auto_20180508_0911.py b/metacore/migrations/0003_auto_20180508_0911.py new file mode 100644 index 0000000000000000000000000000000000000000..530a9a2ec6de7be8b7444b7a4a80ae049a3dd534 --- /dev/null +++ b/metacore/migrations/0003_auto_20180508_0911.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2018-05-08 07:11 +from __future__ import unicode_literals + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('metacore', '0002_auto_20180417_1036'), + ] + + operations = [ + migrations.AddField( + model_name='journal', + name='count_running', + field=models.IntegerField(blank=True, null=True), + ), + migrations.AddField( + model_name='journal', + name='last_update', + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AlterField( + model_name='journal', + name='ISSN_digital', + field=models.CharField(max_length=9, unique=True, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')]), + ), + migrations.AlterField( + model_name='journal', + name='ISSN_print', + field=models.CharField(blank=True, max_length=9, null=True, unique=True, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')]), + ), + ] diff --git a/metacore/migrations/0004_auto_20180508_0916.py b/metacore/migrations/0004_auto_20180508_0916.py new file mode 100644 index 0000000000000000000000000000000000000000..4261c3727d56e70c227ce1f81e8c1ec13569e574 --- /dev/null +++ b/metacore/migrations/0004_auto_20180508_0916.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2018-05-08 07:16 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('metacore', '0003_auto_20180508_0911'), + ] + + operations = [ + migrations.AlterField( + model_name='journal', + name='last_update', + field=models.DateTimeField(auto_now=True, null=True), + ), + ] diff --git a/metacore/migrations/0005_journal_last_task_id.py b/metacore/migrations/0005_journal_last_task_id.py new file mode 100644 index 0000000000000000000000000000000000000000..ffbe641755c59b5fcfcac3ac3b69e5f24fa5625f --- /dev/null +++ b/metacore/migrations/0005_journal_last_task_id.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2018-06-12 12:10 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('metacore', '0004_auto_20180508_0916'), + ] + + operations = [ + migrations.AddField( + model_name='journal', + name='last_task_id', + field=models.IntegerField(blank=True, null=True), + ), + ] diff --git a/metacore/migrations/0006_auto_20180612_1419.py b/metacore/migrations/0006_auto_20180612_1419.py new file mode 100644 index 0000000000000000000000000000000000000000..deb7f3a43d9c762cebc9e7d3accd077c33338a78 --- /dev/null +++ b/metacore/migrations/0006_auto_20180612_1419.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2018-06-12 12:19 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('metacore', '0005_journal_last_task_id'), + ] + + operations = [ + migrations.AlterField( + model_name='journal', + name='last_task_id', + field=models.CharField(blank=True, max_length=250, null=True), + ), + ] diff --git a/metacore/migrations/0007_auto_20181028_1902.py b/metacore/migrations/0007_auto_20181028_1902.py new file mode 100644 index 0000000000000000000000000000000000000000..0b3f674cc1e47151955a688aa031b8dcc1e99d9c --- /dev/null +++ b/metacore/migrations/0007_auto_20181028_1902.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2018-10-28 18:02 +from __future__ import unicode_literals + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('metacore', '0006_auto_20180612_1419'), + ] + + operations = [ + migrations.AlterField( + model_name='journal', + name='ISSN_print', + field=models.CharField(blank=True, default='', max_length=9, validators=[django.core.validators.RegexValidator('^[0-9]{4}-[0-9]{3}[0-9X]$')]), + preserve_default=False, + ), + migrations.AlterField( + model_name='journal', + name='last_cursor', + field=models.CharField(blank=True, default='', max_length=250), + preserve_default=False, + ), + migrations.AlterField( + model_name='journal', + name='last_errors', + field=models.TextField(blank=True, default=''), + preserve_default=False, + ), + migrations.AlterField( + model_name='journal', + name='last_task_id', + field=models.CharField(blank=True, default='', max_length=250), + preserve_default=False, + ), + ] diff --git a/metacore/migrations/__init__.py b/metacore/migrations/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/metacore/models.py b/metacore/models.py new file mode 100644 index 0000000000000000000000000000000000000000..cdb46bfa3c5b870d6b1938e1f4aceb53402dd172 --- /dev/null +++ b/metacore/models.py @@ -0,0 +1,134 @@ +__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" +__license__ = "AGPL v3" + + +import requests + +from mongoengine import ( + connect, DynamicDocument, ListField, StringField, DynamicField, URLField, DateTimeField) + +from django.db import models +from django.conf import settings +from django.core.validators import RegexValidator + +from .managers import CitableQuerySet + +# Make the connection to MongoDB - this could be put in settings.py as well +# It uses default settings for the mongo server +connect( + settings.MONGO_DATABASE['database'], host=settings.MONGO_DATABASE['host'], + username=settings.MONGO_DATABASE['user'], password=settings.MONGO_DATABASE['password'], + port=settings.MONGO_DATABASE['port'], authSource='admin') + + +class Citable(DynamicDocument): + """ + Citable is a generic object in the metacore database - either a version of records + (with DOI) or preprint of an published/unpublished document. + + NOTE: extra text index for authors/title is defined through mongo shell! + This should be in the readme, but I'll temporarily add it here for ease of use: + For the text index, execute this in the mongo shell: + use scipost + db.citable.createIndex({authors: "text", title: "text", journal: "text"}) + """ + + # Fields that are extracted from the source metadata in order to normalize + # some of the data for searching / metrics + references = ListField(StringField()) + authors = ListField(StringField()) + title = StringField() + publisher = StringField() + license = URLField() + publication_date = DateTimeField() + journal = StringField() + + # Dump all the raw source metadata here + metadata = DynamicField() + + # Settings for mongoengine + meta = { + 'queryset_class': CitableQuerySet, # use the custom queryset + 'indexes': ['doi', 'title', 'publication_date', 'publisher', 'references', 'journal'], + 'allow_inheritance': True + } + + def times_cited(self): + return [] + + def author_list(self, max_n=None): + if max_n and max_n < len(self.authors): + return '; '.join(self.authors[:max_n]) + ' et al.' + else: + return '; '.join(self.authors) + + def crossref_ref_count(self): + return self.metadata['is-referenced-by-count'] + + +class CitableWithDOI(Citable): + """ + CitableWithDOI is the subclass of Citable meant for documents that have a DOI, + which enables the times_cited metric. + """ + doi = StringField(require=True, unique=True) + + def times_cited(self): + return CitableWithDOI.objects.cited_by(self.doi).count() + + +class Journal(models.Model): + """Provides interface for importing citables of a journal into Metacore.""" + + name = models.CharField(max_length=250, blank=False) + ISSN_digital = models.CharField(max_length=9, unique=True, + validators=[RegexValidator(r'^[0-9]{4}-[0-9]{3}[0-9X]$')]) + # Print ISSN not used right now, but there for future use + ISSN_print = models.CharField( + max_length=9, blank=True, + validators=[RegexValidator(r'^[0-9]{4}-[0-9]{3}[0-9X]$')]) + last_full_sync = models.DateTimeField(blank=True, null=True) + last_cursor = models.CharField(max_length=250, blank=True) + last_errors = models.TextField(blank=True) + count_metacore = models.IntegerField(blank=True, null=True) + count_crossref = models.IntegerField(blank=True, null=True) + count_running = models.IntegerField(blank=True, null=True) # Tracks progress during import tasks + last_update = models.DateTimeField(blank=True, null=True, auto_now=True) # Set during import tasks + last_task_id = models.CharField(max_length=250, blank=True) # Set after task related to journal is started + + def __str__(self): + return self.name + + def update_count_metacore(self): + count = Citable.objects(metadata__ISSN=self.ISSN_digital).count() + self.count_metacore = count + + def update_count_crossref(self): + """ + Returns the total number of citables that are present in CR for a given ISSN. + + Needs to be merged with .services but need to work out imports first (circular) + """ + + # Formulate the CR query + url = 'https://api.crossref.org/journals/{}/works'.format(self.ISSN_digital) + + # If the loop is allowed to complete, it fetches (rows * batches) records + rows = 0 + + params = {'rows': rows, 'mailto': 'b.g.t.ponsioen@uva.nl'} + r = requests.get(url, params=params) + r_json = r.json() + + result = r_json['message'] + + if 'total-results' in result: + self.count_metacore = result['total-results'] + + + def purge_citables(self): + """ + This will delete all citables with their issn set to this journal's issn! + """ + + Citable.objects(metadata__ISSN=self.ISSN_digital).delete() diff --git a/metacore/services.py b/metacore/services.py new file mode 100644 index 0000000000000000000000000000000000000000..37e2adea42e36da17093707b1b2e85a2c9e3ac1d --- /dev/null +++ b/metacore/services.py @@ -0,0 +1,283 @@ +from __future__ import absolute_import, unicode_literals + +import logging +import requests + +from celery import shared_task, current_task +from mongoengine.python_support import pymongo +from rest_framework import serializers + +from django.utils import timezone + +from .models import Citable, CitableWithDOI, Journal + +logger = logging.getLogger(__name__) + + +@shared_task +def import_journal_full(issn, cursor='*'): + """ + Task to query CrossRef for all works of a journal with given ISSN + and store them in the Metacore mongo database + """ + return import_journal(issn=issn, cursor=cursor, from_index_date=None) + + +@shared_task +def import_journal_incremental(issn, from_index_date, cursor='*'): + """ + Task to query CrossRef for all works of a journal with given ISSN + from a given date onward and store them in the Metacore mongo database + """ + + # Get from date from the journal itself (necessary for periodic tasks) + # TODO: make periodic tasks call this function without the date + journal = Journal.objects.get(ISSN_digital=issn) + from_index_date = journal.last_full_sync.strftime('%Y-%m-%d') + + import_journal(issn=issn, cursor=cursor, from_index_date=from_index_date) + + +def import_journal(issn, cursor='*', from_index_date=None): + # Get journal to track progress + + # Formulate the CR query + url = 'https://api.crossref.org/journals/{}/works'.format(issn) + + # If the loop is allowed to complete, it fetches (rows * batches) records + rows = 500 + batches = 2000 + last_cursor = cursor + total_processed = 0 + error_count = 0 + total_upserted = 0 + total_modified = 0 + + validation_errors = [] + + for i in range(batches): + logger.info("-------------------------------") + logger.info("Batch %s" % (i, )) + logger.info("Last cursor: {}".format(last_cursor)) + logger.info("Current cursor: {}".format(cursor)) + + params = {'cursor': cursor, 'rows': rows, 'mailto': 'jorrandewit@scipost.org'} + if from_index_date: + params['filter'] = 'from-index-date:{}'.format(from_index_date) + + last_cursor = cursor + r_json = requests.get(url, params=params).json() + + citables_json = r_json['message']['items'] + cursor = r_json['message']['next-cursor'] + number_of_results = len(r_json['message']['items']) + + citables = [] + serialized_objects = [] + for cit in citables_json: + serialized_object = CitableCrossrefSerializer(data=cit) + if serialized_object.is_valid(): + citables.append(CitableWithDOI(**serialized_object.validated_data)) + serialized_objects.append(serialized_object) + else: + # TODO: insert the actual validation errors instead + citables.append(False) + logger.info("Error at {}".format(cit)) + validation_errors.append(serialized_object.errors) + + # Parser returns False if there's an error + errors = [not i for i in citables if i == False] + error_count = error_count + len(errors) + citables = [citable for citable in citables if citable] + + # Mass insert in database (will fail on encountering existing documents + # with same DOI + if citables: + operations = [obj.to_UpdateOne() for obj in serialized_objects] + col = Citable._get_collection() + bulk_res = col.bulk_write(operations, ordered=False) + + current_task.update_state(state='PROGRESS', meta={ + 'current': total_processed, + 'errors': error_count, + 'last_upserted': bulk_res.upserted_count, + 'last_matched_count': bulk_res.matched_count, + 'last_inserted': bulk_res.inserted_count + }) + + total_upserted += bulk_res.upserted_count + total_modified += bulk_res.modified_count + + # Save current count so progress can be tracked in the admin page + total_processed += number_of_results + Journal.objects.filter(ISSN_digital=issn).update(count_running=total_processed) + current_task.update_state(state='PROGRESS', + meta={'current': total_processed, 'errors': error_count}) + + # For debugging purposes + logger.info(current_task) + if citables: + logger.info("Upserted: {}".format(bulk_res.upserted_count)) + logger.info("Modified: {}".format(bulk_res.modified_count)) + + logger.info("Errors: {}".format(error_count)) + logger.info(validation_errors) + + + if number_of_results < rows: + logger.info(number_of_results) + logger.info('End reached.') + break + + count_crossref = get_crossref_work_count(issn) + Journal.objects.filter(ISSN_digital=issn).update( + count_metacore=Citable.objects(metadata__ISSN=issn).count(), + count_crossref=count_crossref, + last_task_id=current_task.id + ) + Journal.objects.filter(ISSN_digital=issn, count_metacore=count_crossref).update( + last_full_sync=timezone.now()) + + # Pack stuff for result + return { + 'total processed': total_processed, + 'total inserted': total_upserted, + 'total modified': total_modified, + 'validation errors': len(validation_errors) + } + + +def get_crossref_work_count(issn): + """ + Returns the total number of citables that are present in CR for a given ISSN + """ + + # Formulate the CR query + url = 'https://api.crossref.org/journals/{}/works'.format(issn) + + # If the loop is allowed to complete, it fetches (rows * batches) records + rows = 0 + + params = {'rows': rows, 'mailto': 'jorrandewit@scipost.org'} + r = requests.get(url, params=params) + r_json = r.json() + + result = r_json['message'] + + if 'total-results' in result: + return result['total-results'] + + +def convert_doi_to_lower_case(): + # If you accidentally import 100.000+ records that have random uppercase characters + # in their reference DOI list + i = 0 + cits = Citable.objects(__raw__={'references': {'$regex': '([A-Z])\w+'}}) + for cit in cits.only('references'): + i = i + 1 + refs = [ref.lower() for ref in cit.references] + cit.modify(references=refs) + + if i % 1000 == 0: + print(i) + + +def add_journal_to_existing(journal_issn=None): + # Take journal from metadata ('container-title') and put it in top-level 'journal' field + # for all existing citables + i = 0 + errors = 0 + if journal_issn: + print('Using given journal ISSN ', journal_issn) + cits = Citable.objects(metadata__ISSN=journal_issn, journal__exists=False) + else: + cits = Citable.objects(journal__exists=False) + + for cit in cits.only('metadata', 'journal'): + i = i + 1 + if 'container-title' in cit.metadata: + journal = cit.metadata['container-title'][0] + cit.modify(journal=journal) + else: + errors = errors + 1 + + if i % 1000 == 0: + print(i) + print(errors, ' errors') + print('-------') + + +class CitableCrossrefSerializer(serializers.BaseSerializer): + """ + Class for deserializing a JSON object into the correct form to create a CitableWithDOI out of. + Specifically for Crossref REST API format + + Usage: + json_data = { ... } + serialized_object = CitableCrossrefSerializer(data=json_data) + serialized_object.is_valid() + # Validated/parsed data: serialized_object.validated_data + CitableWithDOI.create(**serialized_object.validated_data) + """ + + def to_internal_value(self, data): + authors_raw = data.get('author') + references_raw = data.get('reference') + + doi = data.get('DOI') + publisher = data.get('publisher') + # {'issued': {'date-parts': ['...']}} + publication_date_raw = data.get('issued', {}).get('date-parts', [''])[0] + # {'title': ['...']} + title = data.get('title', [''])[0] + # {'container-title': ['...']} + journal = data.get('container-title', [''])[0] + # {'license': [{'url': '...'}]} + license = data.get('license', [{}])[0].get('URL') + + # Validation errors + if not doi: + raise serializers.ValidationError({'DOI': 'DOI not given.'}) + if not authors_raw: + raise serializers.ValidationError({'authors': 'Author list is empty.'}) + if not title: + raise serializers.ValidationError({'title': 'Title is not present.'}) + if not publication_date_raw: + raise serializers.ValidationError({'publication_date': 'Publication date is missing.'}) + + # More complex parsing logic + publication_date = '-'.join([str(date_part) for date_part in publication_date_raw]) + + authors = [] + for author_names in authors_raw: + author = [] + if 'given' in author_names: + author.append(author_names['given']) + if 'family' in author_names: + author.append(author_names['family']) + authors.append(' '.join(author)) + + if references_raw: + references_with_doi = [ref for ref in references_raw if 'DOI' in ref] + references = [ref['DOI'].lower() for ref in references_with_doi] + else: + references = [] + + return { + '_cls': CitableWithDOI._class_name, + 'authors': authors, + 'doi': doi.lower(), + 'references': references, + 'publisher': publisher, + 'publication-date': publication_date, + 'title': title, + 'journal': journal, + 'license': license, + 'metadata': data, + } + + def to_UpdateOne(self): + filters = {'doi': self.validated_data.pop('doi')} + mods = {'$set': self.validated_data} + + return pymongo.UpdateOne(filters, mods, upsert=True) diff --git a/metacore/tasks.py b/metacore/tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..6d07721e4cfff302bf566a82a6c04ff9e27035fc --- /dev/null +++ b/metacore/tasks.py @@ -0,0 +1,4 @@ +from __future__ import absolute_import, unicode_literals + + +# Add tasks here... diff --git a/metacore/templates/citable_list.html b/metacore/templates/citable_list.html new file mode 100644 index 0000000000000000000000000000000000000000..7482eaaf5f0b5c57bfe5e72b8cfeaabc27ac48bc --- /dev/null +++ b/metacore/templates/citable_list.html @@ -0,0 +1,96 @@ +{% extends 'scipost/base.html' %} + +{% load bootstrap %} +{% load humanize %} +{% load request_filters %} + +{% block pagetitle %}: Metacore{% endblock pagetitle %} + +{% block content %} +<h1 class="highlight">Metacore</h1> + +<div class="row"> + <div class="col-12"> + <form action="{% url 'metacore:citable-list' %}" method="get" class="form-xinline"> + <div class="form-group"> + {{ form.omni.errors }} + <label for="{{ form.omni.auto_id }}" class="my-1 mr-2">{{ form.omni.label }}</label> + <div class="input-group"> + <input type="text" name="{{ form.omni.name }}" class="form-control form-control-lg" id="{{ form.omni.auto_id }}" aria-describedby="search_help" placeholder="Search term" value="{{ form.omni.value|default:'' }}" required="required"> + <div class="input-group-append"> + <input type="submit" class="btn btn-primary px-4" value="Search"> + </div> + </div> + </div> + <a href="javascript:;" data-toggle="toggle" data-target="#advanced-search" class="float-right">Use advanced search</a> + <br> + + <div id="advanced-search"{% if not form.is_bound_advanced_search %} style="display: none"{% endif %} class="pb-3"> + {% for field in form %} + <div class="form-group"> + {% if field.id_for_label != 'id_omni' %} + {{ field.errors }} + {{ field|bootstrap }} + {% if field.help_text %} + <p class="help">{{ field.help_text|safe }}</p> + {% endif %} + {% endif %} + </div> + {% endfor %} + </div> + + <div class="text-muted"> + Found {{ object_list.count|intcomma }} results + {% if object_list %} + · + Order by: + <a href="?{% url_replace orderby='citations' page='' %}" class="d-inline-block mb-1 ml-2 active-bold {% active_get_request 'orderby' 'citations' %}{% if not request.GET.orderby %} active{% endif %}">Citations</a> + <a href="?{% url_replace orderby='journal' page='' %}" class="d-inline-block mb-1 ml-2 active-bold {% active_get_request 'orderby' 'journal' %}">Journal</a> + <a href="?{% url_replace orderby='name' page='' %}" class="d-inline-block mb-1 ml-2 active-bold {% active_get_request 'orderby' 'name' %}">Name</a> + + · + Show + <select name="results" class="form-control d-inline-block w-auto" onchange="this.form.submit()"> + <option value="10"{% if not request.GET.results or request.GET.results == '10' %} selected{% endif %}>10</option> + <option value="20"{% if request.GET.results == '20' %} selected{% endif %}>20</option> + <option value="50"{% if request.GET.results == '50' %} selected{% endif %}>50</option> + <option value="100"{% if request.GET.results == '100' %} selected{% endif %}>100</option> + </select> + results + {% endif %} + </div> + + </form> + </div> + + <div class="col-12"> + <hr> + + {% if is_paginated %} + <div class="pb-3"> + {% include 'partials/pagination.html' with page_obj=page_obj %} + </div> + {% endif %} + + <ul class="list-group list-group-flush"> + {% for citable in object_list %} + <li class="list-group-item"> + <div class="card-body px-0"> + {% include 'partials/citable_card_content.html' with citable=citable %} + </div> + </li> + {% empty %} + <p>No match found for your search query.</p> + {% endfor %} + </ul> + + {% if is_paginated %} + <div class="p-3"> + {% include 'partials/pagination.html' with page_obj=page_obj %} + </div> + {% endif %} + </div> + +</div> + +{% endblock content %} diff --git a/metacore/templates/partials/citable_card_content.html b/metacore/templates/partials/citable_card_content.html new file mode 100644 index 0000000000000000000000000000000000000000..05c2e4e857e54e3e86f8014396ac92410a83e0a9 --- /dev/null +++ b/metacore/templates/partials/citable_card_content.html @@ -0,0 +1,24 @@ +{% load humanize %} +{% load metacore_extras %} + +<div class="submission_title"> + <h3 class="card-title mb-0 submisssion_title">{{ citable.title }}</h3> + <div class="author_list mb-0">by {{ citable.authors|join_authors_list:16 }} + {% if citable.authors|length > 16 %} + · <a href="javascript:;" data-toggle="toggle" data-target="#authors-{{ citable.id }}">See all authors</a> + <div style="display: none;" id="authors-{{ citable.id }}" class="py-2">{{ citable.authors|join_authors_list }}</div> + {% endif %} + </div> +</div> + +{% block card_footer %} + <p class="text-muted mb-0 mt-3"> + Cited {{ citable.crossref_ref_count|intcomma }} times (CrossRef) / {{ citable.times_cited|intcomma }} times (SciPost Meta) + · doi: <a href="//doi.org/{{ citable.doi }}" target="_blank">{{ citable.doi }}</a> + <br> + Published {{ citable.publication_date|date:"d-m-Y" }} by <b>{{ citable.publisher }}</b> + {% if citable.journal %} + in <b>{{ citable.journal }}</b> + {% endif %} + </p> +{% endblock %} diff --git a/metacore/templatetags/__init__.py b/metacore/templatetags/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/metacore/templatetags/metacore_extras.py b/metacore/templatetags/metacore_extras.py new file mode 100644 index 0000000000000000000000000000000000000000..56b45b0b472989c110ded9f95633bda28d653292 --- /dev/null +++ b/metacore/templatetags/metacore_extras.py @@ -0,0 +1,17 @@ +__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" +__license__ = "AGPL v3" + + +from django import template + +register = template.Library() + + +@register.filter +def join_authors_list(authors, max_n=None): + """ Returns authors list as string, truncated to max_n authors when the list is longer.""" + if max_n and max_n < len(authors): + return ', '.join(authors[:max_n - 1]) + ' ... ' + authors[-1] + elif len(authors) > 1: + return ', '.join(authors[:-1]) + ' and ' + authors[-1] + return authors[0] diff --git a/metacore/tests.py b/metacore/tests.py new file mode 100644 index 0000000000000000000000000000000000000000..9135c42ab26e15b71fd25dc25d9f92bcec7e676b --- /dev/null +++ b/metacore/tests.py @@ -0,0 +1,7 @@ +__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" +__license__ = "AGPL v3" + + +from django.test import TestCase + +# Create your tests here. diff --git a/metacore/urls.py b/metacore/urls.py new file mode 100644 index 0000000000000000000000000000000000000000..6bf43718f84cac38da35147fa93129f84208ae94 --- /dev/null +++ b/metacore/urls.py @@ -0,0 +1,13 @@ +__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" +__license__ = "AGPL v3" + + +from django.conf.urls import url +from django.views.generic import TemplateView + +from . import views + +urlpatterns = [ + # Citables + url(r'^$', views.CitableListView.as_view(), name='citable-list'), +] diff --git a/metacore/views.py b/metacore/views.py new file mode 100644 index 0000000000000000000000000000000000000000..675893aedae149343300c0e692b8d08b83766f15 --- /dev/null +++ b/metacore/views.py @@ -0,0 +1,49 @@ +__copyright__ = "Copyright 2016-2018, Stichting SciPost (SciPost Foundation)" +__license__ = "AGPL v3" + + +from django.views.generic.list import ListView + +from .models import Citable +from .forms import CitableSearchForm + + +class CitableListView(ListView): + model = Citable + template_name = 'citable_list.html' + form = CitableSearchForm + + def get_queryset(self): + self.form = self.form(self.request.GET or None) + + if self.form.is_valid():# and self.form.has_changed(): + qs = self.form.search_results() + else: + qs = Citable.objects.simple().limit(10) + return qs.order_by(self.get_ordering()) + + + def get_context_data(self, **kwargs): + # Call the base implementation first to get a context + context = super().get_context_data(**kwargs) + + # Form into the context! + context['form'] = self.form + + return context + + def get_paginate_by(self, queryset): + """Dynamically compute pagination setting.""" + try: + return min(int(self.request.GET.get('results', 10)), 100) + except ValueError: + return 10 + + def get_ordering(self): + if not self.request.GET.get('orderby'): + return '-metadata.is-referenced-by-count' + elif self.request.GET['orderby'] == 'name': + return '-title' + elif self.request.GET['orderby'] == 'journal': + return '-journal' + return '-metadata.is-referenced-by-count' diff --git a/requirements.txt b/requirements.txt index 388e33d5cfc3018c8b67c0976ab9684b9fcceb5d..2f75da286cca0ebbac9c2976edd5c2f757da950e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -49,6 +49,9 @@ python-dateutil==2.6.0 # Doesn't Django have this functionality built-in? -- J Pillow==3.4.2 # Latest version is v4.2.1; need to know about usage before upgrade. -- JdW html2text +# Mongo (Metacore) +mongoengine==0.15.0 + # Possibly dead (most probably not used anymore and possibly not up-to-date packages) -- JdW (August 15th, 2017) imagesize==0.7.1 Jinja2==2.8 diff --git a/scipost/static/scipost/assets/config/preconfig.scss b/scipost/static/scipost/assets/config/preconfig.scss index 0e77b6eac1a33faa19344e0a8db42385a98735cb..d4cf18740f3c0ab5dbfd15163f4e6b59676ba2db 100644 --- a/scipost/static/scipost/assets/config/preconfig.scss +++ b/scipost/static/scipost/assets/config/preconfig.scss @@ -30,7 +30,6 @@ $scipost-lightblue: #6885c3; $scipost-lightestblue: #d3e3f6; $scipost-darkblue: #002b49; $scipost-orange: #f6a11a; -// $scipost-white: #f9f9f9; $scipost-white: #f6f6f6; $sidebar-border-color: #dfe0e4; @@ -89,7 +88,7 @@ $enable-rounded: true !important; $btn-transition: none; $input-height: calc(1.5rem + 2px); -$input-height-lg: calc(1.5rem + 2px); +$input-height-lg: calc(2.0rem + 2px); // Lists // diff --git a/scipost/static/scipost/assets/css/_typography.scss b/scipost/static/scipost/assets/css/_typography.scss index 50789f21660d69bc8064ce5a09c7eaff4ff89ec7..94f42554138026073a201dd9dbfdd661abb9bc5c 100644 --- a/scipost/static/scipost/assets/css/_typography.scss +++ b/scipost/static/scipost/assets/css/_typography.scss @@ -148,3 +148,8 @@ hr.hr12 { .fa[data-toggle="tooltip"] { font-size: 1.5em; } + +.active-bold.active { + font-weight: bold; + text-decoration: underline; +} diff --git a/templates/partials/pagination.html b/templates/partials/pagination.html index 28924103800f75c6f1e860c481ffe9ada73ca353..140db2bfb0f64087a197b447281a6b645be106b5 100644 --- a/templates/partials/pagination.html +++ b/templates/partials/pagination.html @@ -7,7 +7,6 @@ <span class="text-muted"><i class="fa fa-long-arrow-left" aria-hidden="true"></i> Previous</span> {% endif %} - {% for page in page_obj.pages %} {% if page %} {% if page == page_obj.number %}