SciPost Code Repository

Skip to content
Snippets Groups Projects
services.py 4.7 KiB
Newer Older
__copyright__ = "Copyright © Stichting SciPost (SciPost Foundation)"
__license__ = "AGPL v3"


import hashlib
import logging
import random
import string
import requests
import xml.etree.ElementTree as ET

from django.conf import settings
from django.utils import timezone

from .models import Publication


logger = logging.getLogger(__name__)


def update_citedby(doi_label):
    """
    Run an XML query at Crossref, to update the Cited-by data for a Publication
    """
    publication = Publication.objects.get(doi_label=doi_label)

    # create a doi_batch_id
    salt = ""
    for i in range(5):
        salt = salt + random.choice(string.ascii_letters)
    salt = salt.encode('utf8')
    idsalt = publication.title[:10]
    idsalt = idsalt.encode('utf8')
    doi_batch_id = hashlib.sha1(salt+idsalt).hexdigest()
    query_xml = ('<?xml version = "1.0" encoding="UTF-8"?>'
                 '<query_batch version="2.0" xmlns = "http://www.crossref.org/qschema/2.0"'
                 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'
                 'xsi:schemaLocation="http://www.crossref.org/qschema/2.0 '
                 'http://www.crossref.org/qschema/crossref_query_input2.0.xsd">'
                 '<head>'
                 '<email_address>' + settings.CROSSREF_DEPOSIT_EMAIL + '</email_address>'
                 '<doi_batch_id>' + str(doi_batch_id) + '</doi_batch_id>'
                 '</head>'
                 '<body>'
                 '<fl_query alert="false">'
                 '<doi>' + publication.doi_string + '</doi>'
                 '</fl_query>'
                 '</body>'
                 '</query_batch>')
    url = 'http://doi.crossref.org/servlet/getForwardLinks'
    params = {'usr': settings.CROSSREF_LOGIN_ID,
              'pwd': settings.CROSSREF_LOGIN_PASSWORD,
              'qdata': query_xml,
              'doi': publication.doi_string, }
    r = requests.post(url, params=params)
    if r.status_code == 401:
        print('update_citedby: Crossref credentials are invalid. '
              'Please contact the SciPost Admin.')

        logger.info('update_citedby: Crossref credentials are invalid. '
                    'Please contact the SciPost Admin.')
        return

    try:
        response_deserialized = ET.fromstring(r.text)
    except ET.ParseError: # something went wrong, abort
        logger.info('Response parsing failed for doi: %s', publication.doi_string)
        return

    prefix = '{http://www.crossref.org/qrschema/2.0}'
    citations = []
    for link in response_deserialized.iter(prefix + 'forward_link'):
        citation = {}
        # Cited in Journal, Book, or whatever you want to be cited in.
        link_el = link[0]

        # The only required field in Crossref: doi.
        citation['doi'] = link_el.find(prefix + 'doi').text

        if link_el.find(prefix + 'article_title') is not None:
            citation['article_title'] = link_el.find(prefix + 'article_title').text

        if link_el.find(prefix + 'journal_abbreviation') is not None:
            citation['journal_abbreviation'] = link_el.find(prefix + 'journal_abbreviation').text

        if link_el.find(prefix + 'volume') is not None:
            citation['volume'] = link_el.find(prefix + 'volume').text

        if link_el.find(prefix + 'first_page') is not None:
            citation['first_page'] = link_el.find(prefix + 'first_page').text

        if link_el.find(prefix + 'item_number') is not None:
            citation['item_number'] = link_el.find(prefix + 'item_number').text

        if link_el.find(prefix + 'year') is not None:
            citation['year'] = link_el.find(prefix + 'year').text

        if link_el.find(prefix + 'issn') is not None:
            citation['issn'] = link_el.find(prefix + 'issn').text

        if link_el.find(prefix + 'isbn') is not None:
            citation['isbn'] = link_el.find(prefix + 'isbn').text

        multiauthors = False
        if link_el.find(prefix + 'contributors') is not None:
            for author in link_el.find(prefix + 'contributors').iter(prefix + 'contributor'):
                if author.get('sequence') == 'first':
                    citation['first_author_given_name'] = author.find(prefix + 'given_name').text
                    citation['first_author_surname'] = author.find(prefix + 'surname').text
                else:
                    multiauthors = True
        else:
            citation['first_author_given_name'] = ''
            citation['first_author_surname'] = '[undetermined]'

        citation['multiauthors'] = multiauthors
        citations.append(citation)

    # Update Publication object
    publication.citedby = citations
    publication.number_of_citations = len(citations)
    publication.latest_citedby_update = timezone.now()
    publication.save()