Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
__copyright__ = "Copyright © Stichting SciPost (SciPost Foundation)"
__license__ = "AGPL v3"
import hashlib
import logging
import random
import string
import requests
import xml.etree.ElementTree as ET
from django.conf import settings
from django.utils import timezone
from .models import Publication
logger = logging.getLogger(__name__)
def update_citedby(doi_label):
"""
Run an XML query at Crossref, to update the Cited-by data for a Publication
"""
publication = Publication.objects.get(doi_label=doi_label)
# create a doi_batch_id
salt = ""
for i in range(5):
salt = salt + random.choice(string.ascii_letters)
salt = salt.encode('utf8')
idsalt = publication.title[:10]
idsalt = idsalt.encode('utf8')
doi_batch_id = hashlib.sha1(salt+idsalt).hexdigest()
query_xml = ('<?xml version = "1.0" encoding="UTF-8"?>'
'<query_batch version="2.0" xmlns = "http://www.crossref.org/qschema/2.0"'
'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'
'xsi:schemaLocation="http://www.crossref.org/qschema/2.0 '
'http://www.crossref.org/qschema/crossref_query_input2.0.xsd">'
'<head>'
'<email_address>' + settings.CROSSREF_DEPOSIT_EMAIL + '</email_address>'
'<doi_batch_id>' + str(doi_batch_id) + '</doi_batch_id>'
'</head>'
'<body>'
'<fl_query alert="false">'
'<doi>' + publication.doi_string + '</doi>'
'</fl_query>'
'</body>'
'</query_batch>')
url = 'http://doi.crossref.org/servlet/getForwardLinks'
params = {'usr': settings.CROSSREF_LOGIN_ID,
'pwd': settings.CROSSREF_LOGIN_PASSWORD,
'qdata': query_xml,
'doi': publication.doi_string, }
r = requests.post(url, params=params)
if r.status_code == 401:
print('update_citedby: Crossref credentials are invalid. '
'Please contact the SciPost Admin.')
logger.info('update_citedby: Crossref credentials are invalid. '
'Please contact the SciPost Admin.')
return
try:
response_deserialized = ET.fromstring(r.text)
except ET.ParseError: # something went wrong, abort
logger.info('Response parsing failed for doi: %s', publication.doi_string)
return
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
prefix = '{http://www.crossref.org/qrschema/2.0}'
citations = []
for link in response_deserialized.iter(prefix + 'forward_link'):
citation = {}
# Cited in Journal, Book, or whatever you want to be cited in.
link_el = link[0]
# The only required field in Crossref: doi.
citation['doi'] = link_el.find(prefix + 'doi').text
if link_el.find(prefix + 'article_title') is not None:
citation['article_title'] = link_el.find(prefix + 'article_title').text
if link_el.find(prefix + 'journal_abbreviation') is not None:
citation['journal_abbreviation'] = link_el.find(prefix + 'journal_abbreviation').text
if link_el.find(prefix + 'volume') is not None:
citation['volume'] = link_el.find(prefix + 'volume').text
if link_el.find(prefix + 'first_page') is not None:
citation['first_page'] = link_el.find(prefix + 'first_page').text
if link_el.find(prefix + 'item_number') is not None:
citation['item_number'] = link_el.find(prefix + 'item_number').text
if link_el.find(prefix + 'year') is not None:
citation['year'] = link_el.find(prefix + 'year').text
if link_el.find(prefix + 'issn') is not None:
citation['issn'] = link_el.find(prefix + 'issn').text
if link_el.find(prefix + 'isbn') is not None:
citation['isbn'] = link_el.find(prefix + 'isbn').text
multiauthors = False
if link_el.find(prefix + 'contributors') is not None:
for author in link_el.find(prefix + 'contributors').iter(prefix + 'contributor'):
if author.get('sequence') == 'first':
citation['first_author_given_name'] = author.find(prefix + 'given_name').text
citation['first_author_surname'] = author.find(prefix + 'surname').text
else:
multiauthors = True
else:
citation['first_author_given_name'] = ''
citation['first_author_surname'] = '[undetermined]'
citation['multiauthors'] = multiauthors
citations.append(citation)
# Update Publication object
publication.citedby = citations
publication.number_of_citations = len(citations)
publication.latest_citedby_update = timezone.now()
publication.save()