Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
__copyright__ = "Copyright © Stichting SciPost (SciPost Foundation)"
__license__ = "AGPL v3"
import hashlib
import logging
import random
import string
import requests
import xml.etree.ElementTree as ET
from django.conf import settings
from django.utils import timezone
from .models import Publication
logger = logging.getLogger(__name__)
def update_citedby(doi_label):
"""
Run an XML query at Crossref, to update the Cited-by data for a Publication
"""
publication = Publication.objects.get(doi_label=doi_label)
# create a doi_batch_id
salt = ""
for i in range(5):
salt = salt + random.choice(string.ascii_letters)
salt = salt.encode('utf8')
idsalt = publication.title[:10]
idsalt = idsalt.encode('utf8')
doi_batch_id = hashlib.sha1(salt+idsalt).hexdigest()
query_xml = ('<?xml version = "1.0" encoding="UTF-8"?>'
'<query_batch version="2.0" xmlns = "http://www.crossref.org/qschema/2.0"'
'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'
'xsi:schemaLocation="http://www.crossref.org/qschema/2.0 '
'http://www.crossref.org/qschema/crossref_query_input2.0.xsd">'
'<head>'
'<email_address>' + settings.CROSSREF_DEPOSIT_EMAIL + '</email_address>'
'<doi_batch_id>' + str(doi_batch_id) + '</doi_batch_id>'
'</head>'
'<body>'
'<fl_query alert="false">'
'<doi>' + publication.doi_string + '</doi>'
'</fl_query>'
'</body>'
'</query_batch>')
url = 'http://doi.crossref.org/servlet/getForwardLinks'
params = {'usr': settings.CROSSREF_LOGIN_ID,
'pwd': settings.CROSSREF_LOGIN_PASSWORD,
'qdata': query_xml,
'doi': publication.doi_string, }
r = requests.post(url, params=params)
if r.status_code == 401:
print('update_citedby: Crossref credentials are invalid. '
'Please contact the SciPost Admin.')
logger.info('update_citedby: Crossref credentials are invalid. '
'Please contact the SciPost Admin.')
return
response_deserialized = ET.fromstring(r.text)
prefix = '{http://www.crossref.org/qrschema/2.0}'
citations = []
for link in response_deserialized.iter(prefix + 'forward_link'):
citation = {}
# Cited in Journal, Book, or whatever you want to be cited in.
link_el = link[0]
# The only required field in Crossref: doi.
citation['doi'] = link_el.find(prefix + 'doi').text
if link_el.find(prefix + 'article_title') is not None:
citation['article_title'] = link_el.find(prefix + 'article_title').text
if link_el.find(prefix + 'journal_abbreviation') is not None:
citation['journal_abbreviation'] = link_el.find(prefix + 'journal_abbreviation').text
if link_el.find(prefix + 'volume') is not None:
citation['volume'] = link_el.find(prefix + 'volume').text
if link_el.find(prefix + 'first_page') is not None:
citation['first_page'] = link_el.find(prefix + 'first_page').text
if link_el.find(prefix + 'item_number') is not None:
citation['item_number'] = link_el.find(prefix + 'item_number').text
if link_el.find(prefix + 'year') is not None:
citation['year'] = link_el.find(prefix + 'year').text
if link_el.find(prefix + 'issn') is not None:
citation['issn'] = link_el.find(prefix + 'issn').text
if link_el.find(prefix + 'isbn') is not None:
citation['isbn'] = link_el.find(prefix + 'isbn').text
multiauthors = False
for author in link_el.find(prefix + 'contributors').iter(prefix + 'contributor'):
if author.get('sequence') == 'first':
citation['first_author_given_name'] = author.find(prefix + 'given_name').text
citation['first_author_surname'] = author.find(prefix + 'surname').text
else:
multiauthors = True
citation['multiauthors'] = multiauthors
citations.append(citation)
# Update Publication object
publication.citedby = citations
publication.number_of_citations = len(citations)
publication.latest_citedby_update = timezone.now()
publication.save()