SciPost Code Repository

Skip to content
Snippets Groups Projects
Commit 00473b23 authored by Boris Ponsioen's avatar Boris Ponsioen
Browse files

Fixes reference list to lowercase dois, speeds up browsing and shows cited-by counts in template

parent 1cbdbba1
No related branches found
No related tags found
No related merge requests found
......@@ -4,7 +4,13 @@ from mongoengine import QuerySet
class CitableQuerySet(QuerySet):
def cited_by(self, dois):
return self.only('references').filter(references__in=dois)
if isinstance(dois, list):
return self.only('references').filter(references__in=dois)
else:
return self.only('references').filter(references=dois)
def simple(self):
return self.only('doi', 'title', 'authors', 'metadata.is-referenced-by-count', 'publication_date', 'publisher')
def prl(self):
return self.filter(metadata__ISSN='1079-7114')
......@@ -30,7 +30,7 @@ class Citable(DynamicDocument):
# Settings for mongoengine
meta = {
'queryset_class': CitableQuerySet, # use the custom queryset
'indexes': ['doi', 'authors', 'title', 'publication_date', 'publisher'], # define indices on database
'indexes': ['doi', 'authors', 'title', 'publication_date', 'publisher', 'references'], # define indices on database
'allow_inheritance': True
}
......@@ -52,5 +52,5 @@ class CitableWithDOI(Citable):
doi = StringField(require=True, unique=True)
def times_cited(self):
return CitableWithDOI.objects.cited_by([self.doi]).count()
return CitableWithDOI.objects.cited_by(self.doi).count()
......@@ -7,12 +7,16 @@ def get_crossref_test():
in de database, after parsing
"""
# Member 16 is APS
url = 'https://api.crossref.org/members/16/works'
cursor = '*'
# Last cursor I used (after 100.000 records from APS)
cursor = 'AoJ79tDrpd8CPwtodHRwOi8vZHguZG9pLm9yZy8xMC4xMTAzL3BoeXNyZXZiLjQyLjgxMjU='
# Member 16 is APS
# url = 'https://api.crossref.org/members/16/works'
# Last cursor I used (after 100.000 records from APS) for this
# cursor = 'AoJ79tDrpd8CPwtodHRwOi8vZHguZG9pLm9yZy8xMC4xMTAzL3BoeXNyZXZiLjQyLjgxMjU='
# This is PRL
url = 'https://api.crossref.org/journals/0031-9007/works'
cursor = 'AoJ4wfD37eACPxBodHRwOi8vZHguZG9pLm9yZy8xMC4xMTAzL3BoeXNyZXZsZXR0LjkwLjAzNTUwNA=='
# If the loop is allowed to complete, it fetches (rows * batches) records
rows = 1000
......@@ -22,7 +26,7 @@ def get_crossref_test():
print("Batch %s" % (i, ))
print("-------------------------------")
print(cursor)
# params = {'query.publisher-name': 'American Physical Society', 'cursor': cursor, 'rows': rows}
params = {'cursor': cursor, 'rows': rows, 'mailto': 'b.g.t.ponsioen@uva.nl'}
r = requests.get(url, params=params)
r_json = r.json()
......@@ -40,15 +44,30 @@ def get_crossref_test():
Citable.objects.insert(citables)
if number_of_results < rows:
print(number_of_results)
print('End reached.')
break
def convert_doi_to_lower_case():
# If you accidentally import 100.000+ records that have random uppercase characters
# in their reference DOI list
i = 0
cits = Citable.objects(__raw__={'references': {'$regex': '([A-Z])\w+'}})
for cit in cits.only('references'):
i = i + 1
refs = [ref.lower() for ref in cit.references]
cit.modify(references=refs)
if i % 1000 == 0:
print(i)
def parse_crossref_citable(citable_item):
if not citable_item['type'] == 'journal-article':
return
if 'DOI' in citable_item:
doi = citable_item['DOI']
doi = citable_item['DOI'].lower()
else:
return
......@@ -59,7 +78,7 @@ def parse_crossref_citable(citable_item):
if 'reference' in citable_item:
references_with_doi = [ref for ref in citable_item['reference'] if 'DOI' in ref]
references = [ref['DOI'] for ref in references_with_doi]
references = [ref['DOI'].lower() for ref in references_with_doi]
else:
references = []
......
......@@ -8,7 +8,8 @@
{% block card_footer %}
<p class="text-muted mb-0">
Cited {{ citable.crossref_ref_count }} times | DOI <a href='https://doi.org/{{ citable.doi }}'> {{ citable.doi }} </a>
Cited {{ citable.crossref_ref_count }} times (CrossRef) / {{ citable.times_cited}} times (SciPost Meta)
| DOI <a href='https://doi.org/{{ citable.doi }}'> {{ citable.doi }} </a>
<br>
Published {{ citable.publication_date|date:"d-m-Y" }} by {{ citable.publisher }}
</p>
......
......@@ -19,7 +19,7 @@ class CitableListView(ListView):
queryset = self.form.search_results()
else:
# queryset = Citable.objects.simple().limit(100)
queryset = Citable.objects.simple().order_by('-metadata.is-referenced-by-count').limit(100)
queryset = Citable.objects.simple().order_by('-metadata.is-referenced-by-count')
return queryset
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment