From 00473b230335619c7439881bdd8273e8076a6bd5 Mon Sep 17 00:00:00 2001 From: Boris Ponsioen <b.g.t.ponsioen@uva.nl> Date: Fri, 2 Feb 2018 17:52:48 +0100 Subject: [PATCH] Fixes reference list to lowercase dois, speeds up browsing and shows cited-by counts in template --- metacore/managers.py | 8 ++++- metacore/models.py | 4 +-- metacore/services.py | 33 +++++++++++++++---- .../partials/citable_card_content.html | 3 +- metacore/views.py | 2 +- 5 files changed, 38 insertions(+), 12 deletions(-) diff --git a/metacore/managers.py b/metacore/managers.py index d32ea5b7e..a98ac0789 100644 --- a/metacore/managers.py +++ b/metacore/managers.py @@ -4,7 +4,13 @@ from mongoengine import QuerySet class CitableQuerySet(QuerySet): def cited_by(self, dois): - return self.only('references').filter(references__in=dois) + if isinstance(dois, list): + return self.only('references').filter(references__in=dois) + else: + return self.only('references').filter(references=dois) def simple(self): return self.only('doi', 'title', 'authors', 'metadata.is-referenced-by-count', 'publication_date', 'publisher') + + def prl(self): + return self.filter(metadata__ISSN='1079-7114') diff --git a/metacore/models.py b/metacore/models.py index 776034d61..7a26da00b 100644 --- a/metacore/models.py +++ b/metacore/models.py @@ -30,7 +30,7 @@ class Citable(DynamicDocument): # Settings for mongoengine meta = { 'queryset_class': CitableQuerySet, # use the custom queryset - 'indexes': ['doi', 'authors', 'title', 'publication_date', 'publisher'], # define indices on database + 'indexes': ['doi', 'authors', 'title', 'publication_date', 'publisher', 'references'], # define indices on database 'allow_inheritance': True } @@ -52,5 +52,5 @@ class CitableWithDOI(Citable): doi = StringField(require=True, unique=True) def times_cited(self): - return CitableWithDOI.objects.cited_by([self.doi]).count() + return CitableWithDOI.objects.cited_by(self.doi).count() diff --git a/metacore/services.py b/metacore/services.py index 64db5a21c..2672ca364 100644 --- a/metacore/services.py +++ b/metacore/services.py @@ -7,12 +7,16 @@ def get_crossref_test(): in de database, after parsing """ - # Member 16 is APS - url = 'https://api.crossref.org/members/16/works' cursor = '*' - # Last cursor I used (after 100.000 records from APS) - cursor = 'AoJ79tDrpd8CPwtodHRwOi8vZHguZG9pLm9yZy8xMC4xMTAzL3BoeXNyZXZiLjQyLjgxMjU=' + # Member 16 is APS + # url = 'https://api.crossref.org/members/16/works' + # Last cursor I used (after 100.000 records from APS) for this + # cursor = 'AoJ79tDrpd8CPwtodHRwOi8vZHguZG9pLm9yZy8xMC4xMTAzL3BoeXNyZXZiLjQyLjgxMjU=' + + # This is PRL + url = 'https://api.crossref.org/journals/0031-9007/works' + cursor = 'AoJ4wfD37eACPxBodHRwOi8vZHguZG9pLm9yZy8xMC4xMTAzL3BoeXNyZXZsZXR0LjkwLjAzNTUwNA==' # If the loop is allowed to complete, it fetches (rows * batches) records rows = 1000 @@ -22,7 +26,7 @@ def get_crossref_test(): print("Batch %s" % (i, )) print("-------------------------------") print(cursor) - # params = {'query.publisher-name': 'American Physical Society', 'cursor': cursor, 'rows': rows} + params = {'cursor': cursor, 'rows': rows, 'mailto': 'b.g.t.ponsioen@uva.nl'} r = requests.get(url, params=params) r_json = r.json() @@ -40,15 +44,30 @@ def get_crossref_test(): Citable.objects.insert(citables) if number_of_results < rows: + print(number_of_results) print('End reached.') break +def convert_doi_to_lower_case(): + # If you accidentally import 100.000+ records that have random uppercase characters + # in their reference DOI list + i = 0 + cits = Citable.objects(__raw__={'references': {'$regex': '([A-Z])\w+'}}) + for cit in cits.only('references'): + i = i + 1 + refs = [ref.lower() for ref in cit.references] + cit.modify(references=refs) + + if i % 1000 == 0: + print(i) + + def parse_crossref_citable(citable_item): if not citable_item['type'] == 'journal-article': return if 'DOI' in citable_item: - doi = citable_item['DOI'] + doi = citable_item['DOI'].lower() else: return @@ -59,7 +78,7 @@ def parse_crossref_citable(citable_item): if 'reference' in citable_item: references_with_doi = [ref for ref in citable_item['reference'] if 'DOI' in ref] - references = [ref['DOI'] for ref in references_with_doi] + references = [ref['DOI'].lower() for ref in references_with_doi] else: references = [] diff --git a/metacore/templates/partials/citable_card_content.html b/metacore/templates/partials/citable_card_content.html index 64cc90dc0..f4a8a3688 100644 --- a/metacore/templates/partials/citable_card_content.html +++ b/metacore/templates/partials/citable_card_content.html @@ -8,7 +8,8 @@ {% block card_footer %} <p class="text-muted mb-0"> - Cited {{ citable.crossref_ref_count }} times | DOI <a href='https://doi.org/{{ citable.doi }}'> {{ citable.doi }} </a> + Cited {{ citable.crossref_ref_count }} times (CrossRef) / {{ citable.times_cited}} times (SciPost Meta) + | DOI <a href='https://doi.org/{{ citable.doi }}'> {{ citable.doi }} </a> <br> Published {{ citable.publication_date|date:"d-m-Y" }} by {{ citable.publisher }} </p> diff --git a/metacore/views.py b/metacore/views.py index 51650641a..b7fe95759 100644 --- a/metacore/views.py +++ b/metacore/views.py @@ -19,7 +19,7 @@ class CitableListView(ListView): queryset = self.form.search_results() else: # queryset = Citable.objects.simple().limit(100) - queryset = Citable.objects.simple().order_by('-metadata.is-referenced-by-count').limit(100) + queryset = Citable.objects.simple().order_by('-metadata.is-referenced-by-count') return queryset -- GitLab