From 00473b230335619c7439881bdd8273e8076a6bd5 Mon Sep 17 00:00:00 2001
From: Boris Ponsioen <b.g.t.ponsioen@uva.nl>
Date: Fri, 2 Feb 2018 17:52:48 +0100
Subject: [PATCH] Fixes reference list to lowercase dois, speeds up browsing
 and shows cited-by counts in template

---
 metacore/managers.py                          |  8 ++++-
 metacore/models.py                            |  4 +--
 metacore/services.py                          | 33 +++++++++++++++----
 .../partials/citable_card_content.html        |  3 +-
 metacore/views.py                             |  2 +-
 5 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/metacore/managers.py b/metacore/managers.py
index d32ea5b7e..a98ac0789 100644
--- a/metacore/managers.py
+++ b/metacore/managers.py
@@ -4,7 +4,13 @@ from mongoengine import QuerySet
 class CitableQuerySet(QuerySet):
 
     def cited_by(self, dois):
-        return self.only('references').filter(references__in=dois)
+        if isinstance(dois, list):
+            return self.only('references').filter(references__in=dois)
+        else:
+            return self.only('references').filter(references=dois)
 
     def simple(self):
         return self.only('doi', 'title', 'authors', 'metadata.is-referenced-by-count', 'publication_date', 'publisher')
+
+    def prl(self):
+        return self.filter(metadata__ISSN='1079-7114')
diff --git a/metacore/models.py b/metacore/models.py
index 776034d61..7a26da00b 100644
--- a/metacore/models.py
+++ b/metacore/models.py
@@ -30,7 +30,7 @@ class Citable(DynamicDocument):
     # Settings for mongoengine
     meta = {
             'queryset_class': CitableQuerySet, # use the custom queryset
-            'indexes': ['doi', 'authors', 'title', 'publication_date', 'publisher'], # define indices on database
+            'indexes': ['doi', 'authors', 'title', 'publication_date', 'publisher', 'references'], # define indices on database
             'allow_inheritance': True
             }
 
@@ -52,5 +52,5 @@ class CitableWithDOI(Citable):
     doi = StringField(require=True, unique=True)
 
     def times_cited(self):
-        return CitableWithDOI.objects.cited_by([self.doi]).count()
+        return CitableWithDOI.objects.cited_by(self.doi).count()
 
diff --git a/metacore/services.py b/metacore/services.py
index 64db5a21c..2672ca364 100644
--- a/metacore/services.py
+++ b/metacore/services.py
@@ -7,12 +7,16 @@ def get_crossref_test():
     in de database, after parsing
     """
 
-    # Member 16 is APS
-    url = 'https://api.crossref.org/members/16/works'
     cursor = '*'
 
-    # Last cursor I used (after 100.000 records from APS)
-    cursor = 'AoJ79tDrpd8CPwtodHRwOi8vZHguZG9pLm9yZy8xMC4xMTAzL3BoeXNyZXZiLjQyLjgxMjU='
+    # Member 16 is APS
+    # url = 'https://api.crossref.org/members/16/works'
+    # Last cursor I used (after 100.000 records from APS) for this
+    # cursor = 'AoJ79tDrpd8CPwtodHRwOi8vZHguZG9pLm9yZy8xMC4xMTAzL3BoeXNyZXZiLjQyLjgxMjU='
+
+    # This is PRL
+    url = 'https://api.crossref.org/journals/0031-9007/works'
+    cursor = 'AoJ4wfD37eACPxBodHRwOi8vZHguZG9pLm9yZy8xMC4xMTAzL3BoeXNyZXZsZXR0LjkwLjAzNTUwNA=='
 
     # If the loop is allowed to complete, it fetches (rows * batches) records
     rows = 1000
@@ -22,7 +26,7 @@ def get_crossref_test():
         print("Batch %s" % (i, ))
         print("-------------------------------")
         print(cursor)
-        # params = {'query.publisher-name': 'American Physical Society', 'cursor': cursor, 'rows': rows}
+
         params = {'cursor': cursor, 'rows': rows, 'mailto': 'b.g.t.ponsioen@uva.nl'}
         r = requests.get(url, params=params)
         r_json = r.json()
@@ -40,15 +44,30 @@ def get_crossref_test():
             Citable.objects.insert(citables)
 
         if number_of_results < rows:
+            print(number_of_results)
             print('End reached.')
             break
 
+def convert_doi_to_lower_case():
+    # If you accidentally import 100.000+ records that have random uppercase characters
+    # in their reference DOI list
+    i = 0
+    cits = Citable.objects(__raw__={'references': {'$regex': '([A-Z])\w+'}})
+    for cit in cits.only('references'):
+        i = i + 1
+        refs = [ref.lower() for ref in cit.references]
+        cit.modify(references=refs)
+
+        if i % 1000 == 0:
+            print(i)
+
+
 def parse_crossref_citable(citable_item):
     if not citable_item['type'] == 'journal-article':
         return
     
     if 'DOI' in citable_item:
-        doi = citable_item['DOI']
+        doi = citable_item['DOI'].lower()
     else:
         return 
 
@@ -59,7 +78,7 @@ def parse_crossref_citable(citable_item):
 
             if 'reference' in citable_item:
                 references_with_doi = [ref for ref in citable_item['reference'] if 'DOI' in ref]
-                references = [ref['DOI'] for ref in references_with_doi]
+                references = [ref['DOI'].lower() for ref in references_with_doi]
             else:
                 references = []
 
diff --git a/metacore/templates/partials/citable_card_content.html b/metacore/templates/partials/citable_card_content.html
index 64cc90dc0..f4a8a3688 100644
--- a/metacore/templates/partials/citable_card_content.html
+++ b/metacore/templates/partials/citable_card_content.html
@@ -8,7 +8,8 @@
 
 {% block card_footer %}
     <p class="text-muted mb-0">
-    Cited {{ citable.crossref_ref_count }} times | DOI <a href='https://doi.org/{{ citable.doi }}'> {{ citable.doi }} </a>
+    Cited {{ citable.crossref_ref_count }} times (CrossRef) / {{ citable.times_cited}} times (SciPost Meta) 
+        | DOI <a href='https://doi.org/{{ citable.doi }}'> {{ citable.doi }} </a>
       <br>
       Published {{ citable.publication_date|date:"d-m-Y" }} by {{ citable.publisher }}
     </p>
diff --git a/metacore/views.py b/metacore/views.py
index 51650641a..b7fe95759 100644
--- a/metacore/views.py
+++ b/metacore/views.py
@@ -19,7 +19,7 @@ class CitableListView(ListView):
             queryset = self.form.search_results()
         else:
             # queryset = Citable.objects.simple().limit(100)
-            queryset = Citable.objects.simple().order_by('-metadata.is-referenced-by-count').limit(100)
+            queryset = Citable.objects.simple().order_by('-metadata.is-referenced-by-count')
 
         return queryset
 
-- 
GitLab