Newer
Older
__copyright__ = "Copyright © Stichting SciPost (SciPost Foundation)"
__license__ = "AGPL v3"
from decimal import Decimal
from typing import TYPE_CHECKING
from django.contrib.postgres.fields import ArrayField
from django.core.exceptions import ValidationError
from django.db import models
from django.db.models import Min, Sum, OuterRef, Exists, Q
from django.urls import reverse
from django.utils.functional import cached_property
from ..constants import (
STATUS_DRAFT,
STATUS_PUBLISHED,
PUBLICATION_PUBLISHED,
CCBY4,
CC_LICENSES,
CC_LICENSES_URI,
PUBLICATION_STATUSES,
)
from ..helpers import paper_nr_string
from ..managers import PublicationQuerySet
from ..validators import doi_publication_validator
from finances.models import PubFrac
from scipost.constants import SCIPOST_APPROACHES
from scipost.fields import ChoiceArrayField
from django.db.models.manager import RelatedManager
from production.models import ProofsRepository
from profiles.models import Profile
class PublicationAuthorsTable(models.Model):
"""
PublicationAuthorsTable represents an author of a Publication.
Fields:
* publication
* profile
* affiliations: for this author/Publication (supersede profile.affiliations)
* order: the ordinal position of this author in this Publication's list of authors.
"""
publication = models.ForeignKey(
"journals.Publication", on_delete=models.CASCADE, related_name="authors"
)
profile = models.ForeignKey["Profile"](
"profiles.Profile", on_delete=models.PROTECT, blank=True, null=True
)
affiliations = models.ManyToManyField("organizations.Organization", blank=True)
order = models.PositiveSmallIntegerField()
class Meta:
def __str__(self):
return str(self.profile)
def save(self, *args, **kwargs):
"""Auto increment order number if not explicitly set."""
if not self.order:
self.order = self.publication.authors.count() + 1
return super().save(*args, **kwargs)
def is_empty(self) -> bool:
"""Check if object is a temporary placeholder."""
return self.profile is None
@property
def first_name(self) -> str:
"""Return first name of author. If not available, return None."""
return self.profile.first_name if self.profile else None
@property
def last_name(self) -> str:
"""Return last name of author. If not available, return None."""
return self.profile.last_name if self.profile else None
class Publication(models.Model):
"""A Publication is an object directly related to an accepted Submission.
It contains metadata, the actual publication file, author data, etc. etc.
It may be directly related to a Journal or to an Issue.
"""
PUBTYPE_ARTICLE = "article"
PUBTYPE_CODEBASE_RELEASE = "codebase_release"
PUBTYPE_CHOICES = (
(PUBTYPE_ARTICLE, "Article"),
(PUBTYPE_CODEBASE_RELEASE, "Codebase release"),
)
pubtype = models.CharField(
max_length=32,
choices=PUBTYPE_CHOICES,
default=PUBTYPE_ARTICLE,
)
# Publication data
"submissions.Submission",
on_delete=models.CASCADE,
in_issue = models.ForeignKey(
"journals.Issue",
on_delete=models.CASCADE,
null=True,
blank=True,
help_text="Assign either an Issue or Journal to the Publication",
)
in_journal = models.ForeignKey(
"journals.Journal",
on_delete=models.CASCADE,
null=True,
blank=True,
help_text="Assign either an Issue or Journal to the Publication",
)
paper_nr = models.PositiveSmallIntegerField()
paper_nr_suffix = models.CharField(max_length=32, blank=True)
status = models.CharField(
max_length=8, choices=PUBLICATION_STATUSES, default=STATUS_DRAFT
)
# Core fields
title = models.CharField(max_length=300)
author_list = models.CharField(max_length=10000, verbose_name="author list")
abstract = models.TextField()
abstract_jats = models.TextField(
blank=True,
default="",
help_text="JATS version of abstract for Crossref deposit",
)
upload_to="UPLOADS/PUBLICATIONS/%Y/%m/", max_length=200, blank=True
# Ontology-based semantic linking
acad_field = models.ForeignKey(
"ontology.AcademicField", on_delete=models.PROTECT, related_name="publications"
)
specialties = models.ManyToManyField(
"ontology.Specialty", related_name="publications"
topics = models.ManyToManyField("ontology.Topic", blank=True)
approaches = ChoiceArrayField(
models.CharField(max_length=24, choices=SCIPOST_APPROACHES),
blank=True,
null=True,
)
cc_license = models.CharField(max_length=32, choices=CC_LICENSES, default=CCBY4)
# Funders
grants = models.ManyToManyField("funders.Grant", blank=True)
funders_generic = models.ManyToManyField(
"funders.Funder", blank=True
) # not linked to a grant
# Metadata
metadata = models.JSONField(default=dict, blank=True, null=True)
metadata_xml = models.TextField(blank=True) # for Crossref deposit
metadata_DOAJ = models.JSONField(default=dict, blank=True, null=True)
doi_label = models.CharField(
max_length=200,
unique=True,
db_index=True,
validators=[doi_publication_validator],
)
doideposit_needs_updating = models.BooleanField(default=False)
citedby = models.JSONField(default=dict, blank=True, null=True)
number_of_citations = models.PositiveIntegerField(default=0)
# To handle cases without parsable author info (e.g. docx)
author_info_source = models.TextField(blank=True, null=True)
# Date fields
submission_date = models.DateField(verbose_name="submission date")
acceptance_date = models.DateField(verbose_name="acceptance date")
publication_date = models.DateField(verbose_name="publication date")
latest_citedby_update = models.DateTimeField(null=True, blank=True)
latest_metadata_update = models.DateTimeField(blank=True, null=True)
latest_activity = models.DateTimeField(
auto_now=True
) # Needs `auto_now` as its not explicity updated anywhere?
# Calculated fields
cf_citation = models.CharField(
max_length=1024,
blank=True,
help_text="NB: calculated field. Do not modify.",
)
cf_author_affiliation_indices_list = ArrayField(
ArrayField(
models.PositiveSmallIntegerField(blank=True, null=True), default=list
),
help_text="NB: calculated field. Do not modify.",
)
objects = PublicationQuerySet.as_manager()
if TYPE_CHECKING:
authors = RelatedManager[PublicationAuthorsTable]
class Meta:
default_related_name = "publications"
ordering = ("-publication_date", "-paper_nr")
def __str__(self):
return "{cite}, {title} by {authors}, {date}".format(
cite=self.citation,
title=self.title[:30],
authors=self.author_list[:30],
date=self.publication_date.strftime("%Y-%m-%d"),
)
def clean(self):
"""Check if either a valid Journal or Issue is assigned to the Publication."""
if not (self.in_journal or self.in_issue):
raise ValidationError(
{
"in_journal": ValidationError(
"Either assign a Journal or Issue to this Publication",
code="required",
),
"in_issue": ValidationError(
"Either assign a Journal or Issue to this Publication",
code="required",
),
}
)
if self.in_journal and self.in_issue:
# Assigning both a Journal and an Issue will screw up the database
raise ValidationError(
{
"in_journal": ValidationError(
"Either assign only a Journal or Issue to this Publication",
code="invalid",
),
"in_issue": ValidationError(
"Either assign only a Journal or Issue to this Publication",
code="invalid",
),
}
)
if self.in_issue and not self.get_journal().has_issues:
# Assigning both a Journal and an Issue will screw up the database
raise ValidationError(
{
"in_issue": ValidationError(
"This journal does not allow the use of Issues", code="invalid"
),
}
)
if self.in_journal and self.get_journal().has_issues:
# Assigning both a Journal and an Issue will screw up the database
raise ValidationError(
{
"in_journal": ValidationError(
"This journal does not allow the use of individual Publications",
code="invalid",
),
}
)
def get_absolute_url(self):
return reverse("scipost:publication_detail", args=(self.doi_label,))
def get_cc_license_URI(self):
if key == self.cc_license:
return val
raise KeyError
def get_all_affiliations(self):
"""
Returns all author affiliations.
"""
from organizations.models import Organization
return (
Organization.objects.filter(publicationauthorstable__publication=self)
.annotate(order=Min("publicationauthorstable__order"))
.order_by("order")
)
def get_author_affiliation_indices_list(self):
"""
Return a list containing for each author an ordered list of affiliation indices.
This is for display on the publication detail page,
and is a calculated field (saved in the model) to avoid
unnecessary db queries (problematic for papers with large number of authors).
"""
if len(self.cf_author_affiliation_indices_list) > 0:
return self.cf_author_affiliation_indices_list
indexed_author_list = []
pub_affiliations = self.get_all_affiliations()
pub_authors = self.authors.all().prefetch_related("affiliations")
for author in pub_authors:
affnrs = []
for idx, aff in enumerate(pub_affiliations):
if aff in author.affiliations.all():
affnrs.append(idx + 1)
# If no affiliation is found, add a None to the list
# Prevents crashes since Django v4
if len(affnrs) == 0:
affnrs = [None]
indexed_author_list.append(affnrs)
# Since nested ArrayFields must have the same dimension,
# we pad the "empty" entries with Null:
max_length = 0
for entry in indexed_author_list:
max_length = max(max_length, len(entry))
padded_list = []
for entry in indexed_author_list:
padded_entry = entry + [None] * (max_length - len(entry))
padded_list.append(padded_entry)
# Save into the calculated field for future purposes.
Publication.objects.filter(id=self.id).update(
cf_author_affiliation_indices_list=padded_list
)
return padded_list
def get_all_funders(self):
from funders.models import Funder
return Funder.objects.annotate(
has_publication_in_grants=Exists(self.grants.filter(funder=OuterRef("pk"))),
has_publication_in_funders_generic=Exists(
self.funders_generic.filter(pk=OuterRef("pk"))
),
).filter(
Q(has_publication_in_grants=True)
| Q(has_publication_in_funders_generic=True)
)
def get_affiliations(self):
"""Returns the Organizations mentioned in author affiliations."""
from organizations.models import Organization
return Organization.objects.filter(
publicationauthorstable__publication=self
@property
return "10.21468/" + self.doi_label
@property
"""Check if the publication is in draft status, awaiting to be published."""
return self.status == STATUS_DRAFT
@property
if self.status != PUBLICATION_PUBLISHED:
return False
if self.in_issue:
return self.in_issue.status == STATUS_PUBLISHED
elif self.in_journal:
return self.in_journal.active
return False
@property
def has_abstract_jats(self) -> bool:
"""Check if there is a JATS version of the abstract used for Crossref deposits."""
return self.abstract_jats != ""
@property
def has_xml_metadata(self) -> bool:
"""Check if there is XML metadata used for Crossref deposits."""
return self.metadata_xml != ""
@property
bibtex_entry = ("@Article{%s,\n\ttitle={{%s}},\n\tauthor={%s},\n") % (
self.doi_string,
self.title,
self.author_list.replace(",", " and"),
)
if self.in_issue:
if self.in_issue.in_volume:
bibtex_entry += "\tjournal={%s},\n\tvolume={%i},\n" % (
self.in_issue.in_volume.in_journal.name_abbrev,
self.in_issue.in_volume.number,
)
elif self.in_issue.in_journal:
bibtex_entry += (
"\tjournal={%s},\n" % self.in_issue.in_journal.name_abbrev
)
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
elif self.in_journal:
bibtex_entry += "\tjournal={%s},\n" % self.in_journal.name_abbrev
bibtex_entry += (
"\tpages={%s},\n"
"\tyear={%s},\n"
"\tpublisher={SciPost},\n"
"\tdoi={%s},\n"
"\turl={https://%s/%s},\n"
"}"
) % (
self.get_paper_nr(),
self.publication_date.strftime("%Y"),
self.doi_string,
get_current_domain(),
self.doi_string,
)
return bibtex_entry
def resources_as_md(self):
"""Return a Markdown string representing the list of resources."""
if self.resources.all():
resources_md = "## Resources:\n\n"
for resource in self.resources.all():
resources_md += (
f"* {resource.get__type_display()} "
f"at [{resource.url}]({resource.url})\n"
)
return resources_md
return None
@property
def has_citation_list(self):
return (
"citation_list" in self.metadata and len(self.metadata["citation_list"]) > 0
)
@property
def has_funding_statement(self):
return (
"funding_statement" in self.metadata and self.metadata["funding_statement"]
)
def expenditures(self):
"""The expenditures (as defined by the Journal) to produce this Publication."""
return self.get_journal().cost_per_publication(self.publication_date.year)
def recalculate_pubfracs(self):
"""Recalculates PubFracs using the balanced affiliations algorithm."""
# First, remove non-affiliation-related PubFracs
aff_ids = [aff.id for aff in self.get_affiliations()]
PubFrac.objects.filter(publication=self).exclude(
organization__pk__in=aff_ids
).delete()
# Now recreate according to the balanced affiliations algorithm
nr_authors = self.authors.all().count()
affiliations = self.get_affiliations()
for org in affiliations.all():
pubfrac, created = PubFrac.objects.get_or_create(
publication=self, organization=org
)
fraction = {}
for org in affiliations.all():
fraction[org.id] = 0
for author in self.authors.all():
nr_affiliations = author.affiliations.all().count()
for aff in author.affiliations.all():
fraction[aff.id] += 1.0 / (nr_authors * nr_affiliations)
for org in affiliations.all():
pubfrac, created = PubFrac.objects.get_or_create(
publication=self,
organization=org,
)
# ensure 3 digit accuracy for all fractions through integer cast
pubfrac.fraction = 0.001 * int(1000 * fraction[org.id])
pubfrac.save()
self.ensure_pubfracs_sum_to_1()
def ensure_pubfracs_sum_to_1(self):
"""Ensure sum is 1 by putting any difference in the largest PubFrac."""
pubfrac_max = self.pubfracs.order_by("-fraction").first()
sum_pubfracs = self.pubfracs.aggregate(Sum("fraction"))["fraction__sum"]
pubfrac_max.fraction += 1 - sum_pubfracs
pubfrac_max.save()
@property
def pubfracs_sum_to_1(self):
"""Checks that the support fractions sum up to one."""
return self.pubfracs.aggregate(Sum("fraction"))["fraction__sum"] == 1
def proofs_repository(self) -> "ProofsRepository":
"""Return the proofs repository for the publication."""
return self.accepted_submission.production_stream.proofs_repository
@property
def compensated_expenditures(self):
"""Compensated part of expenditures for this Publication."""
qs = self.pubfracs.filter(compensated_by__isnull=False)
# Use the fraction to obtain an accurate result
return (
qs.aggregate(Sum("fraction"))["fraction__sum"] * self.expenditures
if qs.exists()
else 0
)
@property
def uncompensated_expenditures(self):
"""Unompensated part of expenditures for this Publication."""
return self.expenditures - self.compensated_expenditures
@property
def citation(self):
if self.cf_citation:
return self.cf_citation
citation = ""
"""Return Publication name in the preferred citation format."""
if self.in_issue and self.in_issue.in_volume:
citation = "{journal} {volume}, {paper_nr} ({year})".format(
journal=self.in_issue.in_volume.in_journal.name_abbrev,
volume=self.in_issue.in_volume.number,
paper_nr=self.get_paper_nr(),
year=self.publication_date.strftime("%Y"),
)
elif self.in_issue and self.in_issue.in_journal:
citation = "{journal} {issue}, {paper_nr} ({year})".format(
journal=self.in_issue.in_journal.name_abbrev,
issue=self.in_issue.number,
paper_nr=self.get_paper_nr(),
year=self.publication_date.strftime("%Y"),
)
elif self.in_journal:
citation = "{journal} {paper_nr} ({year})".format(
journal=self.in_journal.name_abbrev,
year=self.publication_date.strftime("%Y"),
)
else:
citation = "{paper_nr} ({year})".format(
paper_nr=self.get_paper_nr(), year=self.publication_date.strftime("%Y")
)
self.cf_citation = citation
self.save()
return citation
def get_journal(self):
if self.in_journal:
return self.in_journal
elif self.in_issue.in_journal:
return self.in_issue.in_journal
return self.in_issue.in_volume.in_journal
def journal_issn(self):
return self.get_journal().issn
def get_volume(self):
if self.in_issue and self.in_issue.in_volume:
return self.in_issue.in_volume
return None
def get_paper_nr(self):
"""Returns the paper number (including possible suffixes) as a string."""
s = f"{self.paper_nr}" if self.in_journal else paper_nr_string(self.paper_nr)
return f"{s}{f'-{self.paper_nr_suffix}' if self.paper_nr_suffix else ''}"
def citation_rate(self):
"""Returns the citation rate in units of nr citations per article per year."""
if self.citedby and self.latest_citedby_update:
ncites = len(self.citedby)
deltat = (self.latest_citedby_update.date() - self.publication_date).days
return ncites * 365.25 / deltat
else:
return 0
def get_issue_related_publications(self):
"""Return 4 Publications within same Issue."""
return (
Publication.objects.published()
.filter(in_issue=self.in_issue)
.exclude(id=self.id)[:4]
)
@property
def bundle(self):
"""Returns a QuerySet of all Publications with same DOI anchor."""
doi_anchor = self.doi_label.partition("-")[0]
return Publication.objects.filter(
models.Q(doi_label=doi_anchor)
| models.Q(doi_label__startswith=f"{doi_anchor}-")
).order_by("doi_label")
@cached_property
def tex_contents(self) -> str | None:
return self.author_info_source or self.proofs_repository.fetch_tex()
George Katsikas
committed
@cached_property
def tex_affiliations(self) -> list[tuple[str, str]]:
George Katsikas
committed
Extracts the affiliations from the TeX contents, constructing a list
with each affiliation as an (identifier, text) pair
Matches the pattern: `{\\bf #} Affiliation Text ...\\\\`
Returns a list of tuples with the affiliation identifier (number) and the affiliation text.
affiliations_block = re.search(
r"%+ TODO: AFFILIATIONS.*?%+ END TODO: AFFILIATIONS",
self.tex_contents or "",
re.DOTALL,
)
if affiliations_block is None:
return []
matches = re.findall(
r"\n(?:\{\\bf\s([^%]+?)\})?\s?([^%]+?)\n(?:\\\\|%)",
affiliations_block.group(),
re.DOTALL,
George Katsikas
committed
)
return [
(number or f"UN_{i}", text.replace("\\\\", " ").strip())
for i, (number, text) in enumerate(matches)
]
def tex_author_info(self) -> list[tuple[str, list[str]]]:
"""
Returns a list of tuples with the author name and a list of superscripts.
"""
authors_block = re.search(
r"%+ TODO: AUTHORS(.*?)%+ END TODO: AUTHORS",
self.tex_contents or "",
if authors_block is None:
# If matching against the tex file fails, we use the author_list of the submission.
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
return [(author.strip(), []) for author in self.author_list.split(",")]
author_lines = authors_block.group(1).strip().split("\n")
def extract_author_info(line: str) -> tuple[str, list[str]]:
"""Extracts the superscripts from a text."""
author_match = re.match(r"^(?:\\orcidlink\{.*?\}\\?\W?)?([^\\]+)", line)
superscripts_match = re.match(r".*?\\textsuperscript\{(.*?)\}", line)
author_text = author_match.group(1) if author_match else ""
superscripts_text = (
superscripts_match.group(1) if superscripts_match else ""
)
# Remove any trailing or leading "and"s.
author_text = re.sub(r"(?:^and(\W)|(\W)and$)", r"\1", author_text).strip()
# Prepend a comma to every $...$ expression to split them later
superscripts_text = re.sub(r"(\$.+?\$)", r",\1", superscripts_text)
return (
author_text.strip(),
[superscript.strip() for superscript in superscripts_text.split(",")],
)
return [extract_author_info(line) for line in author_lines]
def reset_author_associations(self) -> None:
"""Deletes all PublicationAuthorsTable entries and repopulates it based on the tex file uploaded on git."""
from profiles.models import Profile
PublicationAuthorsTable.objects.filter(publication=self).delete()
# We add the succesful authors from the author_render_list to the PublicationAuthorsTable.
author_entries: list[PublicationAuthorsTable] = []
author_names = [name for name, _ in self.tex_author_info]
for i, name in enumerate(author_names):
profile_matches = list(Profile.objects.search(name))
profile = profile_matches[0] if profile_matches else None
author_entries.append(
PublicationAuthorsTable(
publication=self,
profile=profile,
order=i + 1,
)
)
PublicationAuthorsTable.objects.bulk_create(author_entries)
self.cf_author_affiliation_indices_list = []
if author_names:
self.author_list = ", ".join(author_names)
self.save()
class Reference(models.Model):
"""A Refence is a reference used in a specific Publication."""
reference_number = models.IntegerField()
publication = models.ForeignKey("journals.Publication", on_delete=models.CASCADE)
authors = models.CharField(max_length=1028)
citation = models.CharField(max_length=1028, blank=True)
identifier = models.CharField(blank=True, max_length=128)
link = models.URLField(blank=True)
class Meta:
unique_together = ("reference_number", "publication")
ordering = ["reference_number"]
default_related_name = "references"
def __str__(self):
return "[{}] {}, {}".format(
self.reference_number, self.authors[:30], self.citation[:30]
)