diff --git a/scipost_django/common/utils.py b/scipost_django/common/utils.py index 9e5b7fd34c56694cbd0d6bff988f7e0296134f81..550b3aa03426fc3a1c45696f605edf82da85222b 100644 --- a/scipost_django/common/utils.py +++ b/scipost_django/common/utils.py @@ -177,6 +177,13 @@ def get_current_domain(): return "fake.domain" +def remove_extra_spacing(text): + """ + Remove extra spacing from text in the form of multiple spaces. + """ + return " ".join(text.strip().split()) + + # MARKED FOR DEPRECATION class BaseMailUtil(object): mail_sender = "no-reply@%s" % get_current_domain() diff --git a/scipost_django/scipost/services.py b/scipost_django/scipost/services.py index dec57ee0a1a43e03e8402e0f0e33fb6446459331..519940b117ddb1e53f87e24a7dcf8c8389f3f50d 100644 --- a/scipost_django/scipost/services.py +++ b/scipost_django/scipost/services.py @@ -9,6 +9,8 @@ import datetime import dateutil.parser import logging +from common.utils import remove_extra_spacing + from submissions.constants import FIGSHARE_PREPRINT_SERVERS from submissions.models import PreprintServer @@ -155,12 +157,12 @@ class ArxivCaller: def _format_data(self): data = self._arxiv_data - title = data["title"] + title = remove_extra_spacing(data["title"]) author_list = [author["name"] for author in data.get("authors", [])] # author_list is given as a comma separated list of names on the relevant models (Commentary, Submission) author_list = ", ".join(author_list) arxiv_link = data["id"].replace("http:", "https:") - abstract = data["summary"] + abstract = remove_extra_spacing(data["summary"]) pub_date = dateutil.parser.parse(data["published"]).date() self.data = { diff --git a/scipost_django/submissions/forms/__init__.py b/scipost_django/submissions/forms/__init__.py index 3c01b9ad20138fa71398794402febd3bee0de343..8bb3534cae68f083ac220b5f06e9054746015885 100644 --- a/scipost_django/submissions/forms/__init__.py +++ b/scipost_django/submissions/forms/__init__.py @@ -67,7 +67,7 @@ from ..models import ( from ..regexes import CHEMRXIV_DOI_PATTERN from colleges.models import Fellowship -from common.utils import Q_with_alternative_spellings +from common.utils import Q_with_alternative_spellings, remove_extra_spacing from journals.models import Journal, Publication from journals.constants import ( PUBLISHABLE_OBJECT_TYPE_ARTICLE, @@ -1617,6 +1617,12 @@ class SubmissionForm(forms.ModelForm): "You must agree to the terms and conditions to submit a manuscript." ) + def clean_title(self): + return remove_extra_spacing(self.cleaned_data["title"]) + + def clean_abstract(self): + return remove_extra_spacing(self.cleaned_data["abstract"]) + @transaction.atomic def save(self): """ diff --git a/scipost_django/submissions/management/commands/clean_multiple_submission_spacing.py b/scipost_django/submissions/management/commands/clean_multiple_submission_spacing.py new file mode 100644 index 0000000000000000000000000000000000000000..ade163cc15a0ad39a8e4f0e5aecdf0753d95be65 --- /dev/null +++ b/scipost_django/submissions/management/commands/clean_multiple_submission_spacing.py @@ -0,0 +1,46 @@ +__copyright__ = "Copyright © Stichting SciPost (SciPost Foundation)" +__license__ = "AGPL v3" + + +from itertools import zip_longest +from django.core.management.base import BaseCommand +from common.utils import remove_extra_spacing + +from submissions.models.submission import Submission + + +class Command(BaseCommand): + help = "Clean up multiple spaces in submission fields" + fields = ["title", "abstract"] + + def handle(self, *args, **options): + counter = dict(zip(self.fields, [0] * len(self.fields))) + + for submission in Submission.objects.all(): + publications = submission.publications.all() + + for field in self.fields: + if (value := getattr(submission, field, None)) is None: + continue + + cleaned_value = remove_extra_spacing(value) + if value != cleaned_value: + counter[field] += 1 + setattr(submission, field, cleaned_value) + submission.save() + + # Also update the same field in all publications + # stemming from this submission + for publication in publications: + cleaned_pub_field = remove_extra_spacing( + getattr(publication, field, None) + ) + setattr(publication, field, cleaned_pub_field) + publication.save() + + self.stdout.write( + self.style.SUCCESS( + f"Cleaned up multiple spaces in " + + ", ".join(list(map(lambda x: f"{counter[x]} {x}s", self.fields))) + ) + )