From 997b87e367e4441015265bca235724f3a7b05737 Mon Sep 17 00:00:00 2001 From: George Katsikas <giorgakis.katsikas@gmail.com> Date: Wed, 8 Nov 2023 16:21:53 +0100 Subject: [PATCH] remove multiple spaces for titles and abstracts --- scipost_django/common/utils.py | 7 +++ scipost_django/scipost/services.py | 6 ++- scipost_django/submissions/forms/__init__.py | 8 +++- .../clean_multiple_submission_spacing.py | 46 +++++++++++++++++++ 4 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 scipost_django/submissions/management/commands/clean_multiple_submission_spacing.py diff --git a/scipost_django/common/utils.py b/scipost_django/common/utils.py index 9e5b7fd34..550b3aa03 100644 --- a/scipost_django/common/utils.py +++ b/scipost_django/common/utils.py @@ -177,6 +177,13 @@ def get_current_domain(): return "fake.domain" +def remove_extra_spacing(text): + """ + Remove extra spacing from text in the form of multiple spaces. + """ + return " ".join(text.strip().split()) + + # MARKED FOR DEPRECATION class BaseMailUtil(object): mail_sender = "no-reply@%s" % get_current_domain() diff --git a/scipost_django/scipost/services.py b/scipost_django/scipost/services.py index dec57ee0a..519940b11 100644 --- a/scipost_django/scipost/services.py +++ b/scipost_django/scipost/services.py @@ -9,6 +9,8 @@ import datetime import dateutil.parser import logging +from common.utils import remove_extra_spacing + from submissions.constants import FIGSHARE_PREPRINT_SERVERS from submissions.models import PreprintServer @@ -155,12 +157,12 @@ class ArxivCaller: def _format_data(self): data = self._arxiv_data - title = data["title"] + title = remove_extra_spacing(data["title"]) author_list = [author["name"] for author in data.get("authors", [])] # author_list is given as a comma separated list of names on the relevant models (Commentary, Submission) author_list = ", ".join(author_list) arxiv_link = data["id"].replace("http:", "https:") - abstract = data["summary"] + abstract = remove_extra_spacing(data["summary"]) pub_date = dateutil.parser.parse(data["published"]).date() self.data = { diff --git a/scipost_django/submissions/forms/__init__.py b/scipost_django/submissions/forms/__init__.py index 3c01b9ad2..8bb3534ca 100644 --- a/scipost_django/submissions/forms/__init__.py +++ b/scipost_django/submissions/forms/__init__.py @@ -67,7 +67,7 @@ from ..models import ( from ..regexes import CHEMRXIV_DOI_PATTERN from colleges.models import Fellowship -from common.utils import Q_with_alternative_spellings +from common.utils import Q_with_alternative_spellings, remove_extra_spacing from journals.models import Journal, Publication from journals.constants import ( PUBLISHABLE_OBJECT_TYPE_ARTICLE, @@ -1617,6 +1617,12 @@ class SubmissionForm(forms.ModelForm): "You must agree to the terms and conditions to submit a manuscript." ) + def clean_title(self): + return remove_extra_spacing(self.cleaned_data["title"]) + + def clean_abstract(self): + return remove_extra_spacing(self.cleaned_data["abstract"]) + @transaction.atomic def save(self): """ diff --git a/scipost_django/submissions/management/commands/clean_multiple_submission_spacing.py b/scipost_django/submissions/management/commands/clean_multiple_submission_spacing.py new file mode 100644 index 000000000..ade163cc1 --- /dev/null +++ b/scipost_django/submissions/management/commands/clean_multiple_submission_spacing.py @@ -0,0 +1,46 @@ +__copyright__ = "Copyright © Stichting SciPost (SciPost Foundation)" +__license__ = "AGPL v3" + + +from itertools import zip_longest +from django.core.management.base import BaseCommand +from common.utils import remove_extra_spacing + +from submissions.models.submission import Submission + + +class Command(BaseCommand): + help = "Clean up multiple spaces in submission fields" + fields = ["title", "abstract"] + + def handle(self, *args, **options): + counter = dict(zip(self.fields, [0] * len(self.fields))) + + for submission in Submission.objects.all(): + publications = submission.publications.all() + + for field in self.fields: + if (value := getattr(submission, field, None)) is None: + continue + + cleaned_value = remove_extra_spacing(value) + if value != cleaned_value: + counter[field] += 1 + setattr(submission, field, cleaned_value) + submission.save() + + # Also update the same field in all publications + # stemming from this submission + for publication in publications: + cleaned_pub_field = remove_extra_spacing( + getattr(publication, field, None) + ) + setattr(publication, field, cleaned_pub_field) + publication.save() + + self.stdout.write( + self.style.SUCCESS( + f"Cleaned up multiple spaces in " + + ", ".join(list(map(lambda x: f"{counter[x]} {x}s", self.fields))) + ) + ) -- GitLab