From 1074d8b1cb3dbab11e3ae917815741ce875608af Mon Sep 17 00:00:00 2001 From: George Katsikas <giorgakis.katsikas@gmail.com> Date: Thu, 11 Apr 2024 11:35:44 +0200 Subject: [PATCH] fix arxiv source fetcher for single files --- .../management/commands/advance_git_repos.py | 58 ++++++++++++------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/scipost_django/production/management/commands/advance_git_repos.py b/scipost_django/production/management/commands/advance_git_repos.py index 7f3424a3b..df9b0768d 100644 --- a/scipost_django/production/management/commands/advance_git_repos.py +++ b/scipost_django/production/management/commands/advance_git_repos.py @@ -3,6 +3,7 @@ __license__ = "AGPL v3" from datetime import datetime from functools import reduce +from io import UnsupportedOperation from itertools import chain, cycle from typing import Any, Callable, Dict, List, Tuple from time import sleep @@ -482,30 +483,43 @@ class Command(BaseCommand): # Create file creation actions for each file in the source tar actions = [] - with tarfile.open(fileobj=source_stream.raw) as tar: - for member in tar: - if not member.isfile(): - continue - - f = tar.extractfile(member) - try: - bin_content = f.read() - actions.append( - { - "action": "create", - "file_path": member.name, - "encoding": "base64", - # Encode the binary content in base64, required by the API - "content": b64encode(bin_content).decode("utf-8"), - } - ) + try: + with tarfile.open(fileobj=source_stream.raw) as tar: + for member in tar: + if not member.isfile(): + continue + + f = tar.extractfile(member) + try: + bin_content = f.read() + actions.append( + { + "action": "create", + "file_path": member.name, + "encoding": "base64", + # Encode the binary content in base64, required by the API + "content": b64encode(bin_content).decode("utf-8"), + } + ) - except: - self.stdout.write( - self.style.ERROR( - f"Could not read {member.name} from the arXiv source files, skipping..." + except: + self.stdout.write( + self.style.ERROR( + f"Could not read {member.name} from the arXiv source files, skipping..." + ) ) - ) + except UnsupportedOperation: + # The file is not a tar, but a single file (e.g. a TeX file) + # Refetch the source since the previous one was consumed + source = requests.get(paper.pdf_url.replace("pdf", "src")) + actions.append( + { + "action": "create", + "file_path": f"main.tex", + "encoding": "base64", + "content": b64encode(source.content).decode("utf-8"), + } + ) # Filter out the files that already exist in the repo to avoid conflicts project = self.GL.projects.get(repo.git_path) -- GitLab