SciPost Code Repository

Skip to content
Snippets Groups Projects
Commit 1074d8b1 authored by George Katsikas's avatar George Katsikas :goat:
Browse files

fix arxiv source fetcher for single files

parent 93c2e300
No related branches found
No related tags found
No related merge requests found
...@@ -3,6 +3,7 @@ __license__ = "AGPL v3" ...@@ -3,6 +3,7 @@ __license__ = "AGPL v3"
from datetime import datetime from datetime import datetime
from functools import reduce from functools import reduce
from io import UnsupportedOperation
from itertools import chain, cycle from itertools import chain, cycle
from typing import Any, Callable, Dict, List, Tuple from typing import Any, Callable, Dict, List, Tuple
from time import sleep from time import sleep
...@@ -482,30 +483,43 @@ class Command(BaseCommand): ...@@ -482,30 +483,43 @@ class Command(BaseCommand):
# Create file creation actions for each file in the source tar # Create file creation actions for each file in the source tar
actions = [] actions = []
with tarfile.open(fileobj=source_stream.raw) as tar: try:
for member in tar: with tarfile.open(fileobj=source_stream.raw) as tar:
if not member.isfile(): for member in tar:
continue if not member.isfile():
continue
f = tar.extractfile(member)
try: f = tar.extractfile(member)
bin_content = f.read() try:
actions.append( bin_content = f.read()
{ actions.append(
"action": "create", {
"file_path": member.name, "action": "create",
"encoding": "base64", "file_path": member.name,
# Encode the binary content in base64, required by the API "encoding": "base64",
"content": b64encode(bin_content).decode("utf-8"), # Encode the binary content in base64, required by the API
} "content": b64encode(bin_content).decode("utf-8"),
) }
)
except: except:
self.stdout.write( self.stdout.write(
self.style.ERROR( self.style.ERROR(
f"Could not read {member.name} from the arXiv source files, skipping..." f"Could not read {member.name} from the arXiv source files, skipping..."
)
) )
) except UnsupportedOperation:
# The file is not a tar, but a single file (e.g. a TeX file)
# Refetch the source since the previous one was consumed
source = requests.get(paper.pdf_url.replace("pdf", "src"))
actions.append(
{
"action": "create",
"file_path": f"main.tex",
"encoding": "base64",
"content": b64encode(source.content).decode("utf-8"),
}
)
# Filter out the files that already exist in the repo to avoid conflicts # Filter out the files that already exist in the repo to avoid conflicts
project = self.GL.projects.get(repo.git_path) project = self.GL.projects.get(repo.git_path)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment