From 6f3fef0caed870469aa7dde892f6f1c85b3bdff7 Mon Sep 17 00:00:00 2001 From: Geert Kapteijns <ghkapteijns@gmail.com> Date: Mon, 15 May 2017 15:06:18 +0200 Subject: [PATCH] move author list formatting to ArxivCaller --- scipost/services.py | 233 +------------------------------------------- 1 file changed, 2 insertions(+), 231 deletions(-) diff --git a/scipost/services.py b/scipost/services.py index f4f7fc7f2..17ca3811f 100644 --- a/scipost/services.py +++ b/scipost/services.py @@ -91,6 +91,8 @@ class ArxivCaller: data = self._arxiv_data pub_title = data['title'] author_list = [author['name'] for author in data['authors']] + # author_list is given as a comma separated list of names on the relevant models (Commentary, Submission) + author_list = ", ".join(author_list) arxiv_link = data['id'] abstract = data['summary'] pub_date = dateutil.parser.parse(data['published']).date() @@ -105,234 +107,3 @@ class ArxivCaller: def _search_result_present(self, data): return 'title' in data - - -# I'm going to revamp this whole thing... -class BaseCaller(object): - '''Base mixin for caller (Arxiv, DOI). - The basic workflow is to initiate the caller, call process() to make the actual call - followed by is_valid() to validate the response of the call. - - An actual caller should inherit at least the following: - > Properties: - - query_base_url - - caller_regex - > Methods: - - process() - ''' - # State of the caller - _is_processed = False - caller_regex = None - errorcode = None - errorvariables = {} - errormessages = {} - identifier_without_vn_nr = '' - identifier_with_vn_nr = '' - metadata = {} - query_base_url = None - target_object = None - version_nr = None - - def __init__(self, target_object, identifier, *args, **kwargs): - '''Initiate the Caller by assigning which object is used - the Arxiv identifier to be called. - - After initiating call in specific order: - - process() - - is_valid() - - Keyword arguments: - target_object -- The model calling the Caller (object) - identifier -- The identifier used for the call (string) - ''' - try: - self._check_valid_caller() - except NotImplementedError as e: - print('Caller invalid: %s' % e) - return - - # Set given arguments - self.target_object = target_object - self.identifier = identifier - self._precheck_if_valid() - super(BaseCaller, self).__init__(*args, **kwargs) - - def _check_identifier(self): - '''Split the given identifier in an article identifier and version number.''' - if not self.caller_regex: - raise NotImplementedError('No regex is set for this caller') - - if re.match(self.caller_regex, self.identifier): - self.identifier_without_vn_nr = self.identifier.rpartition('v')[0] - self.identifier_with_vn_nr = self.identifier - self.version_nr = int(self.identifier.rpartition('v')[2]) - else: - self.errorvariables['identifier_with_vn_nr'] = self.identifier - raise ValueError('bad_identifier') - - def _check_valid_caller(self): - '''Check if all methods and variables are set appropriately''' - if not self.query_base_url: - raise NotImplementedError('No `query_base_url` set') - - def _precheck_duplicate(self): - '''Check if identifier for object already exists.''' - if self.target_object.same_version_exists(self.identifier_with_vn_nr): - raise ValueError('preprint_already_submitted') - - def _precheck_previous_submissions_are_valid(self): - '''Check if previous submitted versions have the appropriate status.''' - try: - self.previous_submissions = self.target_object.different_versions( - self.identifier_without_vn_nr) - except AttributeError: - # Commentaries do not have previous version numbers? - pass - - if self.previous_submissions: - for submission in [self.previous_submissions[0]]: - if submission.status == 'revision_requested': - self.resubmission = True - elif submission.status in ['rejected', 'rejected_visible']: - raise ValueError('previous_submissions_rejected') - else: - raise ValueError('previous_submission_undergoing_refereeing') - - def _precheck_if_valid(self): - '''The master method to perform all checks required during initializing Caller.''' - try: - self._check_identifier() - self._precheck_duplicate() - self._precheck_previous_submissions_are_valid() - # More tests should be called right here...! - except ValueError as e: - self.errorcode = str(e) - - return not self.errorcode - - def _post_process_checks(self): - '''Perform checks after process, to check received data. - - Return: - None -- Raise ValueError with error code for an invalid check. - ''' - pass - - def is_valid(self): - '''Check if the process() call received valid data. - - If `is_valid()` is overwritten in the actual caller, be - sure to call this parent method in the last line! - - Return: - boolean -- True for valid data received. False otherwise. - ''' - if self.errorcode: - return False - if not self._is_processed: - raise ValueError('`process()` should be called first!') - return True - - def process(self): - '''Call to receive data. - - The `process()` should be implemented in the actual - caller be! Be sure to call this parent method in the last line! - ''' - try: - self._post_process_checks() - except ValueError as e: - self.errorcode = str(e) - - self._is_processed = True - - def get_error_message(self, errormessages={}): - '''Return the errormessages for a specific error code, with the possibility to - overrule the default errormessage dictionary for the specific Caller. - ''' - try: - t = Template(errormessages[self.errorcode]) - except KeyError: - t = Template(self.errormessages[self.errorcode]) - return t.render(Context(self.errorvariables)) - - - -class ArxivCallerOld(BaseCaller): - """ Performs an Arxiv article lookup for given identifier """ - - # State of the caller - resubmission = False - previous_submissions = [] - errormessages = arxiv_caller_errormessages - errorvariables = { - 'arxiv_journal_ref': '', - 'arxiv_doi': '', - 'identifier_with_vn_nr': '' - } - arxiv_journal_ref = '' - arxiv_doi = '' - metadata = {} - query_base_url = 'http://export.arxiv.org/api/query?id_list=%s' - caller_regex = "^[0-9]{4,}.[0-9]{4,5}v[0-9]{1,2}$" - - def __init__(self, target_object, identifier): - if not issubclass(target_object, ArxivCallable): - raise TypeError('Given target_object is not an ArxivCallable object.') - super(ArxivCaller, self).__init__(target_object, identifier) - - def process(self): - '''Do the actual call the receive Arxiv information.''' - if self.errorcode: - return - - queryurl = (self.query_base_url % self.identifier_with_vn_nr) - - try: - self._response = requests.get(queryurl, timeout=4.0) - except requests.ReadTimeout: - self.errorcode = 'arxiv_timeout' - return - except requests.ConnectionError: - self.errorcode = 'arxiv_timeout' - return - - self._response_content = feedparser.parse(self._response.content) - - super(ArxivCaller, self).process() - - def _post_process_checks(self): - # Check if response has at least one entry - if self._response.status_code == 400 or 'entries' not in self._response_content: - raise ValueError('arxiv_bad_request') - - # Check if preprint exists - if not self.preprint_exists(): - raise ValueError('preprint_does_not_exist') - - # Check via journal ref if already published - self.arxiv_journal_ref = self.published_journal_ref() - self.errorvariables['arxiv_journal_ref'] = self.arxiv_journal_ref - if self.arxiv_journal_ref: - raise ValueError('paper_published_journal_ref') - - # Check via DOI if already published - self.arxiv_doi = self.published_doi() - self.errorvariables['arxiv_doi'] = self.arxiv_doi - if self.arxiv_doi: - raise ValueError('paper_published_doi') - - self.metadata = self._response_content - - def preprint_exists(self): - return 'title' in self._response_content['entries'][0] - - def published_journal_ref(self): - if 'arxiv_journal_ref' in self._response_content['entries'][0]: - return self._response_content['entries'][0]['arxiv_journal_ref'] - return None - - def published_doi(self): - if 'arxiv_doi' in self._response_content['entries'][0]: - return self._response_content['entries'][0]['arxiv_doi'] - return None -- GitLab