From e7ef0cca6e57ba897df53d286a6434fa680e0dc2 Mon Sep 17 00:00:00 2001 From: "J.-S. Caux" <J.S.Caux@uva.nl> Date: Fri, 10 Jul 2020 15:04:42 +0200 Subject: [PATCH] Improve selection of Profiles for COI checks on arXiv --- .../commands/update_coi_via_arxiv.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/conflicts/management/commands/update_coi_via_arxiv.py b/conflicts/management/commands/update_coi_via_arxiv.py index 9c49692a6..f80ae1b0d 100644 --- a/conflicts/management/commands/update_coi_via_arxiv.py +++ b/conflicts/management/commands/update_coi_via_arxiv.py @@ -33,14 +33,17 @@ class Command(BaseCommand): fellow_profiles = Profile.objects.filter(contributor__fellowships__id__in=fellow_ids) # Get all possibly relevant Profiles - author_str_list = [a.split()[-1] for a in sub.author_list.split(',')] - if 'entries' in sub.metadata: - author_str_list += [ - a['name'].split()[-1] for a in sub.metadata['entries'][0]['authors']] - author_str_list = set(author_str_list) # Distinct operator - author_profiles = Profile.objects.filter( - Q(contributor__in=sub.authors.all()) | - Q(last_name__in=author_str_list)).distinct() + # Assume the author list is purely comma-separated, + # with entries in format [firstname or initial[.]] lastname + author_profile_ids = [] + for a in sub.author_list.split(','): + last = a.split()[-1] + first = a.split()[0].split('.')[0] + print("%s %s" % (first, last)) + author_profile_ids += [p.id for p in Profile.objects.filter( + last_name__endswith=last, first_name__startswith=first).all()] + author_profile_ids_set = set(author_profile_ids) + author_profiles = Profile.objects.filter(pk__in=author_profile_ids_set) n_new_conflicts += caller.compare(author_profiles, fellow_profiles, submission=sub) Submission.objects.filter(id=sub.id).update(needs_conflicts_update=False) -- GitLab