import hashlib import logging import os from io import BytesIO import requests from django.conf import settings from django.core.files.images import ImageFile from gql import Client, gql from gql.transport.aiohttp import AIOHTTPTransport from gql.transport.exceptions import TransportServerError from wagtail.images.models import Image from wagtail.models.media import Collection from shared.models import ( OctopusPerson, OctopusPersonOriginatingGroup, OctopusPersonOriginatingTeam, ) logger = logging.getLogger(__name__) class ImporterMixin: def get_file_hash(self, file): BUF_SIZE = 65536 md5 = hashlib.md5() while True: data = file.read(BUF_SIZE) if not data: break md5.update(data) return md5.hexdigest() def process_person(self, person_id, profile, person): attribute_mapping = { "octopus_id": person_id, "username": profile["person"]["username"], "email": profile["email"], "phone": profile["phone"], "position": profile.get("position"), "order": profile.get("order", 0), "facebook_url": profile["facebookUrl"], "flickr_url": profile["flickrUrl"], "instagram_url": profile["instagramUrl"], "mastodon_url": profile["mastodonUrl"], "twitter_url": profile["twitterUrl"], "tiktok_url": profile["tiktokUrl"], "web_url": profile["webUrl"], "youtube_url": profile["youtubeUrl"], "more_info_url": profile["url"], # TODO: Assume this is valid for the time being "profile_type": profile["kind"], # TODO "photo": profile["photo"], "short_text": profile["textShort"], "long_text": profile["textLong"], "degree_before": profile["person"]["degreeBeforeName"], "degree_after": profile["person"]["degreeAfterName"], "display_name": profile["person"]["displayName"], } verifiably_inexsistent_person = OctopusPerson.objects.filter( octopus_id=person_id ).first() # print(verifiably_inexsistent_person) # For some reason, probably due to integrity issues somewhere, the person # can end up existing. FIXME if person is None and verifiably_inexsistent_person is not None: person = verifiably_inexsistent_person if person is None: if hasattr(self, "new_user_count"): self.new_user_count += 1 logger.info( "Creating new Octopus person profile - ID %s, username %s", person_id, profile["person"]["username"], ) person = OctopusPerson() else: if hasattr(self, "existing_user_count"): self.existing_user_count += 1 logger.info( "Synchronizing existing Octopus person profile - ID %s, username %s", person_id, profile["person"]["username"], ) for person_attr, octopus_value in attribute_mapping.items(): if person_attr == "photo": # If there is no image, just skip setting it. # TODO: Delete images once they have been removed in Octopus if not isinstance(octopus_value, str) or len(octopus_value) == 0: continue image_response = requests.get(octopus_value) if not image_response.ok: # Don't do anything if the image can't be retrieved # at the moment. logger.warning( "Profile image for Octopus user %s could not be retrieved due to an issue with its server:\n%s", profile["person"]["username"], image_response.content, ) continue if person.photo is not None: existing_image_hash = self.get_file_hash(person.photo.file) new_image_file = ImageFile( BytesIO(image_response.content), name=f"profile-image-{person_id}", ) new_image_file_hash = self.get_file_hash(new_image_file.file) if existing_image_hash == new_image_file_hash: logger.info( "Skipping image update for Octopus user %s, hash is the same", profile["person"]["username"], ) continue else: logger.info( "Deleting old profile image for Octopus user %s as it has changed", profile["person"]["username"], ) person.photo.delete() logger.info( "Creating new image for Octopus user %s", profile["person"]["username"], ) new_image = Image( title=f"Fotografie osoby {profile['person']['username']}", file=ImageFile( BytesIO(image_response.content), name=f"profile-image-{person_id}", ), collection=self.collection, ) new_image.save() person.photo = new_image continue setattr(person, person_attr, octopus_value) person.save() return person def get_person_profile_from_id(self, id: str, kind: str): query = gql( f""" query {{ allProfiles( filters: {{ person: {{ id: "{id}" }}, kind: {kind} }} ) {{ edges {{ node {{ email facebookUrl flickrUrl instagramUrl kind mastodonUrl phone photo textLong textShort tiktokUrl twitterUrl url webUrl youtubeUrl person {{ username degreeAfterName degreeBeforeName displayName }} }} }} }} }} """ ) result = self.client.execute(query) # Just return the first result, there should never be more than one in this case. for node in result["allProfiles"]["edges"]: return node["node"] # If there are no results, return None. return None def create_and_update_people_models(self, people_profiles): people_instances = [] for person_id, profile in people_profiles.items(): if profile is None: continue person = self.process_person( person_id, profile, OctopusPerson.objects.filter(octopus_id=person_id).first(), ) people_instances.append(person) return people_instances class PeopleGroupImporter(ImporterMixin): def __init__( self, people_parent_page_id, people_parent_page_model, person_page_model, collection_id, group_shortcut, group_display, lock_file_name, ): try: self.people_parent_page_id = people_parent_page_id self.people_parent_page_model = people_parent_page_model self.person_page_model = person_page_model self.collection_id = collection_id self.group_shortcut = group_shortcut self.group_display = group_display self.lock_file_name = lock_file_name self.new_user_count = 0 self.existing_user_count = 0 self.people_parent_page = self.people_parent_page_model.objects.get( id=self.people_parent_page_id ) self.collection = Collection.objects.filter(id=self.collection_id).first() if self.collection is None: # Fallback - Use the first collection in the database, # we'll have to assume there is one. self.collection = Collection.objects.first() self.transport = AIOHTTPTransport(url=settings.OCTOPUS_API_URL) self.client = Client( transport=self.transport, fetch_schema_from_transport=True ) except Exception: # No matter what happens, at least remove the lockfile. if os.path.exists(lock_file_name): os.remove(lock_file_name) def get_people_ids_from_group(self): query = gql( f""" query {{ allGroups( filters: {{ shortcut: {{iExact: "{self.group_shortcut}" }} }} ) {{ edges {{ node {{ memberships {{ person {{ id }} }} }} }} }} }} """ ) result = self.client.execute(query) user_ids = [] for node in result["allGroups"]["edges"]: for membership in node["node"]["memberships"]: user_ids.append(membership["person"]["id"]) return user_ids def get_processed_people_profiles(self, people_ids): people_profiles = {} for person_id in people_ids: prirotizied_profiles = [] prirotizied_profiles.append( self.get_person_profile_from_id(person_id, "POLITICAL") ) prirotizied_profiles.append( self.get_person_profile_from_id(person_id, "PIRATE") ) for profile in prirotizied_profiles: if profile is None: continue people_profiles[person_id] = profile break if person_id not in people_profiles: people_profiles[person_id] = None return people_profiles def perform_import(self): logger.info( "Started Octopus person sync for DistrictPeoplePage %s", self.people_parent_page_id, ) try: people_ids = self.get_people_ids_from_group() people_profiles = self.get_processed_people_profiles(people_ids) people_instances = self.create_and_update_people_models(people_profiles) for person_instance in people_instances: person_page = ( self.person_page_model.objects.filter(person=person_instance) .descendant_of(self.people_parent_page) .first() ) originating_group = OctopusPersonOriginatingGroup.objects.get_or_create( name=self.group_shortcut )[0] if person_page is None: if not isinstance(person_instance.display_name, str): logger.warning( "Skipping page creation for user %s, they have no display name", person_instance.username, ) continue person_page = self.person_page_model( person=person_instance, is_automatically_created=True, originating_display=self.group_display, title=person_instance.display_name, ) self.people_parent_page.add_child(instance=person_page) person_page.save_revision().publish() person_page.originating_groups.add(originating_group) else: if not person_page.originating_groups.filter( id=originating_group.id ).exists(): person_page.originating_groups.add(originating_group) person_page.originating_display = self.group_display person_page.save_revision().publish() # Delete old pages that correspond to profiles which aren't # part of the group we are importing anymore. # self.person_page_model.objects.filter( # ~models.Q(person__in=people_instances), # originating_groups__name=self.group_shortcut, # is_automatically_created=True, # ).descendant_of(self.people_parent_page).delete() finally: # No matter what happens, at least remove the lockfile. logger.info("Letting go of lockfile %s", self.lock_file_name) if os.path.exists(self.lock_file_name): os.remove(self.lock_file_name) logger.info( "Successfully finished Octopus person sync for page ID %s. %s new users, %s updated", self.people_parent_page_id, self.new_user_count, self.existing_user_count, ) return { "new_users": self.new_user_count, "existing_users": self.existing_user_count, } class PeopleTeamImporter(ImporterMixin): def __init__( self, people_parent_page_id, people_parent_page_model, person_page_model, collection_id, team_shortcut, team_display, team_roles, lock_file_name, ): try: self.people_parent_page_id = people_parent_page_id self.people_parent_page_model = people_parent_page_model self.person_page_model = person_page_model self.collection_id = collection_id self.team_shortcut = team_shortcut self.team_display = team_display self.team_roles = team_roles self.lock_file_name = lock_file_name self.new_user_count = 0 self.existing_user_count = 0 self.people_parent_page = self.people_parent_page_model.objects.get( id=self.people_parent_page_id ) self.collection = Collection.objects.filter(id=self.collection_id).first() if self.collection is None: # Fallback - Use the first collection in the database, # we'll have to assume there is one. self.collection = Collection.objects.first() self.transport = AIOHTTPTransport(url=settings.OCTOPUS_API_URL) self.client = Client( transport=self.transport, fetch_schema_from_transport=True ) except Exception: # No matter what happens, at least remove the lockfile. if os.path.exists(lock_file_name): os.remove(lock_file_name) def get_people_ids_from_team(self): query = gql( f""" query {{ allTeams( filters: {{ shortcut: {{iExact: "{self.team_shortcut}" }} }} ) {{ edges {{ node {{ memberships {{ roleDisplay roleOrder person {{ id }} }} }} }} }} }} """ ) try: result = self.client.execute(query) except TransportServerError as exception: logger.warning( f"Error getting data for group %s: %s", self.team_shortcut, str(exception), ) return [] users = [] for node in result["allTeams"]["edges"]: if node["node"]["memberships"] is None: continue for membership in sorted( node["node"]["memberships"], key=lambda membership: int(membership.get("roleOrder", 0)), ): # Can't do this in the query (yet), so just filter here if ( len(self.team_roles) != 0 and membership["roleDisplay"] not in self.team_roles ): continue users.append( { "id": membership["person"]["id"], "role": membership["roleDisplay"], "order": int(membership.get("roleOrder", 0)), } ) return users def get_processed_people_profiles(self, people_ids): people_profiles = {} for person in people_ids: id = person["id"] position = person["role"] order = person["order"] prirotizied_profiles = [] prirotizied_profiles.append( self.get_person_profile_from_id(id, "POLITICAL") ) prirotizied_profiles.append(self.get_person_profile_from_id(id, "PIRATE")) for profile in prirotizied_profiles: if profile is None: continue people_profiles[id] = profile break if id not in people_profiles: people_profiles[id] = None else: people_profiles[id]["position"] = position people_profiles[id]["order"] = order return people_profiles def perform_import(self): logger.info( "Started Octopus person sync for DistrictPeoplePage %s", self.people_parent_page_id, ) try: people_ids = self.get_people_ids_from_team() people_profiles = self.get_processed_people_profiles(people_ids) people_instances = self.create_and_update_people_models(people_profiles) # print("People instances:", people_instances, "DONE") for person_instance in people_instances: # print(f"Person instance:", person_instance, "DONE") person_page = ( self.person_page_model.objects.filter(person=person_instance) .descendant_of(self.people_parent_page) .first() ) originating_team = OctopusPersonOriginatingTeam.objects.get_or_create( name=self.team_shortcut )[0] # print("Person page:", person_page, "DONE") if person_page is None: if not isinstance(person_instance.display_name, str): logger.warning( "Skipping page creation for user %s, they have no display name", person_instance.username, ) continue person_page = self.person_page_model( person=person_instance, is_automatically_created=True, originating_display=self.team_display, title=person_instance.display_name, ) last_child = self.people_parent_page.get_last_child() if last_child is None: # Handle empty parent case new_path = ( self.people_parent_page.path + "0001" ) # Example path increment else: new_path = last_child._inc_path() person_page.path = new_path self.people_parent_page.add_child(instance=person_page) person_page.save_revision().publish() person_page.originating_teams.add(originating_team) if len(self.team_roles) != 0: person_page.originating_role = person_instance.position person_page.save() else: person_page.originating_display = self.team_display currently_valid_teams = [ team["shortcut"] for team in self.people_parent_page.get_syncable_octopus_teams() ] originating_teams = list(person_page.originating_teams.all()) for existing_originating_team in originating_teams: if existing_originating_team.name not in currently_valid_teams: person_page.originating_teams.remove( existing_originating_team ) if not person_page.originating_teams.filter( id=originating_team.id ).exists(): person_page.originating_teams.add(originating_team) person_page.save() if len(self.team_roles) != 0: person_page.originating_role = person_instance.position person_page.save_revision().publish() # Delete old pages that correspond to profiles which aren't # part of the group we are importing anymore. # filter = models.Q( # ~models.Q(person__in=people_instances), # originating_team=self.team_shortcut, # is_automatically_created=True, # ) # # if len(self.team_roles) != 0: # filter = filter & models.Q(originating_role__in=self.team_roles) # # self.person_page_model.objects.filter(filter).descendant_of( # self.people_parent_page # ).delete() finally: # No matter what happens, at least remove the lockfile. logger.info("Letting go of lockfile %s", self.lock_file_name) if os.path.exists(self.lock_file_name): os.remove(self.lock_file_name) logger.info( "Successfully finished Octopus person sync for page ID %s. %s new users, %s updated", self.people_parent_page_id, self.new_user_count, self.existing_user_count, ) return { "new_users": self.new_user_count, "existing_users": self.existing_user_count, } class PersonImporter(ImporterMixin): def __init__( self, person_page_model, person_page_id, collection_id, lock_file_name, ): try: self.person_page_id = person_page_id self.person_page_model = person_page_model self.collection_id = collection_id self.lock_file_name = lock_file_name self.person_page = self.person_page_model.objects.get( id=self.person_page_id ) self.collection = Collection.objects.filter(id=self.collection_id).first() if self.collection is None: # Fallback - Use the first collection in the database, # we'll have to assume there is one. self.collection = Collection.objects.first() self.transport = AIOHTTPTransport(url=settings.OCTOPUS_API_URL) self.client = Client( transport=self.transport, fetch_schema_from_transport=True ) except Exception as exception: # No matter what happens, at least remove the lockfile. if os.path.exists(lock_file_name): os.remove(lock_file_name) logger.error("Person importer crashed on init: %s", str(exception)) def get_person_id_from_username(self, username): query = gql( f""" query {{ allPeople( filters: {{ username: {{ iExact: "{username}" }} }} ) {{ edges {{ node {{ id }} }} }} }} """ ) result = self.client.execute(query) id = None for node in result["allPeople"]["edges"]: id = node["node"]["id"] return id def get_person_data(self, username: str, kind: str) -> dict | None: id = self.get_person_id_from_username(username) if id is None: return query = gql( f""" query {{ allProfiles( filters: {{ person: {{ id: "{id}" }}, kind: {kind} }} ) {{ edges {{ node {{ email facebookUrl flickrUrl instagramUrl kind mastodonUrl phone photo textLong textShort tiktokUrl twitterUrl url webUrl youtubeUrl person {{ id username degreeAfterName degreeBeforeName displayName }} }} }} }} }} """ ) result = self.client.execute(query) # Just return the first result, there should never be more than one in this case. for node in result["allProfiles"]["edges"]: return node["node"] # If there are no results, return None. return None def get_person_instance(self, profile): person = self.process_person( profile["person"]["id"], profile, OctopusPerson.objects.filter(octopus_id=profile["person"]["id"]).first(), ) return person def perform_import(self): logger.info( "Started Octopus person sync for DistrictManualOctopusPersonPage %s", self.person_page_id, ) try: prirotizied_profiles = [] prirotizied_profiles.append( self.get_person_data(self.person_page.username, "POLITICAL") ) prirotizied_profiles.append( self.get_person_data(self.person_page.username, "PIRATE") ) person_data = None for profile in prirotizied_profiles: if profile is None: continue person_data = profile break if person_data is None: return person_instance = self.get_person_instance(person_data) if self.person_page.person_id != person_instance.id: self.person_page.person = person_instance self.person_page.save() self.person_page.save_revision().publish() finally: # No matter what happens, at least remove the lockfile. logger.info("Letting go of lockfile %s", self.lock_file_name) if os.path.exists(self.lock_file_name): os.remove(self.lock_file_name)