Select Git revision
people_import.py 12.27 KiB
import hashlib
import logging
import os
from io import BytesIO
import requests
from django.conf import settings
from django.core.files.images import ImageFile
from django.db import models
from gql import Client, gql
from gql.transport.aiohttp import AIOHTTPTransport
from wagtail.images.models import Image
from wagtail.models.media import Collection
from shared.models import OctopusPerson
logger = logging.getLogger(__name__)
class PeopleGroupImporter:
def __init__(
self,
people_parent_page_id,
people_parent_page_model,
person_page_model,
collection_id,
group_shortcut,
lock_file_name,
):
try:
self.people_parent_page_id = people_parent_page_id
self.people_parent_page_model = people_parent_page_model
self.person_page_model = person_page_model
self.collection_id = collection_id
self.group_shortcut = group_shortcut
self.lock_file_name = lock_file_name
self.new_user_count = 0
self.existing_user_count = 0
self.people_parent_page = self.people_parent_page_model.objects.get(
id=self.people_parent_page_id
)
self.collection = Collection.objects.get(id=self.collection_id)
self.transport = AIOHTTPTransport(url=settings.OCTOPUS_API_URL)
self.client = Client(
transport=self.transport, fetch_schema_from_transport=True
)
except Exception:
# No matter what happens, at least remove the lockfile.
if os.path.exists(lock_file_name):
os.remove(lock_file_name)
def get_people_ids_from_group(self):
query = gql(
f"""
query {{
allGroups(
filters:
{{
shortcut: {{exact: "{self.group_shortcut}" }}
}}
) {{
edges {{
node {{
memberships {{
person {{
id
}}
}}
}}
}}
}}
}}
"""
)
result = self.client.execute(query)
user_ids = []
for node in result["allGroups"]["edges"]:
for membership in node["node"]["memberships"]:
user_ids.append(membership["person"]["id"])
return user_ids
def get_person_profile_from_id(self, id: str, kind: str):
query = gql(
f"""
query {{
allProfiles(
filters: {{
person: {{
id: "{id}"
}},
kind: {kind}
}}
) {{
edges {{
node {{
email
facebookUrl
flickrUrl
instagramUrl
kind
mastodonUrl
phone
photo
textLong
textShort
tiktokUrl
twitterUrl
url
webUrl
youtubeUrl
person {{
username
degreeAfterName
degreeBeforeName
displayName
}}
}}
}}
}}
}}
"""
)
result = self.client.execute(query)
# Just return the first result, there should never be more than one in this case.
for node in result["allProfiles"]["edges"]:
return node["node"]
# If there are no results, return None.
return None
def get_processed_people_profiles(self, people_ids):
people_profiles = {}
for person_id in people_ids:
prirotizied_profiles = []
prirotizied_profiles.append(
self.get_person_profile_from_id(person_id, "POLITICAL")
)
prirotizied_profiles.append(
self.get_person_profile_from_id(person_id, "PIRATE")
)
for profile in prirotizied_profiles:
if profile is None:
continue
people_profiles[person_id] = profile
if person_id not in people_profiles:
people_profiles[person_id] = None
return people_profiles
def create_and_update_people_models(self, people_profiles):
people_instances = []
for person_id, profile in people_profiles.items():
if profile is None:
continue
person = OctopusPerson.objects.filter(octopus_id=person_id).first()
attribute_mapping = {
"octopus_id": person_id,
"username": profile["person"]["username"],
"email": profile["email"],
"phone": profile["phone"],
"facebook_url": profile["facebookUrl"],
"flickr_url": profile["flickrUrl"],
"instagram_url": profile["instagramUrl"],
"mastodon_url": profile["mastodonUrl"],
"twitter_url": profile["twitterUrl"],
"tiktok_url": profile["tiktokUrl"],
"web_url": profile["webUrl"],
"youtube_url": profile["youtubeUrl"],
"more_info_url": profile["url"],
# TODO: Assume this is valid for the time being
"profile_type": profile["kind"],
# TODO
"photo": profile["photo"],
"short_text": profile["textShort"],
"long_text": profile["textLong"],
"degree_before": profile["person"]["degreeBeforeName"],
"degree_after": profile["person"]["degreeAfterName"],
"display_name": profile["person"]["displayName"],
}
if person is None:
self.new_user_count += 1
logger.info(
"Creating new Octopus person profile - ID %s, username %s",
person_id,
profile["person"]["username"],
)
person = OctopusPerson()
else:
self.existing_user_count += 1
logger.info(
"Synchronizing existing Octopus person profile - ID %s, username %s",
person_id,
profile["person"]["username"],
)
for person_attr, octopus_value in attribute_mapping.items():
if person_attr == "photo":
# If there is no image, just skip setting it.
# TODO: Delete images once they have been removed in Octopus
if not isinstance(octopus_value, str) or len(octopus_value) == 0:
continue
image_response = requests.get(octopus_value)
if not image_response.ok:
# Don't do anything if the image can't be retrieved
# at the moment.
logger.warning(
"Profile image for Octopus user %s could not be retrieved due to an issue with its server:\n%s",
profile["person"]["username"],
image_response.content,
)
continue
if person.photo is not None:
existing_image_hash = self.get_file_hash(person.photo.file)
new_image_file = ImageFile(
BytesIO(image_response.content),
name=f"profile-image-{person_id}",
)
new_image_file_hash = self.get_file_hash(new_image_file.file)
if existing_image_hash == new_image_file_hash:
logger.info(
"Skipping image update for Octopus user %s, hash is the same",
profile["person"]["username"],
)
continue
else:
logger.info(
"Deleting old profile image for Octopus user %s as it has changed",
profile["person"]["username"],
)
person.photo.delete()
logger.info(
"Creating new image for Octopus user %s",
profile["person"]["username"],
)
new_image = Image(
title=f"Fotografie osoby {profile['person']['username']}",
file=ImageFile(
BytesIO(image_response.content),
name=f"profile-image-{person_id}",
),
collection=self.collection,
)
new_image.save()
person.photo = new_image
continue
setattr(person, person_attr, octopus_value)
person.save()
people_instances.append(person)
return people_instances
def get_file_hash(self, file):
BUF_SIZE = 65536
md5 = hashlib.md5()
while True:
data = file.read(BUF_SIZE)
if not data:
break
md5.update(data)
return md5.hexdigest()
def perform_import(self):
logger.info(
"Started Octopus person sync for page %s", self.people_parent_page_id
)
try:
people_ids = self.get_people_ids_from_group()
people_profiles = self.get_processed_people_profiles(people_ids)
people_instances = self.create_and_update_people_models(people_profiles)
for person_instance in people_instances:
person_page = (
self.person_page_model.objects.filter(person=person_instance)
.descendant_of(self.people_parent_page)
.first()
)
if person_page is None:
if not isinstance(person_instance.display_name, str):
logger.warning(
"Skipping page creation for user %s, they have no display name",
person_instance.username,
)
continue
person_page = self.person_page_model(
person=person_instance,
is_automatically_created=True,
originating_group=self.group_shortcut,
title=person_instance.display_name,
)
self.people_parent_page.add_child(instance=person_page)
person_page.save_revision().publish()
# Delete old pages that correspond to profiles which aren't
# part of the group we are importing anymore.
self.person_page_model.objects.filter(
~models.Q(person__in=people_instances),
originating_group=self.group_shortcut,
is_automatically_created=True,
).descendant_of(self.people_parent_page).delete()
finally:
# No matter what happens, at least remove the lockfile.
logger.info("Letting go of lockfile %s", self.lock_file_name)
if os.path.exists(self.lock_file_name):
os.remove(self.lock_file_name)
logger.info(
"Successfully finished Octopus person sync for page ID %s. %s new users, %s updated",
self.people_parent_page_id,
self.new_user_count,
self.existing_user_count,
)
return {
"new_users": self.new_user_count,
"existing_users": self.existing_user_count,
}