From d50aa43b643f100de4e266970b7f1904510f4f92 Mon Sep 17 00:00:00 2001
From: OndraRehounek <ondra.rehounek@seznam.cz>
Date: Fri, 18 Mar 2022 17:15:50 +0100
Subject: [PATCH] working import prototype for at least second web

---
 district/jekyll_import.py | 54 +++++++++++++++++++++++++++------------
 1 file changed, 37 insertions(+), 17 deletions(-)

diff --git a/district/jekyll_import.py b/district/jekyll_import.py
index 70e60236..275ff54e 100644
--- a/district/jekyll_import.py
+++ b/district/jekyll_import.py
@@ -18,6 +18,7 @@ from markdown import Markdown
 from markdown.extensions import Extension
 from markdown.inlinepatterns import InlineProcessor
 from wagtail.contrib.redirects.models import Redirect
+from wagtail.core.rich_text import RichText
 from wagtail.images.models import Image
 
 # Wagtail to portrebuje https://docs.wagtail.io/en/stable/extending/rich_text_internals.html#data-format
@@ -54,7 +55,7 @@ class ImgProcessor(InlineProcessor):
         el.attrib["format"] = "left"
         collection = get_collection()
         image_obj = get_or_create_image(
-            params["path"], m.group(2), collection=collection
+            params["path"], m.group(2), collection=collection, repo_name=""
         )
         el.attrib["id"] = str(image_obj.pk)
         return el, m.start(0), m.end(0)
@@ -84,7 +85,7 @@ def get_site_config(path) -> dict:
     return config
 
 
-def import_post(path, file_path, parent, title_suffix, dry_run):
+def import_post(path, file_path, parent, title_suffix, dry_run, repo_name):
     from district.models import DistrictArticlePage
 
     with open(os.path.join(path, file_path), "rt") as f:
@@ -104,7 +105,8 @@ def import_post(path, file_path, parent, title_suffix, dry_run):
 
     article = DistrictArticlePage()
 
-    article.text = html
+    # article.text = html
+    article.content = [("text", RichText(html))]
     article.perex = get_perex(md)
 
     meta_date = meta["date"]
@@ -123,7 +125,10 @@ def import_post(path, file_path, parent, title_suffix, dry_run):
     #     article.tags.add(tag)
 
     collection = get_collection()
-    article.image = get_or_create_image(path, meta["image"], collection=collection)
+    if meta.get("image", None):
+        article.image = get_or_create_image(
+            path, meta["image"], collection=collection, repo_name=repo_name
+        )
 
     if dry_run:
         return article, True
@@ -132,7 +137,7 @@ def import_post(path, file_path, parent, title_suffix, dry_run):
         parent.add_child(instance=article)
         stdout.write("Creating article: %s" % article)
         rev = article.save_revision()
-        if meta["published"]:
+        if meta.get("published", True):
             rev.publish()
     except Exception as e:
         message_list.append(
@@ -150,22 +155,36 @@ def get_collection():
     return params["kolekce"]
 
 
-def get_path(url: str, use_git: bool) -> str:
+def get_path_and_repo_name(url: str, use_git: bool) -> (str, str):
     if use_git:
         return clone_repo(url)
     else:
         return download_repo_as_zip(url)
 
 
-def get_or_create_image(path, file_path, collection):
+def get_or_create_image(path, file_path, collection, repo_name):
     file_path = file_path.lstrip("/")
     if Image.objects.filter(title=file_path).exists():
         return Image.objects.filter(title=file_path).first()
     else:
-        file = ImageFile(open(os.path.join(path, file_path), "rb"), name=file_path)
-        image = Image(title=file_path, file=file, collection=collection)
-        image.save()
-        return image
+        try:
+            file = ImageFile(open(os.path.join(path, file_path), "rb"), name=file_path)
+            image = Image(title=file_path, file=file, collection=collection)
+            image.save()
+            return image
+        except FileNotFoundError:
+            img_name = file_path.split("/")[-1]
+            img_assets_folder = repo_name.split(".")[0]  # TODO make as form field
+            img_url = "https://a.pirati.cz/{}/img/posts/{}".format(
+                img_assets_folder, img_name
+            )
+            img_path = os.path.join(path, img_name)
+            urllib.request.urlretrieve(img_url, img_path)
+
+            file = ImageFile(open(img_path, "rb"), name=img_path)
+            image = Image(title=file_path, file=file, collection=collection)
+            image.save()
+            return image
 
 
 def get_title_from_site_config(site_config: dict) -> str:
@@ -174,7 +193,7 @@ def get_title_from_site_config(site_config: dict) -> str:
     return ""
 
 
-def clone_repo(url: str) -> str:
+def clone_repo(url: str) -> (str, str):
     """
     Naclonuje repo do tmp s využitím gitu a vrátí cestu k němu.
     Pokud URL končí lomítkem, odebereme ho, a vezmeme jako název repozitáře
@@ -193,10 +212,10 @@ def clone_repo(url: str) -> str:
     os.chdir(path)
     os.system("git clone --depth 1 {}".format(url))
 
-    return repo_path
+    return repo_path, repo_name
 
 
-def download_repo_as_zip(url: str) -> str:
+def download_repo_as_zip(url: str) -> (str, str):
     """
     Stáhne .zip repa, extrahuje a vrátí cestu k extrahovanému repu.
     Hodně nešikovné je, že extrahovaná složka má ještě suffix "-gh-pages"
@@ -215,7 +234,7 @@ def download_repo_as_zip(url: str) -> str:
     with zipfile.ZipFile(zip_path, "r") as zip_ref:
         zip_ref.extractall(path)
 
-    return os.path.join(path, "{}-gh-pages".format(repo_name))
+    return os.path.join(path, "{}-gh-pages".format(repo_name)), repo_name
 
 
 def perform_import(
@@ -231,7 +250,8 @@ def perform_import(
     params["kolekce"] = collection
     site = article_parent_page.get_site()
 
-    path = params["path"] = get_path(url=url, use_git=use_git)
+    params["path"], repo_name = get_path_and_repo_name(url=url, use_git=use_git)
+    path = params["path"]
     site_config = get_site_config(path)
     title_suffix = get_title_from_site_config(site_config)
 
@@ -249,7 +269,7 @@ def perform_import(
 
             if ext == "md":
                 article, success = import_post(
-                    path, fname, article_parent_page, title_suffix, dry_run
+                    path, fname, article_parent_page, title_suffix, dry_run, repo_name
                 )
 
                 if not success:
-- 
GitLab