working import prototype for at least second web

d50aa43b · OndraRehounek · jan.bednarik · fa5777f8 · d50aa43b
Commit d50aa43b authored 3 years ago by OndraRehounek Committed by jan.bednarik 3 years ago
--- a/district/jekyll_import.py
+++ b/district/jekyll_import.py
@@ -18,6 +18,7 @@ from markdown import Markdown
 from markdown.extensions import Extension
 from markdown.inlinepatterns import InlineProcessor
 from wagtail.contrib.redirects.models import Redirect
+from wagtail.core.rich_text import RichText
 from wagtail.images.models import Image
 # Wagtail to portrebuje https://docs.wagtail.io/en/stable/extending/rich_text_internals.html#data-format
@@ -54,7 +55,7 @@ class ImgProcessor(InlineProcessor):
        el.attrib["format"] = "left"
        collection = get_collection()
        image_obj = get_or_create_image(
-            params["path"], m.group(2), collection=collection
+            params["path"], m.group(2), collection=collection, repo_name=""
        )
        el.attrib["id"] = str(image_obj.pk)
        return el, m.start(0), m.end(0)
@@ -84,7 +85,7 @@ def get_site_config(path) -> dict:
    return config
-def import_post(path, file_path, parent, title_suffix, dry_run):
+def import_post(path, file_path, parent, title_suffix, dry_run, repo_name):
    from district.models import DistrictArticlePage
    with open(os.path.join(path, file_path), "rt") as f:
@@ -104,7 +105,8 @@ def import_post(path, file_path, parent, title_suffix, dry_run):
    article = DistrictArticlePage()
-    article.text = html
+    # article.text = html
+    article.content = [("text", RichText(html))]
    article.perex = get_perex(md)
    meta_date = meta["date"]
@@ -123,7 +125,10 @@ def import_post(path, file_path, parent, title_suffix, dry_run):
    #     article.tags.add(tag)
    collection = get_collection()
-    article.image = get_or_create_image(path, meta["image"], collection=collection)
+    if meta.get("image", None):
+        article.image = get_or_create_image(
+            path, meta["image"], collection=collection, repo_name=repo_name
+        )
    if dry_run:
        return article, True
@@ -132,7 +137,7 @@ def import_post(path, file_path, parent, title_suffix, dry_run):
        parent.add_child(instance=article)
        stdout.write("Creating article: %s" % article)
        rev = article.save_revision()
-        if meta["published"]:
+        if meta.get("published", True):
            rev.publish()
    except Exception as e:
        message_list.append(
@@ -150,22 +155,36 @@ def get_collection():
    return params["kolekce"]
-def get_path(url: str, use_git: bool) -> str:
+def get_path_and_repo_name(url: str, use_git: bool) -> (str, str):
    if use_git:
        return clone_repo(url)
    else:
        return download_repo_as_zip(url)
-def get_or_create_image(path, file_path, collection):
+def get_or_create_image(path, file_path, collection, repo_name):
    file_path = file_path.lstrip("/")
    if Image.objects.filter(title=file_path).exists():
        return Image.objects.filter(title=file_path).first()
    else:
+        try:
            file = ImageFile(open(os.path.join(path, file_path), "rb"), name=file_path)
            image = Image(title=file_path, file=file, collection=collection)
            image.save()
            return image
+        except FileNotFoundError:
+            img_name = file_path.split("/")[-1]
+            img_assets_folder = repo_name.split(".")[0]  # TODO make as form field
+            img_url = "https://a.pirati.cz/{}/img/posts/{}".format(
+                img_assets_folder, img_name
+            )
+            img_path = os.path.join(path, img_name)
+            urllib.request.urlretrieve(img_url, img_path)
+            file = ImageFile(open(img_path, "rb"), name=img_path)
+            image = Image(title=file_path, file=file, collection=collection)
+            image.save()
+            return image
 def get_title_from_site_config(site_config: dict) -> str:
@@ -174,7 +193,7 @@ def get_title_from_site_config(site_config: dict) -> str:
    return ""
-def clone_repo(url: str) -> str:
+def clone_repo(url: str) -> (str, str):
    """
    Naclonuje repo do tmp s využitím gitu a vrátí cestu k němu.
    Pokud URL končí lomítkem, odebereme ho, a vezmeme jako název repozitáře
@@ -193,10 +212,10 @@ def clone_repo(url: str) -> str:
    os.chdir(path)
    os.system("git clone --depth 1 {}".format(url))
-    return repo_path
+    return repo_path, repo_name
-def download_repo_as_zip(url: str) -> str:
+def download_repo_as_zip(url: str) -> (str, str):
    """
    Stáhne .zip repa, extrahuje a vrátí cestu k extrahovanému repu.
    Hodně nešikovné je, že extrahovaná složka má ještě suffix "-gh-pages"
@@ -215,7 +234,7 @@ def download_repo_as_zip(url: str) -> str:
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(path)
-    return os.path.join(path, "{}-gh-pages".format(repo_name))
+    return os.path.join(path, "{}-gh-pages".format(repo_name)), repo_name
 def perform_import(
@@ -231,7 +250,8 @@ def perform_import(
    params["kolekce"] = collection
    site = article_parent_page.get_site()
-    path = params["path"] = get_path(url=url, use_git=use_git)
+    params["path"], repo_name = get_path_and_repo_name(url=url, use_git=use_git)
+    path = params["path"]
    site_config = get_site_config(path)
    title_suffix = get_title_from_site_config(site_config)
@@ -249,7 +269,7 @@ def perform_import(
            if ext == "md":
                article, success = import_post(
-                    path, fname, article_parent_page, title_suffix, dry_run
+                    path, fname, article_parent_page, title_suffix, dry_run, repo_name
                )
                if not success: