From b9bde0ef902bdf09bcfb9452fb5feacdbc9aecba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Valenta?= <git@imaniti.org>
Date: Fri, 20 Oct 2023 16:10:33 +0200
Subject: [PATCH] fix imports, remove import tasks and execute directly (for
 now)

---
 district/forms.py       | 16 ++++++++++++----
 district/models.py      | 15 +++++++++------
 district/tasks.py       | 27 ---------------------------
 main/forms.py           | 11 +++++++----
 main/tasks.py           | 27 ---------------------------
 shared/jekyll_import.py | 41 ++++++++++++++++++++++++++++++-----------
 6 files changed, 58 insertions(+), 79 deletions(-)
 delete mode 100644 district/tasks.py
 delete mode 100644 main/tasks.py

diff --git a/district/forms.py b/district/forms.py
index aaea2f30..03efb555 100644
--- a/district/forms.py
+++ b/district/forms.py
@@ -3,7 +3,7 @@ from django.utils.safestring import mark_safe
 from wagtail.admin.forms import WagtailAdminPageForm
 from wagtail.models.collections import Collection
 
-from .tasks import import_jekyll_articles
+from shared.jekyll_import import JekyllArticleImporter
 
 
 class JekyllImportForm(WagtailAdminPageForm):
@@ -69,6 +69,11 @@ class JekyllImportForm(WagtailAdminPageForm):
                 self.add_error(
                     "jekyll_repo_url", "Vložte odkaz pouze na repozitář, ne na zip"
                 )
+            elif cleaned_data.get("jekyll_repo_url", "").endswith(".git"):
+                self.add_error(
+                    "jekyll_repo_url",
+                    "Vložte odkaz pouze na repozitář, ne na '.git' soubor",
+                )
         else:
             if not cleaned_data.get("jekyll_repo_url", "").endswith(".zip"):
                 self.add_error("jekyll_repo_url", "Odkaz nesměřuje na soubor '.zip'")
@@ -76,13 +81,16 @@ class JekyllImportForm(WagtailAdminPageForm):
         return cleaned_data
 
     def handle_import(self):
-        import_jekyll_articles.delay(
-            article_parent_page_id=self.instance.id,
+        from .models import DistrictArticlePage
+
+        JekyllArticleImporter(
+            article_parent_page=self.instance,
             collection_id=self.cleaned_data["collection"].id,
             url=self.cleaned_data["jekyll_repo_url"],
             dry_run=self.cleaned_data["dry_run"],
             use_git=self.cleaned_data["use_git"],
-        )
+            page_model=DistrictArticlePage,
+        ).perform_import()
 
     def save(self, commit=True):
         if self.cleaned_data.get("do_import"):
diff --git a/district/models.py b/district/models.py
index 9963cb36..1d638815 100644
--- a/district/models.py
+++ b/district/models.py
@@ -7,6 +7,7 @@ from django.core.exceptions import ValidationError
 from django.db import models
 from django.http import HttpResponseNotFound, HttpResponseRedirect
 from django.shortcuts import render
+from django.utils.safestring import mark_safe
 from django.utils.translation import gettext_lazy
 from modelcluster.contrib.taggit import ClusterTaggableManager
 from modelcluster.fields import ParentalKey
@@ -535,12 +536,14 @@ class DistrictArticlesPage(
                 FieldPanel("jekyll_repo_url"),
                 FieldPanel("readonly_log"),
                 HelpPanel(
-                    "Import provádějte vždy až po vytvoření stránky aktualit. "
-                    'Pro uložení logu je nutné volit možnost "Publikovat", nikoliv'
-                    'pouze "Uložit koncept". '
-                    "Import proběhne na pozadí a může trvat až několik minut. "
-                    "Dejte si po spuštění importu kávu a potom obnovte stránku pro "
-                    "zobrazení výsledku importu."
+                    mark_safe(
+                        "Import provádějte vždy až po vytvoření stránky aktualit. "
+                        "Pro uložení logu je nutné volit možnost <strong>Publikovat</strong>, nikoliv "
+                        "pouze <strong>Uložit koncept</strong>. "
+                        "Import proběhne na pozadí a může trvat až několik minut. "
+                        "Dejte si po spuštění importu kávu a potom obnovte stránku pro "
+                        "zobrazení výsledku importu."
+                    )
                 ),
             ],
             "import z Jekyll repozitáře",
diff --git a/district/tasks.py b/district/tasks.py
deleted file mode 100644
index 0bb2aa59..00000000
--- a/district/tasks.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import logging
-
-from majak.celery import app
-from shared.jekyll_import import JekyllArticleImporter
-
-logger = logging.getLogger(__name__)
-
-
-@app.task
-def import_jekyll_articles(
-    article_parent_page_id: int,
-    collection_id: int,
-    url: str,
-    dry_run: bool,
-    use_git: bool,
-):
-    from .models import DistrictArticlePage, DistrictArticlesPage
-
-    return JekyllArticleImporter(
-        article_parent_page_id=article_parent_page_id,
-        collection_id=collection_id,
-        url=url,
-        dry_run=dry_run,
-        use_git=use_git,
-        parent_page_model=DistrictArticlesPage,
-        page_model=DistrictArticlePage,
-    ).perform_import()
diff --git a/main/forms.py b/main/forms.py
index a3155166..a8194047 100644
--- a/main/forms.py
+++ b/main/forms.py
@@ -2,7 +2,7 @@ from django import forms
 from wagtail.admin.forms import WagtailAdminPageForm
 from wagtail.models.collections import Collection
 
-from .tasks import import_jekyll_articles
+from shared.jekyll_import import JekyllArticleImporter
 
 
 class JekyllImportForm(WagtailAdminPageForm):
@@ -60,13 +60,16 @@ class JekyllImportForm(WagtailAdminPageForm):
         return cleaned_data
 
     def handle_import(self):
-        import_jekyll_articles.delay(
-            article_parent_page_id=self.instance.id,
+        from .models import MainArticlePage
+
+        JekyllArticleImporter(
+            article_parent_page=self.instance,
             collection_id=self.cleaned_data["collection"].id,
             url=self.cleaned_data["jekyll_repo_url"],
             dry_run=self.cleaned_data["dry_run"],
             use_git=True,
-        )
+            page_model=MainArticlePage,
+        ).perform_import()
 
     def save(self, commit=True):
         if self.cleaned_data.get("do_import"):
diff --git a/main/tasks.py b/main/tasks.py
deleted file mode 100644
index 03f20935..00000000
--- a/main/tasks.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import logging
-
-from majak.celery import app
-from shared.jekyll_import import JekyllArticleImporter
-
-logger = logging.getLogger(__name__)
-
-
-@app.task
-def import_jekyll_articles(
-    article_parent_page_id: int,
-    collection_id: int,
-    url: str,
-    dry_run: bool,
-    use_git: bool,
-):
-    from .models import MainArticlePage, MainArticlesPage
-
-    return JekyllArticleImporter(
-        article_parent_page_id=article_parent_page_id,
-        collection_id=collection_id,
-        url=url,
-        dry_run=dry_run,
-        use_git=use_git,
-        parent_page_model=MainArticlesPage,
-        page_model=MainArticlePage,
-    ).perform_import()
diff --git a/shared/jekyll_import.py b/shared/jekyll_import.py
index cdd77721..4ac04156 100644
--- a/shared/jekyll_import.py
+++ b/shared/jekyll_import.py
@@ -26,6 +26,7 @@ from wagtail.contrib.redirects.models import Redirect
 from wagtail.images.models import Image
 from wagtail.models import Page
 from wagtail.models.collections import Collection
+from willow.image import UnrecognisedImageFormatError
 from yaml.scanner import ScannerError
 
 logger = logging.getLogger(__name__)
@@ -114,6 +115,7 @@ def get_or_create_image(
     one-timer, tak jsme to pouze dotáhli do stavu, kdy to schroupne co nejvíce
     případů.
     """
+
     file_path = file_path.lstrip("/")
 
     if Image.objects.filter(title=file_path).exists():
@@ -125,7 +127,7 @@ def get_or_create_image(
         if not image_params["dry_run"]:
             image.save()
         return image, ""
-    except FileNotFoundError:
+    except (FileNotFoundError, UnrecognisedImageFormatError):
         pass  # cesta pomocí file_path neexisuje, jdeme dál
 
     try:
@@ -137,7 +139,7 @@ def get_or_create_image(
         if not image_params["dry_run"]:
             image.save()
         return image, ""
-    except FileNotFoundError:
+    except (FileNotFoundError, UnrecognisedImageFormatError):
         pass  # cesta s vložením "assets/img" před file_path neexisuje, jdeme dál
 
     try:
@@ -149,7 +151,7 @@ def get_or_create_image(
         if not image_params["dry_run"]:
             image.save()
         return image, ""
-    except FileNotFoundError:
+    except (FileNotFoundError, UnrecognisedImageFormatError):
         pass
 
     # na disku jsme nenašli, zkusíme stáhnout z webu
@@ -172,7 +174,7 @@ def get_or_create_image(
             )
             return None, log_message
 
-    # opačném případě jdeme zkusit assets server a.pirati.cz
+    # v opačném případě jdeme zkusit assets server a.pirati.cz
     else:
         img_assets_folder = repo_name.split(".")[0]  # např. "praha" z praha.pirati.cz
         img_url = "https://a.pirati.cz/resize/4000x-/{}/img/{}".format(
@@ -191,6 +193,7 @@ def get_or_create_image(
             except (HTTPError, UnicodeEncodeError, InvalidURL, IsADirectoryError):
                 msg = "Nedohledán obrázek při importu článků - ani na disku, ani na URL"
                 log_message = "{}: cesta {}, URL {}\n".format(msg, file_path, img_url)
+
                 logger.warning(
                     log_message,
                     extra={
@@ -199,10 +202,25 @@ def get_or_create_image(
                         "img_url": img_url,
                     },
                 )
+
                 return None, log_message
 
     file = ImageFile(open(img_path, "rb"), name=img_path)
-    image = Image(title=file_path, file=file, collection=collection)
+
+    try:
+        image = Image(title=file_path, file=file, collection=collection)
+    except UnrecognisedImageFormatError:
+        msg = "Obrázek byl nalezen, ale jeho formát nerozpoznán"
+        log_message = "{}: cesta {}\n".format(msg, file_path)
+
+        logger.warning(
+            log_message,
+            extra={
+                "file_path": file_path,
+            },
+        )
+
+        return None, log_message
 
     if not image_params["dry_run"]:
         try:
@@ -211,6 +229,7 @@ def get_or_create_image(
             msg = "Nelze uložit obrázek"
             logger.warning(msg, extra={"exc": e})
             return None, msg
+
     return image, ""
 
 
@@ -304,20 +323,17 @@ params = {}
 class JekyllArticleImporter:
     def __init__(
         self,
-        article_parent_page_id: int,
+        article_parent_page,
         collection_id: int,
         url: str,
         dry_run: bool,
         use_git: bool,
-        parent_page_model,
         page_model,
     ):
         self.page_model = page_model
 
         # Params
-        self.article_parent_page = parent_page_model.objects.get(
-            id=article_parent_page_id
-        ).specific  # TODO test if specific should be included or not
+        self.article_parent_page = article_parent_page
         self.collection = Collection.objects.get(id=collection_id)
         self.dry_run = dry_run
         self.use_git = use_git
@@ -384,7 +400,7 @@ class JekyllArticleImporter:
             self.page_log += "NELZE importovat {} článků\n".format(self.skipped_counter)
 
         self.article_parent_page.last_import_log = self.page_log
-        self.article_parent_page.save()
+        self.article_parent_page.save_revision()
 
     @staticmethod
     def get_parsed_file_path(path: str):
@@ -614,8 +630,10 @@ class JekyllArticleImporter:
         try:
             if not article.id:
                 self.article_parent_page.add_child(instance=article)
+
             logger.info("Vytvářím článek: %s" % article)
             rev = article.save_revision()
+
             if meta.get("published", True):
                 rev.publish()
         except Exception as e:
@@ -639,6 +657,7 @@ class JekyllArticleImporter:
         Vrací list dict pro django messages (klíč levelu, text).
         Začne vyčištěním logu.
         """
+
         self.article_parent_page.last_import_log = ""
         self.article_parent_page.save()
 
-- 
GitLab