From e4d1e1a269212aa7724b203dbd8e914981679879 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Valenta?= <git@imaniti.org>
Date: Wed, 17 Jan 2024 11:37:35 +0100
Subject: [PATCH] fully functioning import

---
 district/forms.py         | 76 ++++++++-------------------------------
 district/tasks.py         | 27 ++++++++++++++
 elections/forms.py        | 21 +++++++++++
 elections/models.py       |  3 ++
 elections/tasks.py        | 27 ++++++++++++++
 main/forms.py             | 21 +++++++++++
 main/models.py            |  3 ++
 {shared => main}/tasks.py |  0
 shared/forms.py           | 18 +---------
 shared/jekyll_import.py   | 52 ++++++++++++++-------------
 shared/models/main.py     |  4 +--
 11 files changed, 146 insertions(+), 106 deletions(-)
 create mode 100644 district/tasks.py
 create mode 100644 elections/forms.py
 create mode 100644 elections/tasks.py
 create mode 100644 main/forms.py
 rename {shared => main}/tasks.py (100%)

diff --git a/district/forms.py b/district/forms.py
index 03efb555..48600686 100644
--- a/district/forms.py
+++ b/district/forms.py
@@ -1,52 +1,16 @@
+from shared.forms import JekyllImportForm as SharedJekyllImportForm
+import os
 from django import forms
-from django.utils.safestring import mark_safe
-from wagtail.admin.forms import WagtailAdminPageForm
-from wagtail.models.collections import Collection
+from .tasks import import_jekyll_articles
 
-from shared.jekyll_import import JekyllArticleImporter
 
-
-class JekyllImportForm(WagtailAdminPageForm):
-    do_import = forms.BooleanField(
-        initial=False, required=False, label="Provést import z Jekyllu"
-    )
-    collection = forms.ModelChoiceField(
-        queryset=Collection.objects.all(), required=False, label="Kolekce obrázků"
-    )
-    dry_run = forms.BooleanField(
-        initial=True,
-        required=False,
-        label="Jenom na zkoušku",
-        help_text="Žádné články se neuloží, vypíše případné problémy či "
-        "již existující články - 'ostrému' importu existující "
-        "články nevadí, přeskočí je",
-    )
+class JekyllImportForm(SharedJekyllImportForm):
     use_git = forms.BooleanField(
         initial=False,
         required=False,
         label="Použít Git",
         help_text="Umožňuje jednodušší zpracování, ale vyžaduje nainstalovaný Git.",
     )
-    jekyll_repo_url = forms.URLField(
-        max_length=512,
-        required=False,
-        help_text=mark_safe(
-            "V GitHubu tlačítko Code -> a odkaz z <strong>Download zip</strong>, "
-            "např. <em>https://github.com/pirati-web/cb.pirati.cz/archive/refs/heads/gh-pages.zip</em>. "
-            "Pokud máte nainstalovaný Git, zvolte <strong>Použít Git</strong> a vložte jednoduše "
-            "URL repozitáře, např. <em>https://github.com/pirati-web/cb.pirati.cz</em>."
-        ),
-    )
-    readonly_log = forms.CharField(
-        disabled=True,
-        label="Log z posledního importu",
-        required=False,
-        widget=forms.Textarea,
-    )
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.fields["readonly_log"].initial = self.instance.last_import_log
 
     def clean(self):
         cleaned_data = super().clean()
@@ -54,16 +18,6 @@ class JekyllImportForm(WagtailAdminPageForm):
         if not cleaned_data.get("do_import"):
             return cleaned_data
 
-        if cleaned_data.get("do_import") and not self.instance.id:
-            self.add_error(
-                "do_import", "Import proveďte prosím až po vytvoření stránky"
-            )
-
-        if not cleaned_data.get("collection"):
-            self.add_error("collection", "Pro import je toto pole povinné")
-        if not cleaned_data.get("jekyll_repo_url"):
-            self.add_error("jekyll_repo_url", "Pro import je toto pole povinné")
-
         if cleaned_data.get("use_git"):
             if cleaned_data.get("jekyll_repo_url", "").endswith(".zip"):
                 self.add_error(
@@ -81,19 +35,17 @@ class JekyllImportForm(WagtailAdminPageForm):
         return cleaned_data
 
     def handle_import(self):
-        from .models import DistrictArticlePage
+        lock_file_name = f"/tmp/.{self.instance.id}.import-lock"
 
-        JekyllArticleImporter(
-            article_parent_page=self.instance,
+        if os.path.isfile(lock_file_name):
+            return
+
+        open(lock_file_name, "w").close()
+
+        import_jekyll_articles.delay(
+            article_parent_page_id=self.instance.id,
             collection_id=self.cleaned_data["collection"].id,
             url=self.cleaned_data["jekyll_repo_url"],
             dry_run=self.cleaned_data["dry_run"],
-            use_git=self.cleaned_data["use_git"],
-            page_model=DistrictArticlePage,
-        ).perform_import()
-
-    def save(self, commit=True):
-        if self.cleaned_data.get("do_import"):
-            self.handle_import()
-
-        return super().save(commit=commit)
+            use_git=True,
+        )
diff --git a/district/tasks.py b/district/tasks.py
new file mode 100644
index 00000000..8162ecb0
--- /dev/null
+++ b/district/tasks.py
@@ -0,0 +1,27 @@
+import logging
+
+from celery import shared_task
+from shared.jekyll_import import JekyllArticleImporter
+
+logger = logging.getLogger(__name__)
+
+
+@shared_task()
+def import_jekyll_articles(
+    article_parent_page_id,
+    collection_id,
+    url,
+    dry_run,
+    use_git,
+):
+    from .models import DistrictArticlePage, DistrictArticlesPage
+
+    return JekyllArticleImporter(
+        article_parent_page_id=article_parent_page_id,
+        article_parent_page_model=DistrictArticlesPage,
+        collection_id=collection_id,
+        url=url,
+        dry_run=dry_run,
+        use_git=use_git,
+        page_model=DistrictArticlePage,
+    ).perform_import()
diff --git a/elections/forms.py b/elections/forms.py
new file mode 100644
index 00000000..b252a89a
--- /dev/null
+++ b/elections/forms.py
@@ -0,0 +1,21 @@
+from shared.forms import JekyllImportForm as SharedJekyllImportForm
+import os
+from .tasks import import_jekyll_articles
+
+
+class JekyllImportForm(SharedJekyllImportForm):
+    def handle_import(self):
+        lock_file_name = f"/tmp/.{self.instance.id}.import-lock"
+
+        if os.path.isfile(lock_file_name):
+            return
+
+        open(lock_file_name, "w").close()
+
+        import_jekyll_articles.delay(
+            article_parent_page_id=self.instance.id,
+            collection_id=self.cleaned_data["collection"].id,
+            url=self.cleaned_data["jekyll_repo_url"],
+            dry_run=self.cleaned_data["dry_run"],
+            use_git=True,
+        )
diff --git a/elections/models.py b/elections/models.py
index 9f7f1bf8..72222927 100644
--- a/elections/models.py
+++ b/elections/models.py
@@ -46,6 +46,7 @@ from shared.models import (  # MenuMixin,
 )
 from shared.utils import make_promote_panels, subscribe_to_newsletter
 from tuning import admin_help
+from .forms import JekyllImportForm
 
 from . import blocks
 
@@ -118,6 +119,8 @@ class ElectionsHomePage(MainHomePageMixin):
 
 
 class ElectionsArticlesPage(MainArticlesPageMixin):
+    base_form_class = JekyllImportForm
+
     parent_page_types = ["elections.ElectionsHomePage"]
     subpage_types = ["elections.ElectionsArticlePage"]
 
diff --git a/elections/tasks.py b/elections/tasks.py
new file mode 100644
index 00000000..f3cce43e
--- /dev/null
+++ b/elections/tasks.py
@@ -0,0 +1,27 @@
+import logging
+
+from celery import shared_task
+from shared.jekyll_import import JekyllArticleImporter
+
+logger = logging.getLogger(__name__)
+
+
+@shared_task()
+def import_jekyll_articles(
+    article_parent_page_id,
+    collection_id,
+    url,
+    dry_run,
+    use_git,
+):
+    from .models import ElectionsArticlePage, ElectionsArticlesPage
+
+    return JekyllArticleImporter(
+        article_parent_page_id=article_parent_page_id,
+        article_parent_page_model=ElectionsArticlesPage,
+        collection_id=collection_id,
+        url=url,
+        dry_run=dry_run,
+        use_git=use_git,
+        page_model=ElectionsArticlePage,
+    ).perform_import()
diff --git a/main/forms.py b/main/forms.py
new file mode 100644
index 00000000..b252a89a
--- /dev/null
+++ b/main/forms.py
@@ -0,0 +1,21 @@
+from shared.forms import JekyllImportForm as SharedJekyllImportForm
+import os
+from .tasks import import_jekyll_articles
+
+
+class JekyllImportForm(SharedJekyllImportForm):
+    def handle_import(self):
+        lock_file_name = f"/tmp/.{self.instance.id}.import-lock"
+
+        if os.path.isfile(lock_file_name):
+            return
+
+        open(lock_file_name, "w").close()
+
+        import_jekyll_articles.delay(
+            article_parent_page_id=self.instance.id,
+            collection_id=self.cleaned_data["collection"].id,
+            url=self.cleaned_data["jekyll_repo_url"],
+            dry_run=self.cleaned_data["dry_run"],
+            use_git=True,
+        )
diff --git a/main/models.py b/main/models.py
index c120f040..9f4cc167 100644
--- a/main/models.py
+++ b/main/models.py
@@ -46,6 +46,7 @@ from shared.models import (  # MenuMixin,
 )
 from shared.utils import make_promote_panels, subscribe_to_newsletter
 from tuning import admin_help
+from .forms import JekyllImportForm
 
 from . import blocks
 
@@ -132,6 +133,8 @@ class MainHomePage(MainHomePageMixin):
 
 
 class MainArticlesPage(MainArticlesPageMixin):
+    base_form_class = JekyllImportForm
+
     parent_page_types = ["main.MainHomePage"]
     subpage_types = ["main.MainArticlePage"]
 
diff --git a/shared/tasks.py b/main/tasks.py
similarity index 100%
rename from shared/tasks.py
rename to main/tasks.py
diff --git a/shared/forms.py b/shared/forms.py
index 934dd0bf..90972843 100644
--- a/shared/forms.py
+++ b/shared/forms.py
@@ -1,8 +1,7 @@
 from django import forms
 from wagtail.admin.forms import WagtailAdminPageForm
 from wagtail.models.collections import Collection
-
-from .tasks import import_jekyll_articles
+import os
 
 
 class SubscribeForm(forms.Form):
@@ -65,21 +64,6 @@ class JekyllImportForm(WagtailAdminPageForm):
 
         return cleaned_data
 
-    def handle_import(self):
-        # TODO: Portable function
-
-        print("handling import")
-
-        print(
-            import_jekyll_articles.delay(
-                article_parent_page_id=self.instance.id,
-                collection_id=self.cleaned_data["collection"].id,
-                url=self.cleaned_data["jekyll_repo_url"],
-                dry_run=self.cleaned_data["dry_run"],
-                use_git=True,
-            )
-        )
-
     def save(self, commit=True):
         if self.cleaned_data.get("do_import"):
             self.handle_import()
diff --git a/shared/jekyll_import.py b/shared/jekyll_import.py
index a58508c3..1d5baffa 100644
--- a/shared/jekyll_import.py
+++ b/shared/jekyll_import.py
@@ -13,6 +13,7 @@ from io import StringIO
 from typing import List
 from urllib.error import HTTPError
 from uuid import uuid4
+import os
 
 import bleach
 import markdown.serializers
@@ -665,30 +666,33 @@ class JekyllArticleImporter:
         Začne vyčištěním logu.
         """
 
-        self.article_parent_page.last_import_log = ""
-        self.article_parent_page.save()
-
-        msg = "{} Import započat".format(datetime.now())
-        logger.info(msg)
-        self.page_log += "{}\n\n".format(msg)
-
-        for file_name in os.listdir(os.path.join(self.path, POSTS_DIR)):
-            # Případ podsložek (typicky po jednotlivých letech)
-            if os.path.isdir(os.path.join(self.path, POSTS_DIR, file_name)):
-                posts_sub_folder = os.path.join(self.path, POSTS_DIR, file_name)
-                for sub_file_name in os.listdir(posts_sub_folder):
-                    file_path = os.path.join(posts_sub_folder, sub_file_name)
-                    self.process_article(sub_file_name, file_path)
-            # Případ všech článků v jedné složce
-            else:
-                file_path = os.path.join(POSTS_DIR, file_name)
-                self.process_article(file_name, file_path)
-
-        msg = "{} Import ukončen".format(datetime.now())
-        logger.info(msg)
-        self.page_log += "{}\n\n".format(msg)
-
-        self.create_summary_log()
+        try:
+            self.article_parent_page.last_import_log = ""
+            self.article_parent_page.save()
+
+            msg = "{} Import započat".format(datetime.now())
+            logger.info(msg)
+            self.page_log += "{}\n\n".format(msg)
+
+            for file_name in os.listdir(os.path.join(self.path, POSTS_DIR)):
+                # Případ podsložek (typicky po jednotlivých letech)
+                if os.path.isdir(os.path.join(self.path, POSTS_DIR, file_name)):
+                    posts_sub_folder = os.path.join(self.path, POSTS_DIR, file_name)
+                    for sub_file_name in os.listdir(posts_sub_folder):
+                        file_path = os.path.join(posts_sub_folder, sub_file_name)
+                        self.process_article(sub_file_name, file_path)
+                # Případ všech článků v jedné složce
+                else:
+                    file_path = os.path.join(POSTS_DIR, file_name)
+                    self.process_article(file_name, file_path)
+
+            msg = "{} Import ukončen".format(datetime.now())
+            logger.info(msg)
+            self.page_log += "{}\n\n".format(msg)
+
+            self.create_summary_log()
+        finally:
+            os.remove(f"/tmp/.{self.article_parent_page_id}.import-lock")
 
     def process_article(self, file_name: str, file_path: str):
         match = re.match(r"(\d*)-(\d*)-(\d*)-(.*)\.(.*)", file_name)
diff --git a/shared/models/main.py b/shared/models/main.py
index c9871aec..7f4848c6 100644
--- a/shared/models/main.py
+++ b/shared/models/main.py
@@ -41,7 +41,7 @@ from shared.blocks import (
     TwoTextColumnBlock,
 )
 from shared.const import MONTH_NAMES
-from shared.forms import JekyllImportForm, SubscribeForm
+from shared.forms import SubscribeForm
 from shared.utils import make_promote_panels, subscribe_to_newsletter
 from tuning import admin_help
 
@@ -438,8 +438,6 @@ class MainArticlesPageMixin(
 
     ### OTHERS
 
-    base_form_class = JekyllImportForm
-
     class Meta:
         verbose_name = "Rozcestník článků"
         abstract = True
-- 
GitLab