From b9bde0ef902bdf09bcfb9452fb5feacdbc9aecba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Valenta?= <git@imaniti.org> Date: Fri, 20 Oct 2023 16:10:33 +0200 Subject: [PATCH] fix imports, remove import tasks and execute directly (for now) --- district/forms.py | 16 ++++++++++++---- district/models.py | 15 +++++++++------ district/tasks.py | 27 --------------------------- main/forms.py | 11 +++++++---- main/tasks.py | 27 --------------------------- shared/jekyll_import.py | 41 ++++++++++++++++++++++++++++++----------- 6 files changed, 58 insertions(+), 79 deletions(-) delete mode 100644 district/tasks.py delete mode 100644 main/tasks.py diff --git a/district/forms.py b/district/forms.py index aaea2f30..03efb555 100644 --- a/district/forms.py +++ b/district/forms.py @@ -3,7 +3,7 @@ from django.utils.safestring import mark_safe from wagtail.admin.forms import WagtailAdminPageForm from wagtail.models.collections import Collection -from .tasks import import_jekyll_articles +from shared.jekyll_import import JekyllArticleImporter class JekyllImportForm(WagtailAdminPageForm): @@ -69,6 +69,11 @@ class JekyllImportForm(WagtailAdminPageForm): self.add_error( "jekyll_repo_url", "Vložte odkaz pouze na repozitář, ne na zip" ) + elif cleaned_data.get("jekyll_repo_url", "").endswith(".git"): + self.add_error( + "jekyll_repo_url", + "Vložte odkaz pouze na repozitář, ne na '.git' soubor", + ) else: if not cleaned_data.get("jekyll_repo_url", "").endswith(".zip"): self.add_error("jekyll_repo_url", "Odkaz nesměřuje na soubor '.zip'") @@ -76,13 +81,16 @@ class JekyllImportForm(WagtailAdminPageForm): return cleaned_data def handle_import(self): - import_jekyll_articles.delay( - article_parent_page_id=self.instance.id, + from .models import DistrictArticlePage + + JekyllArticleImporter( + article_parent_page=self.instance, collection_id=self.cleaned_data["collection"].id, url=self.cleaned_data["jekyll_repo_url"], dry_run=self.cleaned_data["dry_run"], use_git=self.cleaned_data["use_git"], - ) + page_model=DistrictArticlePage, + ).perform_import() def save(self, commit=True): if self.cleaned_data.get("do_import"): diff --git a/district/models.py b/district/models.py index 9963cb36..1d638815 100644 --- a/district/models.py +++ b/district/models.py @@ -7,6 +7,7 @@ from django.core.exceptions import ValidationError from django.db import models from django.http import HttpResponseNotFound, HttpResponseRedirect from django.shortcuts import render +from django.utils.safestring import mark_safe from django.utils.translation import gettext_lazy from modelcluster.contrib.taggit import ClusterTaggableManager from modelcluster.fields import ParentalKey @@ -535,12 +536,14 @@ class DistrictArticlesPage( FieldPanel("jekyll_repo_url"), FieldPanel("readonly_log"), HelpPanel( - "Import provádějte vždy až po vytvoření stránky aktualit. " - 'Pro uložení logu je nutné volit možnost "Publikovat", nikoliv' - 'pouze "Uložit koncept". ' - "Import proběhne na pozadí a může trvat až několik minut. " - "Dejte si po spuštění importu kávu a potom obnovte stránku pro " - "zobrazení výsledku importu." + mark_safe( + "Import provádějte vždy až po vytvoření stránky aktualit. " + "Pro uložení logu je nutné volit možnost <strong>Publikovat</strong>, nikoliv " + "pouze <strong>Uložit koncept</strong>. " + "Import proběhne na pozadí a může trvat až několik minut. " + "Dejte si po spuštění importu kávu a potom obnovte stránku pro " + "zobrazení výsledku importu." + ) ), ], "import z Jekyll repozitáře", diff --git a/district/tasks.py b/district/tasks.py deleted file mode 100644 index 0bb2aa59..00000000 --- a/district/tasks.py +++ /dev/null @@ -1,27 +0,0 @@ -import logging - -from majak.celery import app -from shared.jekyll_import import JekyllArticleImporter - -logger = logging.getLogger(__name__) - - -@app.task -def import_jekyll_articles( - article_parent_page_id: int, - collection_id: int, - url: str, - dry_run: bool, - use_git: bool, -): - from .models import DistrictArticlePage, DistrictArticlesPage - - return JekyllArticleImporter( - article_parent_page_id=article_parent_page_id, - collection_id=collection_id, - url=url, - dry_run=dry_run, - use_git=use_git, - parent_page_model=DistrictArticlesPage, - page_model=DistrictArticlePage, - ).perform_import() diff --git a/main/forms.py b/main/forms.py index a3155166..a8194047 100644 --- a/main/forms.py +++ b/main/forms.py @@ -2,7 +2,7 @@ from django import forms from wagtail.admin.forms import WagtailAdminPageForm from wagtail.models.collections import Collection -from .tasks import import_jekyll_articles +from shared.jekyll_import import JekyllArticleImporter class JekyllImportForm(WagtailAdminPageForm): @@ -60,13 +60,16 @@ class JekyllImportForm(WagtailAdminPageForm): return cleaned_data def handle_import(self): - import_jekyll_articles.delay( - article_parent_page_id=self.instance.id, + from .models import MainArticlePage + + JekyllArticleImporter( + article_parent_page=self.instance, collection_id=self.cleaned_data["collection"].id, url=self.cleaned_data["jekyll_repo_url"], dry_run=self.cleaned_data["dry_run"], use_git=True, - ) + page_model=MainArticlePage, + ).perform_import() def save(self, commit=True): if self.cleaned_data.get("do_import"): diff --git a/main/tasks.py b/main/tasks.py deleted file mode 100644 index 03f20935..00000000 --- a/main/tasks.py +++ /dev/null @@ -1,27 +0,0 @@ -import logging - -from majak.celery import app -from shared.jekyll_import import JekyllArticleImporter - -logger = logging.getLogger(__name__) - - -@app.task -def import_jekyll_articles( - article_parent_page_id: int, - collection_id: int, - url: str, - dry_run: bool, - use_git: bool, -): - from .models import MainArticlePage, MainArticlesPage - - return JekyllArticleImporter( - article_parent_page_id=article_parent_page_id, - collection_id=collection_id, - url=url, - dry_run=dry_run, - use_git=use_git, - parent_page_model=MainArticlesPage, - page_model=MainArticlePage, - ).perform_import() diff --git a/shared/jekyll_import.py b/shared/jekyll_import.py index cdd77721..4ac04156 100644 --- a/shared/jekyll_import.py +++ b/shared/jekyll_import.py @@ -26,6 +26,7 @@ from wagtail.contrib.redirects.models import Redirect from wagtail.images.models import Image from wagtail.models import Page from wagtail.models.collections import Collection +from willow.image import UnrecognisedImageFormatError from yaml.scanner import ScannerError logger = logging.getLogger(__name__) @@ -114,6 +115,7 @@ def get_or_create_image( one-timer, tak jsme to pouze dotáhli do stavu, kdy to schroupne co nejvíce případů. """ + file_path = file_path.lstrip("/") if Image.objects.filter(title=file_path).exists(): @@ -125,7 +127,7 @@ def get_or_create_image( if not image_params["dry_run"]: image.save() return image, "" - except FileNotFoundError: + except (FileNotFoundError, UnrecognisedImageFormatError): pass # cesta pomocí file_path neexisuje, jdeme dál try: @@ -137,7 +139,7 @@ def get_or_create_image( if not image_params["dry_run"]: image.save() return image, "" - except FileNotFoundError: + except (FileNotFoundError, UnrecognisedImageFormatError): pass # cesta s vložením "assets/img" před file_path neexisuje, jdeme dál try: @@ -149,7 +151,7 @@ def get_or_create_image( if not image_params["dry_run"]: image.save() return image, "" - except FileNotFoundError: + except (FileNotFoundError, UnrecognisedImageFormatError): pass # na disku jsme nenašli, zkusíme stáhnout z webu @@ -172,7 +174,7 @@ def get_or_create_image( ) return None, log_message - # opačném případě jdeme zkusit assets server a.pirati.cz + # v opačném případě jdeme zkusit assets server a.pirati.cz else: img_assets_folder = repo_name.split(".")[0] # např. "praha" z praha.pirati.cz img_url = "https://a.pirati.cz/resize/4000x-/{}/img/{}".format( @@ -191,6 +193,7 @@ def get_or_create_image( except (HTTPError, UnicodeEncodeError, InvalidURL, IsADirectoryError): msg = "Nedohledán obrázek při importu článků - ani na disku, ani na URL" log_message = "{}: cesta {}, URL {}\n".format(msg, file_path, img_url) + logger.warning( log_message, extra={ @@ -199,10 +202,25 @@ def get_or_create_image( "img_url": img_url, }, ) + return None, log_message file = ImageFile(open(img_path, "rb"), name=img_path) - image = Image(title=file_path, file=file, collection=collection) + + try: + image = Image(title=file_path, file=file, collection=collection) + except UnrecognisedImageFormatError: + msg = "Obrázek byl nalezen, ale jeho formát nerozpoznán" + log_message = "{}: cesta {}\n".format(msg, file_path) + + logger.warning( + log_message, + extra={ + "file_path": file_path, + }, + ) + + return None, log_message if not image_params["dry_run"]: try: @@ -211,6 +229,7 @@ def get_or_create_image( msg = "Nelze uložit obrázek" logger.warning(msg, extra={"exc": e}) return None, msg + return image, "" @@ -304,20 +323,17 @@ params = {} class JekyllArticleImporter: def __init__( self, - article_parent_page_id: int, + article_parent_page, collection_id: int, url: str, dry_run: bool, use_git: bool, - parent_page_model, page_model, ): self.page_model = page_model # Params - self.article_parent_page = parent_page_model.objects.get( - id=article_parent_page_id - ).specific # TODO test if specific should be included or not + self.article_parent_page = article_parent_page self.collection = Collection.objects.get(id=collection_id) self.dry_run = dry_run self.use_git = use_git @@ -384,7 +400,7 @@ class JekyllArticleImporter: self.page_log += "NELZE importovat {} článků\n".format(self.skipped_counter) self.article_parent_page.last_import_log = self.page_log - self.article_parent_page.save() + self.article_parent_page.save_revision() @staticmethod def get_parsed_file_path(path: str): @@ -614,8 +630,10 @@ class JekyllArticleImporter: try: if not article.id: self.article_parent_page.add_child(instance=article) + logger.info("Vytvářím článek: %s" % article) rev = article.save_revision() + if meta.get("published", True): rev.publish() except Exception as e: @@ -639,6 +657,7 @@ class JekyllArticleImporter: Vrací list dict pro django messages (klíč levelu, text). Začne vyčištěním logu. """ + self.article_parent_page.last_import_log = "" self.article_parent_page.save() -- GitLab