Skip to content
Snippets Groups Projects
Commit b371bee0 authored by OndraRehounek's avatar OndraRehounek Committed by jan.bednarik
Browse files

WIP celery import with logs

parent 48750364
Branches
No related tags found
2 merge requests!442Release,!432Feature/majak imports
...@@ -36,9 +36,16 @@ class JekyllImportForm(WagtailAdminPageForm): ...@@ -36,9 +36,16 @@ class JekyllImportForm(WagtailAdminPageForm):
"jednoduše URL repozitáře " "jednoduše URL repozitáře "
"např. 'https://github.com/pirati-web/cb.pirati.cz'", "např. 'https://github.com/pirati-web/cb.pirati.cz'",
) )
readonly_log = forms.CharField(
disabled=True,
label="Log z posledního importu",
required=False,
widget=forms.Textarea,
)
# def __init__(self): def __init__(self, *args, **kwargs):
# pass super().__init__(*args, **kwargs)
self.fields["readonly_log"].initial = self.instance.last_import_log
def clean(self): def clean(self):
cleaned_data = super().clean() cleaned_data = super().clean()
...@@ -72,8 +79,8 @@ class JekyllImportForm(WagtailAdminPageForm): ...@@ -72,8 +79,8 @@ class JekyllImportForm(WagtailAdminPageForm):
return cleaned_data return cleaned_data
def handle_import(self): def handle_import(self):
# import_message_list = # import_jekyll_articles.delay( # TODO
import_jekyll_articles.delay( import_jekyll_articles(
article_parent_page_id=self.instance.id, article_parent_page_id=self.instance.id,
collection_id=self.cleaned_data["collection"].id, collection_id=self.cleaned_data["collection"].id,
url=self.cleaned_data["jekyll_repo_url"], url=self.cleaned_data["jekyll_repo_url"],
...@@ -81,9 +88,6 @@ class JekyllImportForm(WagtailAdminPageForm): ...@@ -81,9 +88,6 @@ class JekyllImportForm(WagtailAdminPageForm):
use_git=self.cleaned_data["use_git"], use_git=self.cleaned_data["use_git"],
) )
# self.instance.import_message_list = import_message_list
# return import_message_list
def save(self, commit=True): def save(self, commit=True):
if self.cleaned_data.get("do_import") and not self.cleaned_data["dry_run"]: if self.cleaned_data.get("do_import") and not self.cleaned_data["dry_run"]:
self.handle_import() self.handle_import()
......
...@@ -7,19 +7,18 @@ import zipfile ...@@ -7,19 +7,18 @@ import zipfile
from datetime import date from datetime import date
from http.client import InvalidURL from http.client import InvalidURL
from io import StringIO from io import StringIO
from sys import stdout
from typing import List from typing import List
from urllib.error import HTTPError from urllib.error import HTTPError
import markdown.serializers import markdown.serializers
import yaml import yaml
from django.contrib.messages import INFO, SUCCESS, WARNING
from django.core.files.images import ImageFile from django.core.files.images import ImageFile
from django.utils import timezone from django.utils import timezone
from markdown import Markdown from markdown import Markdown
from markdown.extensions import Extension from markdown.extensions import Extension
from markdown.inlinepatterns import InlineProcessor from markdown.inlinepatterns import InlineProcessor
from wagtail.contrib.redirects.models import Redirect from wagtail.contrib.redirects.models import Redirect
from wagtail.core.models import Page
from wagtail.core.models.collections import Collection from wagtail.core.models.collections import Collection
from wagtail.core.rich_text import RichText from wagtail.core.rich_text import RichText
from wagtail.images.models import Image from wagtail.images.models import Image
...@@ -28,9 +27,8 @@ from yaml.scanner import ScannerError ...@@ -28,9 +27,8 @@ from yaml.scanner import ScannerError
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# from django.utils.dateparse import parse_date TODO enable date check again... # from django.utils.dateparse import parse_date TODO enable date check again...
image_params = ( image_params = {} # filled on JekyllArticleImporter init and used globally
{}
) # filled on JekyllArticleImported init and used in markdown overwrites
POSTS_DIR = "_posts" POSTS_DIR = "_posts"
# ------------------------------- Misc helper functions ------------------------------- # ------------------------------- Misc helper functions -------------------------------
...@@ -114,13 +112,13 @@ def get_or_create_image( ...@@ -114,13 +112,13 @@ def get_or_create_image(
file_path = file_path.lstrip("/") file_path = file_path.lstrip("/")
if Image.objects.filter(title=file_path).exists(): if Image.objects.filter(title=file_path).exists():
return Image.objects.filter(title=file_path).first() return Image.objects.filter(title=file_path).first(), ""
else: else:
try: try:
file = ImageFile(open(os.path.join(path, file_path), "rb"), name=file_path) file = ImageFile(open(os.path.join(path, file_path), "rb"), name=file_path)
image = Image(title=file_path, file=file, collection=collection) image = Image(title=file_path, file=file, collection=collection)
image.save() image.save()
return image return image, ""
except FileNotFoundError: except FileNotFoundError:
try: try:
file = ImageFile( file = ImageFile(
...@@ -129,7 +127,7 @@ def get_or_create_image( ...@@ -129,7 +127,7 @@ def get_or_create_image(
) )
image = Image(title=file_path, file=file, collection=collection) image = Image(title=file_path, file=file, collection=collection)
image.save() image.save()
return image return image, ""
except FileNotFoundError: except FileNotFoundError:
img_name = file_path.split("/")[-1] img_name = file_path.split("/")[-1]
img_assets_folder = repo_name.split(".")[0] img_assets_folder = repo_name.split(".")[0]
...@@ -152,20 +150,22 @@ def get_or_create_image( ...@@ -152,20 +150,22 @@ def get_or_create_image(
InvalidURL, InvalidURL,
IsADirectoryError, IsADirectoryError,
): ):
msg = "Nedohledán obrázek při importu článků"
log_message = "{} - {}\n".format(msg, img_url)
logger.warning( logger.warning(
"Nedohledán obrázek při importu článků", msg,
extra={ extra={
"file_path": file_path, "file_path": file_path,
"img_name": img_name, "img_name": img_name,
"img_url": img_url, "img_url": img_url,
}, },
) )
return None return None, log_message
file = ImageFile(open(img_path, "rb"), name=img_path) file = ImageFile(open(img_path, "rb"), name=img_path)
image = Image(title=file_path, file=file, collection=collection) image = Image(title=file_path, file=file, collection=collection)
image.save() image.save()
return image return image, ""
def get_path_and_repo_name(url: str, use_git: bool) -> (str, str): def get_path_and_repo_name(url: str, use_git: bool) -> (str, str):
...@@ -220,7 +220,7 @@ class ImgProcessor(InlineProcessor): ...@@ -220,7 +220,7 @@ class ImgProcessor(InlineProcessor):
el.attrib["format"] = "left" el.attrib["format"] = "left"
parsed_image_path = JekyllArticleImporter.get_parsed_file_path(m.group(2)) parsed_image_path = JekyllArticleImporter.get_parsed_file_path(m.group(2))
image_obj = get_or_create_image( image_obj, _ = get_or_create_image(
path=image_params["path"], path=image_params["path"],
file_path=parsed_image_path, file_path=parsed_image_path,
collection=image_params["collection"], collection=image_params["collection"],
...@@ -263,13 +263,15 @@ class JekyllArticleImporter: ...@@ -263,13 +263,15 @@ class JekyllArticleImporter:
url: str, url: str,
dry_run: bool, dry_run: bool,
use_git: bool, use_git: bool,
parent_page_model,
page_model,
): ):
from district.models import DistrictArticlesPage self.page_model = page_model
# Params # Params
self.article_parent_page = DistrictArticlesPage.objects.get( self.article_parent_page = parent_page_model.objects.get(
id=article_parent_page_id id=article_parent_page_id
) ).specific # TODO test if specific should be included or not
self.collection = Collection.objects.get(id=collection_id) self.collection = Collection.objects.get(id=collection_id)
self.dry_run = dry_run self.dry_run = dry_run
self.use_git = use_git self.use_git = use_git
...@@ -288,46 +290,33 @@ class JekyllArticleImporter: ...@@ -288,46 +290,33 @@ class JekyllArticleImporter:
self.success_counter = 0 self.success_counter = 0
self.exists_counter = 0 self.exists_counter = 0
self.skipped_counter = 0 self.skipped_counter = 0
# self.image_warning_counter = 0 # TODO nějak vymyslet
self.message_list = [] # output for django.messages self.page_log = "" # output saved on page instance
# Filling global var for ImgParser # Filling global var for ImgParser
image_params["path"] = self.path image_params["path"] = self.path
image_params["collection"] = self.collection image_params["collection"] = self.collection
image_params["repo_name"] = self.repo_name image_params["repo_name"] = self.repo_name
def create_django_messages(self): def create_summary_log(self):
""" """
Podle (aktuálních) hodnot counterů přidá do self.message_list Podle (aktuálních) hodnot counterů přidá do self.page_log
různé zprávy pro uživatele. různé zprávy pro uživatele.
""" """
self.page_log += "==================================\n"
if self.success_counter: if self.success_counter:
base_msg = "Lze importovat" if self.dry_run else "Úspěšně naimportováno" base_msg = "Úspěšně otestováno" if self.dry_run else "Úspěšně naimportováno"
self.message_list.append( self.page_log += "{} {} článků\n".format(base_msg, self.success_counter)
{
"level": SUCCESS,
"text": "{} {} článků".format(base_msg, self.success_counter),
}
)
if self.exists_counter: if self.exists_counter:
self.message_list.append( self.page_log += "z toho {} již existovalo\n".format(self.exists_counter)
{
"level": INFO,
"text": "{} článků s tímto názvem již existuje".format(
self.exists_counter
),
}
)
if self.skipped_counter: if self.skipped_counter:
self.message_list.append( self.page_log += "NELZE importovat {} článků\n".format(self.skipped_counter)
{
"level": WARNING, self.article_parent_page.last_import_log = self.page_log
"text": "Nelze importovat {} článků".format(self.skipped_counter), self.article_parent_page.save()
}
)
@staticmethod @staticmethod
def get_parsed_file_path(path: str): def get_parsed_file_path(path: str):
...@@ -382,17 +371,15 @@ class JekyllArticleImporter: ...@@ -382,17 +371,15 @@ class JekyllArticleImporter:
return meta_dict return meta_dict
def import_post(self, file_path): def import_post(self, file_path):
from district.models import DistrictArticlePage
with open(os.path.join(self.path, file_path), "rt") as f: with open(os.path.join(self.path, file_path), "rt") as f:
r = re.split(r"^---\s*$", f.read(), maxsplit=3, flags=re.MULTILINE) r = re.split(r"^---\s*$", f.read(), maxsplit=3, flags=re.MULTILINE)
try: try:
meta = yaml.safe_load(r[1].replace("\t", "")) meta = yaml.safe_load(r[1].replace("\t", ""))
except (ScannerError, ValueError): except (ScannerError, ValueError):
logger.warning( msg = "Nelze importovat článek - neparsovatelný YAML"
"Nelze importovat článek - neparsovatelný YAML", logger.warning(msg, extra={"file_path": file_path})
extra={"file_path": file_path}, self.page_log += "{} - {}\n".format(msg, file_path)
)
self.skipped_counter += 1 self.skipped_counter += 1
return None return None
...@@ -405,24 +392,19 @@ class JekyllArticleImporter: ...@@ -405,24 +392,19 @@ class JekyllArticleImporter:
try: try:
title = meta["title"] title = meta["title"]
except TypeError: except TypeError:
logger.warning( msg = "Nelze importovat článek - nepodařilo se získat title"
"Nelze importovat článek - nepodařilo se získat title", logger.warning(msg, extra={"article_meta": meta})
extra={"article_meta": meta}, self.page_log += "{} - {}\n".format(msg, meta)
)
self.skipped_counter += 1 self.skipped_counter += 1
return None return None
if DistrictArticlePage.objects.filter(title=title).exists(): try:
for article in DistrictArticlePage.objects.filter(title=meta["title"]): article = (
# if article.date == parse_date(meta["date"].split()[0]): self.article_parent_page.get_descendants().get(title=title).specific
msg = "Článek již existuje: %s" % article )
stdout.write(msg)
# message_list.append({"level": INFO, "text": msg})
self.exists_counter += 1 self.exists_counter += 1
except (Page.DoesNotExist, Page.MultipleObjectsReturned):
return None article = self.page_model()
article = DistrictArticlePage()
# article.text = html # article.text = html
article.content = [("text", RichText(html))] article.content = [("text", RichText(html))]
...@@ -450,24 +432,29 @@ class JekyllArticleImporter: ...@@ -450,24 +432,29 @@ class JekyllArticleImporter:
# article.tags.add(tag) # article.tags.add(tag)
if meta.get("image", None): if meta.get("image", None):
article.image = get_or_create_image( article.image, log_message = get_or_create_image(
self.path, meta["image"], self.collection, self.repo_name self.path, meta["image"], self.collection, self.repo_name
) )
if log_message:
self.page_log += log_message
if self.dry_run: if self.dry_run:
return article return article
try: try:
if not article.id:
self.article_parent_page.add_child(instance=article) self.article_parent_page.add_child(instance=article)
stdout.write("Vytvářím článek: %s" % article) logger.info("Vytvářím článek: %s" % article)
rev = article.save_revision() rev = article.save_revision()
if meta.get("published", True): if meta.get("published", True):
rev.publish() rev.publish()
except Exception as e: except Exception as e:
msg = "Nelze uložit importovaný článek"
logger.warning( logger.warning(
"Nelze uložit importovaný článek", msg,
extra={"article_title": article.title, "exception": e}, extra={"article_title": article.title, "exception": e},
) )
self.page_log += "{} - {} - {}\n".format(msg, article.title, e)
self.skipped_counter += 1 self.skipped_counter += 1
return article return article
...@@ -479,6 +466,7 @@ class JekyllArticleImporter: ...@@ -479,6 +466,7 @@ class JekyllArticleImporter:
Projde adresář článků a pokusí se zprocesovat Markdown do article. Projde adresář článků a pokusí se zprocesovat Markdown do article.
Vrací list dict pro django messages (klíč levelu, text). Vrací list dict pro django messages (klíč levelu, text).
""" """
logger.info("Import započat")
for file_name in os.listdir(os.path.join(self.path, POSTS_DIR)): for file_name in os.listdir(os.path.join(self.path, POSTS_DIR)):
# Případ podsložek (typicky po jednotlivých letech) # Případ podsložek (typicky po jednotlivých letech)
if os.path.isdir(os.path.join(self.path, POSTS_DIR, file_name)): if os.path.isdir(os.path.join(self.path, POSTS_DIR, file_name)):
...@@ -491,8 +479,8 @@ class JekyllArticleImporter: ...@@ -491,8 +479,8 @@ class JekyllArticleImporter:
file_path = os.path.join(POSTS_DIR, file_name) file_path = os.path.join(POSTS_DIR, file_name)
self.process_article(file_name, file_path) self.process_article(file_name, file_path)
self.create_django_messages() self.create_summary_log()
return self.message_list logger.info("Import dokončen")
def process_article(self, file_name: str, file_path: str): def process_article(self, file_name: str, file_path: str):
match = re.match(r"(\d*)-(\d*)-(\d*)-(.*)\.(.*)", file_name) match = re.match(r"(\d*)-(\d*)-(\d*)-(.*)\.(.*)", file_name)
...@@ -527,8 +515,10 @@ class JekyllArticleImporter: ...@@ -527,8 +515,10 @@ class JekyllArticleImporter:
else: else:
msg = "Nepodporovaná přípona souboru: %s" % ext msg = "Nepodporovaná přípona souboru: %s" % ext
logger.warning(msg) logger.warning(msg)
self.page_log += "{}\n".format(msg)
self.skipped_counter += 1 self.skipped_counter += 1
else: else:
msg = "Přeskočeno: %s" % file_name msg = "Přeskočeno: %s" % file_name
logger.warning(msg) logger.warning(msg)
self.page_log += "{}\n".format(msg)
self.skipped_counter += 1 self.skipped_counter += 1
# Generated by Django 4.0.3 on 2022-04-01 11:30
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("district", "0048_districthomepage_footperson_coord_title_and_more"),
]
operations = [
migrations.AddField(
model_name="districtarticlespage",
name="last_import_log",
field=models.TextField(
blank=True, null=True, verbose_name="Výstup z posledního importu"
),
),
]
...@@ -365,6 +365,9 @@ class DistrictArticlePage(ArticleMixin, SubpageMixin, MetadataPageMixin, Page): ...@@ -365,6 +365,9 @@ class DistrictArticlePage(ArticleMixin, SubpageMixin, MetadataPageMixin, Page):
class DistrictArticlesPage(SubpageMixin, MetadataPageMixin, Page): class DistrictArticlesPage(SubpageMixin, MetadataPageMixin, Page):
### FIELDS ### FIELDS
last_import_log = models.TextField(
"Výstup z posledního importu", null=True, blank=True
)
max_items = models.IntegerField("Počet článků na stránce", default=12) max_items = models.IntegerField("Počet článků na stránce", default=12)
### PANELS ### PANELS
...@@ -383,9 +386,14 @@ class DistrictArticlesPage(SubpageMixin, MetadataPageMixin, Page): ...@@ -383,9 +386,14 @@ class DistrictArticlesPage(SubpageMixin, MetadataPageMixin, Page):
FieldPanel("dry_run"), FieldPanel("dry_run"),
FieldPanel("use_git"), FieldPanel("use_git"),
FieldPanel("jekyll_repo_url"), FieldPanel("jekyll_repo_url"),
FieldPanel("readonly_log"),
HelpPanel( HelpPanel(
"Import provádějte vždy až po vytvoření stránky aktualit. " "Import provádějte vždy až po vytvoření stránky aktualit. "
'Pro uložení logu je nutné volit možnost "Publikovat", nikoliv'
'pouze "Uložit koncept". '
"Import proběhne na pozadí a může trvat až několik minut. " "Import proběhne na pozadí a může trvat až několik minut. "
"Dejte si po spuštění importu kávu a potom obnovte stránku pro "
"zobrazení výsledku importu."
), ),
], ],
"import z Jekyll repozitáře", "import z Jekyll repozitáře",
...@@ -425,6 +433,9 @@ class DistrictArticlesPage(SubpageMixin, MetadataPageMixin, Page): ...@@ -425,6 +433,9 @@ class DistrictArticlesPage(SubpageMixin, MetadataPageMixin, Page):
).get_page(request.GET.get("page")) ).get_page(request.GET.get("page"))
return context return context
def save(self, clean=True, user=None, log_action=False, **kwargs):
super(DistrictArticlesPage, self).save()
class DistrictContactPage(SubpageMixin, MetadataPageMixin, Page): class DistrictContactPage(SubpageMixin, MetadataPageMixin, Page):
### FIELDS ### FIELDS
......
...@@ -15,10 +15,14 @@ def import_jekyll_articles( ...@@ -15,10 +15,14 @@ def import_jekyll_articles(
dry_run: bool, dry_run: bool,
use_git: bool, use_git: bool,
): ):
from .models import DistrictArticlePage, DistrictArticlesPage
return JekyllArticleImporter( return JekyllArticleImporter(
article_parent_page_id=article_parent_page_id, article_parent_page_id=article_parent_page_id,
collection_id=collection_id, collection_id=collection_id,
url=url, url=url,
dry_run=dry_run, dry_run=dry_run,
use_git=use_git, use_git=use_git,
parent_page_model=DistrictArticlesPage,
page_model=DistrictArticlePage,
).perform_import() ).perform_import()
from django.contrib.messages import ERROR, SUCCESS, WARNING, add_message
from wagtail.admin import messages
from wagtail.core import hooks
from .models import DistrictArticlesPage
@hooks.register("after_edit_page")
@hooks.register("after_edit_page")
def handle_page_import(request, page): # def after_create_page(request, page):
"""Block awesome page deletion and show a message."""
if request.method == "POST" and page.specific_class in [DistrictArticlesPage]:
for message in getattr(page, "import_message_list", []):
add_message(request, message["level"], message["text"])
# import re # import re
# #
# from wagtail.core import hooks # from wagtail.core import hooks
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment