Skip to content
Snippets Groups Projects
Commit b371bee0 authored by OndraRehounek's avatar OndraRehounek Committed by jan.bednarik
Browse files

WIP celery import with logs

parent 48750364
Branches
No related tags found
2 merge requests!442Release,!432Feature/majak imports
......@@ -36,9 +36,16 @@ class JekyllImportForm(WagtailAdminPageForm):
"jednoduše URL repozitáře "
"např. 'https://github.com/pirati-web/cb.pirati.cz'",
)
readonly_log = forms.CharField(
disabled=True,
label="Log z posledního importu",
required=False,
widget=forms.Textarea,
)
# def __init__(self):
# pass
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.fields["readonly_log"].initial = self.instance.last_import_log
def clean(self):
cleaned_data = super().clean()
......@@ -72,8 +79,8 @@ class JekyllImportForm(WagtailAdminPageForm):
return cleaned_data
def handle_import(self):
# import_message_list =
import_jekyll_articles.delay(
# import_jekyll_articles.delay( # TODO
import_jekyll_articles(
article_parent_page_id=self.instance.id,
collection_id=self.cleaned_data["collection"].id,
url=self.cleaned_data["jekyll_repo_url"],
......@@ -81,9 +88,6 @@ class JekyllImportForm(WagtailAdminPageForm):
use_git=self.cleaned_data["use_git"],
)
# self.instance.import_message_list = import_message_list
# return import_message_list
def save(self, commit=True):
if self.cleaned_data.get("do_import") and not self.cleaned_data["dry_run"]:
self.handle_import()
......
......@@ -7,19 +7,18 @@ import zipfile
from datetime import date
from http.client import InvalidURL
from io import StringIO
from sys import stdout
from typing import List
from urllib.error import HTTPError
import markdown.serializers
import yaml
from django.contrib.messages import INFO, SUCCESS, WARNING
from django.core.files.images import ImageFile
from django.utils import timezone
from markdown import Markdown
from markdown.extensions import Extension
from markdown.inlinepatterns import InlineProcessor
from wagtail.contrib.redirects.models import Redirect
from wagtail.core.models import Page
from wagtail.core.models.collections import Collection
from wagtail.core.rich_text import RichText
from wagtail.images.models import Image
......@@ -28,9 +27,8 @@ from yaml.scanner import ScannerError
logger = logging.getLogger(__name__)
# from django.utils.dateparse import parse_date TODO enable date check again...
image_params = (
{}
) # filled on JekyllArticleImported init and used in markdown overwrites
image_params = {} # filled on JekyllArticleImporter init and used globally
POSTS_DIR = "_posts"
# ------------------------------- Misc helper functions -------------------------------
......@@ -114,13 +112,13 @@ def get_or_create_image(
file_path = file_path.lstrip("/")
if Image.objects.filter(title=file_path).exists():
return Image.objects.filter(title=file_path).first()
return Image.objects.filter(title=file_path).first(), ""
else:
try:
file = ImageFile(open(os.path.join(path, file_path), "rb"), name=file_path)
image = Image(title=file_path, file=file, collection=collection)
image.save()
return image
return image, ""
except FileNotFoundError:
try:
file = ImageFile(
......@@ -129,7 +127,7 @@ def get_or_create_image(
)
image = Image(title=file_path, file=file, collection=collection)
image.save()
return image
return image, ""
except FileNotFoundError:
img_name = file_path.split("/")[-1]
img_assets_folder = repo_name.split(".")[0]
......@@ -152,20 +150,22 @@ def get_or_create_image(
InvalidURL,
IsADirectoryError,
):
msg = "Nedohledán obrázek při importu článků"
log_message = "{} - {}\n".format(msg, img_url)
logger.warning(
"Nedohledán obrázek při importu článků",
msg,
extra={
"file_path": file_path,
"img_name": img_name,
"img_url": img_url,
},
)
return None
return None, log_message
file = ImageFile(open(img_path, "rb"), name=img_path)
image = Image(title=file_path, file=file, collection=collection)
image.save()
return image
return image, ""
def get_path_and_repo_name(url: str, use_git: bool) -> (str, str):
......@@ -220,7 +220,7 @@ class ImgProcessor(InlineProcessor):
el.attrib["format"] = "left"
parsed_image_path = JekyllArticleImporter.get_parsed_file_path(m.group(2))
image_obj = get_or_create_image(
image_obj, _ = get_or_create_image(
path=image_params["path"],
file_path=parsed_image_path,
collection=image_params["collection"],
......@@ -263,13 +263,15 @@ class JekyllArticleImporter:
url: str,
dry_run: bool,
use_git: bool,
parent_page_model,
page_model,
):
from district.models import DistrictArticlesPage
self.page_model = page_model
# Params
self.article_parent_page = DistrictArticlesPage.objects.get(
self.article_parent_page = parent_page_model.objects.get(
id=article_parent_page_id
)
).specific # TODO test if specific should be included or not
self.collection = Collection.objects.get(id=collection_id)
self.dry_run = dry_run
self.use_git = use_git
......@@ -288,46 +290,33 @@ class JekyllArticleImporter:
self.success_counter = 0
self.exists_counter = 0
self.skipped_counter = 0
# self.image_warning_counter = 0 # TODO nějak vymyslet
self.message_list = [] # output for django.messages
self.page_log = "" # output saved on page instance
# Filling global var for ImgParser
image_params["path"] = self.path
image_params["collection"] = self.collection
image_params["repo_name"] = self.repo_name
def create_django_messages(self):
def create_summary_log(self):
"""
Podle (aktuálních) hodnot counterů přidá do self.message_list
Podle (aktuálních) hodnot counterů přidá do self.page_log
různé zprávy pro uživatele.
"""
self.page_log += "==================================\n"
if self.success_counter:
base_msg = "Lze importovat" if self.dry_run else "Úspěšně naimportováno"
self.message_list.append(
{
"level": SUCCESS,
"text": "{} {} článků".format(base_msg, self.success_counter),
}
)
base_msg = "Úspěšně otestováno" if self.dry_run else "Úspěšně naimportováno"
self.page_log += "{} {} článků\n".format(base_msg, self.success_counter)
if self.exists_counter:
self.message_list.append(
{
"level": INFO,
"text": "{} článků s tímto názvem již existuje".format(
self.exists_counter
),
}
)
self.page_log += "z toho {} již existovalo\n".format(self.exists_counter)
if self.skipped_counter:
self.message_list.append(
{
"level": WARNING,
"text": "Nelze importovat {} článků".format(self.skipped_counter),
}
)
self.page_log += "NELZE importovat {} článků\n".format(self.skipped_counter)
self.article_parent_page.last_import_log = self.page_log
self.article_parent_page.save()
@staticmethod
def get_parsed_file_path(path: str):
......@@ -382,17 +371,15 @@ class JekyllArticleImporter:
return meta_dict
def import_post(self, file_path):
from district.models import DistrictArticlePage
with open(os.path.join(self.path, file_path), "rt") as f:
r = re.split(r"^---\s*$", f.read(), maxsplit=3, flags=re.MULTILINE)
try:
meta = yaml.safe_load(r[1].replace("\t", ""))
except (ScannerError, ValueError):
logger.warning(
"Nelze importovat článek - neparsovatelný YAML",
extra={"file_path": file_path},
)
msg = "Nelze importovat článek - neparsovatelný YAML"
logger.warning(msg, extra={"file_path": file_path})
self.page_log += "{} - {}\n".format(msg, file_path)
self.skipped_counter += 1
return None
......@@ -405,24 +392,19 @@ class JekyllArticleImporter:
try:
title = meta["title"]
except TypeError:
logger.warning(
"Nelze importovat článek - nepodařilo se získat title",
extra={"article_meta": meta},
)
msg = "Nelze importovat článek - nepodařilo se získat title"
logger.warning(msg, extra={"article_meta": meta})
self.page_log += "{} - {}\n".format(msg, meta)
self.skipped_counter += 1
return None
if DistrictArticlePage.objects.filter(title=title).exists():
for article in DistrictArticlePage.objects.filter(title=meta["title"]):
# if article.date == parse_date(meta["date"].split()[0]):
msg = "Článek již existuje: %s" % article
stdout.write(msg)
# message_list.append({"level": INFO, "text": msg})
try:
article = (
self.article_parent_page.get_descendants().get(title=title).specific
)
self.exists_counter += 1
return None
article = DistrictArticlePage()
except (Page.DoesNotExist, Page.MultipleObjectsReturned):
article = self.page_model()
# article.text = html
article.content = [("text", RichText(html))]
......@@ -450,24 +432,29 @@ class JekyllArticleImporter:
# article.tags.add(tag)
if meta.get("image", None):
article.image = get_or_create_image(
article.image, log_message = get_or_create_image(
self.path, meta["image"], self.collection, self.repo_name
)
if log_message:
self.page_log += log_message
if self.dry_run:
return article
try:
if not article.id:
self.article_parent_page.add_child(instance=article)
stdout.write("Vytvářím článek: %s" % article)
logger.info("Vytvářím článek: %s" % article)
rev = article.save_revision()
if meta.get("published", True):
rev.publish()
except Exception as e:
msg = "Nelze uložit importovaný článek"
logger.warning(
"Nelze uložit importovaný článek",
msg,
extra={"article_title": article.title, "exception": e},
)
self.page_log += "{} - {} - {}\n".format(msg, article.title, e)
self.skipped_counter += 1
return article
......@@ -479,6 +466,7 @@ class JekyllArticleImporter:
Projde adresář článků a pokusí se zprocesovat Markdown do article.
Vrací list dict pro django messages (klíč levelu, text).
"""
logger.info("Import započat")
for file_name in os.listdir(os.path.join(self.path, POSTS_DIR)):
# Případ podsložek (typicky po jednotlivých letech)
if os.path.isdir(os.path.join(self.path, POSTS_DIR, file_name)):
......@@ -491,8 +479,8 @@ class JekyllArticleImporter:
file_path = os.path.join(POSTS_DIR, file_name)
self.process_article(file_name, file_path)
self.create_django_messages()
return self.message_list
self.create_summary_log()
logger.info("Import dokončen")
def process_article(self, file_name: str, file_path: str):
match = re.match(r"(\d*)-(\d*)-(\d*)-(.*)\.(.*)", file_name)
......@@ -527,8 +515,10 @@ class JekyllArticleImporter:
else:
msg = "Nepodporovaná přípona souboru: %s" % ext
logger.warning(msg)
self.page_log += "{}\n".format(msg)
self.skipped_counter += 1
else:
msg = "Přeskočeno: %s" % file_name
logger.warning(msg)
self.page_log += "{}\n".format(msg)
self.skipped_counter += 1
# Generated by Django 4.0.3 on 2022-04-01 11:30
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("district", "0048_districthomepage_footperson_coord_title_and_more"),
]
operations = [
migrations.AddField(
model_name="districtarticlespage",
name="last_import_log",
field=models.TextField(
blank=True, null=True, verbose_name="Výstup z posledního importu"
),
),
]
......@@ -365,6 +365,9 @@ class DistrictArticlePage(ArticleMixin, SubpageMixin, MetadataPageMixin, Page):
class DistrictArticlesPage(SubpageMixin, MetadataPageMixin, Page):
### FIELDS
last_import_log = models.TextField(
"Výstup z posledního importu", null=True, blank=True
)
max_items = models.IntegerField("Počet článků na stránce", default=12)
### PANELS
......@@ -383,9 +386,14 @@ class DistrictArticlesPage(SubpageMixin, MetadataPageMixin, Page):
FieldPanel("dry_run"),
FieldPanel("use_git"),
FieldPanel("jekyll_repo_url"),
FieldPanel("readonly_log"),
HelpPanel(
"Import provádějte vždy až po vytvoření stránky aktualit. "
'Pro uložení logu je nutné volit možnost "Publikovat", nikoliv'
'pouze "Uložit koncept". '
"Import proběhne na pozadí a může trvat až několik minut. "
"Dejte si po spuštění importu kávu a potom obnovte stránku pro "
"zobrazení výsledku importu."
),
],
"import z Jekyll repozitáře",
......@@ -425,6 +433,9 @@ class DistrictArticlesPage(SubpageMixin, MetadataPageMixin, Page):
).get_page(request.GET.get("page"))
return context
def save(self, clean=True, user=None, log_action=False, **kwargs):
super(DistrictArticlesPage, self).save()
class DistrictContactPage(SubpageMixin, MetadataPageMixin, Page):
### FIELDS
......
......@@ -15,10 +15,14 @@ def import_jekyll_articles(
dry_run: bool,
use_git: bool,
):
from .models import DistrictArticlePage, DistrictArticlesPage
return JekyllArticleImporter(
article_parent_page_id=article_parent_page_id,
collection_id=collection_id,
url=url,
dry_run=dry_run,
use_git=use_git,
parent_page_model=DistrictArticlesPage,
page_model=DistrictArticlePage,
).perform_import()
from django.contrib.messages import ERROR, SUCCESS, WARNING, add_message
from wagtail.admin import messages
from wagtail.core import hooks
from .models import DistrictArticlesPage
@hooks.register("after_edit_page")
@hooks.register("after_edit_page")
def handle_page_import(request, page): # def after_create_page(request, page):
"""Block awesome page deletion and show a message."""
if request.method == "POST" and page.specific_class in [DistrictArticlesPage]:
for message in getattr(page, "import_message_list", []):
add_message(request, message["level"], message["text"])
# import re
#
# from wagtail.core import hooks
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment