diff --git a/district/forms.py b/district/forms.py new file mode 100644 index 0000000000000000000000000000000000000000..1203f40a12fb7dcf56035ec31a1655980578405b --- /dev/null +++ b/district/forms.py @@ -0,0 +1,29 @@ +from django import forms +from wagtail.admin.forms import WagtailAdminPageForm +from wagtail.core.models.collections import Collection +from wagtail.core.models.sites import Site + +from .jekyll_import import perform_import + + +class JekyllImportForm(WagtailAdminPageForm): + article_root_page_id = ( + forms.IntegerField() + ) # TODO resolve circular import and make ModelChoiceField + collection = forms.ModelChoiceField(queryset=Collection.objects.all()) + dry_run = forms.BooleanField(initial=True) + jekyll_repo_url = forms.URLField() + site = forms.ModelChoiceField(queryset=Site.objects.all()) + + # def clean(self): + # cleaned_data = super().clean() + # return super().clean() + + def save(self, commit=True): + perform_import( + article_root_page_id=self.cleaned_data["article_root_page_id"], + collection=self.cleaned_data["article_root_page_id"], + path=self.cleaned_data["jekyll_repo_url"], + ) + + return super().save(commit=commit) diff --git a/district/jekyll_import.py b/district/jekyll_import.py new file mode 100644 index 0000000000000000000000000000000000000000..9fe5b71979b8821ec48a0b59299f7dd7d9f4e350 --- /dev/null +++ b/district/jekyll_import.py @@ -0,0 +1,174 @@ +import os +import re +import xml.etree.ElementTree as ET +from io import StringIO +from sys import stdout + +import markdown.serializers +import yaml +from django.core.files.images import ImageFile +from django.utils.dateparse import parse_date +from markdown import Markdown +from markdown.extensions import Extension +from markdown.inlinepatterns import InlineProcessor +from wagtail.images.models import Image + +# Wagtail to portrebuje https://docs.wagtail.io/en/stable/extending/rich_text_internals.html#data-format +markdown.serializers.HTML_EMPTY.add("embed") + + +# Plain format pro perex +def unmark_element(element, stream=None): + if stream is None: + stream = StringIO() + if element.text: + stream.write(element.text) + for sub in element: + unmark_element(sub, stream) + if element.tail: + stream.write(element.tail) + return stream.getvalue() + + +Markdown.output_formats["plain"] = unmark_element +plain_md = Markdown(output_format="plain") +plain_md.stripTopLevelTags = False + +params = {} + + +class ImgProcessor(InlineProcessor): + def handleMatch(self, m, data): + el = ET.Element("embed") + el.attrib["embedtype"] = "image" + el.attrib["alt"] = m.group(1) + el.attrib["format"] = "left" + collection = get_collection() + image_obj = get_or_create_image( + params["path"], m.group(2), collection=collection + ) + el.attrib["id"] = str(image_obj.pk) + return el, m.start(0), m.end(0) + + +class ImgExtension(Extension): + def extendMarkdown(self, md): + IMG_PATTERN = r"!\[(.*?)\]\((.*?)\)" + md.inlinePatterns.register(ImgProcessor(IMG_PATTERN, md), "img", 175) + + +html_md = Markdown(extensions=[ImgExtension()]) + + +def get_perex(text): + text = re.split(r"^\s*$", text.strip(), flags=re.MULTILINE)[0] + return plain_md.convert(text) + + +POSTS_DIR = "_posts" +TITLE_SUFFIX = " - Piráti České Budějovice" + + +def get_site_config(path): + with open(os.path.join(path, "_config.yml")) as f: + config = yaml.safe_load(f.read()) + return config + + +def import_post(path, file_path, parrent, title_suffix): + from district.models import DistrictArticlePage + + with open(os.path.join(path, file_path), "rt") as f: + r = re.split(r"^---\s*$", f.read(), maxsplit=3, flags=re.MULTILINE) + meta = yaml.safe_load(r[1]) + md = r[2] + html = html_md.convert(md) + + if DistrictArticlePage.objects.filter(title=meta["title"]).exists(): + for article in DistrictArticlePage.objects.filter(title=meta["title"]): + if article.date == parse_date(meta["date"].split()[0]): + stdout.write("Article already imported: %s" % article) + return article + + article = DistrictArticlePage() + + article.text = html + article.perex = get_perex(md) + article.date = meta["date"].split()[0] + article.title = meta["title"] + article.author = meta["author"] + + article.seo_title = article.title + title_suffix + article.search_description = meta.get("description", "") + + # for tag in meta['tags']: + # article.tags.add(tag) + + collection = get_collection() + article.image = get_or_create_image(path, meta["image"], collection=collection) + + parrent.add_child(instance=article) + + stdout.write("Creating article: %s" % article) + rev = article.save_revision() + if meta["published"]: + rev.publish() + return article + + +def get_collection(): + return params["kolekce"] + + +def get_or_create_image(path, file_path, collection): + file_path = file_path.lstrip("/") + if Image.objects.filter(title=file_path).exists(): + return Image.objects.filter(title=file_path).first() + else: + file = ImageFile(open(os.path.join(path, file_path), "rb"), name=file_path) + image = Image(title=file_path, file=file, collection=collection) + image.save() + return image + + +def perform_import(article_root_page_id, collection, path): + from district.models import DistrictArticlesPage + + articles = DistrictArticlesPage.objects.get(pk=article_root_page_id) + params["kolekce"] = collection + site = articles.get_site() + + path = params["path"] = path + site_config = get_site_config(path) + + if "title" in site_config: + title_suffix = " - " + site_config.get("title", "") + else: + title_suffix = "" + + articlepath = site_config["articlepath"] + + for fn in os.listdir(os.path.join(path, POSTS_DIR)): + fname = os.path.join(POSTS_DIR, fn) + match = re.match(r"(\d*)-(\d*)-(\d*)-(.*)\.(.*)", fn) + if match: + y = match.group(1) + m = match.group(2) + d = match.group(3) + slug = match.group(4) + ext = match.group(5) + + if ext == "md": + article = import_post(path, fname, articles, title_suffix) + from wagtail.contrib.redirects.models import Redirect + + r, created = Redirect.objects.get_or_create( + site=site, + old_path="/%s/%s/%s/%s/%s/" + % (articlepath, y, m.zfill(2), d.zfill(2), slug), + defaults={"is_permanent": True, "redirect_page": article}, + ) + else: + stdout.write("ERROR: Not Implemented: %s" % ext) + else: + stdout.write("WARNING: Skipping: %s" % fn) diff --git a/district/management/commands/district_import_jekyll.py b/district/management/commands/district_import_jekyll.py index 71a06f59d670c5ac6f94fd8d360795e79bfb2cf0..abc99e7c2bf3b19f668a0cab4815f1d19fec68f0 100644 --- a/district/management/commands/district_import_jekyll.py +++ b/district/management/commands/district_import_jekyll.py @@ -1,143 +1,7 @@ -import os -import re -import xml.etree.ElementTree as ET -from io import StringIO - -import markdown -import markdown.serializers -import yaml -from django.core.files.images import ImageFile from django.core.management.base import BaseCommand -from django.db.models.expressions import Col -from django.utils.dateparse import ( - parse_date, - parse_datetime, - parse_duration, - parse_time, -) -from django.utils.text import slugify -from markdown import Markdown -from markdown.extensions import Extension -from markdown.inlinepatterns import InlineProcessor -from wagtail.core.models import Site from wagtail.core.models.collections import Collection -from wagtail.images.models import Image - -from district.models import DistrictArticlePage, DistrictArticlesPage - -# Wagtail to portrebuje https://docs.wagtail.io/en/stable/extending/rich_text_internals.html#data-format -markdown.serializers.HTML_EMPTY.add("embed") - -# Plain format pro perex -def unmark_element(element, stream=None): - if stream is None: - stream = StringIO() - if element.text: - stream.write(element.text) - for sub in element: - unmark_element(sub, stream) - if element.tail: - stream.write(element.tail) - return stream.getvalue() - - -Markdown.output_formats["plain"] = unmark_element -plain_md = Markdown(output_format="plain") -plain_md.stripTopLevelTags = False - -params = {} - - -class ImgProcessor(InlineProcessor): - def handleMatch(self, m, data): - el = ET.Element("embed") - el.attrib["embedtype"] = "image" - el.attrib["alt"] = m.group(1) - el.attrib["format"] = "left" - collection = get_collection() - image_obj = get_or_create_image( - params["path"], m.group(2), collection=collection - ) - el.attrib["id"] = str(image_obj.pk) - return el, m.start(0), m.end(0) - - -class ImgExtension(Extension): - def extendMarkdown(self, md): - IMG_PATTERN = r"!\[(.*?)\]\((.*?)\)" - md.inlinePatterns.register(ImgProcessor(IMG_PATTERN, md), "img", 175) - - -html_md = Markdown(extensions=[ImgExtension()]) - - -def get_perex(text): - text = re.split(r"^\s*$", text.strip(), flags=re.MULTILINE)[0] - return plain_md.convert(text) - - -POSTS_DIR = "_posts" -TITLE_SUFFIX = " - Piráti České Budějovice" - - -def get_site_config(path): - with open(os.path.join(path, "_config.yml")) as f: - config = yaml.safe_load(f.read()) - return config - -def import_post(path, file_path, parrent, title_suffix, command): - with open(os.path.join(path, file_path), "rt") as f: - r = re.split(r"^---\s*$", f.read(), maxsplit=3, flags=re.MULTILINE) - meta = yaml.safe_load(r[1]) - md = r[2] - html = html_md.convert(md) - - if DistrictArticlePage.objects.filter(title=meta["title"]).exists(): - for article in DistrictArticlePage.objects.filter(title=meta["title"]): - if article.date == parse_date(meta["date"].split()[0]): - command.stdout.write("Article already imported: %s" % article) - return article - - article = DistrictArticlePage() - - article.text = html - article.perex = get_perex(md) - article.date = meta["date"].split()[0] - article.title = meta["title"] - article.author = meta["author"] - - article.seo_title = article.title + title_suffix - article.search_description = meta.get("description", "") - - # for tag in meta['tags']: - # article.tags.add(tag) - - collection = get_collection() - article.image = get_or_create_image(path, meta["image"], collection=collection) - - parrent.add_child(instance=article) - - command.stdout.write("Creating article: %s" % article) - rev = article.save_revision() - if meta["published"]: - rev.publish() - return article - - -def get_collection(): - return params["kolekce"] - - -def get_or_create_image(path, file_path, collection): - file_path = file_path.lstrip("/") - if Image.objects.filter(title=file_path).exists(): - return Image.objects.filter(title=file_path).first() - else: - file = ImageFile(open(os.path.join(path, file_path), "rb"), name=file_path) - image = Image(title=file_path, file=file, collection=collection) - image.save() - return image +from ...jekyll_import import perform_import class Command(BaseCommand): @@ -166,44 +30,8 @@ class Command(BaseCommand): ) def handle(self, *args, **options): - - articles = DistrictArticlesPage.objects.get(pk=options["clanky_id"]) - params["kolekce"] = Collection.objects.get(pk=options["kolekce_id"]) - site = articles.get_site() - - path = params["path"] = options["path"] - site_config = get_site_config(path) - - if "title" in site_config: - title_suffix = " - " + site_config.get("title", "") - else: - title_suffix = "" - - articlepath = site_config["articlepath"] - - for fn in os.listdir(os.path.join(path, POSTS_DIR)): - fname = os.path.join(POSTS_DIR, fn) - match = re.match(r"(\d*)-(\d*)-(\d*)-(.*)\.(.*)", fn) - if match: - y = match.group(1) - m = match.group(2) - d = match.group(3) - slug = match.group(4) - ext = match.group(5) - - if ext == "md": - article = import_post( - path, fname, articles, title_suffix, command=self - ) - from wagtail.contrib.redirects.models import Redirect - - r, created = Redirect.objects.get_or_create( - site=site, - old_path="/%s/%s/%s/%s/%s/" - % (articlepath, y, m.zfill(2), d.zfill(2), slug), - defaults={"is_permanent": True, "redirect_page": article}, - ) - else: - self.stdout.write(self.style.ERROR("Not Implemented: %s" % ext)) - else: - self.stdout.write(self.style.WARNING("Skipping: %s" % fn)) + perform_import( + article_root_page_id=options["clanky_id"], + collection=Collection.objects.get(pk=options["kolekce_id"]), + path=options["path"], + ) diff --git a/district/models.py b/district/models.py index ae98eafd04f7aa84193ad1d8d67b3851e7c0aa9e..b701f09c3eadee825cfdb24c57d4d8c6209f43f9 100644 --- a/district/models.py +++ b/district/models.py @@ -30,6 +30,7 @@ from shared.models import ArticleMixin, MenuMixin, SubpageMixin from uniweb.constants import RICH_TEXT_FEATURES from . import blocks +from .forms import JekyllImportForm class DistrictHomePage(MenuMixin, MetadataPageMixin, CalendarMixin, Page): @@ -256,6 +257,8 @@ class DistrictHomePage(MenuMixin, MetadataPageMixin, CalendarMixin, Page): ### OTHERS + base_form_class = JekyllImportForm + class Meta: verbose_name = "Místní sdružení"