Skip to content
Snippets Groups Projects
Commit 291495f5 authored by OndraRehounek's avatar OndraRehounek Committed by jan.bednarik
Browse files

WIP

parent 08451d0e
Branches
No related tags found
2 merge requests!442Release,!432Feature/majak imports
from django import forms
from wagtail.admin.forms import WagtailAdminPageForm
from wagtail.core.models.collections import Collection
from wagtail.core.models.sites import Site
from .jekyll_import import perform_import
class JekyllImportForm(WagtailAdminPageForm):
article_root_page_id = (
forms.IntegerField()
) # TODO resolve circular import and make ModelChoiceField
collection = forms.ModelChoiceField(queryset=Collection.objects.all())
dry_run = forms.BooleanField(initial=True)
jekyll_repo_url = forms.URLField()
site = forms.ModelChoiceField(queryset=Site.objects.all())
# def clean(self):
# cleaned_data = super().clean()
# return super().clean()
def save(self, commit=True):
perform_import(
article_root_page_id=self.cleaned_data["article_root_page_id"],
collection=self.cleaned_data["article_root_page_id"],
path=self.cleaned_data["jekyll_repo_url"],
)
return super().save(commit=commit)
import os
import re
import xml.etree.ElementTree as ET
from io import StringIO
from sys import stdout
import markdown.serializers
import yaml
from django.core.files.images import ImageFile
from django.utils.dateparse import parse_date
from markdown import Markdown
from markdown.extensions import Extension
from markdown.inlinepatterns import InlineProcessor
from wagtail.images.models import Image
# Wagtail to portrebuje https://docs.wagtail.io/en/stable/extending/rich_text_internals.html#data-format
markdown.serializers.HTML_EMPTY.add("embed")
# Plain format pro perex
def unmark_element(element, stream=None):
if stream is None:
stream = StringIO()
if element.text:
stream.write(element.text)
for sub in element:
unmark_element(sub, stream)
if element.tail:
stream.write(element.tail)
return stream.getvalue()
Markdown.output_formats["plain"] = unmark_element
plain_md = Markdown(output_format="plain")
plain_md.stripTopLevelTags = False
params = {}
class ImgProcessor(InlineProcessor):
def handleMatch(self, m, data):
el = ET.Element("embed")
el.attrib["embedtype"] = "image"
el.attrib["alt"] = m.group(1)
el.attrib["format"] = "left"
collection = get_collection()
image_obj = get_or_create_image(
params["path"], m.group(2), collection=collection
)
el.attrib["id"] = str(image_obj.pk)
return el, m.start(0), m.end(0)
class ImgExtension(Extension):
def extendMarkdown(self, md):
IMG_PATTERN = r"!\[(.*?)\]\((.*?)\)"
md.inlinePatterns.register(ImgProcessor(IMG_PATTERN, md), "img", 175)
html_md = Markdown(extensions=[ImgExtension()])
def get_perex(text):
text = re.split(r"^\s*$", text.strip(), flags=re.MULTILINE)[0]
return plain_md.convert(text)
POSTS_DIR = "_posts"
TITLE_SUFFIX = " - Piráti České Budějovice"
def get_site_config(path):
with open(os.path.join(path, "_config.yml")) as f:
config = yaml.safe_load(f.read())
return config
def import_post(path, file_path, parrent, title_suffix):
from district.models import DistrictArticlePage
with open(os.path.join(path, file_path), "rt") as f:
r = re.split(r"^---\s*$", f.read(), maxsplit=3, flags=re.MULTILINE)
meta = yaml.safe_load(r[1])
md = r[2]
html = html_md.convert(md)
if DistrictArticlePage.objects.filter(title=meta["title"]).exists():
for article in DistrictArticlePage.objects.filter(title=meta["title"]):
if article.date == parse_date(meta["date"].split()[0]):
stdout.write("Article already imported: %s" % article)
return article
article = DistrictArticlePage()
article.text = html
article.perex = get_perex(md)
article.date = meta["date"].split()[0]
article.title = meta["title"]
article.author = meta["author"]
article.seo_title = article.title + title_suffix
article.search_description = meta.get("description", "")
# for tag in meta['tags']:
# article.tags.add(tag)
collection = get_collection()
article.image = get_or_create_image(path, meta["image"], collection=collection)
parrent.add_child(instance=article)
stdout.write("Creating article: %s" % article)
rev = article.save_revision()
if meta["published"]:
rev.publish()
return article
def get_collection():
return params["kolekce"]
def get_or_create_image(path, file_path, collection):
file_path = file_path.lstrip("/")
if Image.objects.filter(title=file_path).exists():
return Image.objects.filter(title=file_path).first()
else:
file = ImageFile(open(os.path.join(path, file_path), "rb"), name=file_path)
image = Image(title=file_path, file=file, collection=collection)
image.save()
return image
def perform_import(article_root_page_id, collection, path):
from district.models import DistrictArticlesPage
articles = DistrictArticlesPage.objects.get(pk=article_root_page_id)
params["kolekce"] = collection
site = articles.get_site()
path = params["path"] = path
site_config = get_site_config(path)
if "title" in site_config:
title_suffix = " - " + site_config.get("title", "")
else:
title_suffix = ""
articlepath = site_config["articlepath"]
for fn in os.listdir(os.path.join(path, POSTS_DIR)):
fname = os.path.join(POSTS_DIR, fn)
match = re.match(r"(\d*)-(\d*)-(\d*)-(.*)\.(.*)", fn)
if match:
y = match.group(1)
m = match.group(2)
d = match.group(3)
slug = match.group(4)
ext = match.group(5)
if ext == "md":
article = import_post(path, fname, articles, title_suffix)
from wagtail.contrib.redirects.models import Redirect
r, created = Redirect.objects.get_or_create(
site=site,
old_path="/%s/%s/%s/%s/%s/"
% (articlepath, y, m.zfill(2), d.zfill(2), slug),
defaults={"is_permanent": True, "redirect_page": article},
)
else:
stdout.write("ERROR: Not Implemented: %s" % ext)
else:
stdout.write("WARNING: Skipping: %s" % fn)
import os
import re
import xml.etree.ElementTree as ET
from io import StringIO
import markdown
import markdown.serializers
import yaml
from django.core.files.images import ImageFile
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db.models.expressions import Col
from django.utils.dateparse import (
parse_date,
parse_datetime,
parse_duration,
parse_time,
)
from django.utils.text import slugify
from markdown import Markdown
from markdown.extensions import Extension
from markdown.inlinepatterns import InlineProcessor
from wagtail.core.models import Site
from wagtail.core.models.collections import Collection from wagtail.core.models.collections import Collection
from wagtail.images.models import Image
from district.models import DistrictArticlePage, DistrictArticlesPage
# Wagtail to portrebuje https://docs.wagtail.io/en/stable/extending/rich_text_internals.html#data-format
markdown.serializers.HTML_EMPTY.add("embed")
# Plain format pro perex
def unmark_element(element, stream=None):
if stream is None:
stream = StringIO()
if element.text:
stream.write(element.text)
for sub in element:
unmark_element(sub, stream)
if element.tail:
stream.write(element.tail)
return stream.getvalue()
Markdown.output_formats["plain"] = unmark_element
plain_md = Markdown(output_format="plain")
plain_md.stripTopLevelTags = False
params = {}
class ImgProcessor(InlineProcessor):
def handleMatch(self, m, data):
el = ET.Element("embed")
el.attrib["embedtype"] = "image"
el.attrib["alt"] = m.group(1)
el.attrib["format"] = "left"
collection = get_collection()
image_obj = get_or_create_image(
params["path"], m.group(2), collection=collection
)
el.attrib["id"] = str(image_obj.pk)
return el, m.start(0), m.end(0)
class ImgExtension(Extension):
def extendMarkdown(self, md):
IMG_PATTERN = r"!\[(.*?)\]\((.*?)\)"
md.inlinePatterns.register(ImgProcessor(IMG_PATTERN, md), "img", 175)
html_md = Markdown(extensions=[ImgExtension()])
def get_perex(text):
text = re.split(r"^\s*$", text.strip(), flags=re.MULTILINE)[0]
return plain_md.convert(text)
POSTS_DIR = "_posts"
TITLE_SUFFIX = " - Piráti České Budějovice"
def get_site_config(path):
with open(os.path.join(path, "_config.yml")) as f:
config = yaml.safe_load(f.read())
return config
from ...jekyll_import import perform_import
def import_post(path, file_path, parrent, title_suffix, command):
with open(os.path.join(path, file_path), "rt") as f:
r = re.split(r"^---\s*$", f.read(), maxsplit=3, flags=re.MULTILINE)
meta = yaml.safe_load(r[1])
md = r[2]
html = html_md.convert(md)
if DistrictArticlePage.objects.filter(title=meta["title"]).exists():
for article in DistrictArticlePage.objects.filter(title=meta["title"]):
if article.date == parse_date(meta["date"].split()[0]):
command.stdout.write("Article already imported: %s" % article)
return article
article = DistrictArticlePage()
article.text = html
article.perex = get_perex(md)
article.date = meta["date"].split()[0]
article.title = meta["title"]
article.author = meta["author"]
article.seo_title = article.title + title_suffix
article.search_description = meta.get("description", "")
# for tag in meta['tags']:
# article.tags.add(tag)
collection = get_collection()
article.image = get_or_create_image(path, meta["image"], collection=collection)
parrent.add_child(instance=article)
command.stdout.write("Creating article: %s" % article)
rev = article.save_revision()
if meta["published"]:
rev.publish()
return article
def get_collection():
return params["kolekce"]
def get_or_create_image(path, file_path, collection):
file_path = file_path.lstrip("/")
if Image.objects.filter(title=file_path).exists():
return Image.objects.filter(title=file_path).first()
else:
file = ImageFile(open(os.path.join(path, file_path), "rb"), name=file_path)
image = Image(title=file_path, file=file, collection=collection)
image.save()
return image
class Command(BaseCommand): class Command(BaseCommand):
...@@ -166,44 +30,8 @@ class Command(BaseCommand): ...@@ -166,44 +30,8 @@ class Command(BaseCommand):
) )
def handle(self, *args, **options): def handle(self, *args, **options):
perform_import(
articles = DistrictArticlesPage.objects.get(pk=options["clanky_id"]) article_root_page_id=options["clanky_id"],
params["kolekce"] = Collection.objects.get(pk=options["kolekce_id"]) collection=Collection.objects.get(pk=options["kolekce_id"]),
site = articles.get_site() path=options["path"],
path = params["path"] = options["path"]
site_config = get_site_config(path)
if "title" in site_config:
title_suffix = " - " + site_config.get("title", "")
else:
title_suffix = ""
articlepath = site_config["articlepath"]
for fn in os.listdir(os.path.join(path, POSTS_DIR)):
fname = os.path.join(POSTS_DIR, fn)
match = re.match(r"(\d*)-(\d*)-(\d*)-(.*)\.(.*)", fn)
if match:
y = match.group(1)
m = match.group(2)
d = match.group(3)
slug = match.group(4)
ext = match.group(5)
if ext == "md":
article = import_post(
path, fname, articles, title_suffix, command=self
)
from wagtail.contrib.redirects.models import Redirect
r, created = Redirect.objects.get_or_create(
site=site,
old_path="/%s/%s/%s/%s/%s/"
% (articlepath, y, m.zfill(2), d.zfill(2), slug),
defaults={"is_permanent": True, "redirect_page": article},
) )
else:
self.stdout.write(self.style.ERROR("Not Implemented: %s" % ext))
else:
self.stdout.write(self.style.WARNING("Skipping: %s" % fn))
...@@ -30,6 +30,7 @@ from shared.models import ArticleMixin, MenuMixin, SubpageMixin ...@@ -30,6 +30,7 @@ from shared.models import ArticleMixin, MenuMixin, SubpageMixin
from uniweb.constants import RICH_TEXT_FEATURES from uniweb.constants import RICH_TEXT_FEATURES
from . import blocks from . import blocks
from .forms import JekyllImportForm
class DistrictHomePage(MenuMixin, MetadataPageMixin, CalendarMixin, Page): class DistrictHomePage(MenuMixin, MetadataPageMixin, CalendarMixin, Page):
...@@ -256,6 +257,8 @@ class DistrictHomePage(MenuMixin, MetadataPageMixin, CalendarMixin, Page): ...@@ -256,6 +257,8 @@ class DistrictHomePage(MenuMixin, MetadataPageMixin, CalendarMixin, Page):
### OTHERS ### OTHERS
base_form_class = JekyllImportForm
class Meta: class Meta:
verbose_name = "Místní sdružení" verbose_name = "Místní sdružení"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment