Skip to content
Snippets Groups Projects
Commit ea202873 authored by jan.hosek's avatar jan.hosek
Browse files

district import jekyll

parent c55468c4
No related branches found
No related tags found
2 merge requests!423Release,!419district import jekyll
Pipeline #6729 passed
......@@ -151,3 +151,5 @@ static_files/
update_election_statics.sh
download_static.sh
matice.csv
.vscode/
import os
import yaml, markdown, re
from django.core.management.base import BaseCommand
from django.db.models.expressions import Col
from django.utils.text import slugify
from django.core.files.images import ImageFile
from wagtail.core.models.collections import Collection
from wagtail.images.models import Image
from wagtail.core.models import Site
from district.models import DistrictArticlePage, DistrictArticlesPage
from markdown import Markdown
from markdown.inlinepatterns import InlineProcessor
from markdown.extensions import Extension
import xml.etree.ElementTree as ET
from io import StringIO
from django.utils.dateparse import (
parse_date,
parse_datetime,
parse_duration,
parse_time,
)
import markdown.serializers
# Wagtail to portrebuje https://docs.wagtail.io/en/stable/extending/rich_text_internals.html#data-format
markdown.serializers.HTML_EMPTY.add("embed")
# Plain format pro perex
def unmark_element(element, stream=None):
if stream is None:
stream = StringIO()
if element.text:
stream.write(element.text)
for sub in element:
unmark_element(sub, stream)
if element.tail:
stream.write(element.tail)
return stream.getvalue()
Markdown.output_formats["plain"] = unmark_element
plain_md = Markdown(output_format="plain")
plain_md.stripTopLevelTags = False
global path
PATH = os.path.abspath("../cb.pirati.cz/")
class ImgProcessor(InlineProcessor):
def handleMatch(self, m, data):
el = ET.Element("embed")
el.attrib["embedtype"] = "image"
el.attrib["alt"] = m.group(1)
el.attrib["format"] = "left"
collection = get_collection()
image_obj = get_or_create_image(
path, m.group(2), collection=collection
) # TODO path
el.attrib["id"] = str(image_obj.pk)
return el, m.start(0), m.end(0)
class ImgExtension(Extension):
def extendMarkdown(self, md):
IMG_PATTERN = r"!\[(.*?)\]\((.*?)\)"
md.inlinePatterns.register(ImgProcessor(IMG_PATTERN, md), "img", 175)
html_md = Markdown(extensions=[ImgExtension()])
def get_perex(text):
text = re.split(r"^\s*$", text.strip(), flags=re.MULTILINE)[0]
return plain_md.convert(text)
POSTS_DIR = "_posts"
TITLE_SUFFIX = " - Piráti České Budějovice"
def get_site_config(path):
with open(os.path.join(path, "_config.yml")) as f:
config = yaml.safe_load(f.read())
return config
def import_post(path, file_path, parrent, title_suffix):
with open(os.path.join(path, file_path), "rt") as f:
r = re.split(r"^---\s*$", f.read(), maxsplit=3, flags=re.MULTILINE)
meta = yaml.safe_load(r[1])
md = r[2]
html = html_md.convert(md)
if DistrictArticlePage.objects.filter(title=meta["title"]).exists():
for article in DistrictArticlePage.objects.filter(title=meta["title"]):
if article.date == parse_date(meta["date"].split()[0]):
return article
article = DistrictArticlePage()
article.text = html
article.perex = get_perex(md)
article.date = meta["date"].split()[0]
article.title = meta["title"]
article.author = meta["author"]
article.seo_title = article.title + title_suffix
article.search_description = meta.get("description", "")
# for tag in meta['tags']:
# article.tags.add(tag)
collection = get_collection()
article.image = get_or_create_image(path, meta["image"], collection=collection)
parrent.add_child(instance=article)
rev = article.save_revision()
if meta["published"]:
rev.publish()
return article
def get_collection():
if Collection.objects.filter(name="import").exists():
collection = Collection.objects.filter(name="import").first()
else:
collection = Collection.add_root(name="import")
return collection
def get_or_create_image(path, file_path, collection):
file_path = file_path.lstrip("/")
if Image.objects.filter(title=file_path).exists():
return Image.objects.filter(title=file_path).first()
else:
file = ImageFile(open(os.path.join(path, file_path), "rb"), name=file_path)
image = Image(title=file_path, file=file, collection=collection)
image.save()
return image
class Command(BaseCommand):
help = """Importuje články z pirátského jekyll webu.
"""
def add_arguments(self, parser):
parser.add_argument("path", help="Cesta k jekyll repu")
parser.add_argument(
"--hostname",
default=None,
help="Hostname webu, defaultně vezme první site (/admin/sites/)",
)
parser.add_argument(
"--clanky-id",
default=None,
type=int,
help="Id nadstránky článků, defaultně vezme první",
)
def handle(self, *args, **options):
if options["clanky_id"] is None:
articles = DistrictArticlesPage.objects.first()
else:
articles = DistrictArticlesPage.objects.get(pk=options["clanky_id"])
global path
path = options["path"]
site_config = get_site_config(path)
if "title" in site_config:
title_suffix = " - " + site_config.get("title", "")
else:
title_suffix = ""
if options["hostname"] is None:
site = Site.objects.first()
else:
site = Site.objects.get(hostname=options["hostname"])
articlepath = site_config["articlepath"]
for fn in os.listdir(os.path.join(path, POSTS_DIR)):
fname = os.path.join(POSTS_DIR, fn)
match = re.match(r"(\d*)-(\d*)-(\d*)-(.*)\.(.*)", fn)
if match:
y = match.group(1)
m = match.group(2)
d = match.group(3)
slug = match.group(4)
ext = match.group(5)
if ext == "md":
article = import_post(path, fname, articles, title_suffix)
from wagtail.contrib.redirects.models import Redirect
r, created = Redirect.objects.get_or_create(
site=site,
old_path="/%s/%s/%s/%s/%s/"
% (articlepath, y, m.zfill(2), d.zfill(2), slug),
defaults={"is_permanent": True, "redirect_page": article},
)
else:
self.stdout.write(self.style.ERROR("Not Implemented: %s" % ext))
else:
self.stdout.write(self.style.WARNING("Skipping: %s" % fn))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment