diff --git a/district/jekyll_import.py b/district/jekyll_import.py index 70e60236ef5846b11c634939a78c517dca3191d2..275ff54e64c4926f0fcbc2ee7bcf8f2524c119ff 100644 --- a/district/jekyll_import.py +++ b/district/jekyll_import.py @@ -18,6 +18,7 @@ from markdown import Markdown from markdown.extensions import Extension from markdown.inlinepatterns import InlineProcessor from wagtail.contrib.redirects.models import Redirect +from wagtail.core.rich_text import RichText from wagtail.images.models import Image # Wagtail to portrebuje https://docs.wagtail.io/en/stable/extending/rich_text_internals.html#data-format @@ -54,7 +55,7 @@ class ImgProcessor(InlineProcessor): el.attrib["format"] = "left" collection = get_collection() image_obj = get_or_create_image( - params["path"], m.group(2), collection=collection + params["path"], m.group(2), collection=collection, repo_name="" ) el.attrib["id"] = str(image_obj.pk) return el, m.start(0), m.end(0) @@ -84,7 +85,7 @@ def get_site_config(path) -> dict: return config -def import_post(path, file_path, parent, title_suffix, dry_run): +def import_post(path, file_path, parent, title_suffix, dry_run, repo_name): from district.models import DistrictArticlePage with open(os.path.join(path, file_path), "rt") as f: @@ -104,7 +105,8 @@ def import_post(path, file_path, parent, title_suffix, dry_run): article = DistrictArticlePage() - article.text = html + # article.text = html + article.content = [("text", RichText(html))] article.perex = get_perex(md) meta_date = meta["date"] @@ -123,7 +125,10 @@ def import_post(path, file_path, parent, title_suffix, dry_run): # article.tags.add(tag) collection = get_collection() - article.image = get_or_create_image(path, meta["image"], collection=collection) + if meta.get("image", None): + article.image = get_or_create_image( + path, meta["image"], collection=collection, repo_name=repo_name + ) if dry_run: return article, True @@ -132,7 +137,7 @@ def import_post(path, file_path, parent, title_suffix, dry_run): parent.add_child(instance=article) stdout.write("Creating article: %s" % article) rev = article.save_revision() - if meta["published"]: + if meta.get("published", True): rev.publish() except Exception as e: message_list.append( @@ -150,22 +155,36 @@ def get_collection(): return params["kolekce"] -def get_path(url: str, use_git: bool) -> str: +def get_path_and_repo_name(url: str, use_git: bool) -> (str, str): if use_git: return clone_repo(url) else: return download_repo_as_zip(url) -def get_or_create_image(path, file_path, collection): +def get_or_create_image(path, file_path, collection, repo_name): file_path = file_path.lstrip("/") if Image.objects.filter(title=file_path).exists(): return Image.objects.filter(title=file_path).first() else: - file = ImageFile(open(os.path.join(path, file_path), "rb"), name=file_path) - image = Image(title=file_path, file=file, collection=collection) - image.save() - return image + try: + file = ImageFile(open(os.path.join(path, file_path), "rb"), name=file_path) + image = Image(title=file_path, file=file, collection=collection) + image.save() + return image + except FileNotFoundError: + img_name = file_path.split("/")[-1] + img_assets_folder = repo_name.split(".")[0] # TODO make as form field + img_url = "https://a.pirati.cz/{}/img/posts/{}".format( + img_assets_folder, img_name + ) + img_path = os.path.join(path, img_name) + urllib.request.urlretrieve(img_url, img_path) + + file = ImageFile(open(img_path, "rb"), name=img_path) + image = Image(title=file_path, file=file, collection=collection) + image.save() + return image def get_title_from_site_config(site_config: dict) -> str: @@ -174,7 +193,7 @@ def get_title_from_site_config(site_config: dict) -> str: return "" -def clone_repo(url: str) -> str: +def clone_repo(url: str) -> (str, str): """ Naclonuje repo do tmp s využitím gitu a vrátí cestu k němu. Pokud URL končí lomítkem, odebereme ho, a vezmeme jako název repozitáře @@ -193,10 +212,10 @@ def clone_repo(url: str) -> str: os.chdir(path) os.system("git clone --depth 1 {}".format(url)) - return repo_path + return repo_path, repo_name -def download_repo_as_zip(url: str) -> str: +def download_repo_as_zip(url: str) -> (str, str): """ Stáhne .zip repa, extrahuje a vrátí cestu k extrahovanému repu. Hodně nešikovné je, že extrahovaná složka má ještě suffix "-gh-pages" @@ -215,7 +234,7 @@ def download_repo_as_zip(url: str) -> str: with zipfile.ZipFile(zip_path, "r") as zip_ref: zip_ref.extractall(path) - return os.path.join(path, "{}-gh-pages".format(repo_name)) + return os.path.join(path, "{}-gh-pages".format(repo_name)), repo_name def perform_import( @@ -231,7 +250,8 @@ def perform_import( params["kolekce"] = collection site = article_parent_page.get_site() - path = params["path"] = get_path(url=url, use_git=use_git) + params["path"], repo_name = get_path_and_repo_name(url=url, use_git=use_git) + path = params["path"] site_config = get_site_config(path) title_suffix = get_title_from_site_config(site_config) @@ -249,7 +269,7 @@ def perform_import( if ext == "md": article, success = import_post( - path, fname, article_parent_page, title_suffix, dry_run + path, fname, article_parent_page, title_suffix, dry_run, repo_name ) if not success: