diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..14f1c16 --- /dev/null +++ b/app/config.py @@ -0,0 +1,64 @@ +import os + +import yaml +from pydantic import BaseSettings, BaseModel + + +def yaml_config_settings_source(settings: BaseSettings): + """ + Custom settings source that reads the settings from a YAML file. + """ + path = os.getenv("CONFIG_PATH", "config.yaml") + with open(path, "r") as fh: + return yaml.safe_load(fh) + + +class ConverterOptions(BaseModel): + """ + Converter options. + + Attributes + ---------- + author_rewrite : str + Will rewrite the author to this value for all the posts. + links_rewrite : list[dict] + Will rewrite the links to this value for all the posts. + """ + + author_rewrite: str + links_rewrite: list[dict] + + +class Configurator(BaseSettings): + """ + Configurator class for the app. + + Attributes + ---------- + logging_level: str + The logging level. + source_path : str + The path to the Jekyll posts. + output_path : str + The path to the Hugo posts. + converter : str + The converter that converts the markdown + """ + + logging_level: str = "INFO" + source_path: str + output_path: str + converter: str + converter_options: ConverterOptions + + class Config: + env_file_encoding = "utf-8" + + @classmethod + def customise_sources( + cls, + init_settings, + env_settings, + file_secret_settings, + ): + return (yaml_config_settings_source,) diff --git a/app/converter/__init__.py b/app/converter/__init__.py index a679a33..08ced26 100644 --- a/app/converter/__init__.py +++ b/app/converter/__init__.py @@ -1,2 +1,2 @@ -from converter import Converter -from wordpress_markdown import WordpressMarkdownConverter +from .converter import Converter +from .wordpress_markdown import WordpressMarkdownConverter diff --git a/app/converter/converter.py b/app/converter/converter.py index 0d04ab0..4f0e608 100644 --- a/app/converter/converter.py +++ b/app/converter/converter.py @@ -1,7 +1,9 @@ +import logging import os from pathlib import Path from app import utils +from app.config import Configurator from app.converter.wordpress_markdown import WordpressMarkdownConverter @@ -10,25 +12,28 @@ class Converter: Convert Jekyll posts to Hugo posts """ - def __init__(self, jekyll_posts_path: str, hugo_posts_path: str): + def __init__(self, configurator: Configurator): """ Initializes the converter Parameters ---------- - jekyll_posts_path : str - The path to the Jekyll posts - hugo_posts_path : str - The path to the Hugo posts + configurator : Configurator + The configurator instance. """ - utils.guard_against_none_or_empty_str(jekyll_posts_path, "jekyll_posts_path") - utils.guard_against_none_or_empty_str(hugo_posts_path, "hugo_posts_path") + utils.guard_against_none(configurator, "configurator") - self._jekyll_posts_path = jekyll_posts_path - self._hugo_posts_path = hugo_posts_path + self._logger = logging.getLogger(__name__) + + self._jekyll_posts_path = configurator.source_path + self._hugo_posts_path = configurator.output_path + + self._logger.info( + f"Using source: {self._jekyll_posts_path} output: {self._hugo_posts_path}" + ) # The converter that converts the markdown - self.markdown_converter = WordpressMarkdownConverter() + self.markdown_converter = WordpressMarkdownConverter(configurator) def convert(self): """ diff --git a/app/converter/wordpress_markdown.py b/app/converter/wordpress_markdown.py index 76b366e..5cb989e 100644 --- a/app/converter/wordpress_markdown.py +++ b/app/converter/wordpress_markdown.py @@ -3,6 +3,8 @@ from pathlib import Path import yaml from bs4 import BeautifulSoup, Tag +from app import utils +from app.config import Configurator from app.utils import key_error_silence @@ -11,6 +13,18 @@ class WordpressMarkdownConverter: Markdown converter that converts jekyll posts to hugo posts. """ + def __init__(self, configurator: Configurator): + """ + Initializes the WordpressMarkdownConverter + + Parameters + ---------- + configurator : Configurator + The configurator instance. + """ + utils.guard_against_none(configurator, "configurator") + self.configurator = configurator + def fix_hugo_header(self, header: dict) -> dict: """ Fix the Hugo header @@ -34,7 +48,7 @@ class WordpressMarkdownConverter: with key_error_silence(): del header["wordads_ufa"] header["guid"] = header["guid"].replace("http://localhost", "") - header["author"] = "Denis Nuțiu" + header["author"] = self.configurator.converter_options.author_rewrite return header def remove_html_tags(self, post_lines): @@ -46,11 +60,13 @@ class WordpressMarkdownConverter: soup = BeautifulSoup(line) for content in soup.contents: if isinstance(content, Tag): + # Check if it is a youtube video and add it as a shortcode. if "is-provider-youtube" in content.attrs.get("class", []): video_link = content.findNext("iframe").attrs["src"] video_id_part = video_link.rsplit("/") video_id = video_id_part[-1].split("?")[0] fixed_lines.append(f"{{{{< youtube {video_id} >}}}}\n") + # Fix unknown tags. else: tags = list(map(str, content.contents)) if tags: @@ -58,6 +74,7 @@ class WordpressMarkdownConverter: if fixed_tags: fixed_lines.extend(fixed_tags) else: + # Add the content as is. fixed_lines.append(str(content)) return fixed_lines @@ -76,10 +93,13 @@ class WordpressMarkdownConverter: The converted post content """ # fix link - post_content = post_content.replace("http://localhost/", "/") - post_content = post_content.replace( - "https://nuculabs.wordpress.com/", "https://nuculabs.dev/posts/" - ) + for task in self.configurator.converter_options.links_rewrite: + source_link = task.get("source") + target_link = task.get("target") + if not source_link or not target_link: + continue + post_content = post_content.replace(source_link, target_link) + # fix unknown tags post_lines = post_content.split("\n") fixed_lines = self.remove_html_tags(post_lines) @@ -113,6 +133,9 @@ class WordpressMarkdownConverter: post_content : str The post content """ + # ensure that output path exists + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w") as fo: header = ["---\n", yaml.dump(post_header), "---\n"] fo.writelines(header) diff --git a/app/tests/utils_test.py b/app/tests/utils_test.py index 60294f3..a43894a 100644 --- a/app/tests/utils_test.py +++ b/app/tests/utils_test.py @@ -1,6 +1,10 @@ import pytest -from app.utils import key_error_silence, guard_against_none_or_empty_str +from app.utils import ( + key_error_silence, + guard_against_none_or_empty_str, + guard_against_none, +) def test_key_error_silence(): @@ -33,3 +37,9 @@ def test_guard_against_none_or_empty_str(input_data): def test_guard_against_none_or_empty_str_happy(): guard_against_none_or_empty_str("a", "test") + + +def test_guard_against_none(): + with pytest.raises(ValueError): + guard_against_none(None, "test") + guard_against_none(1, "test") diff --git a/app/utils.py b/app/utils.py index 8880fd4..f8a7353 100644 --- a/app/utils.py +++ b/app/utils.py @@ -26,3 +26,19 @@ def guard_against_none_or_empty_str(value: str, name: str): if value is None or not isinstance(value, str) or value == "": raise ValueError(f"{name} cannot be None or empty") + + +def guard_against_none(value, name: str): + """ + Guard against None. + + Parameters: + ---------- + value: str + The value to check. + name: str + The name of the value. + """ + + if value is None: + raise ValueError(f"{name} cannot be None") diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..c931de4 --- /dev/null +++ b/config.yaml @@ -0,0 +1,11 @@ +logging_level: "INFO" +source_path: "/home/denis/PycharmProjects/jekyll-to-hugo/my_test_data/_posts" +output_path: "/home/denis/PycharmProjects/jekyll-to-hugo/my_test_data/_posts_hugo" +converter: "wordpress_markdown_converter" +converter_options: + author_rewrite: "NucuLabs.dev" + links_rewrite: + - source: "http://localhost/" + target: "/" + - source: "https://nuculabs.wordpress.com/" + target: "https://nuculabs.dev/posts/" \ No newline at end of file diff --git a/main.py b/main.py index 2fc1642..a05ee38 100644 --- a/main.py +++ b/main.py @@ -1,26 +1,23 @@ import logging import sys +from app.config import Configurator from app.converter import Converter def main(): + # Configurator + configurator = Configurator() + # Logging configuration logging.basicConfig( format="%(asctime)s %(process)d %(levelname)s %(message)s", - level=logging.INFO, + level=configurator.logging_level, datefmt="%Y-%m-%d %H:%M:%S", ) - logger = logging.getLogger(__name__) - - if len(sys.argv) != 3: - logger.error( - "Usage: python main.py " - ) - sys.exit(1) # Converter - converter = Converter(sys.argv[1], sys.argv[2]) + converter = Converter(configurator) converter.convert() diff --git a/readme.md b/readme.md index 06e6ac7..fa5426a 100644 --- a/readme.md +++ b/readme.md @@ -4,6 +4,10 @@ Jekyll to Hugo Converter is a simple tool to convert Jekyll posts to Hugo posts. You can also use it to convert your WordPress blog into a Hugo blog. Tutorial coming soon. +Note: +- This tool is still under development. +- This tool is not perfect, it will not convert everything. + ## Usage ```bash diff --git a/requirements.txt b/requirements.txt index 8dbee84..fe1f124 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ beautifulsoup4==4.12.2 PyYAML==6.0 soupsieve==2.4.1 +pydantic==1.10.8 \ No newline at end of file