add config via yaml

This commit is contained in:
Denis-Cosmin Nutiu 2023-05-29 21:58:32 +03:00
parent 8afd96294f
commit 98a188fe35
10 changed files with 158 additions and 27 deletions

64
app/config.py Normal file
View file

@ -0,0 +1,64 @@
import os
import yaml
from pydantic import BaseSettings, BaseModel
def yaml_config_settings_source(settings: BaseSettings):
"""
Custom settings source that reads the settings from a YAML file.
"""
path = os.getenv("CONFIG_PATH", "config.yaml")
with open(path, "r") as fh:
return yaml.safe_load(fh)
class ConverterOptions(BaseModel):
"""
Converter options.
Attributes
----------
author_rewrite : str
Will rewrite the author to this value for all the posts.
links_rewrite : list[dict]
Will rewrite the links to this value for all the posts.
"""
author_rewrite: str
links_rewrite: list[dict]
class Configurator(BaseSettings):
"""
Configurator class for the app.
Attributes
----------
logging_level: str
The logging level.
source_path : str
The path to the Jekyll posts.
output_path : str
The path to the Hugo posts.
converter : str
The converter that converts the markdown
"""
logging_level: str = "INFO"
source_path: str
output_path: str
converter: str
converter_options: ConverterOptions
class Config:
env_file_encoding = "utf-8"
@classmethod
def customise_sources(
cls,
init_settings,
env_settings,
file_secret_settings,
):
return (yaml_config_settings_source,)

View file

@ -1,2 +1,2 @@
from converter import Converter
from wordpress_markdown import WordpressMarkdownConverter
from .converter import Converter
from .wordpress_markdown import WordpressMarkdownConverter

View file

@ -1,7 +1,9 @@
import logging
import os
from pathlib import Path
from app import utils
from app.config import Configurator
from app.converter.wordpress_markdown import WordpressMarkdownConverter
@ -10,25 +12,28 @@ class Converter:
Convert Jekyll posts to Hugo posts
"""
def __init__(self, jekyll_posts_path: str, hugo_posts_path: str):
def __init__(self, configurator: Configurator):
"""
Initializes the converter
Parameters
----------
jekyll_posts_path : str
The path to the Jekyll posts
hugo_posts_path : str
The path to the Hugo posts
configurator : Configurator
The configurator instance.
"""
utils.guard_against_none_or_empty_str(jekyll_posts_path, "jekyll_posts_path")
utils.guard_against_none_or_empty_str(hugo_posts_path, "hugo_posts_path")
utils.guard_against_none(configurator, "configurator")
self._jekyll_posts_path = jekyll_posts_path
self._hugo_posts_path = hugo_posts_path
self._logger = logging.getLogger(__name__)
self._jekyll_posts_path = configurator.source_path
self._hugo_posts_path = configurator.output_path
self._logger.info(
f"Using source: {self._jekyll_posts_path} output: {self._hugo_posts_path}"
)
# The converter that converts the markdown
self.markdown_converter = WordpressMarkdownConverter()
self.markdown_converter = WordpressMarkdownConverter(configurator)
def convert(self):
"""

View file

@ -3,6 +3,8 @@ from pathlib import Path
import yaml
from bs4 import BeautifulSoup, Tag
from app import utils
from app.config import Configurator
from app.utils import key_error_silence
@ -11,6 +13,18 @@ class WordpressMarkdownConverter:
Markdown converter that converts jekyll posts to hugo posts.
"""
def __init__(self, configurator: Configurator):
"""
Initializes the WordpressMarkdownConverter
Parameters
----------
configurator : Configurator
The configurator instance.
"""
utils.guard_against_none(configurator, "configurator")
self.configurator = configurator
def fix_hugo_header(self, header: dict) -> dict:
"""
Fix the Hugo header
@ -34,7 +48,7 @@ class WordpressMarkdownConverter:
with key_error_silence():
del header["wordads_ufa"]
header["guid"] = header["guid"].replace("http://localhost", "")
header["author"] = "Denis Nuțiu"
header["author"] = self.configurator.converter_options.author_rewrite
return header
def remove_html_tags(self, post_lines):
@ -46,11 +60,13 @@ class WordpressMarkdownConverter:
soup = BeautifulSoup(line)
for content in soup.contents:
if isinstance(content, Tag):
# Check if it is a youtube video and add it as a shortcode.
if "is-provider-youtube" in content.attrs.get("class", []):
video_link = content.findNext("iframe").attrs["src"]
video_id_part = video_link.rsplit("/")
video_id = video_id_part[-1].split("?")[0]
fixed_lines.append(f"{{{{< youtube {video_id} >}}}}\n")
# Fix unknown tags.
else:
tags = list(map(str, content.contents))
if tags:
@ -58,6 +74,7 @@ class WordpressMarkdownConverter:
if fixed_tags:
fixed_lines.extend(fixed_tags)
else:
# Add the content as is.
fixed_lines.append(str(content))
return fixed_lines
@ -76,10 +93,13 @@ class WordpressMarkdownConverter:
The converted post content
"""
# fix link
post_content = post_content.replace("http://localhost/", "/")
post_content = post_content.replace(
"https://nuculabs.wordpress.com/", "https://nuculabs.dev/posts/"
)
for task in self.configurator.converter_options.links_rewrite:
source_link = task.get("source")
target_link = task.get("target")
if not source_link or not target_link:
continue
post_content = post_content.replace(source_link, target_link)
# fix unknown tags
post_lines = post_content.split("\n")
fixed_lines = self.remove_html_tags(post_lines)
@ -113,6 +133,9 @@ class WordpressMarkdownConverter:
post_content : str
The post content
"""
# ensure that output path exists
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as fo:
header = ["---\n", yaml.dump(post_header), "---\n"]
fo.writelines(header)

View file

@ -1,6 +1,10 @@
import pytest
from app.utils import key_error_silence, guard_against_none_or_empty_str
from app.utils import (
key_error_silence,
guard_against_none_or_empty_str,
guard_against_none,
)
def test_key_error_silence():
@ -33,3 +37,9 @@ def test_guard_against_none_or_empty_str(input_data):
def test_guard_against_none_or_empty_str_happy():
guard_against_none_or_empty_str("a", "test")
def test_guard_against_none():
with pytest.raises(ValueError):
guard_against_none(None, "test")
guard_against_none(1, "test")

View file

@ -26,3 +26,19 @@ def guard_against_none_or_empty_str(value: str, name: str):
if value is None or not isinstance(value, str) or value == "":
raise ValueError(f"{name} cannot be None or empty")
def guard_against_none(value, name: str):
"""
Guard against None.
Parameters:
----------
value: str
The value to check.
name: str
The name of the value.
"""
if value is None:
raise ValueError(f"{name} cannot be None")

11
config.yaml Normal file
View file

@ -0,0 +1,11 @@
logging_level: "INFO"
source_path: "/home/denis/PycharmProjects/jekyll-to-hugo/my_test_data/_posts"
output_path: "/home/denis/PycharmProjects/jekyll-to-hugo/my_test_data/_posts_hugo"
converter: "wordpress_markdown_converter"
converter_options:
author_rewrite: "NucuLabs.dev"
links_rewrite:
- source: "http://localhost/"
target: "/"
- source: "https://nuculabs.wordpress.com/"
target: "https://nuculabs.dev/posts/"

15
main.py
View file

@ -1,26 +1,23 @@
import logging
import sys
from app.config import Configurator
from app.converter import Converter
def main():
# Configurator
configurator = Configurator()
# Logging configuration
logging.basicConfig(
format="%(asctime)s %(process)d %(levelname)s %(message)s",
level=logging.INFO,
level=configurator.logging_level,
datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)
if len(sys.argv) != 3:
logger.error(
"Usage: python main.py <source path to jekyll posts> <output path to hugo posts>"
)
sys.exit(1)
# Converter
converter = Converter(sys.argv[1], sys.argv[2])
converter = Converter(configurator)
converter.convert()

View file

@ -4,6 +4,10 @@ Jekyll to Hugo Converter is a simple tool to convert Jekyll posts to Hugo posts.
You can also use it to convert your WordPress blog into a Hugo blog. Tutorial coming soon.
Note:
- This tool is still under development.
- This tool is not perfect, it will not convert everything.
## Usage
```bash

View file

@ -1,3 +1,4 @@
beautifulsoup4==4.12.2
PyYAML==6.0
soupsieve==2.4.1
pydantic==1.10.8