add config via yaml
This commit is contained in:
parent
8afd96294f
commit
98a188fe35
10 changed files with 158 additions and 27 deletions
64
app/config.py
Normal file
64
app/config.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
import os
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseSettings, BaseModel
|
||||
|
||||
|
||||
def yaml_config_settings_source(settings: BaseSettings):
|
||||
"""
|
||||
Custom settings source that reads the settings from a YAML file.
|
||||
"""
|
||||
path = os.getenv("CONFIG_PATH", "config.yaml")
|
||||
with open(path, "r") as fh:
|
||||
return yaml.safe_load(fh)
|
||||
|
||||
|
||||
class ConverterOptions(BaseModel):
|
||||
"""
|
||||
Converter options.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
author_rewrite : str
|
||||
Will rewrite the author to this value for all the posts.
|
||||
links_rewrite : list[dict]
|
||||
Will rewrite the links to this value for all the posts.
|
||||
"""
|
||||
|
||||
author_rewrite: str
|
||||
links_rewrite: list[dict]
|
||||
|
||||
|
||||
class Configurator(BaseSettings):
|
||||
"""
|
||||
Configurator class for the app.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
logging_level: str
|
||||
The logging level.
|
||||
source_path : str
|
||||
The path to the Jekyll posts.
|
||||
output_path : str
|
||||
The path to the Hugo posts.
|
||||
converter : str
|
||||
The converter that converts the markdown
|
||||
"""
|
||||
|
||||
logging_level: str = "INFO"
|
||||
source_path: str
|
||||
output_path: str
|
||||
converter: str
|
||||
converter_options: ConverterOptions
|
||||
|
||||
class Config:
|
||||
env_file_encoding = "utf-8"
|
||||
|
||||
@classmethod
|
||||
def customise_sources(
|
||||
cls,
|
||||
init_settings,
|
||||
env_settings,
|
||||
file_secret_settings,
|
||||
):
|
||||
return (yaml_config_settings_source,)
|
|
@ -1,2 +1,2 @@
|
|||
from converter import Converter
|
||||
from wordpress_markdown import WordpressMarkdownConverter
|
||||
from .converter import Converter
|
||||
from .wordpress_markdown import WordpressMarkdownConverter
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from app import utils
|
||||
from app.config import Configurator
|
||||
from app.converter.wordpress_markdown import WordpressMarkdownConverter
|
||||
|
||||
|
||||
|
@ -10,25 +12,28 @@ class Converter:
|
|||
Convert Jekyll posts to Hugo posts
|
||||
"""
|
||||
|
||||
def __init__(self, jekyll_posts_path: str, hugo_posts_path: str):
|
||||
def __init__(self, configurator: Configurator):
|
||||
"""
|
||||
Initializes the converter
|
||||
|
||||
Parameters
|
||||
----------
|
||||
jekyll_posts_path : str
|
||||
The path to the Jekyll posts
|
||||
hugo_posts_path : str
|
||||
The path to the Hugo posts
|
||||
configurator : Configurator
|
||||
The configurator instance.
|
||||
"""
|
||||
utils.guard_against_none_or_empty_str(jekyll_posts_path, "jekyll_posts_path")
|
||||
utils.guard_against_none_or_empty_str(hugo_posts_path, "hugo_posts_path")
|
||||
utils.guard_against_none(configurator, "configurator")
|
||||
|
||||
self._jekyll_posts_path = jekyll_posts_path
|
||||
self._hugo_posts_path = hugo_posts_path
|
||||
self._logger = logging.getLogger(__name__)
|
||||
|
||||
self._jekyll_posts_path = configurator.source_path
|
||||
self._hugo_posts_path = configurator.output_path
|
||||
|
||||
self._logger.info(
|
||||
f"Using source: {self._jekyll_posts_path} output: {self._hugo_posts_path}"
|
||||
)
|
||||
|
||||
# The converter that converts the markdown
|
||||
self.markdown_converter = WordpressMarkdownConverter()
|
||||
self.markdown_converter = WordpressMarkdownConverter(configurator)
|
||||
|
||||
def convert(self):
|
||||
"""
|
||||
|
|
|
@ -3,6 +3,8 @@ from pathlib import Path
|
|||
import yaml
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
from app import utils
|
||||
from app.config import Configurator
|
||||
from app.utils import key_error_silence
|
||||
|
||||
|
||||
|
@ -11,6 +13,18 @@ class WordpressMarkdownConverter:
|
|||
Markdown converter that converts jekyll posts to hugo posts.
|
||||
"""
|
||||
|
||||
def __init__(self, configurator: Configurator):
|
||||
"""
|
||||
Initializes the WordpressMarkdownConverter
|
||||
|
||||
Parameters
|
||||
----------
|
||||
configurator : Configurator
|
||||
The configurator instance.
|
||||
"""
|
||||
utils.guard_against_none(configurator, "configurator")
|
||||
self.configurator = configurator
|
||||
|
||||
def fix_hugo_header(self, header: dict) -> dict:
|
||||
"""
|
||||
Fix the Hugo header
|
||||
|
@ -34,7 +48,7 @@ class WordpressMarkdownConverter:
|
|||
with key_error_silence():
|
||||
del header["wordads_ufa"]
|
||||
header["guid"] = header["guid"].replace("http://localhost", "")
|
||||
header["author"] = "Denis Nuțiu"
|
||||
header["author"] = self.configurator.converter_options.author_rewrite
|
||||
return header
|
||||
|
||||
def remove_html_tags(self, post_lines):
|
||||
|
@ -46,11 +60,13 @@ class WordpressMarkdownConverter:
|
|||
soup = BeautifulSoup(line)
|
||||
for content in soup.contents:
|
||||
if isinstance(content, Tag):
|
||||
# Check if it is a youtube video and add it as a shortcode.
|
||||
if "is-provider-youtube" in content.attrs.get("class", []):
|
||||
video_link = content.findNext("iframe").attrs["src"]
|
||||
video_id_part = video_link.rsplit("/")
|
||||
video_id = video_id_part[-1].split("?")[0]
|
||||
fixed_lines.append(f"{{{{< youtube {video_id} >}}}}\n")
|
||||
# Fix unknown tags.
|
||||
else:
|
||||
tags = list(map(str, content.contents))
|
||||
if tags:
|
||||
|
@ -58,6 +74,7 @@ class WordpressMarkdownConverter:
|
|||
if fixed_tags:
|
||||
fixed_lines.extend(fixed_tags)
|
||||
else:
|
||||
# Add the content as is.
|
||||
fixed_lines.append(str(content))
|
||||
return fixed_lines
|
||||
|
||||
|
@ -76,10 +93,13 @@ class WordpressMarkdownConverter:
|
|||
The converted post content
|
||||
"""
|
||||
# fix link
|
||||
post_content = post_content.replace("http://localhost/", "/")
|
||||
post_content = post_content.replace(
|
||||
"https://nuculabs.wordpress.com/", "https://nuculabs.dev/posts/"
|
||||
)
|
||||
for task in self.configurator.converter_options.links_rewrite:
|
||||
source_link = task.get("source")
|
||||
target_link = task.get("target")
|
||||
if not source_link or not target_link:
|
||||
continue
|
||||
post_content = post_content.replace(source_link, target_link)
|
||||
|
||||
# fix unknown tags
|
||||
post_lines = post_content.split("\n")
|
||||
fixed_lines = self.remove_html_tags(post_lines)
|
||||
|
@ -113,6 +133,9 @@ class WordpressMarkdownConverter:
|
|||
post_content : str
|
||||
The post content
|
||||
"""
|
||||
# ensure that output path exists
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(output_path, "w") as fo:
|
||||
header = ["---\n", yaml.dump(post_header), "---\n"]
|
||||
fo.writelines(header)
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
import pytest
|
||||
|
||||
from app.utils import key_error_silence, guard_against_none_or_empty_str
|
||||
from app.utils import (
|
||||
key_error_silence,
|
||||
guard_against_none_or_empty_str,
|
||||
guard_against_none,
|
||||
)
|
||||
|
||||
|
||||
def test_key_error_silence():
|
||||
|
@ -33,3 +37,9 @@ def test_guard_against_none_or_empty_str(input_data):
|
|||
|
||||
def test_guard_against_none_or_empty_str_happy():
|
||||
guard_against_none_or_empty_str("a", "test")
|
||||
|
||||
|
||||
def test_guard_against_none():
|
||||
with pytest.raises(ValueError):
|
||||
guard_against_none(None, "test")
|
||||
guard_against_none(1, "test")
|
||||
|
|
16
app/utils.py
16
app/utils.py
|
@ -26,3 +26,19 @@ def guard_against_none_or_empty_str(value: str, name: str):
|
|||
|
||||
if value is None or not isinstance(value, str) or value == "":
|
||||
raise ValueError(f"{name} cannot be None or empty")
|
||||
|
||||
|
||||
def guard_against_none(value, name: str):
|
||||
"""
|
||||
Guard against None.
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
value: str
|
||||
The value to check.
|
||||
name: str
|
||||
The name of the value.
|
||||
"""
|
||||
|
||||
if value is None:
|
||||
raise ValueError(f"{name} cannot be None")
|
||||
|
|
11
config.yaml
Normal file
11
config.yaml
Normal file
|
@ -0,0 +1,11 @@
|
|||
logging_level: "INFO"
|
||||
source_path: "/home/denis/PycharmProjects/jekyll-to-hugo/my_test_data/_posts"
|
||||
output_path: "/home/denis/PycharmProjects/jekyll-to-hugo/my_test_data/_posts_hugo"
|
||||
converter: "wordpress_markdown_converter"
|
||||
converter_options:
|
||||
author_rewrite: "NucuLabs.dev"
|
||||
links_rewrite:
|
||||
- source: "http://localhost/"
|
||||
target: "/"
|
||||
- source: "https://nuculabs.wordpress.com/"
|
||||
target: "https://nuculabs.dev/posts/"
|
15
main.py
15
main.py
|
@ -1,26 +1,23 @@
|
|||
import logging
|
||||
import sys
|
||||
|
||||
from app.config import Configurator
|
||||
from app.converter import Converter
|
||||
|
||||
|
||||
def main():
|
||||
# Configurator
|
||||
configurator = Configurator()
|
||||
|
||||
# Logging configuration
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s %(process)d %(levelname)s %(message)s",
|
||||
level=logging.INFO,
|
||||
level=configurator.logging_level,
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
logger.error(
|
||||
"Usage: python main.py <source path to jekyll posts> <output path to hugo posts>"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# Converter
|
||||
converter = Converter(sys.argv[1], sys.argv[2])
|
||||
converter = Converter(configurator)
|
||||
converter.convert()
|
||||
|
||||
|
||||
|
|
|
@ -4,6 +4,10 @@ Jekyll to Hugo Converter is a simple tool to convert Jekyll posts to Hugo posts.
|
|||
|
||||
You can also use it to convert your WordPress blog into a Hugo blog. Tutorial coming soon.
|
||||
|
||||
Note:
|
||||
- This tool is still under development.
|
||||
- This tool is not perfect, it will not convert everything.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
beautifulsoup4==4.12.2
|
||||
PyYAML==6.0
|
||||
soupsieve==2.4.1
|
||||
pydantic==1.10.8
|
Loading…
Reference in a new issue