add config via yaml
This commit is contained in:
parent
8afd96294f
commit
98a188fe35
10 changed files with 158 additions and 27 deletions
64
app/config.py
Normal file
64
app/config.py
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from pydantic import BaseSettings, BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
def yaml_config_settings_source(settings: BaseSettings):
|
||||||
|
"""
|
||||||
|
Custom settings source that reads the settings from a YAML file.
|
||||||
|
"""
|
||||||
|
path = os.getenv("CONFIG_PATH", "config.yaml")
|
||||||
|
with open(path, "r") as fh:
|
||||||
|
return yaml.safe_load(fh)
|
||||||
|
|
||||||
|
|
||||||
|
class ConverterOptions(BaseModel):
|
||||||
|
"""
|
||||||
|
Converter options.
|
||||||
|
|
||||||
|
Attributes
|
||||||
|
----------
|
||||||
|
author_rewrite : str
|
||||||
|
Will rewrite the author to this value for all the posts.
|
||||||
|
links_rewrite : list[dict]
|
||||||
|
Will rewrite the links to this value for all the posts.
|
||||||
|
"""
|
||||||
|
|
||||||
|
author_rewrite: str
|
||||||
|
links_rewrite: list[dict]
|
||||||
|
|
||||||
|
|
||||||
|
class Configurator(BaseSettings):
|
||||||
|
"""
|
||||||
|
Configurator class for the app.
|
||||||
|
|
||||||
|
Attributes
|
||||||
|
----------
|
||||||
|
logging_level: str
|
||||||
|
The logging level.
|
||||||
|
source_path : str
|
||||||
|
The path to the Jekyll posts.
|
||||||
|
output_path : str
|
||||||
|
The path to the Hugo posts.
|
||||||
|
converter : str
|
||||||
|
The converter that converts the markdown
|
||||||
|
"""
|
||||||
|
|
||||||
|
logging_level: str = "INFO"
|
||||||
|
source_path: str
|
||||||
|
output_path: str
|
||||||
|
converter: str
|
||||||
|
converter_options: ConverterOptions
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
env_file_encoding = "utf-8"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def customise_sources(
|
||||||
|
cls,
|
||||||
|
init_settings,
|
||||||
|
env_settings,
|
||||||
|
file_secret_settings,
|
||||||
|
):
|
||||||
|
return (yaml_config_settings_source,)
|
|
@ -1,2 +1,2 @@
|
||||||
from converter import Converter
|
from .converter import Converter
|
||||||
from wordpress_markdown import WordpressMarkdownConverter
|
from .wordpress_markdown import WordpressMarkdownConverter
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from app import utils
|
from app import utils
|
||||||
|
from app.config import Configurator
|
||||||
from app.converter.wordpress_markdown import WordpressMarkdownConverter
|
from app.converter.wordpress_markdown import WordpressMarkdownConverter
|
||||||
|
|
||||||
|
|
||||||
|
@ -10,25 +12,28 @@ class Converter:
|
||||||
Convert Jekyll posts to Hugo posts
|
Convert Jekyll posts to Hugo posts
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, jekyll_posts_path: str, hugo_posts_path: str):
|
def __init__(self, configurator: Configurator):
|
||||||
"""
|
"""
|
||||||
Initializes the converter
|
Initializes the converter
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
jekyll_posts_path : str
|
configurator : Configurator
|
||||||
The path to the Jekyll posts
|
The configurator instance.
|
||||||
hugo_posts_path : str
|
|
||||||
The path to the Hugo posts
|
|
||||||
"""
|
"""
|
||||||
utils.guard_against_none_or_empty_str(jekyll_posts_path, "jekyll_posts_path")
|
utils.guard_against_none(configurator, "configurator")
|
||||||
utils.guard_against_none_or_empty_str(hugo_posts_path, "hugo_posts_path")
|
|
||||||
|
|
||||||
self._jekyll_posts_path = jekyll_posts_path
|
self._logger = logging.getLogger(__name__)
|
||||||
self._hugo_posts_path = hugo_posts_path
|
|
||||||
|
self._jekyll_posts_path = configurator.source_path
|
||||||
|
self._hugo_posts_path = configurator.output_path
|
||||||
|
|
||||||
|
self._logger.info(
|
||||||
|
f"Using source: {self._jekyll_posts_path} output: {self._hugo_posts_path}"
|
||||||
|
)
|
||||||
|
|
||||||
# The converter that converts the markdown
|
# The converter that converts the markdown
|
||||||
self.markdown_converter = WordpressMarkdownConverter()
|
self.markdown_converter = WordpressMarkdownConverter(configurator)
|
||||||
|
|
||||||
def convert(self):
|
def convert(self):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -3,6 +3,8 @@ from pathlib import Path
|
||||||
import yaml
|
import yaml
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, Tag
|
||||||
|
|
||||||
|
from app import utils
|
||||||
|
from app.config import Configurator
|
||||||
from app.utils import key_error_silence
|
from app.utils import key_error_silence
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,6 +13,18 @@ class WordpressMarkdownConverter:
|
||||||
Markdown converter that converts jekyll posts to hugo posts.
|
Markdown converter that converts jekyll posts to hugo posts.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def __init__(self, configurator: Configurator):
|
||||||
|
"""
|
||||||
|
Initializes the WordpressMarkdownConverter
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
configurator : Configurator
|
||||||
|
The configurator instance.
|
||||||
|
"""
|
||||||
|
utils.guard_against_none(configurator, "configurator")
|
||||||
|
self.configurator = configurator
|
||||||
|
|
||||||
def fix_hugo_header(self, header: dict) -> dict:
|
def fix_hugo_header(self, header: dict) -> dict:
|
||||||
"""
|
"""
|
||||||
Fix the Hugo header
|
Fix the Hugo header
|
||||||
|
@ -34,7 +48,7 @@ class WordpressMarkdownConverter:
|
||||||
with key_error_silence():
|
with key_error_silence():
|
||||||
del header["wordads_ufa"]
|
del header["wordads_ufa"]
|
||||||
header["guid"] = header["guid"].replace("http://localhost", "")
|
header["guid"] = header["guid"].replace("http://localhost", "")
|
||||||
header["author"] = "Denis Nuțiu"
|
header["author"] = self.configurator.converter_options.author_rewrite
|
||||||
return header
|
return header
|
||||||
|
|
||||||
def remove_html_tags(self, post_lines):
|
def remove_html_tags(self, post_lines):
|
||||||
|
@ -46,11 +60,13 @@ class WordpressMarkdownConverter:
|
||||||
soup = BeautifulSoup(line)
|
soup = BeautifulSoup(line)
|
||||||
for content in soup.contents:
|
for content in soup.contents:
|
||||||
if isinstance(content, Tag):
|
if isinstance(content, Tag):
|
||||||
|
# Check if it is a youtube video and add it as a shortcode.
|
||||||
if "is-provider-youtube" in content.attrs.get("class", []):
|
if "is-provider-youtube" in content.attrs.get("class", []):
|
||||||
video_link = content.findNext("iframe").attrs["src"]
|
video_link = content.findNext("iframe").attrs["src"]
|
||||||
video_id_part = video_link.rsplit("/")
|
video_id_part = video_link.rsplit("/")
|
||||||
video_id = video_id_part[-1].split("?")[0]
|
video_id = video_id_part[-1].split("?")[0]
|
||||||
fixed_lines.append(f"{{{{< youtube {video_id} >}}}}\n")
|
fixed_lines.append(f"{{{{< youtube {video_id} >}}}}\n")
|
||||||
|
# Fix unknown tags.
|
||||||
else:
|
else:
|
||||||
tags = list(map(str, content.contents))
|
tags = list(map(str, content.contents))
|
||||||
if tags:
|
if tags:
|
||||||
|
@ -58,6 +74,7 @@ class WordpressMarkdownConverter:
|
||||||
if fixed_tags:
|
if fixed_tags:
|
||||||
fixed_lines.extend(fixed_tags)
|
fixed_lines.extend(fixed_tags)
|
||||||
else:
|
else:
|
||||||
|
# Add the content as is.
|
||||||
fixed_lines.append(str(content))
|
fixed_lines.append(str(content))
|
||||||
return fixed_lines
|
return fixed_lines
|
||||||
|
|
||||||
|
@ -76,10 +93,13 @@ class WordpressMarkdownConverter:
|
||||||
The converted post content
|
The converted post content
|
||||||
"""
|
"""
|
||||||
# fix link
|
# fix link
|
||||||
post_content = post_content.replace("http://localhost/", "/")
|
for task in self.configurator.converter_options.links_rewrite:
|
||||||
post_content = post_content.replace(
|
source_link = task.get("source")
|
||||||
"https://nuculabs.wordpress.com/", "https://nuculabs.dev/posts/"
|
target_link = task.get("target")
|
||||||
)
|
if not source_link or not target_link:
|
||||||
|
continue
|
||||||
|
post_content = post_content.replace(source_link, target_link)
|
||||||
|
|
||||||
# fix unknown tags
|
# fix unknown tags
|
||||||
post_lines = post_content.split("\n")
|
post_lines = post_content.split("\n")
|
||||||
fixed_lines = self.remove_html_tags(post_lines)
|
fixed_lines = self.remove_html_tags(post_lines)
|
||||||
|
@ -113,6 +133,9 @@ class WordpressMarkdownConverter:
|
||||||
post_content : str
|
post_content : str
|
||||||
The post content
|
The post content
|
||||||
"""
|
"""
|
||||||
|
# ensure that output path exists
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
with open(output_path, "w") as fo:
|
with open(output_path, "w") as fo:
|
||||||
header = ["---\n", yaml.dump(post_header), "---\n"]
|
header = ["---\n", yaml.dump(post_header), "---\n"]
|
||||||
fo.writelines(header)
|
fo.writelines(header)
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from app.utils import key_error_silence, guard_against_none_or_empty_str
|
from app.utils import (
|
||||||
|
key_error_silence,
|
||||||
|
guard_against_none_or_empty_str,
|
||||||
|
guard_against_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_key_error_silence():
|
def test_key_error_silence():
|
||||||
|
@ -33,3 +37,9 @@ def test_guard_against_none_or_empty_str(input_data):
|
||||||
|
|
||||||
def test_guard_against_none_or_empty_str_happy():
|
def test_guard_against_none_or_empty_str_happy():
|
||||||
guard_against_none_or_empty_str("a", "test")
|
guard_against_none_or_empty_str("a", "test")
|
||||||
|
|
||||||
|
|
||||||
|
def test_guard_against_none():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
guard_against_none(None, "test")
|
||||||
|
guard_against_none(1, "test")
|
||||||
|
|
16
app/utils.py
16
app/utils.py
|
@ -26,3 +26,19 @@ def guard_against_none_or_empty_str(value: str, name: str):
|
||||||
|
|
||||||
if value is None or not isinstance(value, str) or value == "":
|
if value is None or not isinstance(value, str) or value == "":
|
||||||
raise ValueError(f"{name} cannot be None or empty")
|
raise ValueError(f"{name} cannot be None or empty")
|
||||||
|
|
||||||
|
|
||||||
|
def guard_against_none(value, name: str):
|
||||||
|
"""
|
||||||
|
Guard against None.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
----------
|
||||||
|
value: str
|
||||||
|
The value to check.
|
||||||
|
name: str
|
||||||
|
The name of the value.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if value is None:
|
||||||
|
raise ValueError(f"{name} cannot be None")
|
||||||
|
|
11
config.yaml
Normal file
11
config.yaml
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
logging_level: "INFO"
|
||||||
|
source_path: "/home/denis/PycharmProjects/jekyll-to-hugo/my_test_data/_posts"
|
||||||
|
output_path: "/home/denis/PycharmProjects/jekyll-to-hugo/my_test_data/_posts_hugo"
|
||||||
|
converter: "wordpress_markdown_converter"
|
||||||
|
converter_options:
|
||||||
|
author_rewrite: "NucuLabs.dev"
|
||||||
|
links_rewrite:
|
||||||
|
- source: "http://localhost/"
|
||||||
|
target: "/"
|
||||||
|
- source: "https://nuculabs.wordpress.com/"
|
||||||
|
target: "https://nuculabs.dev/posts/"
|
15
main.py
15
main.py
|
@ -1,26 +1,23 @@
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from app.config import Configurator
|
||||||
from app.converter import Converter
|
from app.converter import Converter
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
# Configurator
|
||||||
|
configurator = Configurator()
|
||||||
|
|
||||||
# Logging configuration
|
# Logging configuration
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
format="%(asctime)s %(process)d %(levelname)s %(message)s",
|
format="%(asctime)s %(process)d %(levelname)s %(message)s",
|
||||||
level=logging.INFO,
|
level=configurator.logging_level,
|
||||||
datefmt="%Y-%m-%d %H:%M:%S",
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
)
|
)
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
if len(sys.argv) != 3:
|
|
||||||
logger.error(
|
|
||||||
"Usage: python main.py <source path to jekyll posts> <output path to hugo posts>"
|
|
||||||
)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Converter
|
# Converter
|
||||||
converter = Converter(sys.argv[1], sys.argv[2])
|
converter = Converter(configurator)
|
||||||
converter.convert()
|
converter.convert()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,10 @@ Jekyll to Hugo Converter is a simple tool to convert Jekyll posts to Hugo posts.
|
||||||
|
|
||||||
You can also use it to convert your WordPress blog into a Hugo blog. Tutorial coming soon.
|
You can also use it to convert your WordPress blog into a Hugo blog. Tutorial coming soon.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
- This tool is still under development.
|
||||||
|
- This tool is not perfect, it will not convert everything.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
beautifulsoup4==4.12.2
|
beautifulsoup4==4.12.2
|
||||||
PyYAML==6.0
|
PyYAML==6.0
|
||||||
soupsieve==2.4.1
|
soupsieve==2.4.1
|
||||||
|
pydantic==1.10.8
|
Loading…
Reference in a new issue