diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..550bf39
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,290 @@
+# Created by https://www.toptal.com/developers/gitignore/api/pycharm,python
+# Edit at https://www.toptal.com/developers/gitignore?templates=pycharm,python
+
+### PyCharm ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+
+# AWS User-specific
+.idea/**/aws.xml
+
+# Generated files
+.idea/**/contentModel.xml
+
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn. Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+cmake-build-*/
+
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+
+# File-based project format
+*.iws
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# SonarLint plugin
+.idea/sonarlint/
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+# Editor-based Rest Client
+.idea/httpRequests
+
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+
+### PyCharm Patch ###
+# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
+
+# *.iml
+# modules.xml
+# .idea/misc.xml
+# *.ipr
+
+# Sonarlint plugin
+# https://plugins.jetbrains.com/plugin/7973-sonarlint
+.idea/**/sonarlint/
+
+# SonarQube Plugin
+# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
+.idea/**/sonarIssues.xml
+
+# Markdown Navigator plugin
+# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
+.idea/**/markdown-navigator.xml
+.idea/**/markdown-navigator-enh.xml
+.idea/**/markdown-navigator/
+
+# Cache file creation bug
+# See https://youtrack.jetbrains.com/issue/JBR-2257
+.idea/$CACHE_FILE$
+
+# CodeStream plugin
+# https://plugins.jetbrains.com/plugin/12206-codestream
+.idea/codestream.xml
+
+# Azure Toolkit for IntelliJ plugin
+# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
+.idea/**/azureSettings.xml
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+
+# ruff
+.ruff_cache/
+
+# LSP config files
+pyrightconfig.json
+.my_test_data
+my_test_data/
+# End of https://www.toptal.com/developers/gitignore/api/pycharm,python
\ No newline at end of file
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/jekyll-to-hugo.iml b/.idea/jekyll-to-hugo.iml
new file mode 100644
index 0000000..8428c11
--- /dev/null
+++ b/.idea/jekyll-to-hugo.iml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..8cf1f08
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..9bec349
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..8158c13
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,6 @@
+# Formats the code
+format:
+ black . && isort -r .
+# Run tests
+test:
+ pytest .
\ No newline at end of file
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/converter/__init__.py b/app/converter/__init__.py
new file mode 100644
index 0000000..a679a33
--- /dev/null
+++ b/app/converter/__init__.py
@@ -0,0 +1,2 @@
+from converter import Converter
+from wordpress_markdown import WordpressMarkdownConverter
diff --git a/app/converter/converter.py b/app/converter/converter.py
new file mode 100644
index 0000000..0d04ab0
--- /dev/null
+++ b/app/converter/converter.py
@@ -0,0 +1,45 @@
+import os
+from pathlib import Path
+
+from app import utils
+from app.converter.wordpress_markdown import WordpressMarkdownConverter
+
+
+class Converter:
+ """
+ Convert Jekyll posts to Hugo posts
+ """
+
+ def __init__(self, jekyll_posts_path: str, hugo_posts_path: str):
+ """
+ Initializes the converter
+
+ Parameters
+ ----------
+ jekyll_posts_path : str
+ The path to the Jekyll posts
+ hugo_posts_path : str
+ The path to the Hugo posts
+ """
+ utils.guard_against_none_or_empty_str(jekyll_posts_path, "jekyll_posts_path")
+ utils.guard_against_none_or_empty_str(hugo_posts_path, "hugo_posts_path")
+
+ self._jekyll_posts_path = jekyll_posts_path
+ self._hugo_posts_path = hugo_posts_path
+
+ # The converter that converts the markdown
+ self.markdown_converter = WordpressMarkdownConverter()
+
+ def convert(self):
+ """
+ Converts the Jekyll posts to Hugo posts
+ """
+ source_path = self._jekyll_posts_path
+ output_path = Path(self._hugo_posts_path)
+ _, _, files = next(os.walk(source_path))
+ for file in files:
+ source_abs_path = source_path / Path(file)
+ self.markdown_converter.convert_jekyll_to_hugo(
+ source_abs_path,
+ output_path,
+ )
diff --git a/app/converter/wordpress_markdown.py b/app/converter/wordpress_markdown.py
new file mode 100644
index 0000000..76b366e
--- /dev/null
+++ b/app/converter/wordpress_markdown.py
@@ -0,0 +1,145 @@
+from pathlib import Path
+
+import yaml
+from bs4 import BeautifulSoup, Tag
+
+from app.utils import key_error_silence
+
+
+class WordpressMarkdownConverter:
+ """
+ Markdown converter that converts jekyll posts to hugo posts.
+ """
+
+ def fix_hugo_header(self, header: dict) -> dict:
+ """
+ Fix the Hugo header
+
+ Parameters
+ ----------
+ header : dict
+ The header to fix
+
+ Returns
+ -------
+ dict
+ The fixed header
+ """
+ with key_error_silence():
+ del header["restapi_import_id"]
+ with key_error_silence():
+ del header["original_post_id"]
+ with key_error_silence():
+ del header["timeline_notification"]
+ with key_error_silence():
+ del header["wordads_ufa"]
+ header["guid"] = header["guid"].replace("http://localhost", "")
+ header["author"] = "Denis Nuțiu"
+ return header
+
+ def remove_html_tags(self, post_lines):
+ fixed_lines = []
+ for line in post_lines:
+ if line == "":
+ fixed_lines.append("\n")
+ continue
+ soup = BeautifulSoup(line)
+ for content in soup.contents:
+ if isinstance(content, Tag):
+ if "is-provider-youtube" in content.attrs.get("class", []):
+ video_link = content.findNext("iframe").attrs["src"]
+ video_id_part = video_link.rsplit("/")
+ video_id = video_id_part[-1].split("?")[0]
+ fixed_lines.append(f"{{{{< youtube {video_id} >}}}}\n")
+ else:
+ tags = list(map(str, content.contents))
+ if tags:
+ fixed_tags = self.remove_html_tags(tags)
+ if fixed_tags:
+ fixed_lines.extend(fixed_tags)
+ else:
+ fixed_lines.append(str(content))
+ return fixed_lines
+
+ def convert_post_content(self, post_content: str) -> str:
+ """
+ Converts the post content
+
+ Parameters
+ ----------
+ post_content : str
+ The post content
+
+ Returns
+ -------
+ str
+ The converted post content
+ """
+ # fix link
+ post_content = post_content.replace("http://localhost/", "/")
+ post_content = post_content.replace(
+ "https://nuculabs.wordpress.com/", "https://nuculabs.dev/posts/"
+ )
+ # fix unknown tags
+ post_lines = post_content.split("\n")
+ fixed_lines = self.remove_html_tags(post_lines)
+
+ return "\n".join(fixed_lines)
+
+ def read_jekyll_post(self, path: Path):
+ """
+ Read a Jekyll post from the specified path
+
+ Parameters
+ ----------
+ path : Path
+ The path to the Jekyll post
+ """
+ # read source
+ with open(path, "r") as fh:
+ contents = fh.read()
+ return contents
+
+ def write_hugo_post(self, output_path, post_header: dict, post_content: str):
+ """
+ Write a Hugo post to the specified path
+
+ Parameters
+ ----------
+ output_path : Path
+ The path to the Hugo post
+ post_header : dict
+ The post header
+ post_content : str
+ The post content
+ """
+ with open(output_path, "w") as fo:
+ header = ["---\n", yaml.dump(post_header), "---\n"]
+ fo.writelines(header)
+ fo.write(post_content)
+
+ def convert_jekyll_to_hugo(self, jekyll_post_path: Path, hugo_post_output: Path):
+ """
+ Convert a Jekyll post to a Hugo post
+
+ Parameters
+ ----------
+ jekyll_post_path : Path
+ The path to the Jekyll post
+ hugo_post_output : Path
+ The path to the Hugo post
+ """
+ contents = self.read_jekyll_post(jekyll_post_path)
+
+ # fix header
+ header = yaml.safe_load(contents.split("---")[1])
+ fixed_header = self.fix_hugo_header(header)
+ # fix content
+ post_content = contents.split("---", 2)[2].lstrip()
+ fixed_post_content = self.convert_post_content(post_content)
+
+ self.write_hugo_post(
+ hugo_post_output.joinpath(jekyll_post_path.name),
+ fixed_header,
+ fixed_post_content,
+ )
diff --git a/app/tests/__init__.py b/app/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/tests/utils_test.py b/app/tests/utils_test.py
new file mode 100644
index 0000000..60294f3
--- /dev/null
+++ b/app/tests/utils_test.py
@@ -0,0 +1,35 @@
+import pytest
+
+from app.utils import key_error_silence, guard_against_none_or_empty_str
+
+
+def test_key_error_silence():
+ # Test that the context manager silences the exception
+ with key_error_silence():
+ raise KeyError
+ # Test that the context manager does not silence other exceptions
+ with pytest.raises(ValueError):
+ with key_error_silence():
+ raise ValueError
+
+
+@pytest.mark.parametrize(
+ "input_data",
+ [
+ (""),
+ (None),
+ (1),
+ (True),
+ (False),
+ ({}),
+ ([]),
+ ({"a": 1}),
+ ],
+)
+def test_guard_against_none_or_empty_str(input_data):
+ with pytest.raises(ValueError):
+ guard_against_none_or_empty_str(input_data, "test")
+
+
+def test_guard_against_none_or_empty_str_happy():
+ guard_against_none_or_empty_str("a", "test")
diff --git a/app/utils.py b/app/utils.py
new file mode 100644
index 0000000..8880fd4
--- /dev/null
+++ b/app/utils.py
@@ -0,0 +1,28 @@
+import contextlib
+
+
+@contextlib.contextmanager
+def key_error_silence():
+ """
+ Context manager that silences key errors exceptions.
+ """
+ try:
+ yield
+ except KeyError:
+ pass
+
+
+def guard_against_none_or_empty_str(value: str, name: str):
+ """
+ Guard against None or empty string.
+
+ Parameters:
+ ----------
+ value: str
+ The value to check.
+ name: str
+ The name of the value.
+ """
+
+ if value is None or not isinstance(value, str) or value == "":
+ raise ValueError(f"{name} cannot be None or empty")
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..2fc1642
--- /dev/null
+++ b/main.py
@@ -0,0 +1,29 @@
+import logging
+import sys
+
+from app.converter import Converter
+
+
+def main():
+ # Logging configuration
+ logging.basicConfig(
+ format="%(asctime)s %(process)d %(levelname)s %(message)s",
+ level=logging.INFO,
+ datefmt="%Y-%m-%d %H:%M:%S",
+ )
+ logger = logging.getLogger(__name__)
+
+ if len(sys.argv) != 3:
+ logger.error(
+ "Usage: python main.py