jekyll-to-hugo/app/converter/regex_heuristics.py

54 lines
1.4 KiB
Python
Raw Permalink Normal View History

2023-06-05 08:52:50 +00:00
import dataclasses
2023-06-02 06:58:20 +00:00
import re
2023-06-05 08:52:50 +00:00
from typing import Callable
2023-06-02 06:58:20 +00:00
from app import utils
2023-06-05 08:52:50 +00:00
@dataclasses.dataclass
class RegexCallback:
callback: Callable[[re.Match], str]
name: str
2023-06-02 08:12:24 +00:00
2023-06-02 06:58:20 +00:00
class RegexHeuristics:
"""
2023-06-02 06:58:20 +00:00
Regex heuristics class for applying modifying a line using regex lines.
"""
2023-06-02 06:58:20 +00:00
def __init__(self, configurator):
utils.guard_against_none(configurator, "configurator")
self.configurator = configurator
2023-06-02 08:12:24 +00:00
self._regex_options = (
self.configurator.converter_options.regex_heuristics.dict()
)
2023-06-02 06:58:20 +00:00
self._rules = {
2023-06-02 08:12:24 +00:00
"^(</*pre.*?>)`{0,3}(?P<content>.*?)(<\/pre>)?$": RegexCallback(
self._remove_pre_tag, "remove_pre_tag"
),
2023-06-02 06:58:20 +00:00
}
2023-06-02 08:12:24 +00:00
def _remove_pre_tag(self, match: re.Match) -> str:
2023-06-02 06:58:20 +00:00
"""
Removes the pre tag from the match.
"""
return match.group("content")
def handle_regex_heuristics(self, line: str) -> str:
"""
Manipulates a line by using regex heuristics.
"""
2023-06-02 08:12:24 +00:00
if not self.configurator.converter_options.enable_regex_heuristics:
return line
2023-06-02 06:58:20 +00:00
for regex, callback in self._rules.items():
2023-06-02 08:12:24 +00:00
option_enabled = self._regex_options.get(callback.name, False)
if not option_enabled:
continue
2023-06-02 06:58:20 +00:00
match = re.match(regex, line)
if match:
2023-06-02 08:12:24 +00:00
line = callback.callback(match)
return line