add config option for RegexHeuristics
This commit is contained in:
parent
f4696b35b2
commit
ce06c201ee
3 changed files with 32 additions and 5 deletions
|
@ -13,6 +13,16 @@ def yaml_config_settings_source(settings: BaseSettings):
|
|||
return yaml.safe_load(fh)
|
||||
|
||||
|
||||
class RegexHeuristics(BaseModel):
|
||||
"""
|
||||
Regex heuristics options for applying modifying a line using regex lines.
|
||||
|
||||
True means option is enabled, False means option is disabled.
|
||||
"""
|
||||
|
||||
remove_pre_tag: bool = True
|
||||
|
||||
|
||||
class ConverterOptions(BaseModel):
|
||||
"""
|
||||
Converter options.
|
||||
|
@ -30,6 +40,8 @@ class ConverterOptions(BaseModel):
|
|||
author_rewrite: str = ""
|
||||
links_rewrite: list[dict] = []
|
||||
header_fields_drop: list[str] = []
|
||||
enable_regex_heuristics: bool = True
|
||||
regex_heuristics: RegexHeuristics = RegexHeuristics()
|
||||
|
||||
|
||||
class Configurator(BaseSettings):
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
import re
|
||||
from collections import namedtuple
|
||||
|
||||
from app import utils
|
||||
|
||||
RegexCallback = namedtuple("RegexCallback", ["callback", "name"])
|
||||
|
||||
|
||||
class RegexHeuristics:
|
||||
"""
|
||||
|
@ -12,11 +15,16 @@ class RegexHeuristics:
|
|||
utils.guard_against_none(configurator, "configurator")
|
||||
|
||||
self.configurator = configurator
|
||||
self._regex_options = (
|
||||
self.configurator.converter_options.regex_heuristics.dict()
|
||||
)
|
||||
self._rules = {
|
||||
"^(</*pre.*?>)`{0,3}(?P<content>.*?)(<\/pre>)?$": self._remove_pre_tag,
|
||||
"^(</*pre.*?>)`{0,3}(?P<content>.*?)(<\/pre>)?$": RegexCallback(
|
||||
self._remove_pre_tag, "remove_pre_tag"
|
||||
),
|
||||
}
|
||||
|
||||
def _remove_pre_tag(self, match) -> str:
|
||||
def _remove_pre_tag(self, match: re.Match) -> str:
|
||||
"""
|
||||
Removes the pre tag from the match.
|
||||
"""
|
||||
|
@ -26,9 +34,15 @@ class RegexHeuristics:
|
|||
"""
|
||||
Manipulates a line by using regex heuristics.
|
||||
"""
|
||||
if not self.configurator.converter_options.enable_regex_heuristics:
|
||||
return line
|
||||
|
||||
for regex, callback in self._rules.items():
|
||||
option_enabled = self._regex_options.get(callback.name, False)
|
||||
if not option_enabled:
|
||||
continue
|
||||
|
||||
match = re.match(regex, line)
|
||||
if match:
|
||||
return callback(match)
|
||||
else:
|
||||
return line
|
||||
line = callback.callback(match)
|
||||
return line
|
||||
|
|
|
@ -3,6 +3,7 @@ source_path: "/Users/dnutiu/PycharmProjects/jekyll-to-hugo/my_test_data/_posts"
|
|||
output_path: "/Users/dnutiu/NucuLabsProjects/NucuLabsDevBlog/content/posts"
|
||||
converter: "wordpress_markdown_converter"
|
||||
converter_options:
|
||||
enable_regex_heuristics: true
|
||||
author_rewrite: "Denis Nuțiu"
|
||||
links_rewrite:
|
||||
- source: "http://localhost/"
|
||||
|
|
Loading…
Reference in a new issue