add config option for RegexHeuristics
This commit is contained in:
parent
f4696b35b2
commit
ce06c201ee
3 changed files with 32 additions and 5 deletions
|
@ -13,6 +13,16 @@ def yaml_config_settings_source(settings: BaseSettings):
|
||||||
return yaml.safe_load(fh)
|
return yaml.safe_load(fh)
|
||||||
|
|
||||||
|
|
||||||
|
class RegexHeuristics(BaseModel):
|
||||||
|
"""
|
||||||
|
Regex heuristics options for applying modifying a line using regex lines.
|
||||||
|
|
||||||
|
True means option is enabled, False means option is disabled.
|
||||||
|
"""
|
||||||
|
|
||||||
|
remove_pre_tag: bool = True
|
||||||
|
|
||||||
|
|
||||||
class ConverterOptions(BaseModel):
|
class ConverterOptions(BaseModel):
|
||||||
"""
|
"""
|
||||||
Converter options.
|
Converter options.
|
||||||
|
@ -30,6 +40,8 @@ class ConverterOptions(BaseModel):
|
||||||
author_rewrite: str = ""
|
author_rewrite: str = ""
|
||||||
links_rewrite: list[dict] = []
|
links_rewrite: list[dict] = []
|
||||||
header_fields_drop: list[str] = []
|
header_fields_drop: list[str] = []
|
||||||
|
enable_regex_heuristics: bool = True
|
||||||
|
regex_heuristics: RegexHeuristics = RegexHeuristics()
|
||||||
|
|
||||||
|
|
||||||
class Configurator(BaseSettings):
|
class Configurator(BaseSettings):
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
import re
|
import re
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
from app import utils
|
from app import utils
|
||||||
|
|
||||||
|
RegexCallback = namedtuple("RegexCallback", ["callback", "name"])
|
||||||
|
|
||||||
|
|
||||||
class RegexHeuristics:
|
class RegexHeuristics:
|
||||||
"""
|
"""
|
||||||
|
@ -12,11 +15,16 @@ class RegexHeuristics:
|
||||||
utils.guard_against_none(configurator, "configurator")
|
utils.guard_against_none(configurator, "configurator")
|
||||||
|
|
||||||
self.configurator = configurator
|
self.configurator = configurator
|
||||||
|
self._regex_options = (
|
||||||
|
self.configurator.converter_options.regex_heuristics.dict()
|
||||||
|
)
|
||||||
self._rules = {
|
self._rules = {
|
||||||
"^(</*pre.*?>)`{0,3}(?P<content>.*?)(<\/pre>)?$": self._remove_pre_tag,
|
"^(</*pre.*?>)`{0,3}(?P<content>.*?)(<\/pre>)?$": RegexCallback(
|
||||||
|
self._remove_pre_tag, "remove_pre_tag"
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _remove_pre_tag(self, match) -> str:
|
def _remove_pre_tag(self, match: re.Match) -> str:
|
||||||
"""
|
"""
|
||||||
Removes the pre tag from the match.
|
Removes the pre tag from the match.
|
||||||
"""
|
"""
|
||||||
|
@ -26,9 +34,15 @@ class RegexHeuristics:
|
||||||
"""
|
"""
|
||||||
Manipulates a line by using regex heuristics.
|
Manipulates a line by using regex heuristics.
|
||||||
"""
|
"""
|
||||||
|
if not self.configurator.converter_options.enable_regex_heuristics:
|
||||||
|
return line
|
||||||
|
|
||||||
for regex, callback in self._rules.items():
|
for regex, callback in self._rules.items():
|
||||||
|
option_enabled = self._regex_options.get(callback.name, False)
|
||||||
|
if not option_enabled:
|
||||||
|
continue
|
||||||
|
|
||||||
match = re.match(regex, line)
|
match = re.match(regex, line)
|
||||||
if match:
|
if match:
|
||||||
return callback(match)
|
line = callback.callback(match)
|
||||||
else:
|
|
||||||
return line
|
return line
|
||||||
|
|
|
@ -3,6 +3,7 @@ source_path: "/Users/dnutiu/PycharmProjects/jekyll-to-hugo/my_test_data/_posts"
|
||||||
output_path: "/Users/dnutiu/NucuLabsProjects/NucuLabsDevBlog/content/posts"
|
output_path: "/Users/dnutiu/NucuLabsProjects/NucuLabsDevBlog/content/posts"
|
||||||
converter: "wordpress_markdown_converter"
|
converter: "wordpress_markdown_converter"
|
||||||
converter_options:
|
converter_options:
|
||||||
|
enable_regex_heuristics: true
|
||||||
author_rewrite: "Denis Nuțiu"
|
author_rewrite: "Denis Nuțiu"
|
||||||
links_rewrite:
|
links_rewrite:
|
||||||
- source: "http://localhost/"
|
- source: "http://localhost/"
|
||||||
|
|
Loading…
Reference in a new issue