2023-06-02 06:58:20 +00:00
|
|
|
import re
|
2023-06-02 08:12:24 +00:00
|
|
|
from collections import namedtuple
|
2023-06-02 06:58:20 +00:00
|
|
|
|
|
|
|
from app import utils
|
|
|
|
|
2023-06-02 08:12:24 +00:00
|
|
|
RegexCallback = namedtuple("RegexCallback", ["callback", "name"])
|
|
|
|
|
2023-06-02 06:58:20 +00:00
|
|
|
|
|
|
|
class RegexHeuristics:
|
2023-05-31 16:29:47 +00:00
|
|
|
"""
|
2023-06-02 06:58:20 +00:00
|
|
|
Regex heuristics class for applying modifying a line using regex lines.
|
2023-05-31 16:29:47 +00:00
|
|
|
"""
|
2023-06-02 06:58:20 +00:00
|
|
|
|
|
|
|
def __init__(self, configurator):
|
|
|
|
utils.guard_against_none(configurator, "configurator")
|
|
|
|
|
|
|
|
self.configurator = configurator
|
2023-06-02 08:12:24 +00:00
|
|
|
self._regex_options = (
|
|
|
|
self.configurator.converter_options.regex_heuristics.dict()
|
|
|
|
)
|
2023-06-02 06:58:20 +00:00
|
|
|
self._rules = {
|
2023-06-02 08:12:24 +00:00
|
|
|
"^(</*pre.*?>)`{0,3}(?P<content>.*?)(<\/pre>)?$": RegexCallback(
|
|
|
|
self._remove_pre_tag, "remove_pre_tag"
|
|
|
|
),
|
2023-06-02 06:58:20 +00:00
|
|
|
}
|
|
|
|
|
2023-06-02 08:12:24 +00:00
|
|
|
def _remove_pre_tag(self, match: re.Match) -> str:
|
2023-06-02 06:58:20 +00:00
|
|
|
"""
|
|
|
|
Removes the pre tag from the match.
|
|
|
|
"""
|
|
|
|
return match.group("content")
|
|
|
|
|
|
|
|
def handle_regex_heuristics(self, line: str) -> str:
|
|
|
|
"""
|
|
|
|
Manipulates a line by using regex heuristics.
|
|
|
|
"""
|
2023-06-02 08:12:24 +00:00
|
|
|
if not self.configurator.converter_options.enable_regex_heuristics:
|
|
|
|
return line
|
|
|
|
|
2023-06-02 06:58:20 +00:00
|
|
|
for regex, callback in self._rules.items():
|
2023-06-02 08:12:24 +00:00
|
|
|
option_enabled = self._regex_options.get(callback.name, False)
|
|
|
|
if not option_enabled:
|
|
|
|
continue
|
|
|
|
|
2023-06-02 06:58:20 +00:00
|
|
|
match = re.match(regex, line)
|
|
|
|
if match:
|
2023-06-02 08:12:24 +00:00
|
|
|
line = callback.callback(match)
|
|
|
|
return line
|