add fix for code blocks
This commit is contained in:
parent
3c20d726cf
commit
efbe771799
5 changed files with 125 additions and 9 deletions
|
@ -4,7 +4,7 @@
|
||||||
<content url="file://$MODULE_DIR$">
|
<content url="file://$MODULE_DIR$">
|
||||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="jdk" jdkName="Python 3.11 (jekyll-to-hugo)" jdkType="Python SDK" />
|
<orderEntry type="inheritedJdk" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
|
@ -1,4 +1,4 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (jekyll-to-hugo)" project-jdk-type="Python SDK" />
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (jekyll-to-hugo)" project-jdk-type="Python SDK" />
|
||||||
</project>
|
</project>
|
|
@ -1,5 +1,34 @@
|
||||||
def handle_regex_heuristics(line: str) -> str:
|
import re
|
||||||
|
|
||||||
|
from app import utils
|
||||||
|
|
||||||
|
|
||||||
|
class RegexHeuristics:
|
||||||
"""
|
"""
|
||||||
Manipulates a line by using regex heuristics.
|
Regex heuristics class for applying modifying a line using regex lines.
|
||||||
"""
|
"""
|
||||||
return line
|
|
||||||
|
def __init__(self, configurator):
|
||||||
|
utils.guard_against_none(configurator, "configurator")
|
||||||
|
|
||||||
|
self.configurator = configurator
|
||||||
|
self._rules = {
|
||||||
|
"^(</*pre.*?>)(?P<content>.*?)(<\/pre>)?$": self._remove_pre_tag,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _remove_pre_tag(self, match) -> str:
|
||||||
|
"""
|
||||||
|
Removes the pre tag from the match.
|
||||||
|
"""
|
||||||
|
return match.group("content")
|
||||||
|
|
||||||
|
def handle_regex_heuristics(self, line: str) -> str:
|
||||||
|
"""
|
||||||
|
Manipulates a line by using regex heuristics.
|
||||||
|
"""
|
||||||
|
for regex, callback in self._rules.items():
|
||||||
|
match = re.match(regex, line)
|
||||||
|
if match:
|
||||||
|
return callback(match)
|
||||||
|
else:
|
||||||
|
return line
|
||||||
|
|
|
@ -3,7 +3,7 @@ from bs4 import BeautifulSoup, Tag
|
||||||
|
|
||||||
from app import utils
|
from app import utils
|
||||||
from app.config import Configurator
|
from app.config import Configurator
|
||||||
from app.converter.regex_heuristics import handle_regex_heuristics
|
from app.converter.regex_heuristics import RegexHeuristics
|
||||||
from app.converter.tags_heuristics import convert_figure_tag_to_shortcode
|
from app.converter.tags_heuristics import convert_figure_tag_to_shortcode
|
||||||
from app.io.reader import IoReader
|
from app.io.reader import IoReader
|
||||||
from app.io.writer import IoWriter
|
from app.io.writer import IoWriter
|
||||||
|
@ -26,6 +26,7 @@ class WordpressMarkdownConverter:
|
||||||
"""
|
"""
|
||||||
utils.guard_against_none(configurator, "configurator")
|
utils.guard_against_none(configurator, "configurator")
|
||||||
self.configurator = configurator
|
self.configurator = configurator
|
||||||
|
self.regex_heuristics = RegexHeuristics(configurator)
|
||||||
|
|
||||||
def fix_header(self, header: dict) -> dict:
|
def fix_header(self, header: dict) -> dict:
|
||||||
"""
|
"""
|
||||||
|
@ -51,12 +52,60 @@ class WordpressMarkdownConverter:
|
||||||
header["author"] = self.configurator.converter_options.author_rewrite
|
header["author"] = self.configurator.converter_options.author_rewrite
|
||||||
return header
|
return header
|
||||||
|
|
||||||
|
def fix_pre_content(self, post_lines: list[str]) -> list[str]:
|
||||||
|
"""
|
||||||
|
Fixes the pre content from the post lines when enclosed in backticks code blocks.
|
||||||
|
"""
|
||||||
|
fixed_lines = []
|
||||||
|
index = 0
|
||||||
|
while index < len(post_lines):
|
||||||
|
line = post_lines[index]
|
||||||
|
if line == "```":
|
||||||
|
found_enclosing = False
|
||||||
|
search_index = index + 1
|
||||||
|
while search_index < len(post_lines):
|
||||||
|
if post_lines[search_index] == "```":
|
||||||
|
found_enclosing = True
|
||||||
|
break
|
||||||
|
search_index += 1
|
||||||
|
if found_enclosing:
|
||||||
|
for line_index, line in enumerate(
|
||||||
|
post_lines[index : search_index + 1]
|
||||||
|
):
|
||||||
|
if line_index == 1:
|
||||||
|
regex_line = self.regex_heuristics.handle_regex_heuristics(
|
||||||
|
str(line)
|
||||||
|
)
|
||||||
|
if regex_line:
|
||||||
|
fixed_lines.append(regex_line)
|
||||||
|
else:
|
||||||
|
fixed_lines.append(line)
|
||||||
|
index = search_index + 1
|
||||||
|
continue
|
||||||
|
index += 1
|
||||||
|
fixed_lines.append(line)
|
||||||
|
return fixed_lines
|
||||||
|
|
||||||
def fix_html_tags(self, post_lines):
|
def fix_html_tags(self, post_lines):
|
||||||
"""
|
"""
|
||||||
Fixes the html tags from the post lines.
|
Fixes the html tags from the post lines.
|
||||||
"""
|
"""
|
||||||
fixed_lines = []
|
fixed_lines = []
|
||||||
|
is_in_code_block = False
|
||||||
for line in post_lines:
|
for line in post_lines:
|
||||||
|
if line.startswith("```"):
|
||||||
|
if is_in_code_block:
|
||||||
|
is_in_code_block = False
|
||||||
|
else:
|
||||||
|
is_in_code_block = True
|
||||||
|
fixed_lines.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if is_in_code_block:
|
||||||
|
fixed_lines.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Treat empty string as a new line.
|
||||||
if line == "":
|
if line == "":
|
||||||
fixed_lines.append("\n")
|
fixed_lines.append("\n")
|
||||||
continue
|
continue
|
||||||
|
@ -66,10 +115,12 @@ class WordpressMarkdownConverter:
|
||||||
self._fix_html_tag(content, fixed_lines)
|
self._fix_html_tag(content, fixed_lines)
|
||||||
else:
|
else:
|
||||||
# Add the content.
|
# Add the content.
|
||||||
fixed_lines.append(handle_regex_heuristics(str(content)))
|
fixed_lines.append(
|
||||||
|
self.regex_heuristics.handle_regex_heuristics(str(content))
|
||||||
|
)
|
||||||
return fixed_lines
|
return fixed_lines
|
||||||
|
|
||||||
def _fix_html_tag(self, content, fixed_lines):
|
def _fix_html_tag(self, content: Tag, fixed_lines: list):
|
||||||
"""
|
"""
|
||||||
Fixes the html tag.
|
Fixes the html tag.
|
||||||
"""
|
"""
|
||||||
|
@ -108,7 +159,8 @@ class WordpressMarkdownConverter:
|
||||||
|
|
||||||
# fix unknown tags
|
# fix unknown tags
|
||||||
post_lines = post_content.split("\n")
|
post_lines = post_content.split("\n")
|
||||||
fixed_lines = self.fix_html_tags(post_lines)
|
fixed_lines = self.fix_pre_content(post_lines)
|
||||||
|
fixed_lines = self.fix_html_tags(fixed_lines)
|
||||||
|
|
||||||
return "\n".join(fixed_lines)
|
return "\n".join(fixed_lines)
|
||||||
|
|
||||||
|
|
|
@ -105,3 +105,38 @@ def test_fix_html_tags_stripe_tag(input_lines, expected_lines):
|
||||||
)
|
)
|
||||||
converter = WordpressMarkdownConverter(configurator)
|
converter = WordpressMarkdownConverter(configurator)
|
||||||
assert converter.fix_html_tags(input_lines) == expected_lines
|
assert converter.fix_html_tags(input_lines) == expected_lines
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"input_lines, expected_lines",
|
||||||
|
[
|
||||||
|
(
|
||||||
|
["<pre>", "<p>Te<span>st</span></p>", "</pre>"],
|
||||||
|
["<pre>", "<p>Te<span>st</span></p>", "</pre>"],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
[
|
||||||
|
"```",
|
||||||
|
'<pre class="wp-block-syntaxhighlighter-code"> <ItemGroup>',
|
||||||
|
"```",
|
||||||
|
],
|
||||||
|
["```", " <ItemGroup>", "```"],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
[
|
||||||
|
"```",
|
||||||
|
'<pre class="wp-block-syntaxhighlighter-code">',
|
||||||
|
"<ItemGroup>",
|
||||||
|
"```",
|
||||||
|
],
|
||||||
|
["```", "<ItemGroup>", "```"],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_fix_pre_content(input_lines, expected_lines):
|
||||||
|
configurator = make_fake_configurator(
|
||||||
|
"wordpress_markdown_converter",
|
||||||
|
ConverterOptions(),
|
||||||
|
)
|
||||||
|
converter = WordpressMarkdownConverter(configurator)
|
||||||
|
assert converter.fix_pre_content(input_lines) == expected_lines
|
||||||
|
|
Loading…
Reference in a new issue