add comments

This commit is contained in:
Denis-Cosmin NUTIU 2023-06-02 10:02:14 +03:00
parent efbe771799
commit 7c11c105c5

View file

@ -93,6 +93,8 @@ class WordpressMarkdownConverter:
fixed_lines = [] fixed_lines = []
is_in_code_block = False is_in_code_block = False
for line in post_lines: for line in post_lines:
# Enter code block mode when detecting a line that starts with ```
# and exit when detecting a line that starts with ```.
if line.startswith("```"): if line.startswith("```"):
if is_in_code_block: if is_in_code_block:
is_in_code_block = False is_in_code_block = False
@ -101,6 +103,7 @@ class WordpressMarkdownConverter:
fixed_lines.append(line) fixed_lines.append(line)
continue continue
# Skip modifying the line if it is in code block mode.
if is_in_code_block: if is_in_code_block:
fixed_lines.append(line) fixed_lines.append(line)
continue continue
@ -109,12 +112,15 @@ class WordpressMarkdownConverter:
if line == "": if line == "":
fixed_lines.append("\n") fixed_lines.append("\n")
continue continue
# Parse the line as html and remove the HTML tags from it.
soup = BeautifulSoup(line, features="html.parser") soup = BeautifulSoup(line, features="html.parser")
for content in soup.contents: for content in soup.contents:
if isinstance(content, Tag): if isinstance(content, Tag):
# found html tag
self._fix_html_tag(content, fixed_lines) self._fix_html_tag(content, fixed_lines)
else: else:
# Add the content. # found text, add it to the fixed lines
fixed_lines.append( fixed_lines.append(
self.regex_heuristics.handle_regex_heuristics(str(content)) self.regex_heuristics.handle_regex_heuristics(str(content))
) )
@ -124,13 +130,15 @@ class WordpressMarkdownConverter:
""" """
Fixes the html tag. Fixes the html tag.
""" """
# Check if it is a YouTube video and add it as a shortcode. # Check if tag is a YouTube video and add it as a shortcode.
if "is-provider-youtube" in content.attrs.get("class", []): if "is-provider-youtube" in content.attrs.get("class", []):
convert_figure_tag_to_shortcode(content, fixed_lines) convert_figure_tag_to_shortcode(content, fixed_lines)
# Fix unknown tags. # Fix unknown tags by removing the tag and only add inner content.
# content.contents is a list of all the inner content of the tag.
else: else:
tags = list(map(str, content.contents)) tags = list(map(str, content.contents))
if tags: if tags:
# recursively fix the inner content of the tag.
fixed_tags = self.fix_html_tags(tags) fixed_tags = self.fix_html_tags(tags)
if fixed_tags: if fixed_tags:
fixed_lines.append("".join(fixed_tags)) fixed_lines.append("".join(fixed_tags))
@ -149,7 +157,7 @@ class WordpressMarkdownConverter:
str str
The converted post content The converted post content
""" """
# fix link # fix links inside post content with simple replace
for task in self.configurator.converter_options.links_rewrite: for task in self.configurator.converter_options.links_rewrite:
source_link = task.get("source") source_link = task.get("source")
target_link = task.get("target") target_link = task.get("target")