Skip to content

Commit

Permalink
Fix issue where HTML was allowed in headings
Browse files Browse the repository at this point in the history
Fixes #114
  • Loading branch information
baynezy committed Aug 2, 2024
1 parent 436d849 commit 7b76c5a
Show file tree
Hide file tree
Showing 8 changed files with 365 additions and 140 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed

- Fixed issue where HTML was allowed in headings

## [6.2.3.6] - 2024-08-02

### Fixed
Expand Down
19 changes: 9 additions & 10 deletions src/Html2Markdown/Replacement/AnchorTagReplacer.cs
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
namespace Html2Markdown.Replacement
namespace Html2Markdown.Replacement;

/// <summary>
/// Replaces an anchor tag with the link text and the link URL in Markdown format.
/// </summary>
public class AnchorTagReplacer : CustomReplacer
{
/// <summary>
/// Replaces an anchor tag with the link text and the link URL in Markdown format.
/// Initializes a new instance of the <see cref="AnchorTagReplacer"/> class.
/// Sets the custom action to replace anchor tags with Markdown formatted links.
/// </summary>
public class AnchorTagReplacer : CustomReplacer
public AnchorTagReplacer()
{
/// <summary>
/// Initializes a new instance of the <see cref="AnchorTagReplacer"/> class.
/// Sets the custom action to replace anchor tags with Markdown formatted links.
/// </summary>
public AnchorTagReplacer()
{
CustomAction = HtmlParser.ReplaceAnchor;
}
}
}
14 changes: 2 additions & 12 deletions src/Html2Markdown/Replacement/HeadingTagReplacer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
/// <summary>
/// Replaces the HTML heading tag with its Markdown equivalent.
/// </summary>
public class HeadingTagReplacer : CompositeReplacer
public class HeadingTagReplacer : CustomReplacer
{
/// <summary>
/// Initializes a new instance of the <see cref="HeadingTagReplacer"/> class.
Expand All @@ -14,16 +14,6 @@ public HeadingTagReplacer(Heading heading)
{
var headingNumber = (int) heading;

AddReplacer(new PatternReplacer
{
Pattern = $"</h{headingNumber}>",
Replacement = Environment.NewLine + Environment.NewLine
});

AddReplacer(new PatternReplacer
{
Pattern = $"<h{headingNumber}[^>]*>",
Replacement = Environment.NewLine + Environment.NewLine + new string('#', headingNumber) + " "
});
CustomAction = html => HtmlParser.ReplaceHeading(html, headingNumber);
}
}
23 changes: 23 additions & 0 deletions src/Html2Markdown/Replacement/HtmlParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,27 @@ internal static string ReplaceEntities(string html)
}

internal static string ReplaceParagraph(string html) => ReplaceParagraph(html, false);

internal static string ReplaceHeading(string html, int headingNumber)
{
var tag = $"h{headingNumber}";
var doc = GetHtmlDocument(html);
var nodes = doc.DocumentNode.SelectNodes($"//{tag}");

if (nodes is null) return html;

nodes.ToList().ForEach(node =>
{
var text = node.InnerHtml;
var htmlRemoved = HtmlTags().Replace(text, "");
var markdown = Spaces().Replace(htmlRemoved, " ");
markdown = markdown.Replace(Environment.NewLine, " ");
markdown = Environment.NewLine + Environment.NewLine + new string('#', headingNumber) + " " + markdown + Environment.NewLine + Environment.NewLine;
ReplaceNode(node, markdown);
});

return doc.DocumentNode.OuterHtml;
}

private static string ReplaceParagraph(string html, bool nestedIntoList)
{
Expand Down Expand Up @@ -346,4 +367,6 @@ private static void ReplaceNode(HtmlNode node, string markdown)
private static partial Regex FinalCrLf();
[GeneratedRegex(@"<\s*?/?\s*?br\s*?>")]
private static partial Regex BreakTag();
[GeneratedRegex(@"<[^>]+>")]
private static partial Regex HtmlTags();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
## Support

This project will currently convert the following HTML tags:-

* `<a>`
* `<strong>`
* `<b>`
* `<em>`
* `<i>`
* `<br>`
* `<code>`
* `<h1>`
* `<h2>`
* `<h3>`
* `<h4>`
* `<h5>`
* `<h6>`
* `<blockquote>`
* `<img>`
* `<hr>`
* `<p>`
* `<pre>`
* `<ul>`
* `<ol>`

## [](https://github.com/baynezy/Html2Markdown#installing-via-nuget)Installing via NuGet

[![NuGet version](https://camo.githubusercontent.com/2ee778ef534fdd413d5055d3202813398f39235a3d60b13974d43bc1bf1523a1/68747470733a2f2f62616467652e667572792e696f2f6e752f48746d6c324d61726b646f776e2e737667)](http://badge.fury.io/nu/Html2Markdown)
<div class="highlight highlight-source-powershell position-relative" style="box-sizing: border-box; position: relative !important; margin-bottom: 16px; color: rgb(201, 209, 217); font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji"; font-size: 16px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(13, 17, 23); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial;">

<span class="pl-c1" style="box-sizing: border-box; color: var(--color-prettylights-syntax-constant);">Install-Package</span> Html2Markdown
</div>

## [](https://github.com/baynezy/Html2Markdown#usage)Usage

### [](https://github.com/baynezy/Html2Markdown#strings)Strings

<div class="highlight highlight-source-cs position-relative" style="box-sizing: border-box; position: relative !important; margin-bottom: 16px; color: rgb(201, 209, 217); font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji"; font-size: 16px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(13, 17, 23); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial;">

<span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">var</span> <span class="pl-en" style="box-sizing: border-box; color: var(--color-prettylights-syntax-entity);">html</span> <span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">=</span> <span class="pl-s" style="box-sizing: border-box; color: var(--color-prettylights-syntax-string);"><span class="pl-pds" style="box-sizing: border-box; color: var(--color-prettylights-syntax-string);">"</span>Something to <strong>convert</strong><span class="pl-pds" style="box-sizing: border-box; color: var(--color-prettylights-syntax-string);">"</span></span>;
<span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">var</span> <span class="pl-en" style="box-sizing: border-box; color: var(--color-prettylights-syntax-entity);">converter</span> <span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">=</span> <span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">new</span> <span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">Converter</span>();
<span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">var</span> <span class="pl-en" style="box-sizing: border-box; color: var(--color-prettylights-syntax-entity);">markdown</span> <span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">=</span> <span class="pl-smi" style="box-sizing: border-box; color: var(--color-prettylights-syntax-storage-modifier-import);">converter</span>.<span class="pl-en" style="box-sizing: border-box; color: var(--color-prettylights-syntax-entity);">Convert</span>(<span class="pl-smi" style="box-sizing: border-box; color: var(--color-prettylights-syntax-storage-modifier-import);">html</span>);
</div>
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
## Support

This project will currently convert the following HTML tags:-

* `<a>`
* `<strong>`
* `<b>`
* `<em>`
* `<i>`
* `<br>`
* `<code>`
* `<h1>`
* `<h2>`
* `<h3>`
* `<h4>`
* `<h5>`
* `<h6>`
* `<blockquote>`
* `<img>`
* `<hr>`
* `<p>`
* `<pre>`
* `<ul>`
* `<ol>`

## [](https://github.com/baynezy/Html2Markdown#installing-via-nuget)Installing via NuGet

[![NuGet version](https://camo.githubusercontent.com/2ee778ef534fdd413d5055d3202813398f39235a3d60b13974d43bc1bf1523a1/68747470733a2f2f62616467652e667572792e696f2f6e752f48746d6c324d61726b646f776e2e737667)](http://badge.fury.io/nu/Html2Markdown)
<div class="highlight highlight-source-powershell position-relative" style="box-sizing: border-box; position: relative !important; margin-bottom: 16px; color: rgb(201, 209, 217); font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji"; font-size: 16px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(13, 17, 23); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial;">

<span class="pl-c1" style="box-sizing: border-box; color: var(--color-prettylights-syntax-constant);">Install-Package</span> Html2Markdown
</div>

## [](https://github.com/baynezy/Html2Markdown#usage)Usage

### [](https://github.com/baynezy/Html2Markdown#strings)Strings

<div class="highlight highlight-source-cs position-relative" style="box-sizing: border-box; position: relative !important; margin-bottom: 16px; color: rgb(201, 209, 217); font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji"; font-size: 16px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(13, 17, 23); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial;">

<span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">var</span> <span class="pl-en" style="box-sizing: border-box; color: var(--color-prettylights-syntax-entity);">html</span> <span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">=</span> <span class="pl-s" style="box-sizing: border-box; color: var(--color-prettylights-syntax-string);"><span class="pl-pds" style="box-sizing: border-box; color: var(--color-prettylights-syntax-string);">"</span>Something to <strong>convert</strong><span class="pl-pds" style="box-sizing: border-box; color: var(--color-prettylights-syntax-string);">"</span></span>;
<span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">var</span> <span class="pl-en" style="box-sizing: border-box; color: var(--color-prettylights-syntax-entity);">converter</span> <span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">=</span> <span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">new</span> <span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">Converter</span>();
<span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">var</span> <span class="pl-en" style="box-sizing: border-box; color: var(--color-prettylights-syntax-entity);">markdown</span> <span class="pl-k" style="box-sizing: border-box; color: var(--color-prettylights-syntax-keyword);">=</span> <span class="pl-smi" style="box-sizing: border-box; color: var(--color-prettylights-syntax-storage-modifier-import);">converter</span>.<span class="pl-en" style="box-sizing: border-box; color: var(--color-prettylights-syntax-entity);">Convert</span>(<span class="pl-smi" style="box-sizing: border-box; color: var(--color-prettylights-syntax-storage-modifier-import);">html</span>);
</div>
Loading

0 comments on commit 7b76c5a

Please sign in to comment.