Skip to content

Commit

Permalink
Merge pull request #363 from baynezy/feature/issue-362-refactor
Browse files Browse the repository at this point in the history
Create concrete implementations of replacers
  • Loading branch information
baynezy authored Dec 21, 2023
2 parents d51e4cc + a7c5b44 commit f68630e
Show file tree
Hide file tree
Showing 39 changed files with 453 additions and 268 deletions.
8 changes: 4 additions & 4 deletions src/Html2Markdown/Converter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@ public partial class Converter
/// </summary>
public Converter()
{
_replacers = new Markdown().Replacers();
}
_replacers = new Markdown().Replacers();
}

/// <summary>
/// Create a converter with a custom conversion scheme
/// </summary>
/// <param name="scheme">Conversion scheme to control conversion</param>
public Converter(IScheme scheme)
{
_replacers = scheme.Replacers();
}
_replacers = scheme.Replacers();
}

/// <summary>
/// Converts Html contained in a file to a Markdown string
Expand Down
11 changes: 11 additions & 0 deletions src/Html2Markdown/Replacement/AnchorTagReplacer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// Replaces an anchor tag with the link text and the link URL in Markdown format.
/// </summary>
public class AnchorTagReplacer : CustomReplacer
{
public AnchorTagReplacer()
{
CustomAction = HtmlParser.ReplaceAnchor;
}
}
11 changes: 11 additions & 0 deletions src/Html2Markdown/Replacement/BlockquoteTagReplacer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// Replaces an anchor tag with the link text and the link URL in Markdown format.
/// </summary>
public class BlockquoteTagReplacer : CustomReplacer
{
public BlockquoteTagReplacer()
{
CustomAction = HtmlParser.ReplaceBlockquote;
}
}
12 changes: 12 additions & 0 deletions src/Html2Markdown/Replacement/BodyTagReplacer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// Removes the body tag.
/// </summary>
public class BodyTagReplacer : PatternReplacer
{
public BodyTagReplacer()
{
Pattern = "</?body[^>]*>";
Replacement = "";
}
}
12 changes: 12 additions & 0 deletions src/Html2Markdown/Replacement/BreakTagReplacer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// Replaces the HTML break tag with its Markdown equivalent.
/// </summary>
public class BreakTagReplacer : PatternReplacer
{
public BreakTagReplacer()
{
Pattern = "<br[^>]*>";
Replacement = " " + Environment.NewLine;
}
}
16 changes: 16 additions & 0 deletions src/Html2Markdown/Replacement/CodeTagReplacer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// Replaces the HTML code tag with its Markdown equivalent.
/// </summary>
public class CodeTagReplacer : CustomReplacer
{
public CodeTagReplacer()
{
CustomAction = HtmlParser.ReplaceCode;
}

public CodeTagReplacer(bool supportSyntaxHighlighting)
{
CustomAction = html => HtmlParser.ReplaceCode(html, supportSyntaxHighlighting);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,16 @@ namespace Html2Markdown.Replacement.CommonMark;
public class CommonMarkLayoutReplacementGroup : IReplacementGroup
{
private readonly IList<IReplacer> _replacements = new List<IReplacer> {
new PatternReplacer
{
Pattern = "<hr[^>]*>",
Replacement = Environment.NewLine + Environment.NewLine + "* * *" + Environment.NewLine
},
new CustomReplacer
{
CustomAction = ReplaceCode
},
new CustomReplacer
{
CustomAction = HtmlParser.ReplacePre
},
new CustomReplacer
{
CustomAction = HtmlParser.ReplaceParagraph
},
new PatternReplacer
{
Pattern = "<br[^>]*>",
Replacement = " " + Environment.NewLine
},
new CustomReplacer
{
CustomAction = HtmlParser.ReplaceBlockquote
}
new HorizontalRuleTagReplacer(),
new CodeTagReplacer(true),
new PreTagReplacer(),
new ParagraphTagReplacer(),
new BreakTagReplacer(),
new BlockquoteTagReplacer()
};

private static string ReplaceCode(string html)
{
return HtmlParser.ReplaceCode(html, true);
}

public IEnumerable<IReplacer> Replacers()
{
return _replacements;
}
return _replacements;
}
}
18 changes: 18 additions & 0 deletions src/Html2Markdown/Replacement/CompositeReplacer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// Allows for multiple replacements to be applied to the HTML.
/// </summary>
public abstract class CompositeReplacer : IReplacer
{
private readonly IList<IReplacer> _replacements = new List<IReplacer>();

protected void AddReplacer(IReplacer replacer)
{
_replacements.Add(replacer);
}

public string Replace(string html)
{
return _replacements.Aggregate(html, (current, replacer) => replacer.Replace(current));
}
}
8 changes: 5 additions & 3 deletions src/Html2Markdown/Replacement/CustomReplacer.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
namespace Html2Markdown.Replacement;

internal class CustomReplacer : IReplacer
/// <summary>
/// Allows custom replacement of HTML tags utilising external functions.
/// </summary>
public class CustomReplacer : IReplacer
{
public string Replace(string html)
{
return CustomAction.Invoke(html);
}

public Func<string, string> CustomAction { get; init; }
protected Func<string, string> CustomAction { get; init; }
}
12 changes: 12 additions & 0 deletions src/Html2Markdown/Replacement/DocTypeReplacer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// Removes the doctype tag.
/// </summary>
public class DocTypeReplacer : PatternReplacer
{
public DocTypeReplacer()
{
Pattern = "<!DOCTYPE[^>]*>";
Replacement = "";
}
}
33 changes: 33 additions & 0 deletions src/Html2Markdown/Replacement/EmphasisTagReplacer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// Replaces the HTML emphasis tag with its Markdown equivalent.
/// </summary>
public class EmphasisTagReplacer : CompositeReplacer
{
public EmphasisTagReplacer()
{
AddReplacer(new PatternReplacer
{
Pattern = @"<(?:em|i)>(\s+)",
Replacement = " *"
});

AddReplacer(new PatternReplacer
{
Pattern = "<(?:em|i)>",
Replacement = "*"
});

AddReplacer(new PatternReplacer
{
Pattern = @"(\s+)</(em|i)>",
Replacement = "* "
});

AddReplacer(new PatternReplacer
{
Pattern = "</(em|i)>",
Replacement = "*"
});
}
}
9 changes: 3 additions & 6 deletions src/Html2Markdown/Replacement/EntitiesReplacementGroup.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,11 @@ namespace Html2Markdown.Replacement;
public class EntitiesReplacementGroup : IReplacementGroup
{
private readonly IList<IReplacer> _replacements = new List<IReplacer> {
new CustomReplacer
{
CustomAction = HtmlParser.ReplaceEntities
}
new HtmlEntitiesReplacer()
};

public IEnumerable<IReplacer> Replacers()
{
return _replacements;
}
return _replacements;
}
}
12 changes: 12 additions & 0 deletions src/Html2Markdown/Replacement/HeadTagReplacer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// Removes the doctype tag.
/// </summary>
public class HeadTagReplacer : PatternReplacer
{
public HeadTagReplacer()
{
Pattern = "</?head[^>]*>";
Replacement = "";
}
}
13 changes: 13 additions & 0 deletions src/Html2Markdown/Replacement/Heading.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// HTML Heading Tag Enum
/// </summary>
public enum Heading
{
H1 = 1,
H2 = 2,
H3 = 3,
H4 = 4,
H5 = 5,
H6 = 6
}
45 changes: 8 additions & 37 deletions src/Html2Markdown/Replacement/HeadingReplacementGroup.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,16 @@ namespace Html2Markdown.Replacement;
public class HeadingReplacementGroup : IReplacementGroup
{
private readonly IList<IReplacer> _replacements = new List<IReplacer> {
new PatternReplacer
{
Pattern = "</h[1-6]>",
Replacement = Environment.NewLine + Environment.NewLine
},
new PatternReplacer
{
Pattern = "<h1[^>]*>",
Replacement = Environment.NewLine + Environment.NewLine + "# "
},
new PatternReplacer
{
Pattern = "<h2[^>]*>",
Replacement = Environment.NewLine + Environment.NewLine + "## "
},
new PatternReplacer
{
Pattern = "<h3[^>]*>",
Replacement = Environment.NewLine + Environment.NewLine + "### "
},
new PatternReplacer
{
Pattern = "<h4[^>]*>",
Replacement = Environment.NewLine + Environment.NewLine + "#### "
},
new PatternReplacer
{
Pattern = "<h5[^>]*>",
Replacement = Environment.NewLine + Environment.NewLine + "##### "
},
new PatternReplacer
{
Pattern = "<h6[^>]*>",
Replacement = Environment.NewLine + Environment.NewLine + "###### "
}
new HeadingTagReplacer(Heading.H1),
new HeadingTagReplacer(Heading.H2),
new HeadingTagReplacer(Heading.H3),
new HeadingTagReplacer(Heading.H4),
new HeadingTagReplacer(Heading.H5),
new HeadingTagReplacer(Heading.H6)
};

public IEnumerable<IReplacer> Replacers()
{
return _replacements;
}
return _replacements;
}
}
22 changes: 22 additions & 0 deletions src/Html2Markdown/Replacement/HeadingTagReplacer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// Replaces the HTML heading tag with its Markdown equivalent.
/// </summary>
public class HeadingTagReplacer : CompositeReplacer
{
public HeadingTagReplacer(Heading heading)
{
var headingNumber = (int) heading;
AddReplacer(new PatternReplacer
{
Pattern = $"</h{headingNumber}>",
Replacement = Environment.NewLine + Environment.NewLine
});

AddReplacer(new PatternReplacer
{
Pattern = $"<h{headingNumber}[^>]*>",
Replacement = Environment.NewLine + Environment.NewLine + new string('#', headingNumber) + " "
});
}
}
12 changes: 12 additions & 0 deletions src/Html2Markdown/Replacement/HorizontalRuleTagReplacer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// Replaces the HTML horizontal rule tag with its Markdown equivalent.
/// </summary>
public class HorizontalRuleTagReplacer : PatternReplacer
{
public HorizontalRuleTagReplacer()
{
Pattern = "<hr[^>]*>";
Replacement = Environment.NewLine + Environment.NewLine + "* * *" + Environment.NewLine;
}
}
12 changes: 12 additions & 0 deletions src/Html2Markdown/Replacement/HtmlCommentReplacer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// Removes the HTML comment tag.
/// </summary>
public class HtmlCommentReplacer : PatternReplacer
{
public HtmlCommentReplacer()
{
Pattern = "<!--[^-]+-->";
Replacement = "";
}
}
11 changes: 11 additions & 0 deletions src/Html2Markdown/Replacement/HtmlEntitiesReplacer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
namespace Html2Markdown.Replacement;
/// <summary>
/// Replaces HTML entities with their Markdown equivalent.
/// </summary>
public class HtmlEntitiesReplacer : CustomReplacer
{
public HtmlEntitiesReplacer()
{
CustomAction = HtmlParser.ReplaceEntities;
}
}
Loading

0 comments on commit f68630e

Please sign in to comment.