diff --git a/README.md b/README.md index 6bce00a..0035d7e 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ Or more explicitly, like this: ```php $converter = new HtmlConverter(); -$converter->getConfig()->setOption('strip_tags', true); +$converter->setOptions(['strip_tags' => true]); $html = 'Turnips!'; $markdown = $converter->convert($html); // $markdown now contains "Turnips!" @@ -124,14 +124,20 @@ $markdown = $converter->convert($html); // $markdown now contains "Github" ### Style options By default bold tags are converted using the asterisk syntax, and italic tags are converted using the underlined syntax. Change these by using the `bold_style` and `italic_style` options. +If you want to clear the format of some of them, set them to `null`. +If you want their format to remain as HTML tags, set them to the empty string. ```php $converter = new HtmlConverter(); -$converter->getConfig()->setOption('italic_style', '*'); -$converter->getConfig()->setOption('bold_style', '__'); - -$html = 'Italic and a bold'; -$markdown = $converter->convert($html); // $markdown now contains "*Italic* and a __bold__" +$converter->setOptions([ + 'italic_style' => '*', + 'bold_style' => '__', + 'underline_style' => null, + 'strikethrough_style' => '' +); + +$html = 'Underline, Strikethrough and Italic and a bold'; +$markdown = $converter->convert($html); // $markdown now contains "Underline and a Strikethrough and *Italic* and a __bold__" ``` ### Line break options @@ -142,11 +148,11 @@ By default, `br` tags are converted to two spaces followed by a newline characte $converter = new HtmlConverter(); $html = '

test
line break

'; -$converter->getConfig()->setOption('hard_break', true); -$markdown = $converter->convert($html); // $markdown now contains "test\nline break" +$converter->setOptions(['hard_break' => true]); +$markdown = $converter->convert($html); // $markdown now contains "underline test\nline break" -$converter->getConfig()->setOption('hard_break', false); // default -$markdown = $converter->convert($html); // $markdown now contains "test \nline break" +$converter->setOptions(['hard_break' => false]); // default +$markdown = $converter->convert($html); // $markdown now contains "underline test \nline break" ``` ### Autolinking options @@ -157,10 +163,10 @@ By default, `a` tags are converted to the easiest possible link syntax, i.e. if $converter = new HtmlConverter(); $html = '

https://thephpleague.com

'; -$converter->getConfig()->setOption('use_autolinks', true); +$converter->setOptions(['use_autolinks' => true]); $markdown = $converter->convert($html); // $markdown now contains "" -$converter->getConfig()->setOption('use_autolinks', false); // default +$converter->setOptions(['use_autolinks' => false]); // default $markdown = $converter->convert($html); // $markdown now contains "[https://thephpleague.com](https://thephpleague.com)" ``` diff --git a/src/Converter/EmphasisConverter.php b/src/Converter/EmphasisConverter.php index a122f40..8b9430a 100644 --- a/src/Converter/EmphasisConverter.php +++ b/src/Converter/EmphasisConverter.php @@ -17,15 +17,35 @@ protected function getNormTag(?ElementInterface $element): string { if ($element !== null && ! $element->isText()) { $tag = $element->getTagName(); - if ($tag === 'i' || $tag === 'em') { - return 'em'; - } - - if ($tag === 'b' || $tag === 'strong') { - return 'strong'; + switch($tag) { + case 'i': + case 'em': + case 'cite': + case 'dfn': + case 'var': + return 'em'; + case 'b': + case 'strong': + return 'strong'; + case 'strike': + case 's': + case 'del': + return 'del'; + case 'sub': + return 'sub'; + case 'sup': + return 'sup'; + case 'u': + case 'ins': + return 'u'; + case 'kdb': + return 'kbd'; + case 'span': + case 'small': + case 'abbr': + return $tag; } } - return ''; } @@ -42,22 +62,38 @@ public function convert(ElementInterface $element): string if (! \trim($value)) { return $value; } - - if ($tag === 'em') { - $style = $this->config->getOption('italic_style'); - } else { - $style = $this->config->getOption('bold_style'); + switch ($tag) { + case 'em': + $style = $this->config->getOption('italic_style'); + break; + case 'del': + $style = $this->config->getOption('strikethrough_style'); + break; + case 'sub': + $style = $this->config->getOption('subscript_style'); + break; + case 'sup': + $style = $this->config->getOption('superscript_style'); + break; + case 'strong': + $style = $this->config->getOption('bold_style'); + break; + case 'u': + $style = $this->config->getOption('underline_style'); + break; + case 'kdb': + $style = $this->config->getOption('keyboard_style'); + break; + default: + $style = $this->config->getOption('undefined_style'); + break; } $prefix = \ltrim($value) !== $value ? ' ' : ''; $suffix = \rtrim($value) !== $value ? ' ' : ''; - /* If this node is immediately preceded or followed by one of the same type don't emit - * the start or end $style, respectively. This prevents foobar from - * being converted to *foo**bar* which is incorrect. We want *foobar* instead. - */ - $preStyle = $this->getNormTag($element->getPreviousSibling()) === $tag ? '' : $style; - $postStyle = $this->getNormTag($element->getNextSibling()) === $tag ? '' : $style; + $preStyle = $this->makeDelimiter($element, $tag, $style); + $postStyle = $this->makeDelimiter($element, $tag, $style, false); return $prefix . $preStyle . \trim($value) . $postStyle . $suffix; } @@ -67,6 +103,29 @@ public function convert(ElementInterface $element): string */ public function getSupportedTags(): array { - return ['em', 'i', 'strong', 'b']; + return [ + 'em', 'i', 'cite', 'dfn', 'var', + 'strong', 'b', + 'del', 'strike', 's', + 'sub', 'sup', + 'u', 'ins', + 'kbd', + 'span', 'small', 'abbr' + ]; + } + + protected function makeDelimiter($element, string $tag, $style, bool $prev = true): string + { + /* If this node is immediately preceded or followed by one of the same type don't emit + * the start or end $style, respectively. This prevents foobar from + * being converted to *foo**bar* which is incorrect. We want *foobar* instead. + */ + if($prev) { + $ignore = $this->getNormTag($element->getPreviousSibling()) === $tag; + } else { + $ignore = $this->getNormTag($element->getNextSibling()) === $tag; + } + if (!is_string($style ?? null) || $ignore) return ''; + return empty($style) ? "<" . ($prev ? "" : "/") ."{$tag}>" : $style; } } diff --git a/src/Element.php b/src/Element.php index ef3ecfa..86e4483 100644 --- a/src/Element.php +++ b/src/Element.php @@ -56,6 +56,11 @@ public function isWhitespace(): bool return $this->getTagName() === '#text' && \trim($this->getValue()) === ''; } + public function getNode(): ?\DOMNode + { + return $this->node; + } + public function getTagName(): string { return $this->node->nodeName; @@ -221,6 +226,24 @@ public function getAttribute(string $name): string return ''; } + + public function getSelector(): string { + $element = $this; + if (!empty($element->getAttribute('id'))) { + return '#' . $element->getAttribute('id'); + } + $path = []; + while ($element && $element->getTagName() !== 'body') { + $part = $element->getTagName(); + $index = $element->getSiblingPosition(); + if ($index > 0) { + $part .= ':nth-child(' . $index . ')'; + } + array_unshift($path, $part); + $element = $element->getParent(); + } + return implode(' > ', $path); + } public function equals(ElementInterface $element): bool { diff --git a/src/ElementInterface.php b/src/ElementInterface.php index d8477cf..26fb0c4 100644 --- a/src/ElementInterface.php +++ b/src/ElementInterface.php @@ -12,6 +12,8 @@ public function isText(): bool; public function isWhitespace(): bool; + public function getNode(): ?\DOMNode; + public function getTagName(): string; public function getValue(): string; @@ -47,4 +49,6 @@ public function setFinalMarkdown(string $markdown): void; public function getListItemLevel(): int; public function getAttribute(string $name): string; + + public function getSelector(): string; } diff --git a/src/HtmlConverter.php b/src/HtmlConverter.php index 944cb08..fcd3d0d 100644 --- a/src/HtmlConverter.php +++ b/src/HtmlConverter.php @@ -36,6 +36,12 @@ public function __construct($options = []) 'strip_placeholder_links' => false, // Set to true to remove that doesn't have href. 'bold_style' => '**', // DEPRECATED: Set to '__' if you prefer the underlined style 'italic_style' => '*', // DEPRECATED: Set to '_' if you prefer the underlined style + 'strikethrough_style' => '~~', + 'superscript_style' => '', // Set to '^' to use the superscript style + 'subscript_style' => '', // Set to '~' to use the subscript style + 'keyboard_style' => '\'', + 'underline_style' => '', // Set to null to clear this style + 'undefined_style' => '', // Set to null to clear this style 'remove_nodes' => '', // space-separated list of dom nodes that should be removed. example: 'meta style script' 'hard_break' => false, // Set to true to turn
into `\n` instead of ` \n` 'list_item_style' => '-', // Set the default character for each
  • in a
      . Can be '-', '*', or '+'