diff --git a/README.md b/README.md
index 6bce00a..0035d7e 100644
--- a/README.md
+++ b/README.md
@@ -77,7 +77,7 @@ Or more explicitly, like this:
```php
$converter = new HtmlConverter();
-$converter->getConfig()->setOption('strip_tags', true);
+$converter->setOptions(['strip_tags' => true]);
$html = 'Turnips!';
$markdown = $converter->convert($html); // $markdown now contains "Turnips!"
@@ -124,14 +124,20 @@ $markdown = $converter->convert($html); // $markdown now contains "Github"
### Style options
By default bold tags are converted using the asterisk syntax, and italic tags are converted using the underlined syntax. Change these by using the `bold_style` and `italic_style` options.
+If you want to clear the format of some of them, set them to `null`.
+If you want their format to remain as HTML tags, set them to the empty string.
```php
$converter = new HtmlConverter();
-$converter->getConfig()->setOption('italic_style', '*');
-$converter->getConfig()->setOption('bold_style', '__');
-
-$html = 'Italic and a bold';
-$markdown = $converter->convert($html); // $markdown now contains "*Italic* and a __bold__"
+$converter->setOptions([
+ 'italic_style' => '*',
+ 'bold_style' => '__',
+ 'underline_style' => null,
+ 'strikethrough_style' => ''
+);
+
+$html = 'Underline, Strikethrough and Italic and a bold';
+$markdown = $converter->convert($html); // $markdown now contains "Underline and a Strikethrough and *Italic* and a __bold__"
```
### Line break options
@@ -142,11 +148,11 @@ By default, `br` tags are converted to two spaces followed by a newline characte
$converter = new HtmlConverter();
$html = '
test
line break
';
-$converter->getConfig()->setOption('hard_break', true);
-$markdown = $converter->convert($html); // $markdown now contains "test\nline break"
+$converter->setOptions(['hard_break' => true]);
+$markdown = $converter->convert($html); // $markdown now contains "underline test\nline break"
-$converter->getConfig()->setOption('hard_break', false); // default
-$markdown = $converter->convert($html); // $markdown now contains "test \nline break"
+$converter->setOptions(['hard_break' => false]); // default
+$markdown = $converter->convert($html); // $markdown now contains "underline test \nline break"
```
### Autolinking options
@@ -157,10 +163,10 @@ By default, `a` tags are converted to the easiest possible link syntax, i.e. if
$converter = new HtmlConverter();
$html = 'https://thephpleague.com
';
-$converter->getConfig()->setOption('use_autolinks', true);
+$converter->setOptions(['use_autolinks' => true]);
$markdown = $converter->convert($html); // $markdown now contains ""
-$converter->getConfig()->setOption('use_autolinks', false); // default
+$converter->setOptions(['use_autolinks' => false]); // default
$markdown = $converter->convert($html); // $markdown now contains "[https://thephpleague.com](https://thephpleague.com)"
```
diff --git a/src/Converter/EmphasisConverter.php b/src/Converter/EmphasisConverter.php
index a122f40..8b9430a 100644
--- a/src/Converter/EmphasisConverter.php
+++ b/src/Converter/EmphasisConverter.php
@@ -17,15 +17,35 @@ protected function getNormTag(?ElementInterface $element): string
{
if ($element !== null && ! $element->isText()) {
$tag = $element->getTagName();
- if ($tag === 'i' || $tag === 'em') {
- return 'em';
- }
-
- if ($tag === 'b' || $tag === 'strong') {
- return 'strong';
+ switch($tag) {
+ case 'i':
+ case 'em':
+ case 'cite':
+ case 'dfn':
+ case 'var':
+ return 'em';
+ case 'b':
+ case 'strong':
+ return 'strong';
+ case 'strike':
+ case 's':
+ case 'del':
+ return 'del';
+ case 'sub':
+ return 'sub';
+ case 'sup':
+ return 'sup';
+ case 'u':
+ case 'ins':
+ return 'u';
+ case 'kdb':
+ return 'kbd';
+ case 'span':
+ case 'small':
+ case 'abbr':
+ return $tag;
}
}
-
return '';
}
@@ -42,22 +62,38 @@ public function convert(ElementInterface $element): string
if (! \trim($value)) {
return $value;
}
-
- if ($tag === 'em') {
- $style = $this->config->getOption('italic_style');
- } else {
- $style = $this->config->getOption('bold_style');
+ switch ($tag) {
+ case 'em':
+ $style = $this->config->getOption('italic_style');
+ break;
+ case 'del':
+ $style = $this->config->getOption('strikethrough_style');
+ break;
+ case 'sub':
+ $style = $this->config->getOption('subscript_style');
+ break;
+ case 'sup':
+ $style = $this->config->getOption('superscript_style');
+ break;
+ case 'strong':
+ $style = $this->config->getOption('bold_style');
+ break;
+ case 'u':
+ $style = $this->config->getOption('underline_style');
+ break;
+ case 'kdb':
+ $style = $this->config->getOption('keyboard_style');
+ break;
+ default:
+ $style = $this->config->getOption('undefined_style');
+ break;
}
$prefix = \ltrim($value) !== $value ? ' ' : '';
$suffix = \rtrim($value) !== $value ? ' ' : '';
- /* If this node is immediately preceded or followed by one of the same type don't emit
- * the start or end $style, respectively. This prevents foobar from
- * being converted to *foo**bar* which is incorrect. We want *foobar* instead.
- */
- $preStyle = $this->getNormTag($element->getPreviousSibling()) === $tag ? '' : $style;
- $postStyle = $this->getNormTag($element->getNextSibling()) === $tag ? '' : $style;
+ $preStyle = $this->makeDelimiter($element, $tag, $style);
+ $postStyle = $this->makeDelimiter($element, $tag, $style, false);
return $prefix . $preStyle . \trim($value) . $postStyle . $suffix;
}
@@ -67,6 +103,29 @@ public function convert(ElementInterface $element): string
*/
public function getSupportedTags(): array
{
- return ['em', 'i', 'strong', 'b'];
+ return [
+ 'em', 'i', 'cite', 'dfn', 'var',
+ 'strong', 'b',
+ 'del', 'strike', 's',
+ 'sub', 'sup',
+ 'u', 'ins',
+ 'kbd',
+ 'span', 'small', 'abbr'
+ ];
+ }
+
+ protected function makeDelimiter($element, string $tag, $style, bool $prev = true): string
+ {
+ /* If this node is immediately preceded or followed by one of the same type don't emit
+ * the start or end $style, respectively. This prevents foobar from
+ * being converted to *foo**bar* which is incorrect. We want *foobar* instead.
+ */
+ if($prev) {
+ $ignore = $this->getNormTag($element->getPreviousSibling()) === $tag;
+ } else {
+ $ignore = $this->getNormTag($element->getNextSibling()) === $tag;
+ }
+ if (!is_string($style ?? null) || $ignore) return '';
+ return empty($style) ? "<" . ($prev ? "" : "/") ."{$tag}>" : $style;
}
}
diff --git a/src/Element.php b/src/Element.php
index ef3ecfa..86e4483 100644
--- a/src/Element.php
+++ b/src/Element.php
@@ -56,6 +56,11 @@ public function isWhitespace(): bool
return $this->getTagName() === '#text' && \trim($this->getValue()) === '';
}
+ public function getNode(): ?\DOMNode
+ {
+ return $this->node;
+ }
+
public function getTagName(): string
{
return $this->node->nodeName;
@@ -221,6 +226,24 @@ public function getAttribute(string $name): string
return '';
}
+
+ public function getSelector(): string {
+ $element = $this;
+ if (!empty($element->getAttribute('id'))) {
+ return '#' . $element->getAttribute('id');
+ }
+ $path = [];
+ while ($element && $element->getTagName() !== 'body') {
+ $part = $element->getTagName();
+ $index = $element->getSiblingPosition();
+ if ($index > 0) {
+ $part .= ':nth-child(' . $index . ')';
+ }
+ array_unshift($path, $part);
+ $element = $element->getParent();
+ }
+ return implode(' > ', $path);
+ }
public function equals(ElementInterface $element): bool
{
diff --git a/src/ElementInterface.php b/src/ElementInterface.php
index d8477cf..26fb0c4 100644
--- a/src/ElementInterface.php
+++ b/src/ElementInterface.php
@@ -12,6 +12,8 @@ public function isText(): bool;
public function isWhitespace(): bool;
+ public function getNode(): ?\DOMNode;
+
public function getTagName(): string;
public function getValue(): string;
@@ -47,4 +49,6 @@ public function setFinalMarkdown(string $markdown): void;
public function getListItemLevel(): int;
public function getAttribute(string $name): string;
+
+ public function getSelector(): string;
}
diff --git a/src/HtmlConverter.php b/src/HtmlConverter.php
index 944cb08..fcd3d0d 100644
--- a/src/HtmlConverter.php
+++ b/src/HtmlConverter.php
@@ -36,6 +36,12 @@ public function __construct($options = [])
'strip_placeholder_links' => false, // Set to true to remove that doesn't have href.
'bold_style' => '**', // DEPRECATED: Set to '__' if you prefer the underlined style
'italic_style' => '*', // DEPRECATED: Set to '_' if you prefer the underlined style
+ 'strikethrough_style' => '~~',
+ 'superscript_style' => '', // Set to '^' to use the superscript style
+ 'subscript_style' => '', // Set to '~' to use the subscript style
+ 'keyboard_style' => '\'',
+ 'underline_style' => '', // Set to null to clear this style
+ 'undefined_style' => '', // Set to null to clear this style
'remove_nodes' => '', // space-separated list of dom nodes that should be removed. example: 'meta style script'
'hard_break' => false, // Set to true to turn
into `\n` instead of ` \n`
'list_item_style' => '-', // Set the default character for each in a . Can be '-', '*', or '+'