From 5d49c0afb6cf4df81d8ff58c58dc22fb58814765 Mon Sep 17 00:00:00 2001 From: Zachary Quintana Date: Wed, 19 Nov 2014 15:16:04 -0800 Subject: [PATCH 1/3] Adding table class attributes --- Michelf/Markdown.php | 122 ++++++++++++++++++------------------------- 1 file changed, 52 insertions(+), 70 deletions(-) diff --git a/Michelf/Markdown.php b/Michelf/Markdown.php index c5245fdc..d2e3ac66 100644 --- a/Michelf/Markdown.php +++ b/Michelf/Markdown.php @@ -59,9 +59,6 @@ public static function defaultTransform($text) { public $predef_urls = array(); public $predef_titles = array(); - # Optional filter function for URLs - public $url_filter_func = null; - ### Parser Implementation ### @@ -596,7 +593,7 @@ protected function _doAnchors_reference_callback($matches) { if (isset($this->urls[$link_id])) { $url = $this->urls[$link_id]; - $url = $this->encodeURLAttribute($url); + $url = $this->encodeAttribute($url); $result = "titles[$link_id] ) ) { @@ -626,7 +623,7 @@ protected function _doAnchors_inline_callback($matches) { if ($unhashed != $url) $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); - $url = $this->encodeURLAttribute($url); + $url = $this->encodeAttribute($url); $result = "encodeAttribute($alt_text); if (isset($this->urls[$link_id])) { - $url = $this->encodeURLAttribute($this->urls[$link_id]); + $url = $this->encodeAttribute($this->urls[$link_id]); $result = "\"$alt_text\"";titles[$link_id])) { $title = $this->titles[$link_id]; @@ -731,7 +728,7 @@ protected function _doImages_inline_callback($matches) { $title =& $matches[7]; $alt_text = $this->encodeAttribute($alt_text); - $url = $this->encodeURLAttribute($url); + $url = $this->encodeAttribute($url); $result = "\"$alt_text\"";encodeAttribute($title); @@ -1263,33 +1260,6 @@ protected function encodeAttribute($text) { $text = str_replace('"', '"', $text); return $text; } - - - protected function encodeURLAttribute($url, &$text = null) { - # - # Encode text for a double-quoted HTML attribute containing a URL, - # applying the URL filter if set. Also generates the textual - # representation for the URL (removing mailto: or tel:) storing it in $text. - # This function is *not* suitable for attributes enclosed in single quotes. - # - if ($this->url_filter_func) - $url = call_user_func($this->url_filter_func, $url); - - if (preg_match('{^mailto:}i', $url)) - $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7); - else if (preg_match('{^tel:}i', $url)) - { - $url = $this->encodeAttribute($url); - $text = substr($url, 4); - } - else - { - $url = $this->encodeAttribute($url); - $text = $url; - } - - return $url; - } protected function encodeAmpsAndAngles($text) { @@ -1314,7 +1284,7 @@ protected function encodeAmpsAndAngles($text) { protected function doAutoLinks($text) { - $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i', + $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', array($this, '_doAutoLinks_url_callback'), $text); # Email addresses: @@ -1337,46 +1307,48 @@ protected function doAutoLinks($text) { > }xi', array($this, '_doAutoLinks_email_callback'), $text); + $text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i',array($this, '_doAutoLinks_tel_callback'), $text); return $text; } + protected function _doAutoLinks_tel_callback($matches) { + $url = $this->encodeAttribute($matches[1]); + $tel = $this->encodeAttribute($matches[2]); + $link = "$tel"; + return $this->hashPart($link); + } protected function _doAutoLinks_url_callback($matches) { - $url = $this->encodeURLAttribute($matches[1], $text); - $link = "$text"; + $url = $this->encodeAttribute($matches[1]); + $link = "$url"; return $this->hashPart($link); } protected function _doAutoLinks_email_callback($matches) { - $addr = $matches[1]; - $url = $this->encodeURLAttribute("mailto:$addr", $text); - $link = "$text"; + $address = $matches[1]; + $link = $this->encodeEmailAddress($address); return $this->hashPart($link); } - protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) { + protected function encodeEmailAddress($addr) { # - # Input: some text to obfuscate, e.g. "mailto:foo@example.com" + # Input: an email address, e.g. "foo@example.com" # - # Output: the same text but with most characters encoded as either a - # decimal or hex entity, in the hopes of foiling most address - # harvesting spam bots. E.g.: + # Output: the email address as a mailto link, with each character + # of the address encoded as either a decimal or hex entity, in + # the hopes of foiling most address harvesting spam bots. E.g.: # - # mailto:foo + #

foo@exampl + # e.com

# # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. - # With some optimizations by Milian Wolff. Forced encoding of HTML - # attribute special characters by Allan Odgaard. + # With some optimizations by Milian Wolff. # - if ($text == "") return $tail = ""; - - $chars = preg_split('/(? $char) { $ord = ord($char); # Ignore non-ascii chars. @@ -1384,17 +1356,18 @@ protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_l $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. # roughly 10% raw, 45% hex, 45% dec # '@' *must* be encoded. I insist. - # '"' and '>' have to be encoded inside the attribute - if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */; + # '"' has to be encoded inside the attribute + if ($r > 90 && $char != '@' && $char != '"') /* do nothing */; else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; else $chars[$key] = '&#'.$ord.';'; } } + + $addr = implode('', $chars); + $text = implode('', array_slice($chars, 7)); # text without `mailto:` + $addr = "$text"; - $text = implode('', $chars); - $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text; - - return $text; + return $addr; } @@ -2323,7 +2296,7 @@ protected function _doAnchors_reference_callback($matches) { if (isset($this->urls[$link_id])) { $url = $this->urls[$link_id]; - $url = $this->encodeURLAttribute($url); + $url = $this->encodeAttribute($url); $result = "titles[$link_id] ) ) { @@ -2356,7 +2329,7 @@ protected function _doAnchors_inline_callback($matches) { if ($unhashed != $url) $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); - $url = $this->encodeURLAttribute($url); + $url = $this->encodeAttribute($url); $result = "encodeAttribute($alt_text); if (isset($this->urls[$link_id])) { - $url = $this->encodeURLAttribute($this->urls[$link_id]); + $url = $this->encodeAttribute($this->urls[$link_id]); $result = "\"$alt_text\"";titles[$link_id])) { $title = $this->titles[$link_id]; @@ -2466,7 +2439,7 @@ protected function _doImages_inline_callback($matches) { $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); $alt_text = $this->encodeAttribute($alt_text); - $url = $this->encodeURLAttribute($url); + $url = $this->encodeAttribute($url); $result = "\"$alt_text\"";encodeAttribute($title); @@ -2564,6 +2537,7 @@ protected function doTables($text) { [|] .* \n # Row content. )* ) + ('.$this->id_class_attr_catch_re.')? # $4 = id/class attributes (?=\n|\Z) # Stop at final double newline. }xm', array($this, '_doTable_leadingPipe_callback'), $text); @@ -2590,9 +2564,10 @@ protected function doTables($text) { .* [|] .* \n # Row content )* ) + ('.$this->id_class_attr_catch_re.')? # $4 = id/class attributes (?=\n|\Z) # Stop at final double newline. }xm', - array($this, '_DoTable_callback'), $text); + array($this, '_doTable_callback'), $text); return $text; } @@ -2600,11 +2575,12 @@ protected function _doTable_leadingPipe_callback($matches) { $head = $matches[1]; $underline = $matches[2]; $content = $matches[3]; + $attrs = (isset($matches[4])) ? $matches[4] : null; # Remove leading pipe for each row. $content = preg_replace('/^ *[|]/m', '', $content); - return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); + return $this->_doTable_callback(array($matches[0], $head, $underline, $content, $attrs)); } protected function _doTable_makeAlignAttr($alignname) { @@ -2618,6 +2594,7 @@ protected function _doTable_callback($matches) { $head = $matches[1]; $underline = $matches[2]; $content = $matches[3]; + $attrs = (isset($matches[4])) ? $matches[4] : null; # Remove any tailing pipes for each line. $head = preg_replace('/[|] *$/m', '', $head); @@ -2645,7 +2622,9 @@ protected function _doTable_callback($matches) { $attr = array_pad($attr, $col_count, ''); # Write column headers. - $text = "\n"; + $text = "doExtraAttributes('', $matches[4]) : ''; + $text .= ">\n"; $text .= "\n"; $text .= "\n"; foreach ($headers as $n => $header) @@ -2673,6 +2652,9 @@ protected function _doTable_callback($matches) { } $text .= "\n"; $text .= "
"; + + if ($attrs) + preg_replace('/(?:[ ]? '.$this->id_class_attr_catch_re.' )/', '', $text); return $this->hashBlock($text) . "\n"; } From 6fd7cbe1b5a6dc4eeb46fe1ce81dea53efb86124 Mon Sep 17 00:00:00 2001 From: Zachary Quintana Date: Wed, 19 Nov 2014 15:20:04 -0800 Subject: [PATCH 2/3] Making sure all is uptodate --- Michelf/Markdown.php | 114 ++++++++++++++++++++++++++----------------- 1 file changed, 68 insertions(+), 46 deletions(-) diff --git a/Michelf/Markdown.php b/Michelf/Markdown.php index d2e3ac66..6f37829d 100644 --- a/Michelf/Markdown.php +++ b/Michelf/Markdown.php @@ -59,6 +59,9 @@ public static function defaultTransform($text) { public $predef_urls = array(); public $predef_titles = array(); + # Optional filter function for URLs + public $url_filter_func = null; + ### Parser Implementation ### @@ -593,7 +596,7 @@ protected function _doAnchors_reference_callback($matches) { if (isset($this->urls[$link_id])) { $url = $this->urls[$link_id]; - $url = $this->encodeAttribute($url); + $url = $this->encodeURLAttribute($url); $result = "
titles[$link_id] ) ) { @@ -623,7 +626,7 @@ protected function _doAnchors_inline_callback($matches) { if ($unhashed != $url) $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); - $url = $this->encodeAttribute($url); + $url = $this->encodeURLAttribute($url); $result = "encodeAttribute($alt_text); if (isset($this->urls[$link_id])) { - $url = $this->encodeAttribute($this->urls[$link_id]); + $url = $this->encodeURLAttribute($this->urls[$link_id]); $result = "\"$alt_text\"";titles[$link_id])) { $title = $this->titles[$link_id]; @@ -728,7 +731,7 @@ protected function _doImages_inline_callback($matches) { $title =& $matches[7]; $alt_text = $this->encodeAttribute($alt_text); - $url = $this->encodeAttribute($url); + $url = $this->encodeURLAttribute($url); $result = "\"$alt_text\"";encodeAttribute($title); @@ -1260,6 +1263,33 @@ protected function encodeAttribute($text) { $text = str_replace('"', '"', $text); return $text; } + + + protected function encodeURLAttribute($url, &$text = null) { + # + # Encode text for a double-quoted HTML attribute containing a URL, + # applying the URL filter if set. Also generates the textual + # representation for the URL (removing mailto: or tel:) storing it in $text. + # This function is *not* suitable for attributes enclosed in single quotes. + # + if ($this->url_filter_func) + $url = call_user_func($this->url_filter_func, $url); + + if (preg_match('{^mailto:}i', $url)) + $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7); + else if (preg_match('{^tel:}i', $url)) + { + $url = $this->encodeAttribute($url); + $text = substr($url, 4); + } + else + { + $url = $this->encodeAttribute($url); + $text = $url; + } + + return $url; + } protected function encodeAmpsAndAngles($text) { @@ -1284,7 +1314,7 @@ protected function encodeAmpsAndAngles($text) { protected function doAutoLinks($text) { - $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', + $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i', array($this, '_doAutoLinks_url_callback'), $text); # Email addresses: @@ -1307,48 +1337,46 @@ protected function doAutoLinks($text) { > }xi', array($this, '_doAutoLinks_email_callback'), $text); - $text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i',array($this, '_doAutoLinks_tel_callback'), $text); return $text; } - protected function _doAutoLinks_tel_callback($matches) { - $url = $this->encodeAttribute($matches[1]); - $tel = $this->encodeAttribute($matches[2]); - $link = "$tel"; - return $this->hashPart($link); - } protected function _doAutoLinks_url_callback($matches) { - $url = $this->encodeAttribute($matches[1]); - $link = "$url"; + $url = $this->encodeURLAttribute($matches[1], $text); + $link = "$text"; return $this->hashPart($link); } protected function _doAutoLinks_email_callback($matches) { - $address = $matches[1]; - $link = $this->encodeEmailAddress($address); + $addr = $matches[1]; + $url = $this->encodeURLAttribute("mailto:$addr", $text); + $link = "$text"; return $this->hashPart($link); } - protected function encodeEmailAddress($addr) { + protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) { # - # Input: an email address, e.g. "foo@example.com" + # Input: some text to obfuscate, e.g. "mailto:foo@example.com" # - # Output: the email address as a mailto link, with each character - # of the address encoded as either a decimal or hex entity, in - # the hopes of foiling most address harvesting spam bots. E.g.: + # Output: the same text but with most characters encoded as either a + # decimal or hex entity, in the hopes of foiling most address + # harvesting spam bots. E.g.: # - #

foo@exampl - # e.com

+ # m + # + # Note: the additional output $tail is assigned the same value as the + # ouput, minus the number of characters specified by $head_length. # # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. - # With some optimizations by Milian Wolff. + # With some optimizations by Milian Wolff. Forced encoding of HTML + # attribute special characters by Allan Odgaard. # - $addr = "mailto:" . $addr; - $chars = preg_split('/(? $char) { $ord = ord($char); # Ignore non-ascii chars. @@ -1356,18 +1384,17 @@ protected function encodeEmailAddress($addr) { $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. # roughly 10% raw, 45% hex, 45% dec # '@' *must* be encoded. I insist. - # '"' has to be encoded inside the attribute - if ($r > 90 && $char != '@' && $char != '"') /* do nothing */; + # '"' and '>' have to be encoded inside the attribute + if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */; else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; else $chars[$key] = '&#'.$ord.';'; } } - - $addr = implode('', $chars); - $text = implode('', array_slice($chars, 7)); # text without `mailto:` - $addr = "$text"; - return $addr; + $text = implode('', $chars); + $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text; + + return $text; } @@ -2296,7 +2323,7 @@ protected function _doAnchors_reference_callback($matches) { if (isset($this->urls[$link_id])) { $url = $this->urls[$link_id]; - $url = $this->encodeAttribute($url); + $url = $this->encodeURLAttribute($url); $result = "titles[$link_id] ) ) { @@ -2329,7 +2356,7 @@ protected function _doAnchors_inline_callback($matches) { if ($unhashed != $url) $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); - $url = $this->encodeAttribute($url); + $url = $this->encodeURLAttribute($url); $result = "encodeAttribute($alt_text); if (isset($this->urls[$link_id])) { - $url = $this->encodeAttribute($this->urls[$link_id]); + $url = $this->encodeURLAttribute($this->urls[$link_id]); $result = "\"$alt_text\"";titles[$link_id])) { $title = $this->titles[$link_id]; @@ -2439,7 +2466,7 @@ protected function _doImages_inline_callback($matches) { $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); $alt_text = $this->encodeAttribute($alt_text); - $url = $this->encodeAttribute($url); + $url = $this->encodeURLAttribute($url); $result = "\"$alt_text\"";encodeAttribute($title); @@ -2622,9 +2649,7 @@ protected function _doTable_callback($matches) { $attr = array_pad($attr, $col_count, ''); # Write column headers. - $text = "doExtraAttributes('', $matches[4]) : ''; - $text .= ">\n"; + $text = "\n"; $text .= "\n"; $text .= "\n"; foreach ($headers as $n => $header) @@ -2652,9 +2677,6 @@ protected function _doTable_callback($matches) { } $text .= "\n"; $text .= "
"; - - if ($attrs) - preg_replace('/(?:[ ]? '.$this->id_class_attr_catch_re.' )/', '', $text); return $this->hashBlock($text) . "\n"; } From 72fce7ce03216c384122298dc013702631c810bc Mon Sep 17 00:00:00 2001 From: Zachary Quintana Date: Wed, 19 Nov 2014 15:24:15 -0800 Subject: [PATCH 3/3] Rest of updates --- Michelf/Markdown.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Michelf/Markdown.php b/Michelf/Markdown.php index 6f37829d..2419a4fa 100644 --- a/Michelf/Markdown.php +++ b/Michelf/Markdown.php @@ -2649,7 +2649,9 @@ protected function _doTable_callback($matches) { $attr = array_pad($attr, $col_count, ''); # Write column headers. - $text = "\n"; + $text = "doExtraAttributes('', $matches[4]) : ''; + $text .= ">\n"; $text .= "\n"; $text .= "\n"; foreach ($headers as $n => $header)