From 49b1e2886150a9aa968d1f4fc804ba5c5c255e0b Mon Sep 17 00:00:00 2001 From: David Shanske Date: Thu, 21 Dec 2023 17:55:49 +0000 Subject: [PATCH 1/5] Move exit function to construct to try to fix issue reported using localhost --- includes/class-indieauth-client-discovery.php | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/includes/class-indieauth-client-discovery.php b/includes/class-indieauth-client-discovery.php index 20d083f..fb1ed0b 100644 --- a/includes/class-indieauth-client-discovery.php +++ b/includes/class-indieauth-client-discovery.php @@ -13,6 +13,18 @@ public function __construct( $client_id ) { if ( defined( 'INDIEAUTH_UNIT_TESTS' ) ) { return; } + // Validate if this is an IP address + $ip = filter_var( wp_parse_url( $client_id, PHP_URL_HOST ), FILTER_VALIDATE_IP, FILTER_FLAG_IPV4 | FILTER_FLAG_IPV6 ); + $donotfetch = array( + '127.0.0.1', + '0000:0000:0000:0000:0000:0000:0000:0001', + '::1', + ); + + // If this is an IP address on the donotfetch list then do not fetch. + if ( ( $ip && ! in_array( $ip, $donotfetch, true ) || 'localhost' === wp_parse_url( $client_id, PHP_URL_HOST ) ) ) { + return; + } $this->html = self::parse( $client_id ); if ( is_wp_error( $this->html ) ) { @@ -30,20 +42,6 @@ public function __construct( $client_id ) { } private function fetch( $url ) { - - // Validate if this is an IP address - $ip = filter_var( wp_parse_url( $url, PHP_URL_HOST ), FILTER_VALIDATE_IP, FILTER_FLAG_IPV4 | FILTER_FLAG_IPV6 ); - $donotfetch = array( - '127.0.0.1', - '0000:0000:0000:0000:0000:0000:0000:0001', - '::1', - ); - - // If this is an IP address ion the donotfetch list then do not fetch. - if ( $ip && ! in_array( $ip, $donotfetch ) ) { - return new WP_Error( 'do_not_fetch', __( 'Client Identifier is localhost', 'indieauth' ) ); - } - $wp_version = get_bloginfo( 'version' ); $user_agent = apply_filters( 'http_headers_useragent', 'WordPress/' . $wp_version . '; ' . get_bloginfo( 'url' ) ); $args = array( From 1d07c2ff17f466ef65e4a4a8cf367b17f651ce3c Mon Sep 17 00:00:00 2001 From: David Shanske Date: Fri, 22 Dec 2023 04:13:26 +0000 Subject: [PATCH 2/5] Incorporate MF2 Parser into IndieAuth for Discovery --- composer.json | 10 +- includes/class-indieauth-client-discovery.php | 196 +- includes/class-indieauth-token-ui.php | 33 +- lib/mf2/LICENSE.md | 36 + lib/mf2/Parser.php | 2343 +++++++++++++++++ lib/mf2/README.md | 660 +++++ 6 files changed, 3183 insertions(+), 95 deletions(-) create mode 100644 lib/mf2/LICENSE.md create mode 100644 lib/mf2/Parser.php create mode 100644 lib/mf2/README.md diff --git a/composer.json b/composer.json index 2f03eb9..90fceee 100644 --- a/composer.json +++ b/composer.json @@ -33,14 +33,16 @@ "dealerdirect/phpcodesniffer-composer-installer": "^1.0", "phpcompatibility/phpcompatibility-wp": "*", "sebastian/phpcpd": "^3.0 || ^4.0 || ^6.0", - "yoast/phpunit-polyfills": "^2.0" + "yoast/phpunit-polyfills": "^2.0", + "mf2/mf2": "^0.5.0" }, "scripts": { "install-codestandards": [ "Dealerdirect\\Composer\\Plugin\\Installers\\PHPCodeSniffer\\Plugin::run" ], "post-install-cmd": [ - "@install-codestandard" + "@install-codestandard", + "@copy-files" ], "setup-local-tests": "bash bin/install-wp-tests.sh wordpress_test root root 127.0.0.1 latest", "phpunit": "./vendor/bin/phpunit", @@ -49,6 +51,10 @@ "bin/install-wp-tests.sh wordpress wordpress wordpress", "vendor/bin/phpunit" ], + "copy-files": [ + "cp -u -r vendor/mf2/mf2/Mf2/Parser.php lib/mf2", + "cp -u -r vendor/mf2/mf2/*.md lib/mf2" + ], "lint": [ "./vendor/bin/phpcs -n -p", "@phpcpd" diff --git a/includes/class-indieauth-client-discovery.php b/includes/class-indieauth-client-discovery.php index fb1ed0b..52fe698 100644 --- a/includes/class-indieauth-client-discovery.php +++ b/includes/class-indieauth-client-discovery.php @@ -1,8 +1,10 @@ html = self::parse( $client_id ); - if ( is_wp_error( $this->html ) ) { - error_log( __( 'Failed to Retrieve IndieAuth Client Details ', 'indieauth' ) . wp_json_encode( $this->html ) ); // phpcs:ignore + $response = self::parse( $client_id ); + if ( is_wp_error( $response ) ) { + error_log( __( 'Failed to Retrieve IndieAuth Client Details ', 'indieauth' ) . wp_json_encode( $response ) ); // phpcs:ignore return; } - if ( isset( $this->html['manifest'] ) ) { - $this->manifest = self::get_manifest( $this->html['manifest'] ); - } - $this->client_icon = $this->determine_icon(); - $this->client_name = $this->ifset( $this->manifest, 'name', '' ); - if ( empty( $this->client_name ) ) { - $this->client_name = $this->ifset( $this->html, array( 'application-name', 'og:title', 'title' ), '' ); - } + } + + public function export() { + return array( + 'manifest' => $this->manifest, + 'rels' => $this->rels, + 'mf2' => $this->mf2, + 'html' => $this->html, + 'client_id' => $this->client_id, + 'client_name' => $this->client_name, + 'client_icon' => $this->client_icon, + ); } private function fetch( $url ) { @@ -57,33 +63,94 @@ private function fetch( $url ) { return new WP_Error( 'retrieval_error', __( 'Failed to Retrieve Client Details', 'indieauth' ), $code ); } } + return $response; } private function parse( $url ) { $response = self::fetch( $url ); + if ( is_wp_error( $response ) ) { return $response; } - $return = array(); - // check link header - $links = wp_remote_retrieve_header( $response, 'link' ); - if ( $links ) { - if ( is_string( $links ) ) { - $links = array( $links ); + $content = wp_remote_retrieve_body( $response ); + + if ( class_exists( 'Masterminds\\HTML5' ) ) { + $domdocument = new \Masterminds\HTML5( array( 'disable_html_ns' => true ) ); + $domdocument = $domdocument->loadHTML( $content ); + } else { + $domdocument = new DOMDocument(); + libxml_use_internal_errors( true ); + if ( function_exists( 'mb_convert_encoding' ) ) { + $content = mb_convert_encoding( $content, 'HTML-ENTITIES', mb_detect_encoding( $content ) ); + } + $domdocument->loadHTML( $content ); + libxml_use_internal_errors( false ); + } + + $this->get_mf2( $domdocument, $url ); + if ( empty( $this->mf2 ) ) { + if ( array_key_exists( 'name', $this->mf2 ) ) { + $this->client_name = $this->mf2['name'][0]; + } + if ( array_key_exists( 'logo', $this->mf2 ) ) { + if ( is_string( $this->mf2['logo'][0] ) ) { + $this->client_icon = $this->mf2['logo'][0]; + } else { + $this->client_icon = $this->mf2['logo'][0]['value']; + } + } + } elseif ( isset( $this->rels['manifest'] ) ) { + self::get_manifest( $this->rels['manifest'] ); + $this->client_icon = $this->determine_icon( $this->manifest ); + $this->client_name = $this->manifest['name']; + } else { + $this->client_icon = $this->determine_icon( $this->rels ); + $this->get_html( $domdocument ); + $this->client_name = $this->html['title']; + } + + if ( ! empty( $this->client_icon ) ) { + $this->client_icon = WP_Http::make_absolute_url( $this->client_icon, $url ); + } + } + + private function get_mf2( $input, $url ) { + if ( ! class_exists( 'Mf2\Parser' ) ) { + require_once plugin_dir_path( __DIR__ ) . 'lib/mf2/Parser.php'; + } + $mf = Mf2\parse( $input, $url ); + if ( array_key_exists( 'rels', $mf ) ) { + $this->rels = wp_array_slice_assoc( $mf['rels'], array( 'apple-touch-icon', 'icon', 'mask-icon', 'manifest' ) ); + } + if ( array_key_exists( 'items', $mf ) ) { + foreach ( $mf['items'] as $item ) { + if ( in_array( 'h-app', $item['type'], true ) ) { + $this->mf2 = $item['properties']; + return; + } } - $return['links'] = parse_link_rels( $links, $url ); } - return array_merge( $return, self::extract_client_data_from_html( wp_remote_retrieve_body( $response ), $url ) ); } private function get_manifest( $url ) { + if ( is_array( $url ) ) { + $url = $url[0]; + } $response = self::fetch( $url ); if ( is_wp_error( $response ) ) { return $response; } - return json_decode( wp_remote_retrieve_body( $response ) ); + $this->manifest = json_decode( wp_remote_retrieve_body( $response ), true ); + } + + private function get_html( $input ) { + if ( ! $input ) { + return; + } + $xpath = new DOMXPath( $input ); + $this->html['title'] = $xpath->query( '//title' )->item( 0 )->textContent; } private function ifset( $array, $key, $default = false ) { @@ -106,26 +173,30 @@ public function get_name() { } // Separate function for possible improved size picking later - private function determine_icon() { - if ( is_wp_error( $this->html ) ) { + private function determine_icon( $input ) { + if ( ! is_array( $input ) || empty( $input ) ) { return ''; } + $icons = array(); - if ( is_array( $this->manifest ) && ! empty( $this->manifest ) && ! isset( $this->manifest['icons'] ) ) { - $icons = $this->manifest['icons']; - } elseif ( ! empty( $this->html ) ) { - if ( isset( $this->html['icon'] ) ) { - $icons = $this->html['icon']; - } elseif ( isset( $this->html['mask-icon'] ) ) { - $icons = $this->html['mask-icon']; - } elseif ( isset( $this->html['apple-touch-icon'] ) ) { - $icons = $this->html['apple-touch-icon']; - } + if ( isset( $input['icons'] ) ) { + $icons = $input['icons']; + } elseif ( isset( $input['mask-icon'] ) ) { + $icons = $input['mask-icon']; + } elseif ( isset( $input['apple-touch-icon'] ) ) { + $icons = $input['apple-touch-icon']; + } elseif ( isset( $input['icon'] ) ) { + $icons = $input['icon']; } + if ( is_array( $icons ) && ! wp_is_numeric_array( $icons ) && isset( $icons['url'] ) ) { return $icons['url']; + } elseif ( is_string( $icons[0] ) ) { + return $icons[0]; } elseif ( isset( $icons[0]['url'] ) ) { return $icons[0]['url']; + } elseif ( isset( $icons[0]['src'] ) ) { + return $icons[0]['src']; } else { return ''; } @@ -134,63 +205,4 @@ private function determine_icon() { public function get_icon() { return $this->client_icon; } - - /** - * @param array $contents HTML to parse for rel links - * @param string $url URL to use to make absolute - * @return array $rels rel values as indices to properties, empty array if no rels at all - */ - public static function extract_client_data_from_html( $contents, $url ) { - // unicode to HTML entities - $contents = mb_convert_encoding( $contents, 'HTML-ENTITIES', mb_detect_encoding( $contents ) ); - libxml_use_internal_errors( true ); - $doc = new DOMDocument(); - $doc->loadHTML( $contents ); - $xpath = new DOMXPath( $doc ); - $return = array(); - // check and elements - foreach ( $xpath->query( '//a[@rel and @href] | //link[@rel and @href]' ) as $hyperlink ) { - $rel = $hyperlink->getAttribute( 'rel' ); - $temp = array(); - // Try to extract icons just in case there isn't a manifest - switch ( $rel ) { - case 'icon': - case 'mask-icon': - case 'shortcut icon': - case 'apple-touch-icon-precomposed': - case 'apple-touch-icon': - $temp['url'] = WP_Http::make_absolute_url( $hyperlink->getAttribute( 'href' ), $url ); - $temp['sizes'] = $hyperlink->getAttribute( 'sizes' ); - $temp['type'] = $hyperlink->getAttribute( 'temp' ); - $temp = array_filter( $temp ); - break; - default: - $temp = WP_Http::make_absolute_url( $hyperlink->getAttribute( 'href' ), $url ); - } - if ( 'shortcut icon' === $rel ) { - $rel = 'icon'; - } - if ( isset( $return[ $rel ] ) ) { - if ( is_array( $return[ $rel ] ) ) { - $return[ $rel ] = $temp; - } - if ( is_string( $return[ $rel ] ) ) { - $return[ $rel ] = array( $return[ $rel ] ); - $return[ $rel ][] = $temp; - } - } else { - $return[ $rel ] = $temp; - } - } - // As a fallback also retrieve OpenGraph Title and Image Properties - foreach ( $xpath->query( '//meta[@property and @content]' ) as $meta ) { - $property = $meta->getAttribute( 'property' ); - if ( in_array( $property, array( 'og:title', 'og:image' ), true ) ) { - $return[ $property ] = $meta->getAttribute( 'content' ); - } - } - $return['title'] = $xpath->query( '//title' )->item( 0 )->textContent; - - return $return; - } } diff --git a/includes/class-indieauth-token-ui.php b/includes/class-indieauth-token-ui.php index 3cb6940..9855a0a 100644 --- a/includes/class-indieauth-token-ui.php +++ b/includes/class-indieauth-token-ui.php @@ -14,6 +14,7 @@ public function __construct() { add_action( 'admin_init', array( $this, 'admin_init' ) ); add_action( 'admin_menu', array( $this, 'admin_menu' ), 11 ); add_action( 'admin_action_indieauth_newtoken', array( $this, 'new_token' ) ); + add_action( 'admin_action_indieauth_client_discovery', array( $this, 'client_discovery' ) ); } /** @@ -41,6 +42,25 @@ public function admin_menu() { public function options_callback() { } + public function client_discovery() { + if ( ! isset( $_POST['indieauth_nonce'] ) + || ! wp_verify_nonce( $_POST['indieauth_nonce'], 'indieauth_client_discovery' ) + ) { + esc_html_e( 'Invalid Nonce', 'indieauth' ); + exit; + } + if ( empty( $_REQUEST['client_url'] ) ) { + $GLOBALS['title'] = esc_html__( 'Client Discovery', 'indieauth' ); // phpcs:ignore + esc_html_e( 'A URL must be provided', 'indieauth' ); + exit; + } + header( 'Content-Type: application/json' ); + $client_url = sanitize_text_field( $_REQUEST['client_url'] ); + $client = new IndieAuth_Client_Discovery( $client_url ); + echo wp_json_encode( $client->export(), JSON_PRETTY_PRINT ); + exit; + } + public function new_token() { if ( ! isset( $_POST['indieauth_nonce'] ) || ! wp_verify_nonce( $_POST['indieauth_nonce'], 'indieauth_newtoken' ) @@ -131,7 +151,18 @@ public function options_form() {

- +
+

+
+ + + +

+
+
+ CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER. + +### _Statement of Purpose_ + +The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. + +**1. Copyright and Related Rights.** A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: + +1. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; +2. moral rights retained by the original author(s) and/or performer(s); +3. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; +4. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; +5. rights protecting the extraction, dissemination, use and reuse of data in a Work; +6. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and +7. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. + +**2. Waiver.** To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. + +**3. Public License Fallback.** Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. + +**4. Limitations and Disclaimers.** + +1. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. +2. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. +3. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. +4. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. diff --git a/lib/mf2/Parser.php b/lib/mf2/Parser.php new file mode 100644 index 0000000..e8e6dda --- /dev/null +++ b/lib/mf2/Parser.php @@ -0,0 +1,2343 @@ +Barnaby Walters'); + * echo json_encode($output, JSON_PRETTY_PRINT); + * + * Produces: + * + * { + * "items": [ + * { + * "type": ["h-card"], + * "properties": { + * "name": ["Barnaby Walters"] + * } + * } + * ], + * "rels": {} + * } + * + * @param string|DOMDocument $input The HTML string or DOMDocument object to parse + * @param string $url The URL the input document was found at, for relative URL resolution + * @param bool $convertClassic whether or not to convert classic microformats + * @return array Canonical MF2 array structure + */ +function parse($input, $url = null, $convertClassic = true) { + $parser = new Parser($input, $url); + return $parser->parse($convertClassic); +} + +/** + * Fetch microformats2 + * + * Given a URL, fetches it (following up to 5 redirects) and, if the content-type appears to be HTML, returns the parsed + * microformats2 array structure. + * + * Not that even if the response code was a 4XX or 5XX error, if the content-type is HTML-like then it will be parsed + * all the same, as there are legitimate cases where error pages might contain useful microformats (for example a deleted + * h-entry resulting in a 410 Gone page with a stub h-entry explaining the reason for deletion). Look in $curlInfo['http_code'] + * for the actual value. + * + * @param string $url The URL to fetch + * @param bool $convertClassic (optional, default true) whether or not to convert classic microformats + * @param &array $curlInfo (optional) the results of curl_getinfo will be placed in this variable for debugging + * @return array|null canonical microformats2 array structure on success, null on failure + */ +function fetch($url, $convertClassic = true, &$curlInfo=null) { + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); + curl_setopt($ch, CURLOPT_MAXREDIRS, 5); + curl_setopt($ch, CURLOPT_HTTPHEADER, array( + 'Accept: text/html' + )); + $html = curl_exec($ch); + $info = $curlInfo = curl_getinfo($ch); + curl_close($ch); + + if (strpos(strtolower($info['content_type']), 'html') === false) { + // The content was not delivered as HTML, do not attempt to parse it. + return null; + } + + # ensure the final URL is used to resolve relative URLs + $url = $info['url']; + + return parse($html, $url, $convertClassic); +} + +/** + * Unicode to HTML Entities + * @param string $input String containing characters to convert into HTML entities + * @return string + */ +function unicodeToHtmlEntities($input) { + return mb_convert_encoding($input, 'HTML-ENTITIES', mb_detect_encoding($input)); +} + +/** + * Collapse Whitespace + * + * Collapses any sequences of whitespace within a string into a single space + * character. + * + * @deprecated since v0.2.3 + * @param string $str + * @return string + */ +function collapseWhitespace($str) { + return preg_replace('/[\s|\n]+/', ' ', $str); +} + +function unicodeTrim($str) { + // this is cheating. TODO: find a better way if this causes any problems + $str = str_replace(mb_convert_encoding(' ', 'UTF-8', 'HTML-ENTITIES'), ' ', $str); + $str = preg_replace('/^\s+/', '', $str); + return preg_replace('/\s+$/', '', $str); +} + +/** + * Microformat Name From Class string + * + * Given the value of @class, get the relevant mf classnames (e.g. h-card, + * p-name). + * + * @param string $class A space delimited list of classnames + * @param string $prefix The prefix to look for + * @return string|array The prefixed name of the first microfomats class found or false + */ +function mfNamesFromClass($class, $prefix='h-') { + $class = str_replace(array(' ', ' ', "\n"), ' ', $class); + $classes = explode(' ', $class); + $classes = preg_grep('#^(h|p|u|dt|e)-([a-z0-9]+-)?[a-z]+(-[a-z]+)*$#', $classes); + $matches = array(); + + foreach ($classes as $classname) { + $compare_classname = ' ' . $classname; + $compare_prefix = ' ' . $prefix; + if (strstr($compare_classname, $compare_prefix) !== false && ($compare_classname != $compare_prefix)) { + $matches[] = ($prefix === 'h-') ? $classname : substr($classname, strlen($prefix)); + } + } + + return $matches; +} + +/** + * Get Nested µf Property Name From Class + * + * Returns all the p-, u-, dt- or e- prefixed classnames it finds in a + * space-separated string. + * + * @param string $class + * @return array + */ +function nestedMfPropertyNamesFromClass($class) { + $prefixes = array('p-', 'u-', 'dt-', 'e-'); + $propertyNames = array(); + + $class = str_replace(array(' ', ' ', "\n"), ' ', $class); + foreach (explode(' ', $class) as $classname) { + foreach ($prefixes as $prefix) { + // Check if $classname is a valid property classname for $prefix. + if (mb_substr($classname, 0, mb_strlen($prefix)) == $prefix && $classname != $prefix) { + $propertyName = mb_substr($classname, mb_strlen($prefix)); + $propertyNames[$propertyName][] = $prefix; + } + } + } + + foreach ($propertyNames as $property => $prefixes) { + $propertyNames[$property] = array_unique($prefixes); + } + + return $propertyNames; +} + +/** + * Wraps mfNamesFromClass to handle an element as input (common) + * + * @param DOMElement $e The element to get the classname for + * @param string $prefix The prefix to look for + * @return mixed See return value of mf2\Parser::mfNameFromClass() + */ +function mfNamesFromElement(\DOMElement $e, $prefix = 'h-') { + $class = $e->getAttribute('class'); + return mfNamesFromClass($class, $prefix); +} + +/** + * Wraps nestedMfPropertyNamesFromClass to handle an element as input + */ +function nestedMfPropertyNamesFromElement(\DOMElement $e) { + $class = $e->getAttribute('class'); + return nestedMfPropertyNamesFromClass($class); +} + +/** + * Converts various time formats to HH:MM + * @param string $time The time to convert + * @return string + */ +function convertTimeFormat($time) { + $hh = $mm = $ss = ''; + preg_match('/(\d{1,2}):?(\d{2})?:?(\d{2})?(a\.?m\.?|p\.?m\.?)?/i', $time, $matches); + + // If no am/pm is specified: + if (empty($matches[4])) { + return $time; + } else { + // Otherwise, am/pm is specified. + $meridiem = strtolower(str_replace('.', '', $matches[4])); + + // Hours. + $hh = $matches[1]; + + // Add 12 to hours if pm applies. + if ($meridiem == 'pm' && ($hh < 12)) { + $hh += 12; + } + + $hh = str_pad($hh, 2, '0', STR_PAD_LEFT); + + // Minutes. + $mm = (empty($matches[2]) ) ? '00' : $matches[2]; + + // Seconds, only if supplied. + if (!empty($matches[3])) { + $ss = $matches[3]; + } + + if (empty($ss)) { + return sprintf('%s:%s', $hh, $mm); + } + else { + return sprintf('%s:%s:%s', $hh, $mm, $ss); + } + } +} + +/** + * Normalize an ordinal date to YYYY-MM-DD + * This function should only be called after validating the $dtValue + * matches regex \d{4}-\d{2} + * @param string $dtValue + * @return string + */ +function normalizeOrdinalDate($dtValue) { + list($year, $day) = explode('-', $dtValue, 2); + $day = intval($day); + if ($day < 367 && $day > 0) { + $date = \DateTime::createFromFormat('Y-z', $dtValue); + $date->modify('-1 day'); # 'z' format is zero-based so need to adjust + if ($date->format('Y') === $year) { + return $date->format('Y-m-d'); + } + } + return ''; +} + +/** + * If a date value has a timezone offset, normalize it. + * @param string $dtValue + * @return string isolated, normalized TZ offset for implied TZ for other dt- properties + */ +function normalizeTimezoneOffset(&$dtValue) { + preg_match('/Z|[+-]\d{1,2}:?(\d{2})?$/i', $dtValue, $matches); + + if (empty($matches)) { + return null; + } + + $timezoneOffset = null; + + if ( $matches[0] != 'Z' ) { + $timezoneString = str_replace(':', '', $matches[0]); + $plus_minus = substr($timezoneString, 0, 1); + $timezoneOffset = substr($timezoneString, 1); + if ( strlen($timezoneOffset) <= 2 ) { + $timezoneOffset .= '00'; + } + $timezoneOffset = str_pad($timezoneOffset, 4, 0, STR_PAD_LEFT); + $timezoneOffset = $plus_minus . $timezoneOffset; + $dtValue = preg_replace('/Z?[+-]\d{1,2}:?(\d{2})?$/i', $timezoneOffset, $dtValue); + } + + return $timezoneOffset; +} + +function applySrcsetUrlTransformation($srcset, $transformation) { + return implode(', ', array_filter(array_map(function ($srcsetPart) use ($transformation) { + $parts = explode(" \t\n\r\0\x0B", trim($srcsetPart), 2); + $parts[0] = rtrim($parts[0]); + + if (empty($parts[0])) { return false; } + + $parts[0] = call_user_func($transformation, $parts[0]); + + return $parts[0] . (empty($parts[1]) ? '' : ' ' . $parts[1]); + }, explode(',', trim($srcset))))); +} + +/** + * Microformats2 Parser + * + * A class which holds state for parsing microformats2 from HTML. + * + * Example usage: + * + * use Mf2; + * $parser = new Mf2\Parser('

Barnaby Walters

'); + * $output = $parser->parse(); + */ +class Parser { + /** @var string The baseurl (if any) to use for this parse */ + public $baseurl; + + /** @var DOMXPath object which can be used to query over any fragment*/ + public $xpath; + + /** @var DOMDocument */ + public $doc; + + /** @var SplObjectStorage */ + protected $parsed; + + /** + * @var bool + */ + public $jsonMode; + + /** @var boolean Whether to include experimental language parsing in the result */ + public $lang = false; + + /** @var bool Whether to include alternates object (dropped from spec in favor of rel-urls) */ + public $enableAlternates = false; + + /** + * Elements upgraded to mf2 during backcompat + * @var SplObjectStorage + */ + protected $upgraded; + + /** + * Whether to convert classic microformats + * @var bool + */ + public $convertClassic; + + /** + * Constructor + * + * @param DOMDocument|string $input The data to parse. A string of HTML or a DOMDocument + * @param string $url The URL of the parsed document, for relative URL resolution + * @param boolean $jsonMode Whether or not to use a stdClass instance for an empty `rels` dictionary. This breaks PHP looping over rels, but allows the output to be correctly serialized as JSON. + */ + public function __construct($input, $url = null, $jsonMode = false) { + libxml_use_internal_errors(true); + if (is_string($input)) { + if (class_exists('Masterminds\\HTML5')) { + $doc = new \Masterminds\HTML5(array('disable_html_ns' => true)); + $doc = $doc->loadHTML($input); + } else { + $doc = new DOMDocument(); + @$doc->loadHTML(unicodeToHtmlEntities($input), \LIBXML_NOWARNING); + } + } elseif (is_a($input, 'DOMDocument')) { + $doc = clone $input; + } else { + $doc = new DOMDocument(); + @$doc->loadHTML(''); + } + + $this->xpath = new DOMXPath($doc); + + $baseurl = $url; + foreach ($this->xpath->query('//base[@href]') as $base) { + $baseElementUrl = $base->getAttribute('href'); + + if (parse_url($baseElementUrl, PHP_URL_SCHEME) === null) { + /* The base element URL is relative to the document URL. + * + * :/ + * + * Perhaps the author was high? */ + + $baseurl = resolveUrl($url, $baseElementUrl); + } else { + $baseurl = $baseElementUrl; + } + break; + } + + // Ignore