Skip to content

Commit

Permalink
[Data Liberation] Move Markdown importer to a separate package (#2093)
Browse files Browse the repository at this point in the history
Moves the Markdown importer to a `data-liberation-markdown` package so
that it can be shipped as a separate `.phar` file and downloaded only
when needed.

 ## Testing instructions

This only moves code around. To test, confirm the CI PHP unit tests keep
working.

A part of:

* #2080
* #1894
  • Loading branch information
adamziel authored Dec 17, 2024
1 parent 27d799a commit 869f6bd
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 352 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
<?php

use WordPress\Filesystem\WP_Filesystem;

class WP_Markdown_Importer extends WP_Stream_Importer {

public static function create_for_markdown_directory( $markdown_directory, $options = array(), $cursor = null ) {
return WP_Markdown_Importer::create(
function ( $cursor = null ) use ( $markdown_directory ) {
// @TODO: Handle $cursor
return new WP_Directory_Tree_Entity_Reader(
new WP_Filesystem(),
array (
'root_dir' => $markdown_directory,
'first_post_id' => 1,
'allowed_extensions' => array( 'md' ),
'index_file_patterns' => array( '#^index\.md$#' ),
'markup_converter_factory' => function( $content ) {
return new WP_Markdown_To_Blocks( $content );
},
)
);
},
$options,
$cursor
);
}

protected static function parse_options( $options ) {
if ( ! isset( $options['source_site_url'] ) ) {
_doing_it_wrong( __METHOD__, 'The source_site_url option is required.', '__WP_VERSION__' );
return false;
}
$options['default_source_site_url'] = $options['source_site_url'];

if ( ! isset( $options['local_markdown_assets_root'] ) ) {
_doing_it_wrong( __METHOD__, 'The markdown_assets_root option is required.', '__WP_VERSION__' );
return false;
}
if ( ! is_dir( $options['local_markdown_assets_root'] ) ) {
_doing_it_wrong( __METHOD__, 'The markdown_assets_root option must point to a directory.', '__WP_VERSION__' );
return false;
}
$options['local_markdown_assets_root'] = rtrim( $options['local_markdown_assets_root'], '/' );

return parent::parse_options( $options );
}

protected function rewrite_attachment_url( string $raw_url, $context_path = null ) {
/**
* For Docusaurus docs, URLs starting with `@site` are referring
* to local files. Let's convert them to file:// URLs.
*/
if (
isset( $this->options['local_markdown_assets_url_prefix'] ) &&
str_starts_with( $raw_url, $this->options['local_markdown_assets_url_prefix'] )
) {
// @TODO: Source the file from the current input stream if we can.
// This would allow stream-importing zipped Markdown and WXR directory
// structures.
// Maybe for v1 we could just support importing them from ZIP files
// that are already downloaded and available in a local directory just
// to avoid additional data transfer and the hurdle with implementing
// multiple range requests.
$relative_asset_path = substr( $raw_url, strlen( $this->options['local_markdown_assets_url_prefix'] ) );
$relative_asset_path = '/' . ltrim( $relative_asset_path, '/' );
$raw_url = (
'file://' .
$this->options['local_markdown_assets_root'] .
$relative_asset_path
);
}

return parent::rewrite_attachment_url( $raw_url, $context_path );
}

/**
* When processing Markdown, we'll download all the images
* referenced in the image tags.
*
* @TODO: Actually, should we?
* @TODO: How can we process the videos?
* @TODO: What other asset types are there?
*/
protected function url_processor_matched_asset_url( WP_Block_Markup_Url_Processor $p ) {
return (
$p->get_tag() === 'IMG' &&
$p->get_inspected_attribute_name() === 'src'
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
use League\CommonMark\Extension\Table\TableRow;
use League\CommonMark\Extension\Table\TableSection;


class WP_Markdown_To_Blocks {
class WP_Markdown_To_Blocks implements WP_Block_Markup_Converter {
const STATE_READY = 'STATE_READY';
const STATE_COMPLETE = 'STATE_COMPLETE';

Expand All @@ -40,19 +39,26 @@ public function __construct( $markdown ) {
$this->markdown = $markdown;
}

public function parse() {
public function convert() {
if ( self::STATE_READY !== $this->state ) {
return false;
}
$this->convert_markdown_to_blocks();
$this->block_markup = self::convert_blocks_to_markup( $this->parsed_blocks );
$this->block_markup = WP_Import_Utils::convert_blocks_to_markup( $this->parsed_blocks );
return true;
}

public function get_frontmatter() {
public function get_all_metadata() {
return $this->frontmatter;
}

public function get_meta_value( $key ) {
if ( ! array_key_exists( $key, $this->frontmatter ) ) {
return null;
}
return $this->frontmatter[ $key ][0];
}

public function get_block_markup() {
return $this->block_markup;
}
Expand All @@ -74,7 +80,11 @@ private function convert_markdown_to_blocks() {
$parser = new MarkdownParser( $environment );

$document = $parser->parse( $this->markdown );
$this->frontmatter = $document->data;
$this->frontmatter = [];
foreach( $document->data as $key => $value ) {
// Use an array as a value to comply with the WP_Block_Markup_Converter interface.
$this->frontmatter[ $key ] = [$value];
}

$walker = $document->walker();
while ( true ) {
Expand Down Expand Up @@ -163,7 +173,7 @@ private function convert_markdown_to_blocks() {
'content' => '<pre class="wp-block-code"><code>' . trim( str_replace( "\n", '<br>', htmlspecialchars( $node->getLiteral() ) ) ) . '</code></pre>',
)
);
if ( $node->getInfo() ) {
if ( method_exists( $node, 'getInfo' ) && $node->getInfo() ) {
$this->current_block->attrs['language'] = preg_replace( '/[ \t\r\n\f].*/', '', $node->getInfo() );
}
break;
Expand Down Expand Up @@ -339,35 +349,6 @@ private function convert_markdown_to_blocks() {
$this->parsed_blocks = $this->root_block->inner_blocks;
}

private static function convert_blocks_to_markup( $blocks ) {
$block_markup = '';

foreach ( $blocks as $block ) {
// Start of block comment
$comment = '<!-- -->';
$p = new WP_HTML_Tag_Processor( $comment );
$p->next_token();
$attrs = $block->attrs;
$content = $block->attrs['content'] ?? '';
unset( $attrs['content'] );
$encoded_attrs = json_encode( $attrs );
if ( $encoded_attrs === '[]' ) {
$encoded_attrs = '';
}
$p->set_modifiable_text( " wp:{$block->block_name} " . $encoded_attrs . ' ' );
$open_comment = $p->get_updated_html();

$block_markup .= $open_comment . "\n";
$block_markup .= $content . "\n";
$block_markup .= self::convert_blocks_to_markup( $block->inner_blocks );

// End of block comment
$block_markup .= "<!-- /wp:{$block->block_name} -->\n";
}

return $block_markup;
}

private function append_content( $content ) {
if ( ! isset( $this->current_block->attrs['content'] ) ) {
$this->current_block->attrs['content'] = '';
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?php

require_once __DIR__ . '/WP_Markdown_Importer.php';
require_once __DIR__ . '/WP_Markdown_To_Blocks.php';

require_once __DIR__ . '/../vendor/autoload.php';
14 changes: 0 additions & 14 deletions packages/playground/data-liberation/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,6 @@

require_once __DIR__ . '/src/utf8_decoder.php';

/**
* Require conditionally – these files are missing from the data-liberation-core.phar
* to reduce the bundle size (we'd need to include a large markdown parser and its
* dependencies, too).
*
* @TODO: Build a separate "data-liberation-markdown" phar file plugin with the Markdown
* importing functionality.
*/
if ( file_exists( __DIR__ . '/src/markdown-api/WP_Markdown_To_Blocks.php' ) ) {
require_once __DIR__ . '/src/markdown-api/WP_Markdown_To_Blocks.php';
require_once __DIR__ . '/src/markdown-api/WP_Markdown_Directory_Tree_Reader.php';
require_once __DIR__ . '/src/markdown-api/WP_Markdown_HTML_Processor.php';
}

// When running in Playground, the composer autoloader script sees CLI SAPI and
// tries to use the STDERR, STDIN, and STDOUT constants.
// @TODO: Don't use the "cli" SAPI string and don't allow composer to run platform checks.
Expand Down
Loading

0 comments on commit 869f6bd

Please sign in to comment.