Skip to content

Commit

Permalink
[Data Liberation] Build markdown importer as phar (#2094)
Browse files Browse the repository at this point in the history
Builds data-liberation-markdown.phar.gz (200KB) to enable downloading
the
Markdown importer only when needed instead of on every page load.

A part of:

* #2080
* #1894

 ## Testing instructions

Run `nx build playground-data-liberation-markdown`, confirm it finished
without errors. A smoke test of the built phar file is included in the
build command.
  • Loading branch information
adamziel authored Dec 17, 2024
1 parent 869f6bd commit b9f5edb
Show file tree
Hide file tree
Showing 20 changed files with 251 additions and 85 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?php

use KevinGH\Box\Compactor\Compactor;

class DataLiberationBoxCompactor implements Compactor
{
/**
* {@inheritdoc}
*/
public function compact(string $file, string $contents): string
{
if (!preg_match('/\.(php|json|lock)$/', $file)) {
return '';
}

if (
str_contains($file, 'platform_check.php') ||
str_contains($file, '/tests/') ||
str_contains($file, '/.git/') ||
str_contains($file, '/.github/') ||
str_contains($file, '/bin/')
) {
return '';
}

if( str_contains($contents, 'Your Composer dependencies require ') ) {
return '';
}


return $contents;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?php

$box_base_path = dirname(getenv('BOX_BASE_PATH'));
require_once $box_base_path . '/../autoload.php';
require_once __DIR__ . '/DataLiberationBoxCompactor.php';
require_once $box_base_path . '/box';
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?php
require_once __DIR__ . '/../../bootstrap.php';
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?php

require_once __DIR__ . '/../../../data-liberation/dist/data-liberation-core.phar.gz';
require_once __DIR__ . '/../../dist/data-liberation-markdown.phar';

/**
* None of this will actually try to parse a file or import
* any data. We're just making sure the importer can
* be created without throwing an exception.
*/
$markdown_root = __DIR__ . '/markdown-test-data';
$c = WP_Markdown_Importer::create_for_markdown_directory(
$markdown_root,
array(
'source_site_url' => 'file://' . $markdown_root,
'local_markdown_assets_root' => $markdown_root,
'local_markdown_assets_url_prefix' => '@site/',
),
$import['cursor'] ?? null
);

echo 'Markdown importer created!';

Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?php

$file = $argv[1];
$phar = new Phar($file);
$phar->startBuffering();


/**
* Box, includes an autoloader with a fixed name in every build.
* However, we want to load two .phar files built with Box, not
* one. Unfortunately this yields an error:
*
* Cannot declare class ComposerAutoloaderInitHumbugBox451
*
* Therefore, we're giving all the HumbugBox classes a unique suffix.
*/
$autoloadSuffix = substr(md5(__FILE__), 0, 8);
foreach (new RecursiveIteratorIterator($phar) as $file) {
if(!$file->isFile()) {
continue;
}
$relativePath = $file->getPathname();
$relativePath = str_replace('phar://', '', $relativePath);
$relativePath = str_replace($phar->getPath().'/', '', $relativePath);
$contents = $file->getContent();
$updated_contents = $contents;
foreach([
'InitHumbugBox',
] as $class) {
$updated_contents = str_replace($class, $class . $autoloadSuffix, $updated_contents);
}
if($updated_contents !== $contents) {
$phar[$relativePath] = $updated_contents;
}
}

/**
* Box, very annoyingly, force-adds a platform_check.php file
* into the final built .phar archive. The vendor libraries
* do work with a PHP version lower than 8.1 enforced by that
* platform_check.php file, so let's just truncate it.
*/
$phar['vendor/composer/platform_check.php'] = '';
$phar['.box/bin/check-requirements.php'] = '';
$phar->stopBuffering();

12 changes: 12 additions & 0 deletions packages/playground/data-liberation-markdown/box.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"$schema": "https://raw.githubusercontent.com/box-project/box/refs/heads/main/res/schema.json",
"main": "src/bootstrap.php",
"output": "dist/data-liberation-markdown.phar",
"force-autodiscovery": true,
"compactors": [
"KevinGH\\Box\\Compactor\\Php",
"DataLiberationBoxCompactor"
],
"annotations": false,
"directories": ["src/", "vendor/"]
}
1 change: 1 addition & 0 deletions packages/playground/data-liberation-markdown/composer.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"name": "wordpress/data-liberation-markdown",
"name": "wordpress/data-liberation-markdown",
"prefer-stable": true,
"require": {
Expand Down
Empty file.
Binary file not shown.
26 changes: 26 additions & 0 deletions packages/playground/data-liberation-markdown/phar-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

# Builds the standalone dist/core-data-liberation.phar.gz file meant for
# use in the importWxr Blueprint step.
#
# This is a temporary measure until we have a canonical way of distributing,
# versioning, and using the Data Liberation modules and their dependencies.
# Possible solutions might include composer packages, WordPress plugins, or
# tree-shaken zip files with each module and its composer deps.

set -e
echo "Building data liberation plugin"
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
DATA_LIBERATION_DIR=$SCRIPT_DIR
BUILD_DIR=$DATA_LIBERATION_DIR/bin/build
DIST_DIR=$DATA_LIBERATION_DIR/dist

rm $DIST_DIR/* > /dev/null 2>&1 || true
export BOX_BASE_PATH=$(type -a box | grep -v 'alias' | awk '{print $3}')
php $BUILD_DIR/box.php compile -d $DATA_LIBERATION_DIR -c $DATA_LIBERATION_DIR/box.json
php -d 'phar.readonly=0' $BUILD_DIR/truncate-composer-checks.php $DIST_DIR/data-liberation-markdown.phar
php $BUILD_DIR/smoke-test.php
PHP=8.0 bun $DATA_LIBERATION_DIR/../../php-wasm/cli/src/main.ts $BUILD_DIR/smoke-test.php
cd $DIST_DIR
gzip data-liberation-markdown.phar
ls -sgh $DIST_DIR
39 changes: 39 additions & 0 deletions packages/playground/data-liberation-markdown/phpcs.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<ruleset name="WordPressStandard">
<description>PHP 7.0 compatibility.</description>
<config name="testVersion" value="7.2"/>
<exclude-pattern>vendor/*</exclude-pattern>
<rule ref="PHPCompatibility">
<exclude name="PHPCompatibility.Keywords.ForbiddenNamesAsDeclared"/>
</rule>
<rule ref="WordPress-Core">
<exclude name="Generic.Commenting.DocComment.MissingShort"/>
<exclude name="Generic.PHP.DiscourageGoto.Found"/>
<exclude name="Generic.CodeAnalysis.EmptyStatement.DetectedIf"/>
<!-- Unused arguments are necessary when inheriting from classes and overriding methods. -->
<exclude name="Generic.CodeAnalysis.UnusedFunctionParameter.Found"/>
<exclude name="Squiz.PHP.NonExecutableCode.Unreachable"/>
<exclude name="Squiz.Commenting.BlockComment.CloserSameLine"/>
<exclude name="Squiz.Commenting.ClassComment.Missing"/>
<exclude name="Squiz.Commenting.FileComment.WrongStyle"/>
<exclude name="Squiz.Commenting.FileComment.Missing"/>
<exclude name="Squiz.Commenting.FunctionComment.Missing"/>
<exclude name="Squiz.Commenting.FunctionComment.MissingParamTag"/>
<exclude name="Squiz.Commenting.FunctionComment.MissingParamType"/>
<exclude name="Squiz.Commenting.FunctionComment.MissingParamComment"/>
<exclude name="Squiz.Commenting.VariableComment.Missing"/>
<exclude name="Squiz.PHP.CommentedOutCode.Found"/>
<!-- "Parameter comment must end with a full stop" is such a pebble in the shoe. -->
<exclude name="Squiz.Commenting.FunctionComment.ParamCommentFullStop"/>
<exclude name="Squiz.PHP.DisallowSizeFunctionsInLoops.Found"/>
<!-- Aligning the 1500 lines of public_suffix_list.php adds a lot of unnecessary noise and then
the actual indentation is not even correct because the rule seems to cound bytes, not printable
UTF-8 characteds. -->
<exclude name="WordPress.Arrays.MultipleStatementAlignment.DoubleArrowNotAligned"/>
<exclude name="WordPress.Files.FileName.InvalidClassFileName"/>
<exclude name="WordPress.Files.FileName.NotHyphenatedLowercase"/>
<exclude name="WordPress.PHP.YodaConditions.NotYoda"/>
<exclude name="WordPress.Security.EscapeOutput.OutputNotEscaped"/>
<exclude name="WordPress.WP.AlternativeFunctions"/>
<exclude name="WordPress.WP.AlternativeFunctions.file_system_operations_fclose"/>
</rule>
</ruleset>
45 changes: 45 additions & 0 deletions packages/playground/data-liberation-markdown/project.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"name": "playground-data-liberation-markdown",
"$schema": "../../../node_modules/nx/schemas/project-schema.json",
"sourceRoot": "packages/playground/data-liberation-markdown",
"projectType": "library",
"targets": {
"install": {
"executor": "nx:run-commands",
"options": {
"cwd": "packages/playground/data-liberation-markdown",
"commands": ["composer install"],
"parallel": false
}
},
"build:phar": {
"executor": "nx:run-commands",
"options": {
"cwd": "packages/playground/data-liberation-markdown",
"commands": ["bash ./phar-build.sh"],
"parallel": false
},
"dependsOn": ["playground-data-liberation:build:phar"]
},
"lint:php": {
"executor": "nx:run-commands",
"options": {
"cwd": "packages/playground/data-liberation-markdown",
"commands": [
"../data-liberation/vendor/bin/phpcs --standard=./phpcs.xml -s ./src ./*.php"
],
"parallel": false
}
},
"lint:php:fix": {
"executor": "nx:run-commands",
"options": {
"cwd": "packages/playground/data-liberation-markdown",
"commands": [
"../data-liberation/vendor/bin/phpcbf --standard=./phpcs.xml ./src"
],
"parallel": false
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ public static function create_for_markdown_directory( $markdown_directory, $opti
return WP_Markdown_Importer::create(
function ( $cursor = null ) use ( $markdown_directory ) {
// @TODO: Handle $cursor
return new WP_Directory_Tree_Entity_Reader(
return new WP_Directory_Tree_Entity_Reader(
new WP_Filesystem(),
array (
array(
'root_dir' => $markdown_directory,
'first_post_id' => 1,
'allowed_extensions' => array( 'md' ),
'index_file_patterns' => array( '#^index\.md$#' ),
'markup_converter_factory' => function( $content ) {
'markup_converter_factory' => function ( $content ) {
return new WP_Markdown_To_Blocks( $content );
},
)
Expand All @@ -42,7 +42,7 @@ protected static function parse_options( $options ) {
return false;
}
$options['local_markdown_assets_root'] = rtrim( $options['local_markdown_assets_root'], '/' );

return parent::parse_options( $options );
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,10 @@ private function convert_markdown_to_blocks() {
$parser = new MarkdownParser( $environment );

$document = $parser->parse( $this->markdown );
$this->frontmatter = [];
foreach( $document->data as $key => $value ) {
$this->frontmatter = array();
foreach ( $document->data as $key => $value ) {
// Use an array as a value to comply with the WP_Block_Markup_Converter interface.
$this->frontmatter[ $key ] = [$value];
$this->frontmatter[ $key ] = array( $value );
}

$walker = $document->walker();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
require_once __DIR__ . '/WP_Markdown_Importer.php';
require_once __DIR__ . '/WP_Markdown_To_Blocks.php';

require_once __DIR__ . '/../vendor/autoload.php';
require_once __DIR__ . '/../vendor/autoload.php';
2 changes: 1 addition & 1 deletion packages/playground/data-liberation/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
require_once __DIR__ . '/src/wordpress-core-html-api/html5-named-character-references.php';
}

require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Converter.php';
require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Processor.php';
require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Url_Processor.php';
require_once __DIR__ . '/src/block-markup/WP_URL_In_Text_Processor.php';
Expand All @@ -63,7 +64,6 @@
require_once __DIR__ . '/src/import/WP_Stream_Importer.php';
require_once __DIR__ . '/src/import/WP_Entity_Iterator_Chain.php';
require_once __DIR__ . '/src/import/WP_Retry_Frontloading_Iterator.php';
require_once __DIR__ . '/src/import/WP_Markdown_Importer.php';

require_once __DIR__ . '/src/utf8_decoder.php';

Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?php

interface WP_Block_Markup_Converter {
public function convert();
public function get_block_markup();
public function get_all_metadata();
public function get_meta_value( $key );
}

This file was deleted.

0 comments on commit b9f5edb

Please sign in to comment.