Skip to content

Commit 4438d72

Browse files
authored
[Data Liberation] Add WXR import CLI script (#2012)
1 parent 349a179 commit 4438d72

File tree

12 files changed

+175
-47
lines changed

12 files changed

+175
-47
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"$schema": "../../../blueprints/public/blueprint-schema.json",
3+
"constants": {
4+
"WP_DEBUG": true,
5+
"WP_DEBUG_LOG": true
6+
},
7+
"login": true,
8+
"steps": [
9+
{
10+
"step": "activatePlugin",
11+
"pluginPath": "data-liberation/plugin.php"
12+
},
13+
{
14+
"step": "runPHP",
15+
"code": "<?php require_once 'wordpress/wp-load.php';\n$upload_dir = wp_upload_dir();\nforeach ( wp_visit_file_tree( $upload_dir['basedir'] . '/import-wxr' ) as $event ) {\nforeach ( $event->files as $file ) {\nif ( $file->isFile() && pathinfo( $file->getPathname(), PATHINFO_EXTENSION ) === 'xml' ) {\ndata_liberation_import( $file->getPathname() );\n}\n}\n};"
16+
}
17+
]
18+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#!/bin/bash
2+
#
3+
# A script that accepts a folder and imports all WXR files into a WordPress site
4+
#
5+
# Usage:
6+
# ./import-wxr.sh <folder-name>
7+
#
8+
9+
# Display help message
10+
show_help() {
11+
echo "Usage: $0 [-h|--help] <folder-name>"
12+
echo "Options:"
13+
echo " -h, --help Show this help message"
14+
}
15+
16+
# Check if no arguments were provided. If so, display help message
17+
if [ $# -eq 0 ]; then
18+
show_help
19+
exit 1
20+
fi
21+
22+
# Parse command line arguments. If an invalid argument is provided, display help message
23+
while [[ "$1" =~ ^- && ! "$1" == "--" ]]; do case $1 in
24+
-h | --help )
25+
show_help
26+
exit 0
27+
;;
28+
esac; shift; done
29+
if [[ "$1" == '--' ]]; then shift; fi
30+
31+
# Check if filename is provided. If not, display error message.
32+
if [ -z "$1" ]; then
33+
echo "Error: No folder provided"
34+
show_help
35+
exit 1
36+
fi
37+
38+
# Check if the file exists
39+
if [ -d "$1" ]; then
40+
bun ../../../cli/src/cli.ts \
41+
server \
42+
--mount=../../:/wordpress/wp-content/plugins/data-liberation \
43+
--mount=$1:/wordpress/wp-content/uploads/import-wxr \
44+
--blueprint=./blueprint-import-wxr.json
45+
else
46+
echo "Error: Folder '$1' does not exist"
47+
exit 1
48+
fi

packages/playground/data-liberation/bootstrap.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
require_once __DIR__ . '/src/import/WP_File_Visitor_Event.php';
5353
require_once __DIR__ . '/src/import/WP_Imported_Entity.php';
5454
require_once __DIR__ . '/src/import/WP_Attachment_Downloader.php';
55+
require_once __DIR__ . '/src/import/WP_Attachment_Downloader_Event.php';
5556
require_once __DIR__ . '/src/import/WP_Stream_Importer.php';
5657
require_once __DIR__ . '/src/import/WP_Markdown_Importer.php';
5758

packages/playground/data-liberation/phpunit.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
<file>tests/URLParserWHATWGComplianceTests.php</file>
1111
<file>tests/WPXMLProcessorTests.php</file>
1212
<file>tests/UrldecodeNTests.php</file>
13+
<file>tests/WPStreamImporterTests.php</file>
1314
</testsuite>
1415
</testsuites>
1516
</phpunit>

packages/playground/data-liberation/plugin.php

Lines changed: 15 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -25,39 +25,23 @@
2525
return [];
2626
});
2727

28-
/**
29-
* Development debug code to run the import manually.
30-
* @TODO: Remove this in favor of a CLI command.
31-
*/
3228
add_action('init', function() {
33-
return;
34-
$wxr_path = __DIR__ . '/tests/fixtures/wxr-simple.xml';
35-
$importer = WP_Stream_Importer::create_for_wxr_file(
36-
$wxr_path
37-
);
38-
while($importer->next_step()) {
39-
// ...
29+
if ( defined( 'WP_CLI' ) && WP_CLI ) {
30+
/**
31+
* Import a WXR file.
32+
*
33+
* <file>
34+
* : The WXR file to import.
35+
*/
36+
$command = function ( $args, $assoc_args ) {
37+
$file = $args[0];
38+
data_liberation_import( $file );
39+
};
40+
41+
// Register the WP-CLI import command.
42+
// Example usage: wp data-liberation /path/to/file.xml
43+
WP_CLI::add_command( 'data-liberation', $command );
4044
}
41-
return;
42-
$importer->next_step();
43-
$paused_importer_state = $importer->get_reentrancy_cursor();
44-
45-
echo "\n\n";
46-
echo "moving to importer2\n";
47-
echo "\n\n";
48-
49-
$importer2 = WP_Stream_Importer::create_for_wxr_file(
50-
$wxr_path,
51-
array(),
52-
$paused_importer_state
53-
);
54-
$importer2->next_step();
55-
$importer2->next_step();
56-
$importer2->next_step();
57-
// $importer2->next_step();
58-
// var_dump($importer2);
59-
60-
die("YAY");
6145
});
6246

6347
// Register admin menu

packages/playground/data-liberation/project.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,16 @@
5050
],
5151
"parallel": false
5252
}
53+
},
54+
"test:wp-phpunit": {
55+
"executor": "nx:run-commands",
56+
"options": {
57+
"cwd": "packages/playground/data-liberation",
58+
"commands": [
59+
"bun ../cli/src/cli.ts run-blueprint --quiet --mount=./:/wordpress/wp-content/plugins/data-liberation --blueprint=./tests/import/blueprint-import.json"
60+
],
61+
"parallel": false
62+
}
5363
}
5464
}
5565
}

packages/playground/data-liberation/src/functions.php

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,3 +191,37 @@ function wp_visit_file_tree( $dir ) {
191191
new SplFileInfo( $dir )
192192
);
193193
}
194+
195+
/**
196+
* Import a WXR file. Used by the CLI.
197+
*
198+
* @param string $path The path to the WXR file.
199+
* @return void
200+
*/
201+
function data_liberation_import( $path ): bool {
202+
$importer = WP_Stream_Importer::create_for_wxr_file( $path );
203+
204+
if ( ! $importer ) {
205+
return false;
206+
}
207+
208+
$is_wp_cli = defined( 'WP_CLI' ) && WP_CLI;
209+
210+
if ( $is_wp_cli ) {
211+
WP_CLI::line( "Importing from {$path}" );
212+
}
213+
214+
while ( $importer->next_step() ) {
215+
// Output the current stage if running in WP-CLI.
216+
if ( $is_wp_cli ) {
217+
$current_stage = $importer->get_current_stage();
218+
WP_CLI::line( "Import: stage {$current_stage}" );
219+
}
220+
}
221+
222+
if ( $is_wp_cli ) {
223+
WP_CLI::success( 'Import ended' );
224+
}
225+
226+
return true;
227+
}

packages/playground/data-liberation/src/import/WP_Attachment_Downloader.php

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public function has_pending_requests() {
2626
public function enqueue_if_not_exists( $url, $output_path ) {
2727
$this->enqueued_resource_id = null;
2828

29-
$output_path = $this->output_root . '/' . ltrim( $output_path, '/' );
29+
$output_path = ltrim( $output_path, '/' );
3030
if ( file_exists( $output_path ) ) {
3131
// @TODO: Reconsider the return value. The enqueuing operation failed,
3232
// but overall already having a file seems like a success.
@@ -99,13 +99,14 @@ public function poll() {
9999
if ( ! $this->client->await_next_event() ) {
100100
return false;
101101
}
102-
$event = $this->client->get_event();
103-
$request = $this->client->get_request();
104-
// The request object we get from the client may be a redirect.
105-
// Let's keep referring to the original request.
106-
$original_request_id = $request->original_request()->id;
107102

108-
while ( true ) {
103+
do {
104+
$event = $this->client->get_event();
105+
$request = $this->client->get_request();
106+
// The request object we get from the client may be a redirect.
107+
// Let's keep referring to the original request.
108+
$original_request_id = $this->client->get_request()->original_request()->id;
109+
109110
switch ( $event ) {
110111
case Client::EVENT_GOT_HEADERS:
111112
if ( ! $request->is_redirected() ) {
@@ -129,7 +130,7 @@ public function poll() {
129130
fclose( $this->fps[ $original_request_id ] );
130131
}
131132
if ( isset( $this->output_paths[ $original_request_id ] ) ) {
132-
$partial_file = $this->output_root . '/' . $this->output_paths[ $original_request_id ] . '.partial';
133+
$partial_file = $this->output_paths[ $original_request_id ] . '.partial';
133134
if ( file_exists( $partial_file ) ) {
134135
unlink( $partial_file );
135136
}
@@ -162,7 +163,7 @@ public function poll() {
162163
}
163164
break;
164165
}
165-
}
166+
} while ( $this->client->await_next_event() );
166167

167168
return true;
168169
}

packages/playground/data-liberation/src/import/WP_Stream_Importer.php

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,15 @@ public function next_step() {
185185
}
186186
}
187187

188+
/**
189+
* Get the current stage.
190+
*
191+
* @return string
192+
*/
193+
public function get_current_stage() {
194+
return $this->stage;
195+
}
196+
188197
/**
189198
* Advance the cursor to the oldest finished download. For example:
190199
*
@@ -400,7 +409,7 @@ private function enqueue_attachment_download( string $raw_url, $context_path = n
400409

401410
$enqueued = $this->downloader->enqueue_if_not_exists( $url, $output_path );
402411
if ( $enqueued ) {
403-
$resource_id = $this->downloader->get_last_enqueued_resource_id();
412+
$resource_id = $this->downloader->get_enqueued_resource_id();
404413
$entity_cursor = $this->entity_iterator->get_reentrancy_cursor();
405414
$this->active_downloads[ $entity_cursor ][ $resource_id ] = true;
406415
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<?php
2+
3+
use PHPUnit\Framework\TestCase;
4+
5+
/**
6+
* Tests for the WPStreamImporter class.
7+
*/
8+
class WPStreamImporterTests extends TestCase {
9+
10+
protected function setUp(): void {
11+
parent::setUp();
12+
13+
if ( ! isset( $_SERVER['SERVER_SOFTWARE'] ) || $_SERVER['SERVER_SOFTWARE'] !== 'PHP.wasm' ) {
14+
$this->markTestSkipped( 'Test only runs in Playground' );
15+
}
16+
}
17+
18+
public function test_import_simple_wxr() {
19+
$import = data_liberation_import( __DIR__ . '/wxr/small-export.xml' );
20+
21+
$this->assertTrue( $import );
22+
}
23+
}

0 commit comments

Comments
 (0)