Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Data Liberation] Topological sorter, entities remapping and add missing imports #2030

Draft
wants to merge 61 commits into
base: trunk
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
51232f1
First topological sorter draft
zaerl Nov 26, 2024
8496a1d
Move topological sort to separate function
zaerl Nov 26, 2024
2114bb7
Fix: missing importer initialization
zaerl Nov 26, 2024
0610abc
Add categories to the sorter
zaerl Nov 26, 2024
fa85f9c
Add new in-place sort
zaerl Nov 27, 2024
4667b93
Add memory-free functions
zaerl Nov 27, 2024
3060f11
Replace bin script with wp-cli command
zaerl Nov 27, 2024
f02b748
Add special cases
zaerl Nov 27, 2024
602f6db
Change the sorting algorithm to qsort
zaerl Nov 28, 2024
ed8ecc5
Add a TODO
zaerl Nov 28, 2024
2bb62b9
Update names
zaerl Nov 29, 2024
4596088
Fix: change variable name
zaerl Nov 29, 2024
9ce48f8
Add support for categories
zaerl Nov 29, 2024
7c95d2f
Fix: remove double slashes
zaerl Dec 4, 2024
7d45916
Add test check
zaerl Dec 4, 2024
435c37b
Add new hooks
zaerl Dec 4, 2024
4a2e746
Add new topo sorting query
zaerl Dec 4, 2024
6972c9a
Remove unused check
zaerl Dec 4, 2024
dbc4369
Temporary disable test
zaerl Dec 4, 2024
40c7db9
Remove debug code
zaerl Dec 4, 2024
7606e1a
Remove rebase artifacts
zaerl Dec 4, 2024
f4aa9fb
Change to new function signature
zaerl Dec 6, 2024
b1bbf5a
Add support for count
zaerl Dec 6, 2024
934fef4
Add session to CLI
zaerl Dec 6, 2024
c309f3b
Add start session
zaerl Dec 6, 2024
d2479f2
Add support for sessions
zaerl Dec 9, 2024
db96c3e
Add categories check
zaerl Dec 9, 2024
c719941
Fix: wrong name
zaerl Dec 9, 2024
e55bd57
Partial tests rework
zaerl Dec 9, 2024
5ec0f3a
Add comments test
zaerl Dec 10, 2024
623c571
New sorter indexing
zaerl Dec 11, 2024
5dedad0
Fix: missing key
zaerl Dec 11, 2024
b61d9a5
Remove useless code
zaerl Dec 11, 2024
a9f3220
Remove SQLite case
zaerl Dec 11, 2024
efcf1df
Move plugin methods outside class
zaerl Dec 11, 2024
5ee1e95
Create Playground base test class
zaerl Dec 11, 2024
431522c
Fix: wrong keys
zaerl Dec 11, 2024
ad9e1c1
Add core postmeta_no_cdata test
zaerl Dec 11, 2024
3b7aa6d
Add core importer tests
zaerl Dec 11, 2024
69e507a
Add new core importer tests
zaerl Dec 11, 2024
3bd2f16
Update WXR to last core importer
zaerl Dec 11, 2024
ad1bb52
Add support for PHPUnit filters
zaerl Dec 11, 2024
8c4f3f9
Remove old test
zaerl Dec 11, 2024
a5871cc
Fix: remove debug code
zaerl Dec 11, 2024
2b00dde
Fix: wrong check
zaerl Dec 11, 2024
a47d4a3
Add new unit tests and remove old one
zaerl Dec 11, 2024
84ebb2e
Add support for term meta
zaerl Dec 12, 2024
c02882c
Add comment
zaerl Dec 12, 2024
490f037
Rename "elements" to "entities" to match name convention
zaerl Dec 12, 2024
9970f77
Remove filters and actions and move mapping to WP_Entity_Importer
zaerl Dec 12, 2024
bb43fd8
Fix: remove NOT NULL
zaerl Dec 13, 2024
3d6ae29
Add post terms import
zaerl Dec 17, 2024
d286e6b
Fix: use slug instead of the description for categories
zaerl Dec 17, 2024
247f967
Add new unit tests
zaerl Dec 17, 2024
7047113
Fix: remove debug code
zaerl Dec 17, 2024
4de8b16
Add a set_session method
zaerl Dec 18, 2024
8c71591
Add support for sessions
zaerl Dec 18, 2024
1cc22ea
Fix: serialized term meta
zaerl Dec 18, 2024
69606a4
Fix: missing brace
zaerl Dec 18, 2024
1872705
Remove "count" parameter
zaerl Dec 18, 2024
2eafb34
Update blueprints library
zaerl Dec 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
"pluginPath": "data-liberation/plugin.php"
},
{
"step": "runPHP",
"code": "<?php require_once 'wordpress/wp-load.php';\n$upload_dir = wp_upload_dir();\nforeach ( wp_visit_file_tree( $upload_dir['basedir'] . '/import-wxr' ) as $event ) {\nforeach ( $event->files as $file ) {\nif ( $file->isFile() && pathinfo( $file->getPathname(), PATHINFO_EXTENSION ) === 'xml' ) {\ndata_liberation_import( $file->getPathname() );\n}\n}\n};"
"step": "wp-cli",
"command": "wp data-liberation import /wordpress/wp-content/uploads/import-wxr"
}
]
}
2 changes: 2 additions & 0 deletions packages/playground/data-liberation/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@
require_once __DIR__ . '/src/import/WP_Stream_Importer.php';
require_once __DIR__ . '/src/import/WP_Entity_Iterator_Chain.php';
require_once __DIR__ . '/src/import/WP_Retry_Frontloading_Iterator.php';
require_once __DIR__ . '/src/import/WP_Logger.php';
require_once __DIR__ . '/src/import/WP_Topological_Sorter.php';

require_once __DIR__ . '/src/utf8_decoder.php';

Expand Down
1 change: 1 addition & 0 deletions packages/playground/data-liberation/phpunit.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
<file>tests/WPXMLProcessorTests.php</file>
<file>tests/UrldecodeNTests.php</file>
<file>tests/WPStreamImporterTests.php</file>
<file>tests/WPTopologicalSorterTests.php</file>
</testsuite>
</testsuites>
</phpunit>
83 changes: 51 additions & 32 deletions packages/playground/data-liberation/plugin.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,40 +39,59 @@ function () {
}
);

add_action(
'init',
function () {
if ( defined( 'WP_CLI' ) && WP_CLI ) {
/**
* Import a WXR file.
*
* <file>
* : The WXR file to import.
*/
$command = function ( $args, $assoc_args ) {
$file = $args[0];
data_liberation_import( $file );
};

// Register the WP-CLI import command.
// Example usage: wp data-liberation /path/to/file.xml
WP_CLI::add_command( 'data-liberation', $command );
}
function data_liberation_init() {
if ( defined( 'WP_CLI' ) && WP_CLI ) {
require_once __DIR__ . '/src/cli/WP_Import_Command.php';

register_post_status(
'error',
array(
'label' => _x( 'Error', 'post' ), // Label name
'public' => false,
'exclude_from_search' => false,
'show_in_admin_all_list' => false,
'show_in_admin_status_list' => false,
// translators: %s is the number of errors
'label_count' => _n_noop( 'Error <span class="count">(%s)</span>', 'Error <span class="count">(%s)</span>' ),
)
);
// Register the WP-CLI import command.
WP_CLI::add_command( 'data-liberation', WP_Import_Command::class );
}
);

register_post_status(
'error',
array(
'label' => _x( 'Error', 'post' ), // Label name
'public' => false,
'exclude_from_search' => false,
'show_in_admin_all_list' => false,
'show_in_admin_status_list' => false,
// translators: %s is the number of errors
'label_count' => _n_noop( 'Error <span class="count">(%s)</span>', 'Error <span class="count">(%s)</span>' ),
)
);
}

add_action( 'init', 'data_liberation_init' );

function data_liberation_activate() {
// Activate the topological sorter. Create tables and options.
WP_Topological_Sorter::activate();
update_option( WP_Topological_Sorter::OPTION_NAME, WP_Topological_Sorter::DB_VERSION );
}

// Run when the plugin is activated.
register_activation_hook( __FILE__, 'data_liberation_activate' );

function data_liberation_deactivate() {
// Deactivate the topological sorter. Flush away all data.
WP_Topological_Sorter::deactivate();

// @TODO: Cancel any active import sessions and cleanup other data.
}

// Run when the plugin is deactivated.
register_deactivation_hook( __FILE__, 'data_liberation_deactivate' );

function data_liberation_load() {
if ( WP_Topological_Sorter::DB_VERSION !== (int) get_site_option( WP_Topological_Sorter::OPTION_NAME ) ) {
// Update the database with dbDelta, if needed in the future.
WP_Topological_Sorter::activate();
update_option( WP_Topological_Sorter::OPTION_NAME, WP_Topological_Sorter::DB_VERSION );
}
}

// Run when the plugin is loaded.
add_action( 'plugins_loaded', 'data_liberation_load' );

// Register admin menu
add_action(
Expand Down
246 changes: 246 additions & 0 deletions packages/playground/data-liberation/src/cli/WP_Import_Command.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
<?php

require_once __DIR__ . '/WP_Import_Logger.php';

/**
* Implements the `wp data-liberation` command.
*
* ## EXAMPLES
*
* # Import a WXR file.
* wp data-liberation import /path/to/file.xml
*
* # Import all files inside a folder.
* wp data-liberation import /path/to/folder
*
* # Import a WXR file from a URL.
* wp data-liberation import http://example.com/file.xml
*
* Success: Imported data.
*/
class WP_Import_Command {
/**
* @var bool $dry_run Whether to perform a dry run.
*/
private $dry_run = false;

/**
* @var WP_Stream_Importer $importer The importer instance.
*/
private $importer = null;

/**
* @var string $wxr_path The path to the WXR file.
*/
private $wxr_path = '';

/**
* @var int $count The number of items to import in one go.
*/
private $count;

/**
* @var WP_Import_Session $import_session The import session.
*/
private $import_session;

/**
* Import a WXR file.
*
* ## OPTIONS
*
* <path>
* : The path to the WXR file. Either a file, a directory or a URL.
*
* [--count=<count>]
* : The number of items to import in one go. Default is 10,000.
*
* [--dry-run]
* : Perform a dry run if set.
*
* ## EXAMPLES
*
* wp data-liberation import /path/to/file.xml
*
* @param array $args
* @param array $assoc_args
* @return void
*/
public function import( $args, $assoc_args ) {
$path = $args[0];
$this->dry_run = WP_CLI\Utils\get_flag_value( $assoc_args, 'dry-run', false );
$this->count = isset( $assoc_args['count'] ) ? (int) $assoc_args['count'] : 10000;
$options = array(
'logger' => new WP_Import_logger(),
);

if ( extension_loaded( 'pcntl' ) ) {
// Set the signal handler.
$this->register_handlers();
}

// Be sure Data Liberation is activated.
data_liberation_activate();

if ( filter_var( $path, FILTER_VALIDATE_URL ) ) {
// Import URL.
$this->import_wxr_url( $path, $options );
} elseif ( is_dir( $path ) ) {
$count = 0;
// Get all the WXR files in the directory.
foreach ( wp_visit_file_tree( $path ) as $event ) {
foreach ( $event->files as $file ) {
if ( $file->isFile() && 'xml' === pathinfo( $file->getPathname(), PATHINFO_EXTENSION ) ) {
++$count;

// Import the WXR file.
$this->import_wxr_file( $file->getPathname(), $options );
}
}
}

if ( ! $count ) {
WP_CLI::error( WP_CLI::colorize( "No WXR files found in the %R{$path}%n directory" ) );
}
} else {
if ( ! is_file( $path ) ) {
WP_CLI::error( WP_CLI::colorize( "File not found: %R{$path}%n" ) );
}

// Import the WXR file.
$this->import_wxr_file( $path, $options );
}
}

private function start_session( $args ) {
if ( $this->dry_run ) {
WP_CLI::line( 'Dry run enabled. No session created.' );

return;
}

$active_session = WP_Import_Session::get_active();

if ( $active_session ) {
$this->import_session = $active_session;

$id = $this->import_session->get_id();
WP_CLI::line( WP_CLI::colorize( "Current session: %g{$id}%n" ) );
} else {
$this->import_session = WP_Import_Session::create( $args );

$id = $this->import_session->get_id();
WP_CLI::line( WP_CLI::colorize( "New session: %g{$id}%n" ) );
}
}

/**
* Import a WXR file.
*
* @param string $file_path The path to the WXR file.
* @return void
*/
private function import_wxr_file( $file_path, $options = array() ) {
$this->wxr_path = $file_path;

$this->start_session(
array(
'data_source' => 'wxr_file',
'file_name' => $file_path,
)
);

// Pass the session ID.
$options['session_id'] = $this->import_session->get_id();

$this->importer = WP_Stream_Importer::create_for_wxr_file( $file_path, $options );
$this->import_wxr();
}

/**
* Import a WXR file from a URL.
*
* @param string $url The URL to the WXR file.
* @return void
*/
private function import_wxr_url( $url, $options = array() ) {
$this->wxr_path = $url;

$this->start_session(
array(
'data_source' => 'wxr_url',
'file_name' => $url,
)
);

// Pass the session ID.
$options['session_id'] = $this->import_session->get_id();

$this->importer = WP_Stream_Importer::create_for_wxr_url( $url, $options );
$this->import_wxr();
}

/**
* Import the WXR file.
*/
private function import_wxr() {
if ( ! $this->importer ) {
WP_CLI::error( 'Could not create importer' );
}

if ( ! $this->import_session ) {
WP_CLI::error( 'Could not create session' );
}

WP_CLI::line( "Importing {$this->wxr_path}" );

if ( $this->dry_run ) {
// @TODO: do something with the dry run.
WP_CLI::line( 'Dry run enabled.' );
} else {
do {
$current_stage = $this->importer->get_stage();
WP_CLI::line( WP_CLI::colorize( "Stage %g{$current_stage}%n" ) );
$step_count = 0;

while ( $this->importer->next_step() ) {
++$step_count;
WP_CLI::line( WP_CLI::colorize( "Step %g{$step_count}%n" ) );
}
} while ( $this->importer->advance_to_next_stage() );
}

WP_CLI::success( 'Import finished' );
}

/**
* Callback function registered to `pcntl_signal` to handle signals.
*
* @param int $signal The signal number.
* @return void
*/
protected function signal_handler( $signal ) {
switch ( $signal ) {
case SIGINT:
WP_CLI::line( 'Received SIGINT signal' );
exit( 0 );

case SIGTERM:
WP_CLI::line( 'Received SIGTERM signal' );
exit( 0 );
}
}

/**
* Register signal handlers for the command.
*
* @return void
*/
private function register_handlers() {
// Handle the Ctrl + C signal to terminate the program.
pcntl_signal( SIGINT, array( $this, 'signal_handler' ) );

// Handle the `kill` command to terminate the program.
pcntl_signal( SIGTERM, array( $this, 'signal_handler' ) );
}
}
Loading
Loading