From 14dffc8e9e19982102e2ca6c6e491ba776d1fe9c Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Thu, 19 Dec 2024 11:01:11 +0100 Subject: [PATCH] New WP-CLI script --- .../bin/import/blueprint-import-wxr.json | 4 +- .../playground/data-liberation/plugin.php | 53 ++- .../src/cli/WP_Import_Command.php | 314 ++++++++++++++++++ .../data-liberation/src/functions.php | 45 +-- .../src/import/WP_Entity_Importer.php | 185 +---------- 5 files changed, 346 insertions(+), 255 deletions(-) create mode 100644 packages/playground/data-liberation/src/cli/WP_Import_Command.php diff --git a/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json b/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json index 55ab107921..b8ad517fae 100644 --- a/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json +++ b/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json @@ -11,8 +11,8 @@ "pluginPath": "data-liberation/plugin.php" }, { - "step": "runPHP", - "code": "files as $file ) {\nif ( $file->isFile() && pathinfo( $file->getPathname(), PATHINFO_EXTENSION ) === 'xml' ) {\ndata_liberation_import( $file->getPathname() );\n}\n}\n};" + "step": "wp-cli", + "command": "wp data-liberation import /wordpress/wp-content/uploads/import-wxr" } ] } diff --git a/packages/playground/data-liberation/plugin.php b/packages/playground/data-liberation/plugin.php index f17704ebcc..2a59f2c4ff 100644 --- a/packages/playground/data-liberation/plugin.php +++ b/packages/playground/data-liberation/plugin.php @@ -39,40 +39,29 @@ function () { } ); -add_action( - 'init', - function () { - if ( defined( 'WP_CLI' ) && WP_CLI ) { - /** - * Import a WXR file. - * - * - * : The WXR file to import. - */ - $command = function ( $args, $assoc_args ) { - $file = $args[0]; - data_liberation_import( $file ); - }; - - // Register the WP-CLI import command. - // Example usage: wp data-liberation /path/to/file.xml - WP_CLI::add_command( 'data-liberation', $command ); - } +function data_liberation_init() { + if ( defined( 'WP_CLI' ) && WP_CLI ) { + require_once __DIR__ . '/src/cli/WP_Import_Command.php'; - register_post_status( - 'error', - array( - 'label' => _x( 'Error', 'post' ), // Label name - 'public' => false, - 'exclude_from_search' => false, - 'show_in_admin_all_list' => false, - 'show_in_admin_status_list' => false, - // translators: %s is the number of errors - 'label_count' => _n_noop( 'Error (%s)', 'Error (%s)' ), - ) - ); + // Register the WP-CLI import command. + WP_CLI::add_command( 'data-liberation', WP_Import_Command::class ); } -); + + register_post_status( + 'error', + array( + 'label' => _x( 'Error', 'post' ), // Label name + 'public' => false, + 'exclude_from_search' => false, + 'show_in_admin_all_list' => false, + 'show_in_admin_status_list' => false, + // translators: %s is the number of errors + 'label_count' => _n_noop( 'Error (%s)', 'Error (%s)' ), + ) + ); +} + +add_action( 'init', 'data_liberation_init' ); // Register admin menu add_action( diff --git a/packages/playground/data-liberation/src/cli/WP_Import_Command.php b/packages/playground/data-liberation/src/cli/WP_Import_Command.php new file mode 100644 index 0000000000..025fa76feb --- /dev/null +++ b/packages/playground/data-liberation/src/cli/WP_Import_Command.php @@ -0,0 +1,314 @@ + + * : The path to the WXR file. Either a file, a directory or a URL. + * + * [--count=] + * : The number of items to import in one go. Default is 10,000. + * + * [--dry-run] + * : Perform a dry run if set. + * + * [--verbose] + * : Show more detailed output. + * + * ## EXAMPLES + * + * wp data-liberation import /path/to/file.xml + * + * @param array $args + * @param array $assoc_args + * @return void + */ + public function import( $args, $assoc_args ) { + $path = $args[0]; + $this->dry_run = WP_CLI\Utils\get_flag_value( $assoc_args, 'dry-run', false ); + $this->verbose = WP_CLI\Utils\get_flag_value( $assoc_args, 'verbose', false ); + $this->count = isset( $assoc_args['count'] ) ? (int) $assoc_args['count'] : 10000; + + if ( extension_loaded( 'pcntl' ) ) { + // Set the signal handler. + $this->register_handlers(); + } + + if ( $this->verbose ) { + $this->register_verbose_callbacks(); + } + + if ( filter_var( $path, FILTER_VALIDATE_URL ) ) { + // Import URL. + $this->import_wxr_url( $path ); + } elseif ( is_dir( $path ) ) { + $count = 0; + // Get all the WXR files in the directory. + foreach ( wp_visit_file_tree( $path ) as $event ) { + foreach ( $event->files as $file ) { + if ( $file->isFile() && 'xml' === pathinfo( $file->getPathname(), PATHINFO_EXTENSION ) ) { + ++$count; + + // Import the WXR file. + $this->import_wxr_file( $file->getPathname() ); + } + } + } + + if ( ! $count ) { + WP_CLI::error( WP_CLI::colorize( "No WXR files found in the %R{$path}%n directory" ) ); + } + } else { + if ( ! is_file( $path ) ) { + WP_CLI::error( WP_CLI::colorize( "File not found: %R{$path}%n" ) ); + } + + // Import the WXR file. + $this->import_wxr_file( $path ); + } + } + + private function register_verbose_callbacks() { + // Add all the callbacks for the importer. + add_action( + 'wxr_importer_process_failed_user', + function ( $error, $data ) { + WP_CLI::debug( + sprintf( + /* translators: %s: user login */ + __( 'Failed to import user "%1$s": %2$s', 'wordpress-importer' ), + $data['user_login'], + $error->get_error_message() + ) + ); + }, + 10, + 2 + ); + + add_action( + 'wxr_importer_process_failed_term', + function ( $error, $data ) { + WP_CLI::log( + sprintf( + /* translators: 1: taxonomy name, 2: term name, 3: error message */ + __( 'Failed to import %1$s "%2$s": %3$s', 'wordpress-importer' ), + $data['taxonomy'], + $data['name'], + $error->get_error_message() + ) + ); + }, + 10, + 2 + ); + + add_action( + 'wxr_importer_process_failed_post', + function ( $error, $data, $meta, $post_type_object ) { + WP_CLI::log( + sprintf( + /* translators: 1: post title, 2: post type name */ + __( 'Failed to import post "%1$s" (%2$s): %3$s', 'wordpress-importer' ), + $data['post_title'], + $post_type_object->labels->singular_name, + $error->get_error_message() + ) + ); + }, + 10, + 4 + ); + } + + private function start_session( $args ) { + if ( $this->dry_run ) { + WP_CLI::line( 'Dry run enabled. No session created.' ); + + return; + } + + $active_session = WP_Import_Session::get_active(); + + if ( $active_session ) { + $this->import_session = $active_session; + + $id = $this->import_session->get_id(); + WP_CLI::line( WP_CLI::colorize( "Current session: %g{$id}%n" ) ); + } else { + $this->import_session = WP_Import_Session::create( $args ); + + $id = $this->import_session->get_id(); + WP_CLI::line( WP_CLI::colorize( "New session: %g{$id}%n" ) ); + } + } + + /** + * Import a WXR file. + * + * @param string $file_path The path to the WXR file. + * @return void + */ + private function import_wxr_file( $file_path, $options = array() ) { + $this->wxr_path = $file_path; + + $this->start_session( + array( + 'data_source' => 'wxr_file', + 'file_name' => $file_path, + ) + ); + + // Pass the session ID. + $options['session_id'] = $this->import_session->get_id(); + + $this->importer = WP_Stream_Importer::create_for_wxr_file( $file_path, $options ); + $this->import_wxr(); + } + + /** + * Import a WXR file from a URL. + * + * @param string $url The URL to the WXR file. + * @return void + */ + private function import_wxr_url( $url, $options = array() ) { + $this->wxr_path = $url; + + $this->start_session( + array( + 'data_source' => 'wxr_url', + 'file_name' => $url, + ) + ); + + // Pass the session ID. + $options['session_id'] = $this->import_session->get_id(); + + $this->importer = WP_Stream_Importer::create_for_wxr_url( $url, $options ); + $this->import_wxr(); + } + + /** + * Import the WXR file. + */ + private function import_wxr() { + if ( ! $this->importer ) { + WP_CLI::error( 'Could not create importer' ); + } + + if ( ! $this->import_session ) { + WP_CLI::error( 'Could not create session' ); + } + + WP_CLI::line( "Importing {$this->wxr_path}" ); + + if ( $this->dry_run ) { + // @TODO: do something with the dry run. + WP_CLI::line( 'Dry run enabled.' ); + } else { + $progresses = array(); + do { + $current_stage = $this->importer->get_stage(); + WP_CLI::line( WP_CLI::colorize( "Stage %g{$current_stage}%n" ) ); + $step_count = 0; + + while ( $this->importer->next_step() ) { + ++$step_count; + + if ( $this->verbose ) { + WP_CLI::line( WP_CLI::colorize( "Step %g{$step_count}%n" ) ); + } + } + } while ( $this->importer->advance_to_next_stage() ); + } + + WP_CLI::success( 'Import finished' ); + } + + /** + * Callback function registered to `pcntl_signal` to handle signals. + * + * @param int $signal The signal number. + * @return void + */ + protected function signal_handler( $signal ) { + switch ( $signal ) { + case SIGINT: + WP_CLI::line( 'Received SIGINT signal' ); + exit( 0 ); + + case SIGTERM: + WP_CLI::line( 'Received SIGTERM signal' ); + exit( 0 ); + } + } + + /** + * Register signal handlers for the command. + * + * @return void + */ + private function register_handlers() { + // Handle the Ctrl + C signal to terminate the program. + pcntl_signal( SIGINT, array( $this, 'signal_handler' ) ); + + // Handle the `kill` command to terminate the program. + pcntl_signal( SIGTERM, array( $this, 'signal_handler' ) ); + } +} diff --git a/packages/playground/data-liberation/src/functions.php b/packages/playground/data-liberation/src/functions.php index 44166b0f2a..73111c7739 100644 --- a/packages/playground/data-liberation/src/functions.php +++ b/packages/playground/data-liberation/src/functions.php @@ -167,7 +167,7 @@ function wp_visit_file_tree( $dir ) { if ( '.' === $file || '..' === $file ) { continue; } - $file_path = $dir . '/' . $file; + $file_path = rtrim( $dir, '/' ) . '/' . $file; if ( is_dir( $file_path ) ) { $directories[] = $file_path; continue; @@ -193,49 +193,6 @@ function wp_visit_file_tree( $dir ) { ); } -/** - * Import a WXR file. Used by the CLI. - * - * @param string $path The path to the WXR file. - * @return void - */ -function data_liberation_import( $path ): bool { - $importer = WP_Stream_Importer::create_for_wxr_file( $path ); - - if ( ! $importer ) { - return false; - } - - $is_wp_cli = defined( 'WP_CLI' ) && WP_CLI; - - if ( $is_wp_cli ) { - WP_CLI::line( "Importing from {$path}" ); - } - - while ( $importer->next_step() ) { - // Output the current stage if running in WP-CLI. - if ( $is_wp_cli ) { - $current_stage = $importer->get_current_stage(); - WP_CLI::line( "Import: stage {$current_stage}" ); - } - } - - if ( $is_wp_cli ) { - WP_CLI::success( 'Import ended' ); - } - - return true; -} - -function get_all_post_meta_flat( $post_id ) { - return array_map( - function ( $value ) { - return $value[0]; - }, - get_post_meta( $post_id ) - ); -} - /** * Polyfill the mb_str_split function used by Rowbot\URL\URL. * diff --git a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php index 95ff593f6f..b889d8e6f2 100644 --- a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php @@ -57,7 +57,6 @@ class=[\'"].*?\b(wp-image-\d+|attachment-[\w\-]+)\b protected $tags = array(); protected $base_url = ''; - protected $logger; protected $options = array(); // NEW STYLE @@ -95,7 +94,6 @@ public function __construct( $options = array() ) { $this->mapping['term_id'] = array(); $this->requires_remapping = $empty_types; $this->exists = $empty_types; - $this->logger = new Logger(); $this->options = wp_parse_args( $options, @@ -136,15 +134,8 @@ public function import_entity( WP_Imported_Entity $entity ) { } public function import_site_option( $data ) { - $this->logger->info( - sprintf( - /* translators: %s: option name */ - __( 'Imported site option "%s"', 'wordpress-importer' ), - $data['option_name'] - ) - ); - update_option( $data['option_name'], $data['option_value'] ); + do_action( 'wxr_importer_processed_site_option', $data ); } public function import_user( $data ) { @@ -209,15 +200,6 @@ public function import_user( $data ) { $user_id = wp_insert_user( wp_slash( $userdata ) ); if ( is_wp_error( $user_id ) ) { - $this->logger->error( - sprintf( - /* translators: %s: user login */ - __( 'Failed to import user "%s"', 'wordpress-importer' ), - $userdata['user_login'] - ) - ); - $this->logger->debug( $user_id->get_error_message() ); - /** * User processing failed. * @@ -233,22 +215,6 @@ public function import_user( $data ) { } $this->mapping['user_slug'][ $original_slug ] = $user_id; - $this->logger->info( - sprintf( - /* translators: %s: user login */ - __( 'Imported user "%s"', 'wordpress-importer' ), - $userdata['user_login'] - ) - ); - $this->logger->debug( - sprintf( - /* translators: 1: original user ID, 2: new user ID */ - __( 'User %1$d remapped to %2$d', 'wordpress-importer' ), - $original_id, - $user_id - ) - ); - // TODO: Implement meta handling once WXR includes it /** * User processing completed. @@ -327,17 +293,6 @@ public function import_term( $data ) { $result = wp_insert_term( $data['name'], $data['taxonomy'], $termdata ); if ( is_wp_error( $result ) ) { - $this->logger->warning( - sprintf( - /* translators: 1: taxonomy name, 2: term name */ - __( 'Failed to import %1$s %2$s', 'wordpress-importer' ), - $data['taxonomy'], - $data['name'] - ) - ); - $this->logger->debug( $result->get_error_message() ); - do_action( 'wp_import_insert_term_failed', $result, $data ); - /** * Term processing failed. * @@ -354,25 +309,6 @@ public function import_term( $data ) { $this->mapping['term'][ $mapping_key ] = $term_id; $this->mapping['term_id'][ $original_id ] = $term_id; - $this->logger->info( - sprintf( - /* translators: 1: term name, 2: taxonomy name */ - __( 'Imported "%1$s" (%2$s)', 'wordpress-importer' ), - $data['name'], - $data['taxonomy'] - ) - ); - $this->logger->debug( - sprintf( - /* translators: 1: original term ID, 2: new term ID */ - __( 'Term %1$d remapped to %2$d', 'wordpress-importer' ), - $original_id, - $term_id - ) - ); - - do_action( 'wp_import_insert_term', $term_id, $data ); - /** * Term processing completed. * @@ -449,7 +385,6 @@ public function import_post( $data ) { */ $data = apply_filters( 'wxr_importer_pre_process_post', $data ); if ( empty( $data ) ) { - $this->logger->debug( 'Skipping post, empty data' ); return false; } @@ -458,7 +393,6 @@ public function import_post( $data ) { // Have we already processed this? if ( isset( $this->mapping['post'][ $original_id ] ) ) { - $this->logger->debug( 'Skipping post, already processed' ); return; } @@ -467,28 +401,17 @@ public function import_post( $data ) { // Is this type even valid? if ( ! $post_type_object ) { - $this->logger->warning( - sprintf( - /* translators: 1: post title, 2: post type */ - __( 'Failed to import "%1$s": Invalid post type %2$s', 'wordpress-importer' ), - $data['post_title'], - $post_type - ) - ); + /** + * Post type not found. + * + * @param array $data Raw data imported for the post. + */ + do_action( 'wxr_importer_process_post_type_not_found', $data ); return false; } $post_exists = $this->post_exists( $data ); if ( $post_exists ) { - $this->logger->info( - sprintf( - /* translators: 1: post type name, 2: post title */ - __( '%1$s "%2$s" already exists.', 'wordpress-importer' ), - $post_type_object->labels->singular_name, - $data['post_title'] - ) - ); - /** * Post processing already imported. * @@ -557,13 +480,6 @@ public function import_post( $data ) { // @TODO: Do not download any attachments here. We're just inserting the data // at this point. All the downloads have already been processed by now. if ( ! $this->options['fetch_attachments'] ) { - $this->logger->notice( - sprintf( - /* translators: %s: post title */ - __( 'Skipping attachment "%s", fetching attachments disabled' ), - $data['post_title'] - ) - ); /** * Post processing skipped. * @@ -581,16 +497,6 @@ public function import_post( $data ) { } if ( is_wp_error( $post_id ) ) { - $this->logger->error( - sprintf( - /* translators: 1: post title, 2: post type name */ - __( 'Failed to import "%1$s" (%2$s)', 'wordpress-importer' ), - $data['post_title'], - $post_type_object->labels->singular_name - ) - ); - $this->logger->debug( $post_id->get_error_message() ); - /** * Post processing failed. * @@ -600,7 +506,7 @@ public function import_post( $data ) { * @param array $comments Raw comment data, already processed by {@see process_comments}. * @param array $terms Raw term data, already processed. */ - do_action( 'wxr_importer_process_failed_post', $post_id, $data, $meta, $comments, $terms ); + do_action( 'wxr_importer_process_failed_post', $post_id, $data, $meta, $post_type_object ); return false; } @@ -617,23 +523,6 @@ public function import_post( $data ) { } $this->mark_post_exists( $data, $post_id ); - $this->logger->info( - sprintf( - /* translators: 1: post title, 2: post type name */ - __( 'Imported "%1$s" (%2$s)', 'wordpress-importer' ), - $data['post_title'] ?? '', - $post_type_object->labels->singular_name - ) - ); - $this->logger->debug( - sprintf( - /* translators: 1: original post ID, 2: new post ID */ - __( 'Post %1$d remapped to %2$d', 'wordpress-importer' ), - $original_id, - $post_id - ) - ); - /** * Post processing completed. * @@ -703,8 +592,6 @@ protected function process_menu_item_meta( $post_id, $data, $meta ) { $original_object_id = get_post_meta( $post_id, '_menu_item_object_id', true ); $object_id = null; - $this->logger->debug( sprintf( 'Processing menu item %s', $item_type ) ); - $requires_remapping = false; switch ( $item_type ) { case 'taxonomy': @@ -733,7 +620,6 @@ protected function process_menu_item_meta( $post_id, $data, $meta ) { default: // associated object is missing or not imported yet, we'll retry later $this->missing_menu_items[] = $item; - $this->logger->debug( 'Unknown menu item type' ); break; } @@ -746,7 +632,6 @@ protected function process_menu_item_meta( $post_id, $data, $meta ) { return; } - $this->logger->debug( sprintf( 'Menu item %d mapped to %d', $original_object_id, $object_id ) ); update_post_meta( $post_id, '_menu_item_object_id', wp_slash( $object_id ) ); } @@ -1193,57 +1078,3 @@ public static function sort_comments_by_id( $a, $b ) { return $a['comment_id'] - $b['comment_id']; } } - -/** - * @TODO how to treat this? Should this class even exist? - * how does WordPress handle different levels? It - * seems useful for usage in wp-cli, Blueprints, - * and other non-web environments. - */ -// phpcs:ignore Generic.Files.OneObjectStructurePerFile.MultipleFound -class Logger { - /** - * Log a debug message. - * - * @param string $message Message to log - */ - public function debug( $message ) { - // echo( '[DEBUG] ' . $message ); - } - - /** - * Log an info message. - * - * @param string $message Message to log - */ - public function info( $message ) { - // echo( '[INFO] ' . $message ); - } - - /** - * Log a warning message. - * - * @param string $message Message to log - */ - public function warning( $message ) { - echo( '[WARNING] ' . $message ); - } - - /** - * Log an error message. - * - * @param string $message Message to log - */ - public function error( $message ) { - echo( '[ERROR] ' . $message ); - } - - /** - * Log a notice message. - * - * @param string $message Message to log - */ - public function notice( $message ) { - // echo( '[NOTICE] ' . $message ); - } -}