Skip to content

Commit

Permalink
Add new topo sorting query
Browse files Browse the repository at this point in the history
  • Loading branch information
zaerl committed Dec 4, 2024
1 parent d197de6 commit e95618e
Showing 1 changed file with 207 additions and 79 deletions.
286 changes: 207 additions & 79 deletions packages/playground/data-liberation/src/import/WP_Topological_Sorter.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,24 @@
*/
class WP_Topological_Sorter {

public $posts = array();
public $categories = array();
/**
* The base name of the table.
*/
const TABLE_NAME = 'data_liberation_index';

/**
* The option name for the database version.
*/
const OPTION_NAME = 'data_liberation_db_version';

/**
* The current database version, to be used with dbDelta.
*/
const DB_VERSION = 1;

// Element types.
const ELEMENT_TYPE_POST = 1;
const ELEMENT_TYPE_CATEGORY = 2;

/**
* Variable for keeping counts of orphaned posts/attachments, it'll also be assigned as temporarly post ID.
Expand All @@ -34,27 +50,135 @@ class WP_Topological_Sorter {
*/
protected $sorted = false;

public static function get_table_name() {
global $wpdb;

// Default is wp_{TABLE_NAME}
return $wpdb->prefix . self::TABLE_NAME;
}

/**
* Run by register_activation_hook.
*/
public static function activate() {
global $wpdb;

// See wp_get_db_schema
$max_index_length = 191;
$table_name = self::get_table_name();

// Create the table if it doesn't exist.
// @TODO: remove this custom SQLite declaration after first phase of unit tests is done.
if ( self::is_sqlite() ) {
$sql = $wpdb->prepare(
'CREATE TABLE IF NOT EXISTS %i (
id INTEGER PRIMARY KEY AUTOINCREMENT,
element_type INTEGER NOT NULL default %d,
element_id INTEGER NOT NULL,
parent_id INTEGER,
parent TEXT NOT NULL default "",
byte_offset INTEGER NOT NULL,
hierarchy_level INTEGER DEFAULT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_element_id ON %i (element_id);
CREATE INDEX IF NOT EXISTS idx_element_parent ON %i (parent);
CREATE INDEX IF NOT EXISTS idx_byte_offset ON %i (byte_offset);',
$table_name,
self::ELEMENT_TYPE_POST,
$table_name,
$table_name,
$table_name
);
} else {
// MySQL, MariaDB.
$sql = $wpdb->prepare(
'CREATE TABLE IF NOT EXISTS %i (
id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
element_type tinyint(1) NOT NULL default %d,
element_id unsigned bigint(20) NOT NULL,
parent_id unsigned bigint(20) DEFAULT NULL,
parent varchar(200) NOT NULL default "",
byte_offset bigint(20) unsigned NOT NULL,
hierarchy_level INT DEFAULT NULL,
PRIMARY KEY (id),
UNIQUE KEY element_id (element_id(%d))
KEY element_parent (element_parent(%d))
KEY byte_offset (byte_offset(%d))
) ' . $wpdb->get_charset_collate(),
self::get_table_name(),
self::ELEMENT_TYPE_POST,
$max_index_length,
$max_index_length,
$max_index_length
);
}

require_once ABSPATH . 'wp-admin/includes/upgrade.php';
dbDelta( $sql );

update_option( self::OPTION_NAME, self::DB_VERSION );
}

public static function is_sqlite() {
return defined( 'DB_ENGINE' ) || 'sqlite' === DB_ENGINE;
}

/**
* Run in the 'plugins_loaded' action.
*/
public static function load() {
if ( self::DB_VERSION !== (int) get_site_option( self::OPTION_NAME ) ) {
// Used to update the database with dbDelta, if needed in the future.
self::activate();
}
}

/**
* Run by register_deactivation_hook.
*/
public static function deactivate() {
global $wpdb;
$table_name = self::get_table_name();

// Drop the table.
$wpdb->query( $wpdb->prepare( 'DROP TABLE IF EXISTS %s', $table_name ) );

// Delete the option.
delete_option( self::OPTION_NAME );
}

/**
* Run by register_uninstall_hook.
*/
public function reset() {
$this->posts = array();
$this->categories = array();
$this->category_index = array();
$this->orphan_post_counter = 0;
$this->last_post_id = 0;
$this->sorted = false;
}

public function map_category( $byte_offset, $data ) {
global $wpdb;

if ( empty( $data ) ) {
return false;
}

$this->categories[ $data['slug'] ] = array(
array_key_exists( 'parent', $data ) ? $data['parent'] : '',
$byte_offset,
$wpdb->insert(
self::get_table_name(),
array(
'element_type' => self::ELEMENT_TYPE_CATEGORY,
'element_id' => $data['term_id'],
'parent_id' => $data['parent_id'],
'parent' => array_key_exists( 'parent', $data ) ? $data['parent'] : '',
'byte_offset' => $byte_offset,
)
);
}

public function map_post( $byte_offset, $data ) {
global $wpdb;

if ( empty( $data ) ) {
return false;
}
Expand All @@ -70,11 +194,15 @@ public function map_post( $byte_offset, $data ) {
--$this->orphan_post_counter;
}

// This is an array saved as: [ parent, byte_offset ], to save
// space and not using an associative one.
$this->posts[ $data['post_id'] ] = array(
$data['post_parent'],
$byte_offset,
$wpdb->insert(
self::get_table_name(),
array(
'element_type' => self::ELEMENT_TYPE_POST,
'element_id' => $data['post_id'],
'parent_id' => $data['post_parent'],
'parent' => '',
'byte_offset' => $byte_offset,
)
);
}

Expand All @@ -89,25 +217,20 @@ public function map_post( $byte_offset, $data ) {
* @return int|bool The byte offset of the post, or false if the post is not found.
*/
public function get_post_byte_offset( $id ) {
global $wpdb;

if ( ! $this->sorted ) {
return false;
}

if ( isset( $this->posts[ $id ] ) ) {
$ret = $this->posts[ $id ];

// Remove the element from the array.
unset( $this->posts[ $id ] );

if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) {
// All posts have been processed.
$this->reset();
}

return $ret;
}

return false;
return $wpdb->get_var(
$wpdb->prepare(
'SELECT byte_offset FROM %s WHERE element_id = %d AND element_type = %d',
self::get_table_name(),
$id,
self::ELEMENT_TYPE_POST
)
);
}

/**
Expand All @@ -118,25 +241,20 @@ public function get_post_byte_offset( $id ) {
* @return int|bool The byte offset of the category, or false if the category is not found.
*/
public function get_category_byte_offset( $slug ) {
global $wpdb;

if ( ! $this->sorted ) {
return false;
}

if ( isset( $this->categories[ $slug ] ) ) {
$ret = $this->categories[ $slug ];

// Remove the element from the array.
unset( $this->categories[ $slug ] );

if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) {
// All categories have been processed.
$this->reset();
}

return $ret;
}

return false;
return $wpdb->get_var(
$wpdb->prepare(
'SELECT byte_offset FROM %s WHERE element_id = %d AND element_type = %d',
self::get_table_name(),
$id,
self::ELEMENT_TYPE_CATEGORY
)
);
}

public function is_sorted() {
Expand All @@ -150,30 +268,30 @@ public function is_sorted() {
* This method sorts the elements in the order they should be processed.
*/
public function sort_topologically( $free_space = true ) {
foreach ( $this->categories as $slug => $category ) {
$this->topological_category_sort( $slug, $category );
}
/*foreach ( $this->categories as $slug => $category ) {
// $this->topological_category_sort( $slug, $category );
}*/

$this->sort_elements( $this->posts );
$this->sort_elements( $this->categories );
$this->sort_elements( self::ELEMENT_TYPE_POST );
$this->sort_elements( self::ELEMENT_TYPE_CATEGORY );

// Free some space.
if ( $free_space ) {
/**
/*
* @TODO: all the elements that have not been moved can be flushed away.
*/
*
foreach ( $this->posts as $id => $element ) {
// Save only the byte offset.
$this->posts[ $id ] = $element[1];
}
/**
/*
* @TODO: all the elements that have not been moved can be flushed away.
*/
*
foreach ( $this->categories as $slug => $element ) {
// Save only the byte offset.
$this->categories[ $slug ] = $element[1];
}
}*/
}

$this->sorted = true;
Expand All @@ -182,34 +300,44 @@ public function sort_topologically( $free_space = true ) {
/**
* Recursive sort elements. Posts with parents will be moved to the correct position.
*
* @param int $type The type of element to sort.
* @return true
*/
private function sort_elements( &$elements ) {
$sort_callback = function ( $a, $b ) use ( &$elements ) {
$parent_a = $elements[ $a ][0];
$parent_b = $elements[ $b ][0];

if ( ! $parent_a && ! $parent_b ) {
// No parents.
return 0;
} elseif ( $a === $parent_b ) {
// A is the parent of B.
return -1;
} elseif ( $b === $parent_a ) {
// B is the parent of A.
return 1;
}

return 0;
};

/**
* @TODO: PHP uses quicksort: https://github.com/php/php-src/blob/master/Zend/zend_sort.c
* WordPress export posts by ID and so are likely to be already in order.
* Quicksort performs badly on already sorted arrays, O(n^2) is the worst case.
* Let's consider using a different sorting algorithm.
*/
uksort( $elements, $sort_callback );
private function sort_elements( $type ) {
global $wpdb;
$table_name = self::get_table_name();

return $wpdb->query(
$wpdb->prepare(
// Perform a topological sort CTE.
'WITH RECURSIVE hierarchy_cte AS (
-- Select all root nodes (where parent_id is NULL)
SELECT id, parent_id, 1 AS hierarchy_level
FROM %i
WHERE parent_id IS NULL AND element_type = %d
UNION ALL
-- Recursive member: Join the CTE with the table to find children
SELECT yt.id, yt.parent_id, hc.hierarchy_level + 1
FROM %i yt
WHERE element_type = %d
INNER JOIN hierarchy_cte hc ON yt.parent_id = hc.id
)
-- Update the hierarchy_level based on the computed hierarchy_level
UPDATE %i
SET hierarchy_level = hc.hierarchy_level
FROM hierarchy_cte hc
WHERE %i.id = hc.id;',
$table_name,
$type,
$table_name,
$type,
$table_name,
$table_name
)
);
}

/**
Expand Down

0 comments on commit e95618e

Please sign in to comment.