391 lines
17 KiB
PHP
391 lines
17 KiB
PHP
<?php
|
|
|
|
if (defined('WP_CLI') && WP_CLI) {
|
|
|
|
WP_CLI::add_hook('after_wp_load', function () {
|
|
register_taxonomy('page_category', 'page', [
|
|
'label' => 'Page Categories',
|
|
'hierarchical' => true,
|
|
'public' => true,
|
|
'show_ui' => true,
|
|
'show_in_rest' => true,
|
|
]);
|
|
});
|
|
|
|
class Drupal_Import_Command extends WP_CLI_Command
|
|
{
|
|
/**
|
|
* Imports Drupal content into WordPress.
|
|
*
|
|
* ## OPTIONS
|
|
*
|
|
* --type=<type>
|
|
* : (Required) Drupal content type(s) to import or delete. Comma-separated if multiple. Example: --type=story,hidden_content,vidcast,news_article,resources
|
|
*
|
|
* [--delete-only]
|
|
* : Only delete existing imported WordPress pages for the specified type.
|
|
*
|
|
*/
|
|
|
|
public function __invoke($args, $assoc_args)
|
|
{
|
|
global $wpdb;
|
|
|
|
// 📡 Connect to Drupal DB
|
|
$drupal_db = new wpdb(
|
|
getenv('DRUPAL_DB_USER'),
|
|
getenv('DRUPAL_DB_PASS'),
|
|
getenv('DRUPAL_DB_NAME'),
|
|
getenv('DRUPAL_DB_HOST')
|
|
);
|
|
|
|
if (is_wp_error($drupal_db)) {
|
|
WP_CLI::error("Could not connect to Drupal DB: " . $drupal_db->get_error_message());
|
|
}
|
|
|
|
// Make --type a required arguement
|
|
if (! isset($assoc_args['type']) || trim($assoc_args['type']) === '') {
|
|
WP_CLI::error("You must provide a --type=TYPE argument (e.g., --type=story).");
|
|
}
|
|
|
|
|
|
$type_arg = explode(',', $assoc_args['type']);
|
|
$escaped_types = implode("','", array_map('esc_sql', $type_arg));
|
|
|
|
// 🔍 Fetch valid Drupal NIDs
|
|
$nids = $drupal_db->get_col("
|
|
SELECT nid FROM node_field_data WHERE type IN ('$escaped_types')
|
|
");
|
|
|
|
// 🧹 --delete-only logic
|
|
if (isset($assoc_args['delete-only'])) {
|
|
if (empty($nids)) {
|
|
WP_CLI::warning("No Drupal NIDs found—nothing to delete.");
|
|
return;
|
|
}
|
|
|
|
$posts_to_delete = get_posts([
|
|
'post_type' => 'page',
|
|
'post_status' => 'any',
|
|
'meta_key' => '_drupal_nid',
|
|
'meta_value' => $nids,
|
|
'numberposts' => -1,
|
|
]);
|
|
|
|
foreach ($posts_to_delete as $post) {
|
|
wp_delete_post($post->ID, true);
|
|
WP_CLI::log("Deleted post ID {$post->ID} (NID: " . get_post_meta($post->ID, '_drupal_nid', true) . ")");
|
|
}
|
|
|
|
WP_CLI::success("Deleted " . count($posts_to_delete) . " Drupal-linked pages.");
|
|
return;
|
|
}
|
|
|
|
// 🧾 Fetch content with user & timestamps
|
|
$nodes = $drupal_db->get_results("
|
|
SELECT n.nid, n.title, b.body_value, b.body_summary, n.status, n.created, n.changed, n.uid, n.type,
|
|
u.name AS author_name, u.mail AS author_email
|
|
FROM node_field_data n
|
|
JOIN node__body b ON n.nid = b.entity_id
|
|
LEFT JOIN users_field_data u ON n.uid = u.uid
|
|
WHERE n.type IN ('$escaped_types')
|
|
");
|
|
|
|
|
|
$nids_only = array_map(function ($node) {
|
|
return intval($node->nid);
|
|
}, $nodes);
|
|
|
|
// Taxonomy
|
|
$term_data = $drupal_db->get_results("
|
|
SELECT
|
|
ti.nid,
|
|
td.vid AS vocabulary, -- Use vid instead of joining missing table
|
|
ttd.name AS term_name
|
|
FROM taxonomy_index ti
|
|
JOIN taxonomy_term_data td ON ti.tid = td.tid
|
|
JOIN taxonomy_term_field_data ttd ON ti.tid = ttd.tid
|
|
WHERE ti.nid IN (" . implode(',', array_map('intval', $nids_only)) . ")
|
|
");
|
|
|
|
// foreach ($nodes as $node) {
|
|
// WP_CLI::log("Node Title: {$node->title}");
|
|
// WP_CLI::log("Node ID: {$node->nid}");
|
|
// WP_CLI::log("Created: " . date('Y-m-d H:i:s', $node->created));
|
|
// WP_CLI::log("Changed: " . date('Y-m-d H:i:s', $node->changed));
|
|
// break; // Exit after first iteration
|
|
// }
|
|
// exit; // End the script
|
|
|
|
$taxonomy_by_nid = [];
|
|
foreach ($term_data as $row) {
|
|
$nid = $row->nid;
|
|
$vocab = sanitize_title($row->vocabulary); // safe WP slug
|
|
$parent = $row->vocabulary;
|
|
$child = $row->term_name;
|
|
|
|
$taxonomy_by_nid[$nid][$vocab][] = ['parent' => $parent, 'child' => $child];
|
|
}
|
|
|
|
if (empty($nodes)) {
|
|
WP_CLI::warning("No Drupal content found to import.");
|
|
return;
|
|
}
|
|
|
|
$imported_nids = [];
|
|
|
|
foreach ($nodes as $node) {
|
|
$imported_nids[] = $node->nid;
|
|
|
|
// TEST a node
|
|
// if ( $node->nid == 498 ) {
|
|
// WP_CLI::log( "Raw body for nid 498:\n" . $node->body_value );
|
|
// }
|
|
|
|
// 🗑 Remove any existing post with this NID
|
|
$existing = get_posts([
|
|
'meta_key' => '_drupal_nid',
|
|
'meta_value' => $node->nid,
|
|
'post_type' => 'page',
|
|
'post_status' => 'any',
|
|
'numberposts' => -1,
|
|
]);
|
|
foreach ($existing as $post) {
|
|
wp_delete_post($post->ID, true);
|
|
}
|
|
|
|
// 👤 Ensure author exists
|
|
$user_login = sanitize_user($node->author_name ?? 'drupal_user');
|
|
$author_id = username_exists($user_login);
|
|
if (! $author_id) {
|
|
$author_id = wp_create_user($user_login, wp_generate_password(), $node->author_email ?: "{$user_login}@example.com");
|
|
}
|
|
|
|
// 🪄 Convert to Gutenberg blocks
|
|
$block_content = $this->convert_to_blocks($node->body_value);
|
|
$wp_status = $node->status == 1 ? 'publish' : 'draft';
|
|
$post_date = date('Y-m-d H:i:s', $node->created);
|
|
$post_modified = date('Y-m-d H:i:s', $node->changed);
|
|
|
|
|
|
// WP_CLI::log("{$node->title} POST DATE: {$post_date} POST MODIFED: {$post_modified}");
|
|
// exit;
|
|
|
|
// if ($node->nid == 1005) {
|
|
// WP_CLI::log("Raw body for nid 498:\n" . $block_content);
|
|
// }
|
|
|
|
add_filter('wp_insert_post_data', [$this, 'alter_post_modification_time'], 99, 2);
|
|
$post_id = wp_insert_post([
|
|
'post_title' => $node->title,
|
|
'post_content' => $block_content,
|
|
'post_status' => $wp_status,
|
|
'post_type' => 'page',
|
|
'post_author' => $author_id,
|
|
'post_date' => $post_date,
|
|
'post_date_gmt' => get_gmt_from_date($post_date),
|
|
'post_modified' => $post_modified,
|
|
'post_modified_gmt' => get_gmt_from_date($post_modified),
|
|
]);
|
|
remove_filter('wp_insert_post_data', [$this, 'alter_post_modification_time'], 99, 2);
|
|
|
|
|
|
// 🏷 Assign tags based on each --type value passed to the script
|
|
$term_name = $node->type;
|
|
$term_ids = [];
|
|
|
|
// 🌱 Create the term if missing
|
|
$term_info = term_exists($term_name, 'page_category');
|
|
|
|
// WP_CLI::log("Vocabulary {$term_name}: " . json_encode($term_info));
|
|
// exit;
|
|
|
|
if (! is_wp_error($term_info)) {
|
|
$term_id = is_array($term_info) ? $term_info['term_id'] : $term_info;
|
|
$term_ids[] = $term_id; // 👈 Add node->type term to final list
|
|
WP_CLI::log("✅ Assigned term '{$term_name}' (ID: {$term_id}) to post {$post_id}");
|
|
}
|
|
|
|
// Gather parent + child terms
|
|
if (isset($taxonomy_by_nid[$node->nid])) {
|
|
foreach ($taxonomy_by_nid[$node->nid] as $vocab_slug => $terms) {
|
|
foreach ($terms as $pair) {
|
|
$parent_name = $pair['parent'];
|
|
$child_name = $pair['child'];
|
|
|
|
$parent = term_exists($parent_name, 'page_category');
|
|
if (! $parent) {
|
|
$parent = wp_insert_term($parent_name, 'page_category');
|
|
}
|
|
$parent_id = is_array($parent) ? $parent['term_id'] : $parent;
|
|
|
|
$child = term_exists($child_name, 'page_category');
|
|
if (! $child) {
|
|
$child = wp_insert_term($child_name, 'page_category', ['parent' => $parent_id]);
|
|
}
|
|
$child_id = is_array($child) ? $child['term_id'] : $child;
|
|
|
|
$term_ids[] = $parent_id;
|
|
$term_ids[] = $child_id;
|
|
|
|
WP_CLI::log("✔ Assigned '{$child_name}' under '{$parent_name}' to post {$post_id} in page_category");
|
|
}
|
|
}
|
|
}
|
|
|
|
// ✅ Assign all terms in one go (prevent overwriting)
|
|
wp_set_post_terms($post_id, array_unique($term_ids), 'page_category', false);
|
|
|
|
|
|
|
|
// 🧩 Save original Drupal NID
|
|
update_post_meta($post_id, '_drupal_nid', $node->nid);
|
|
|
|
WP_CLI::log("Imported: {$node->title} (NID: {$node->nid}, Author: {$user_login}, Status: {$wp_status})");
|
|
}
|
|
|
|
// 🧽 Unpublish orphans
|
|
$all_synced = get_posts([
|
|
'post_type' => 'page',
|
|
'meta_key' => '_drupal_nid',
|
|
'post_status' => ['publish', 'draft'],
|
|
'numberposts' => -1,
|
|
]);
|
|
|
|
|
|
// foreach ($all_synced as $page) {
|
|
// $nid = get_post_meta($page->ID, '_drupal_nid', true);
|
|
// if (! in_array($nid, $imported_nids)) {
|
|
// wp_update_post(['ID' => $page->ID, 'post_status' => 'draft']);
|
|
// // WP_CLI::log("Unpublished orphaned page ID {$page->ID} (Drupal NID {$nid})");
|
|
// }
|
|
// }
|
|
|
|
foreach ($all_synced as $page) {
|
|
$nid = get_post_meta($page->ID, '_drupal_nid', true);
|
|
|
|
// Skip if the nid was imported this run
|
|
if (in_array($nid, $imported_nids)) {
|
|
continue;
|
|
}
|
|
|
|
// Get assigned terms
|
|
$assigned_terms = wp_get_post_terms($page->ID, 'page_category', ['fields' => 'names']);
|
|
|
|
// Check if any assigned term matches current types passed in --type
|
|
$matched_vocab = false;
|
|
foreach ($assigned_terms as $term_name) {
|
|
if (in_array($term_name, $type_arg)) {
|
|
$matched_vocab = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Only unpublish if the post matches current vocab context
|
|
if ($matched_vocab) {
|
|
wp_update_post(['ID' => $page->ID, 'post_status' => 'draft']);
|
|
WP_CLI::log("📉 Unpublished orphaned page ID {$page->ID} with term '{$term_name}'");
|
|
}
|
|
}
|
|
}
|
|
|
|
public function alter_post_modification_time($data, $postarr)
|
|
{
|
|
if (!empty($postarr['post_modified']) && !empty($postarr['post_modified_gmt'])) {
|
|
$data['post_modified'] = $postarr['post_modified'];
|
|
$data['post_modified_gmt'] = $postarr['post_modified_gmt'];
|
|
}
|
|
|
|
return $data;
|
|
}
|
|
|
|
|
|
private function convert_to_blocks($html)
|
|
{
|
|
$doc = new DOMDocument();
|
|
libxml_use_internal_errors(true);
|
|
$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
|
|
libxml_clear_errors();
|
|
|
|
$body = $doc->getElementsByTagName('body')->item(0);
|
|
$output = '';
|
|
|
|
foreach ($body->childNodes as $node) {
|
|
$output .= $this->convert_node_to_block($node);
|
|
}
|
|
|
|
return $output;
|
|
}
|
|
|
|
private function convert_node_to_block($node)
|
|
{
|
|
if ($node instanceof DOMText) {
|
|
$text = trim($node->wholeText);
|
|
return $text !== '' ? "<!-- wp:paragraph -->\n<p>{$text}</p>\n<!-- /wp:paragraph -->\n\n" : '';
|
|
}
|
|
|
|
if (!($node instanceof DOMElement)) {
|
|
return '';
|
|
}
|
|
|
|
$tag = strtolower($node->tagName);
|
|
|
|
// 🖼 Image handler
|
|
if ($tag === 'img') {
|
|
$src = $node->getAttribute('src');
|
|
$alt = $node->getAttribute('alt') ?: '';
|
|
$html = '<img src="' . esc_url($src) . '" alt="' . esc_attr($alt) . '" />';
|
|
return "<!-- wp:image -->\n<figure class='wp-block-image'>{$html}</figure>\n<!-- /wp:image -->\n\n";
|
|
}
|
|
|
|
// 🔗 Image wrapped in <a>
|
|
if ($tag === 'a' && $node->getElementsByTagName('img')->length > 0) {
|
|
$img = $node->getElementsByTagName('img')->item(0);
|
|
$src = $img->getAttribute('src');
|
|
$alt = $img->getAttribute('alt') ?: '';
|
|
$href = $node->getAttribute('href');
|
|
$html = '<a href="' . esc_url($href) . '"><img src="' . esc_url($src) . '" alt="' . esc_attr($alt) . '" /></a>';
|
|
return "<!-- wp:image -->\n<figure class='wp-block-image'>{$html}</figure>\n<!-- /wp:image -->\n\n";
|
|
}
|
|
|
|
// 🧱 Recognize div and process its children recursively
|
|
if ($tag === 'div') {
|
|
$content = '';
|
|
foreach ($node->childNodes as $child) {
|
|
$content .= $this->convert_node_to_block($child);
|
|
}
|
|
return $content;
|
|
}
|
|
|
|
if ($tag === 'table') {
|
|
$innerHTML = $node->ownerDocument->saveHTML($node);
|
|
|
|
// Optional: sanitize or restructure table HTML here
|
|
|
|
return "<!-- wp:table -->\n{$innerHTML}\n<!-- /wp:table -->\n\n";
|
|
}
|
|
|
|
|
|
// 🧾 Generic block mapping
|
|
$innerHTML = $node->ownerDocument->saveHTML($node);
|
|
switch ($tag) {
|
|
case 'p':
|
|
return "<!-- wp:paragraph -->\n{$innerHTML}\n<!-- /wp:paragraph -->\n\n";
|
|
case 'h2':
|
|
case 'h3':
|
|
return "<!-- wp:heading -->\n{$innerHTML}\n<!-- /wp:heading -->\n\n";
|
|
case 'blockquote':
|
|
return "<!-- wp:quote -->\n{$innerHTML}\n<!-- /wp:quote -->\n\n";
|
|
case 'ol':
|
|
return "<!-- wp:list {\"ordered\":true} -->\n{$innerHTML}\n<!-- /wp:list -->\n\n";
|
|
case 'ul':
|
|
return "<!-- wp:list -->\n{$innerHTML}\n<!-- /wp:list -->\n\n";
|
|
default:
|
|
return "<!-- wp:paragraph -->\n{$innerHTML}\n<!-- /wp:paragraph -->\n\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
WP_CLI::add_command('drupal-import', 'Drupal_Import_Command');
|
|
}
|