Drupal2WP_Toolkit/drupal2wp-toolkit.php

391 lines
17 KiB
PHP

<?php
if (defined('WP_CLI') && WP_CLI) {
WP_CLI::add_hook('after_wp_load', function () {
register_taxonomy('page_category', 'page', [
'label' => 'Page Categories',
'hierarchical' => true,
'public' => true,
'show_ui' => true,
'show_in_rest' => true,
]);
});
class Drupal_Import_Command extends WP_CLI_Command
{
/**
* Imports Drupal content into WordPress.
*
* ## OPTIONS
*
* --type=<type>
* : (Required) Drupal content type(s) to import or delete. Comma-separated if multiple. Example: --type=story,hidden_content,vidcast,news_article,resources
*
* [--delete-only]
* : Only delete existing imported WordPress pages for the specified type.
*
*/
public function __invoke($args, $assoc_args)
{
global $wpdb;
// 📡 Connect to Drupal DB
$drupal_db = new wpdb(
getenv('DRUPAL_DB_USER'),
getenv('DRUPAL_DB_PASS'),
getenv('DRUPAL_DB_NAME'),
getenv('DRUPAL_DB_HOST')
);
if (is_wp_error($drupal_db)) {
WP_CLI::error("Could not connect to Drupal DB: " . $drupal_db->get_error_message());
}
// Make --type a required arguement
if (! isset($assoc_args['type']) || trim($assoc_args['type']) === '') {
WP_CLI::error("You must provide a --type=TYPE argument (e.g., --type=story).");
}
$type_arg = explode(',', $assoc_args['type']);
$escaped_types = implode("','", array_map('esc_sql', $type_arg));
// 🔍 Fetch valid Drupal NIDs
$nids = $drupal_db->get_col("
SELECT nid FROM node_field_data WHERE type IN ('$escaped_types')
");
// 🧹 --delete-only logic
if (isset($assoc_args['delete-only'])) {
if (empty($nids)) {
WP_CLI::warning("No Drupal NIDs found—nothing to delete.");
return;
}
$posts_to_delete = get_posts([
'post_type' => 'page',
'post_status' => 'any',
'meta_key' => '_drupal_nid',
'meta_value' => $nids,
'numberposts' => -1,
]);
foreach ($posts_to_delete as $post) {
wp_delete_post($post->ID, true);
WP_CLI::log("Deleted post ID {$post->ID} (NID: " . get_post_meta($post->ID, '_drupal_nid', true) . ")");
}
WP_CLI::success("Deleted " . count($posts_to_delete) . " Drupal-linked pages.");
return;
}
// 🧾 Fetch content with user & timestamps
$nodes = $drupal_db->get_results("
SELECT n.nid, n.title, b.body_value, b.body_summary, n.status, n.created, n.changed, n.uid, n.type,
u.name AS author_name, u.mail AS author_email
FROM node_field_data n
JOIN node__body b ON n.nid = b.entity_id
LEFT JOIN users_field_data u ON n.uid = u.uid
WHERE n.type IN ('$escaped_types')
");
$nids_only = array_map(function ($node) {
return intval($node->nid);
}, $nodes);
// Taxonomy
$term_data = $drupal_db->get_results("
SELECT
ti.nid,
td.vid AS vocabulary, -- Use vid instead of joining missing table
ttd.name AS term_name
FROM taxonomy_index ti
JOIN taxonomy_term_data td ON ti.tid = td.tid
JOIN taxonomy_term_field_data ttd ON ti.tid = ttd.tid
WHERE ti.nid IN (" . implode(',', array_map('intval', $nids_only)) . ")
");
// foreach ($nodes as $node) {
// WP_CLI::log("Node Title: {$node->title}");
// WP_CLI::log("Node ID: {$node->nid}");
// WP_CLI::log("Created: " . date('Y-m-d H:i:s', $node->created));
// WP_CLI::log("Changed: " . date('Y-m-d H:i:s', $node->changed));
// break; // Exit after first iteration
// }
// exit; // End the script
$taxonomy_by_nid = [];
foreach ($term_data as $row) {
$nid = $row->nid;
$vocab = sanitize_title($row->vocabulary); // safe WP slug
$parent = $row->vocabulary;
$child = $row->term_name;
$taxonomy_by_nid[$nid][$vocab][] = ['parent' => $parent, 'child' => $child];
}
if (empty($nodes)) {
WP_CLI::warning("No Drupal content found to import.");
return;
}
$imported_nids = [];
foreach ($nodes as $node) {
$imported_nids[] = $node->nid;
// TEST a node
// if ( $node->nid == 498 ) {
// WP_CLI::log( "Raw body for nid 498:\n" . $node->body_value );
// }
// 🗑 Remove any existing post with this NID
$existing = get_posts([
'meta_key' => '_drupal_nid',
'meta_value' => $node->nid,
'post_type' => 'page',
'post_status' => 'any',
'numberposts' => -1,
]);
foreach ($existing as $post) {
wp_delete_post($post->ID, true);
}
// 👤 Ensure author exists
$user_login = sanitize_user($node->author_name ?? 'drupal_user');
$author_id = username_exists($user_login);
if (! $author_id) {
$author_id = wp_create_user($user_login, wp_generate_password(), $node->author_email ?: "{$user_login}@example.com");
}
// 🪄 Convert to Gutenberg blocks
$block_content = $this->convert_to_blocks($node->body_value);
$wp_status = $node->status == 1 ? 'publish' : 'draft';
$post_date = date('Y-m-d H:i:s', $node->created);
$post_modified = date('Y-m-d H:i:s', $node->changed);
// WP_CLI::log("{$node->title} POST DATE: {$post_date} POST MODIFED: {$post_modified}");
// exit;
// if ($node->nid == 1005) {
// WP_CLI::log("Raw body for nid 498:\n" . $block_content);
// }
add_filter('wp_insert_post_data', [$this, 'alter_post_modification_time'], 99, 2);
$post_id = wp_insert_post([
'post_title' => $node->title,
'post_content' => $block_content,
'post_status' => $wp_status,
'post_type' => 'page',
'post_author' => $author_id,
'post_date' => $post_date,
'post_date_gmt' => get_gmt_from_date($post_date),
'post_modified' => $post_modified,
'post_modified_gmt' => get_gmt_from_date($post_modified),
]);
remove_filter('wp_insert_post_data', [$this, 'alter_post_modification_time'], 99, 2);
// 🏷 Assign tags based on each --type value passed to the script
$term_name = $node->type;
$term_ids = [];
// 🌱 Create the term if missing
$term_info = term_exists($term_name, 'page_category');
// WP_CLI::log("Vocabulary {$term_name}: " . json_encode($term_info));
// exit;
if (! is_wp_error($term_info)) {
$term_id = is_array($term_info) ? $term_info['term_id'] : $term_info;
$term_ids[] = $term_id; // 👈 Add node->type term to final list
WP_CLI::log("✅ Assigned term '{$term_name}' (ID: {$term_id}) to post {$post_id}");
}
// Gather parent + child terms
if (isset($taxonomy_by_nid[$node->nid])) {
foreach ($taxonomy_by_nid[$node->nid] as $vocab_slug => $terms) {
foreach ($terms as $pair) {
$parent_name = $pair['parent'];
$child_name = $pair['child'];
$parent = term_exists($parent_name, 'page_category');
if (! $parent) {
$parent = wp_insert_term($parent_name, 'page_category');
}
$parent_id = is_array($parent) ? $parent['term_id'] : $parent;
$child = term_exists($child_name, 'page_category');
if (! $child) {
$child = wp_insert_term($child_name, 'page_category', ['parent' => $parent_id]);
}
$child_id = is_array($child) ? $child['term_id'] : $child;
$term_ids[] = $parent_id;
$term_ids[] = $child_id;
WP_CLI::log("✔ Assigned '{$child_name}' under '{$parent_name}' to post {$post_id} in page_category");
}
}
}
// ✅ Assign all terms in one go (prevent overwriting)
wp_set_post_terms($post_id, array_unique($term_ids), 'page_category', false);
// 🧩 Save original Drupal NID
update_post_meta($post_id, '_drupal_nid', $node->nid);
WP_CLI::log("Imported: {$node->title} (NID: {$node->nid}, Author: {$user_login}, Status: {$wp_status})");
}
// 🧽 Unpublish orphans
$all_synced = get_posts([
'post_type' => 'page',
'meta_key' => '_drupal_nid',
'post_status' => ['publish', 'draft'],
'numberposts' => -1,
]);
// foreach ($all_synced as $page) {
// $nid = get_post_meta($page->ID, '_drupal_nid', true);
// if (! in_array($nid, $imported_nids)) {
// wp_update_post(['ID' => $page->ID, 'post_status' => 'draft']);
// // WP_CLI::log("Unpublished orphaned page ID {$page->ID} (Drupal NID {$nid})");
// }
// }
foreach ($all_synced as $page) {
$nid = get_post_meta($page->ID, '_drupal_nid', true);
// Skip if the nid was imported this run
if (in_array($nid, $imported_nids)) {
continue;
}
// Get assigned terms
$assigned_terms = wp_get_post_terms($page->ID, 'page_category', ['fields' => 'names']);
// Check if any assigned term matches current types passed in --type
$matched_vocab = false;
foreach ($assigned_terms as $term_name) {
if (in_array($term_name, $type_arg)) {
$matched_vocab = true;
break;
}
}
// Only unpublish if the post matches current vocab context
if ($matched_vocab) {
wp_update_post(['ID' => $page->ID, 'post_status' => 'draft']);
WP_CLI::log("📉 Unpublished orphaned page ID {$page->ID} with term '{$term_name}'");
}
}
}
public function alter_post_modification_time($data, $postarr)
{
if (!empty($postarr['post_modified']) && !empty($postarr['post_modified_gmt'])) {
$data['post_modified'] = $postarr['post_modified'];
$data['post_modified_gmt'] = $postarr['post_modified_gmt'];
}
return $data;
}
private function convert_to_blocks($html)
{
$doc = new DOMDocument();
libxml_use_internal_errors(true);
$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
libxml_clear_errors();
$body = $doc->getElementsByTagName('body')->item(0);
$output = '';
foreach ($body->childNodes as $node) {
$output .= $this->convert_node_to_block($node);
}
return $output;
}
private function convert_node_to_block($node)
{
if ($node instanceof DOMText) {
$text = trim($node->wholeText);
return $text !== '' ? "<!-- wp:paragraph -->\n<p>{$text}</p>\n<!-- /wp:paragraph -->\n\n" : '';
}
if (!($node instanceof DOMElement)) {
return '';
}
$tag = strtolower($node->tagName);
// 🖼 Image handler
if ($tag === 'img') {
$src = $node->getAttribute('src');
$alt = $node->getAttribute('alt') ?: '';
$html = '<img src="' . esc_url($src) . '" alt="' . esc_attr($alt) . '" />';
return "<!-- wp:image -->\n<figure class='wp-block-image'>{$html}</figure>\n<!-- /wp:image -->\n\n";
}
// 🔗 Image wrapped in <a>
if ($tag === 'a' && $node->getElementsByTagName('img')->length > 0) {
$img = $node->getElementsByTagName('img')->item(0);
$src = $img->getAttribute('src');
$alt = $img->getAttribute('alt') ?: '';
$href = $node->getAttribute('href');
$html = '<a href="' . esc_url($href) . '"><img src="' . esc_url($src) . '" alt="' . esc_attr($alt) . '" /></a>';
return "<!-- wp:image -->\n<figure class='wp-block-image'>{$html}</figure>\n<!-- /wp:image -->\n\n";
}
// 🧱 Recognize div and process its children recursively
if ($tag === 'div') {
$content = '';
foreach ($node->childNodes as $child) {
$content .= $this->convert_node_to_block($child);
}
return $content;
}
if ($tag === 'table') {
$innerHTML = $node->ownerDocument->saveHTML($node);
// Optional: sanitize or restructure table HTML here
return "<!-- wp:table -->\n{$innerHTML}\n<!-- /wp:table -->\n\n";
}
// 🧾 Generic block mapping
$innerHTML = $node->ownerDocument->saveHTML($node);
switch ($tag) {
case 'p':
return "<!-- wp:paragraph -->\n{$innerHTML}\n<!-- /wp:paragraph -->\n\n";
case 'h2':
case 'h3':
return "<!-- wp:heading -->\n{$innerHTML}\n<!-- /wp:heading -->\n\n";
case 'blockquote':
return "<!-- wp:quote -->\n{$innerHTML}\n<!-- /wp:quote -->\n\n";
case 'ol':
return "<!-- wp:list {\"ordered\":true} -->\n{$innerHTML}\n<!-- /wp:list -->\n\n";
case 'ul':
return "<!-- wp:list -->\n{$innerHTML}\n<!-- /wp:list -->\n\n";
default:
return "<!-- wp:paragraph -->\n{$innerHTML}\n<!-- /wp:paragraph -->\n\n";
}
}
}
WP_CLI::add_command('drupal-import', 'Drupal_Import_Command');
}