'Page Categories', 'hierarchical' => true, 'public' => true, 'show_ui' => true, 'show_in_rest' => true, ]); }); class Drupal_Import_Command extends WP_CLI_Command { /** * Imports Drupal content into WordPress. * * ## OPTIONS * * --type= * : (Required) Drupal content type(s) to import or delete. Comma-separated if multiple. Example: --type=story,hidden_content,vidcast,news_article,resources * * [--delete-only] * : Only delete existing imported WordPress pages for the specified type. * */ public function __invoke($args, $assoc_args) { global $wpdb; // πŸ“‘ Connect to Drupal DB $drupal_db = new wpdb( getenv('DRUPAL_DB_USER'), getenv('DRUPAL_DB_PASS'), getenv('DRUPAL_DB_NAME'), getenv('DRUPAL_DB_HOST') ); if (is_wp_error($drupal_db)) { WP_CLI::error("Could not connect to Drupal DB: " . $drupal_db->get_error_message()); } // Make --type a required arguement if (! isset($assoc_args['type']) || trim($assoc_args['type']) === '') { WP_CLI::error("You must provide a --type=TYPE argument (e.g., --type=story)."); } $type_arg = explode(',', $assoc_args['type']); $escaped_types = implode("','", array_map('esc_sql', $type_arg)); // πŸ” Fetch valid Drupal NIDs $nids = $drupal_db->get_col(" SELECT nid FROM node_field_data WHERE type IN ('$escaped_types') "); // 🧹 --delete-only logic if (isset($assoc_args['delete-only'])) { if (empty($nids)) { WP_CLI::warning("No Drupal NIDs foundβ€”nothing to delete."); return; } $posts_to_delete = get_posts([ 'post_type' => 'page', 'post_status' => 'any', 'meta_key' => '_drupal_nid', 'meta_value' => $nids, 'numberposts' => -1, ]); foreach ($posts_to_delete as $post) { wp_delete_post($post->ID, true); WP_CLI::log("Deleted post ID {$post->ID} (NID: " . get_post_meta($post->ID, '_drupal_nid', true) . ")"); } WP_CLI::success("Deleted " . count($posts_to_delete) . " Drupal-linked pages."); return; } // 🧾 Fetch content with user & timestamps $nodes = $drupal_db->get_results(" SELECT n.nid, n.title, b.body_value, b.body_summary, n.status, n.created, n.changed, n.uid, n.type, u.name AS author_name, u.mail AS author_email FROM node_field_data n JOIN node__body b ON n.nid = b.entity_id LEFT JOIN users_field_data u ON n.uid = u.uid WHERE n.type IN ('$escaped_types') "); $nids_only = array_map(function ($node) { return intval($node->nid); }, $nodes); // Taxonomy $term_data = $drupal_db->get_results(" SELECT ti.nid, td.vid AS vocabulary, -- Use vid instead of joining missing table ttd.name AS term_name FROM taxonomy_index ti JOIN taxonomy_term_data td ON ti.tid = td.tid JOIN taxonomy_term_field_data ttd ON ti.tid = ttd.tid WHERE ti.nid IN (" . implode(',', array_map('intval', $nids_only)) . ") "); // foreach ($nodes as $node) { // WP_CLI::log("Node Title: {$node->title}"); // WP_CLI::log("Node ID: {$node->nid}"); // WP_CLI::log("Created: " . date('Y-m-d H:i:s', $node->created)); // WP_CLI::log("Changed: " . date('Y-m-d H:i:s', $node->changed)); // break; // Exit after first iteration // } // exit; // End the script $taxonomy_by_nid = []; foreach ($term_data as $row) { $nid = $row->nid; $vocab = sanitize_title($row->vocabulary); // safe WP slug $parent = $row->vocabulary; $child = $row->term_name; $taxonomy_by_nid[$nid][$vocab][] = ['parent' => $parent, 'child' => $child]; } if (empty($nodes)) { WP_CLI::warning("No Drupal content found to import."); return; } $imported_nids = []; foreach ($nodes as $node) { $imported_nids[] = $node->nid; // TEST a node // if ( $node->nid == 498 ) { // WP_CLI::log( "Raw body for nid 498:\n" . $node->body_value ); // } // πŸ—‘ Remove any existing post with this NID $existing = get_posts([ 'meta_key' => '_drupal_nid', 'meta_value' => $node->nid, 'post_type' => 'page', 'post_status' => 'any', 'numberposts' => -1, ]); foreach ($existing as $post) { wp_delete_post($post->ID, true); } // πŸ‘€ Ensure author exists $user_login = sanitize_user($node->author_name ?? 'drupal_user'); $author_id = username_exists($user_login); if (! $author_id) { $author_id = wp_create_user($user_login, wp_generate_password(), $node->author_email ?: "{$user_login}@example.com"); } // πŸͺ„ Convert to Gutenberg blocks $block_content = $this->convert_to_blocks($node->body_value); $wp_status = $node->status == 1 ? 'publish' : 'draft'; $post_date = date('Y-m-d H:i:s', $node->created); $post_modified = date('Y-m-d H:i:s', $node->changed); // WP_CLI::log("{$node->title} POST DATE: {$post_date} POST MODIFED: {$post_modified}"); // exit; // if ($node->nid == 1005) { // WP_CLI::log("Raw body for nid 498:\n" . $block_content); // } add_filter('wp_insert_post_data', [$this, 'alter_post_modification_time'], 99, 2); $post_id = wp_insert_post([ 'post_title' => $node->title, 'post_content' => $block_content, 'post_status' => $wp_status, 'post_type' => 'page', 'post_author' => $author_id, 'post_date' => $post_date, 'post_date_gmt' => get_gmt_from_date($post_date), 'post_modified' => $post_modified, 'post_modified_gmt' => get_gmt_from_date($post_modified), ]); remove_filter('wp_insert_post_data', [$this, 'alter_post_modification_time'], 99, 2); // 🏷 Assign tags based on each --type value passed to the script $term_name = $node->type; $term_ids = []; // 🌱 Create the term if missing $term_info = term_exists($term_name, 'page_category'); // WP_CLI::log("Vocabulary {$term_name}: " . json_encode($term_info)); // exit; if (! is_wp_error($term_info)) { $term_id = is_array($term_info) ? $term_info['term_id'] : $term_info; $term_ids[] = $term_id; // πŸ‘ˆ Add node->type term to final list WP_CLI::log("βœ… Assigned term '{$term_name}' (ID: {$term_id}) to post {$post_id}"); } // Gather parent + child terms if (isset($taxonomy_by_nid[$node->nid])) { foreach ($taxonomy_by_nid[$node->nid] as $vocab_slug => $terms) { foreach ($terms as $pair) { $parent_name = $pair['parent']; $child_name = $pair['child']; $parent = term_exists($parent_name, 'page_category'); if (! $parent) { $parent = wp_insert_term($parent_name, 'page_category'); } $parent_id = is_array($parent) ? $parent['term_id'] : $parent; $child = term_exists($child_name, 'page_category'); if (! $child) { $child = wp_insert_term($child_name, 'page_category', ['parent' => $parent_id]); } $child_id = is_array($child) ? $child['term_id'] : $child; $term_ids[] = $parent_id; $term_ids[] = $child_id; WP_CLI::log("βœ” Assigned '{$child_name}' under '{$parent_name}' to post {$post_id} in page_category"); } } } // βœ… Assign all terms in one go (prevent overwriting) wp_set_post_terms($post_id, array_unique($term_ids), 'page_category', false); // 🧩 Save original Drupal NID update_post_meta($post_id, '_drupal_nid', $node->nid); WP_CLI::log("Imported: {$node->title} (NID: {$node->nid}, Author: {$user_login}, Status: {$wp_status})"); } // 🧽 Unpublish orphans $all_synced = get_posts([ 'post_type' => 'page', 'meta_key' => '_drupal_nid', 'post_status' => ['publish', 'draft'], 'numberposts' => -1, ]); // foreach ($all_synced as $page) { // $nid = get_post_meta($page->ID, '_drupal_nid', true); // if (! in_array($nid, $imported_nids)) { // wp_update_post(['ID' => $page->ID, 'post_status' => 'draft']); // // WP_CLI::log("Unpublished orphaned page ID {$page->ID} (Drupal NID {$nid})"); // } // } foreach ($all_synced as $page) { $nid = get_post_meta($page->ID, '_drupal_nid', true); // Skip if the nid was imported this run if (in_array($nid, $imported_nids)) { continue; } // Get assigned terms $assigned_terms = wp_get_post_terms($page->ID, 'page_category', ['fields' => 'names']); // Check if any assigned term matches current types passed in --type $matched_vocab = false; foreach ($assigned_terms as $term_name) { if (in_array($term_name, $type_arg)) { $matched_vocab = true; break; } } // Only unpublish if the post matches current vocab context if ($matched_vocab) { wp_update_post(['ID' => $page->ID, 'post_status' => 'draft']); WP_CLI::log("πŸ“‰ Unpublished orphaned page ID {$page->ID} with term '{$term_name}'"); } } } public function alter_post_modification_time($data, $postarr) { if (!empty($postarr['post_modified']) && !empty($postarr['post_modified_gmt'])) { $data['post_modified'] = $postarr['post_modified']; $data['post_modified_gmt'] = $postarr['post_modified_gmt']; } return $data; } private function convert_to_blocks($html) { $doc = new DOMDocument(); libxml_use_internal_errors(true); $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); libxml_clear_errors(); $body = $doc->getElementsByTagName('body')->item(0); $output = ''; foreach ($body->childNodes as $node) { $output .= $this->convert_node_to_block($node); } return $output; } private function convert_node_to_block($node) { if ($node instanceof DOMText) { $text = trim($node->wholeText); return $text !== '' ? "\n

{$text}

\n\n\n" : ''; } if (!($node instanceof DOMElement)) { return ''; } $tag = strtolower($node->tagName); // πŸ–Ό Image handler if ($tag === 'img') { $src = $node->getAttribute('src'); $alt = $node->getAttribute('alt') ?: ''; $html = '' . esc_attr($alt) . ''; return "\n
{$html}
\n\n\n"; } // πŸ”— Image wrapped in if ($tag === 'a' && $node->getElementsByTagName('img')->length > 0) { $img = $node->getElementsByTagName('img')->item(0); $src = $img->getAttribute('src'); $alt = $img->getAttribute('alt') ?: ''; $href = $node->getAttribute('href'); $html = '' . esc_attr($alt) . ''; return "\n
{$html}
\n\n\n"; } // 🧱 Recognize div and process its children recursively if ($tag === 'div') { $content = ''; foreach ($node->childNodes as $child) { $content .= $this->convert_node_to_block($child); } return $content; } if ($tag === 'table') { $innerHTML = $node->ownerDocument->saveHTML($node); // Optional: sanitize or restructure table HTML here return "\n{$innerHTML}\n\n\n"; } // 🧾 Generic block mapping $innerHTML = $node->ownerDocument->saveHTML($node); switch ($tag) { case 'p': return "\n{$innerHTML}\n\n\n"; case 'h2': case 'h3': return "\n{$innerHTML}\n\n\n"; case 'blockquote': return "\n{$innerHTML}\n\n\n"; case 'ol': return "\n{$innerHTML}\n\n\n"; case 'ul': return "\n{$innerHTML}\n\n\n"; default: return "\n{$innerHTML}\n\n\n"; } } } WP_CLI::add_command('drupal-import', 'Drupal_Import_Command'); }