From dcd704ec57d3e1dd788b9d77f38e4966113c90a6 Mon Sep 17 00:00:00 2001 From: Jonathan Rosenbaum Date: Sun, 20 Jul 2025 20:15:26 +0000 Subject: [PATCH] Initial commit! --- .gitignore | 1 + README.md | 77 ++++++ drupal2wp-toolkit.php | 528 ++++++++++++++++++++++++++++++++++++++++++ page-taxonomy.php | 183 +++++++++++++++ 4 files changed, 789 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 drupal2wp-toolkit.php create mode 100644 page-taxonomy.php diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2eea525 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.env \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..95bf734 --- /dev/null +++ b/README.md @@ -0,0 +1,77 @@ +# Drupal2WP Toolkit + +Flexible content migration and taxonomy structuring for modern WordPress sites. + +Drupal2WP Toolkit provides an extensible command-line and plugin-based bridge between Drupal 8 and WordPress 6, optimized for developers handling structured content migration. It includes taxonomy mapping, sortable categories, UI enhancements, and future-ready plans for block-based template integration. + +--- + +## βš™οΈ Features + +- **Drupal β†’ WordPress Migration** + - Imports all Drupal nodes as WordPress **Pages** by default + - Vocabulary terms are converted into **top-level categories** + - Associated Drupal terms become **sub-categories** under their vocabularies + - Drupal content types are added as **top-level categories** for structural clarity + +- **Admin UI Enhancements** + - Adds a sortable **Last Modified** column to Pages view + - Top-level categories are also **sortable** + - Category filters available directly in admin for Pages + +- **WP-CLI Utilities** + - Run `wp drupal2wp_toolkit import` to initiate Drupal content ingestion + - Easily convert migrated Pages into **Posts**, retaining taxonomy mapping + +--- + +## πŸš€ Future Plans + +- **Block Theme Editor Integration** + - Assign categories to custom templates or reusable block layouts + - Enable GUI-based taxonomy-driven theming inside WordPress's Site Editor + +- **Advanced Taxonomy Mapping** + - Support for custom post types and multi-taxonomy assignment + +- **Content Blueprinting** + - Define reusable structures based on imported vocabularies and content types + +--- + +## 🧰 Installation + +1. Clone the repository into your WordPress plugins directory: + + ```bash + git clone https://git.bikeshopi.dev/bike/drupal2wp_toolkit.git wp-content/plugins/drupal2wp_toolkit + ``` + +2. Activate the plugin: + + ```bash + wp plugin activate drupal2wp_toolkit + ``` + +3. Run the import command: + ```bash + wp drupal2wp_toolkit import + ``` + +## πŸ“ Repo Structure +``` + drupal2wp_toolkit/ + β”œβ”€β”€ page-taxonomy/ + β”‚ └── page-taxonomy.php + β”œβ”€β”€ drupal2wp_toolkit.php + └── README.md +``` + +## πŸ™Œ Credits +Created and maintained by developers at BikeShopi Dev. Contributions and feedback welcome β€” this is a toolkit built for extensibility. + +## πŸ“„ License +MIT License. See LICENSE.md for details. + + +Let me know if you want version badges, contributor guidelines, or sample config diff --git a/drupal2wp-toolkit.php b/drupal2wp-toolkit.php new file mode 100644 index 0000000..4e63db2 --- /dev/null +++ b/drupal2wp-toolkit.php @@ -0,0 +1,528 @@ + 'Page Categories', + 'hierarchical' => true, + 'public' => true, + 'show_ui' => true, + 'show_in_rest' => true, + ]); + }); + + class Drupal_Import_Command extends WP_CLI_Command + { + /** + * Imports Drupal content into WordPress. + * + * ## OPTIONS + * + * --type= + * : (Required) Drupal content type(s) to import or delete. Comma-separated if multiple. Example: --type=story,hidden_content,vidcast,news_article,resources + * + * [--delete-only] + * : Only delete existing imported WordPress pages for the specified type. + * + */ + + public function __invoke($args, $assoc_args) + { + global $wpdb; + + // πŸ“‘ Connect to Drupal DB + $drupal_db = new wpdb( + getenv('DRUPAL_DB_USER'), + getenv('DRUPAL_DB_PASS'), + getenv('DRUPAL_DB_NAME'), + getenv('DRUPAL_DB_HOST') + ); + + if (is_wp_error($drupal_db)) { + WP_CLI::error("Could not connect to Drupal DB: " . $drupal_db->get_error_message()); + } + + // Make --type a required arguement + if (! isset($assoc_args['type']) || trim($assoc_args['type']) === '') { + WP_CLI::error("You must provide a --type=TYPE argument (e.g., --type=story)."); + } + + + $type_arg = explode(',', $assoc_args['type']); + $escaped_types = implode("','", array_map('esc_sql', $type_arg)); + + // πŸ” Fetch valid Drupal NIDs + $nids = $drupal_db->get_col(" + SELECT nid FROM node_field_data WHERE type IN ('$escaped_types') + "); + + // 🧹 --delete-only logic + if (isset($assoc_args['delete-only'])) { + if (empty($nids)) { + WP_CLI::warning("No Drupal NIDs foundβ€”nothing to delete."); + return; + } + + $posts_to_delete = get_posts([ + 'post_type' => 'page', + 'post_status' => 'any', + 'meta_key' => '_drupal_nid', + 'meta_value' => $nids, + 'numberposts' => -1, + ]); + + foreach ($posts_to_delete as $post) { + wp_delete_post($post->ID, true); + WP_CLI::log("Deleted post ID {$post->ID} (NID: " . get_post_meta($post->ID, '_drupal_nid', true) . ")"); + } + + WP_CLI::success("Deleted " . count($posts_to_delete) . " Drupal-linked pages."); + return; + } + + // 🧾 Fetch content with user & timestamps + $nodes = $drupal_db->get_results(" + SELECT n.nid, n.title, b.body_value, b.body_summary, n.status, n.created, n.changed, n.uid, n.type, + u.name AS author_name, u.mail AS author_email + FROM node_field_data n + JOIN node__body b ON n.nid = b.entity_id + LEFT JOIN users_field_data u ON n.uid = u.uid + WHERE n.type IN ('$escaped_types') + "); + + + $nids_only = array_map(function ($node) { + return intval($node->nid); + }, $nodes); + + // Taxonomy + $term_data = $drupal_db->get_results(" + SELECT + ti.nid, + td.vid AS vocabulary, -- Use vid instead of joining missing table + ttd.name AS term_name + FROM taxonomy_index ti + JOIN taxonomy_term_data td ON ti.tid = td.tid + JOIN taxonomy_term_field_data ttd ON ti.tid = ttd.tid + WHERE ti.nid IN (" . implode(',', array_map('intval', $nids_only)) . ") + "); + + // foreach ($nodes as $node) { + // WP_CLI::log("Node Title: {$node->title}"); + // WP_CLI::log("Node ID: {$node->nid}"); + // WP_CLI::log("Created: " . date('Y-m-d H:i:s', $node->created)); + // WP_CLI::log("Changed: " . date('Y-m-d H:i:s', $node->changed)); + // break; // Exit after first iteration + // } + // exit; // End the script + + $taxonomy_by_nid = []; + foreach ($term_data as $row) { + $nid = $row->nid; + $vocab = sanitize_title($row->vocabulary); // safe WP slug + $parent = $row->vocabulary; + $child = $row->term_name; + + $taxonomy_by_nid[$nid][$vocab][] = ['parent' => $parent, 'child' => $child]; + } + + if (empty($nodes)) { + WP_CLI::warning("No Drupal content found to import."); + return; + } + + $imported_nids = []; + + foreach ($nodes as $node) { + $imported_nids[] = $node->nid; + + // TEST a node + // if ( $node->nid == 498 ) { + // WP_CLI::log( "Raw body for nid 498:\n" . $node->body_value ); + // } + + // πŸ—‘ Remove any existing post with this NID + $existing = get_posts([ + 'meta_key' => '_drupal_nid', + 'meta_value' => $node->nid, + 'post_type' => 'page', + 'post_status' => 'any', + 'numberposts' => -1, + ]); + foreach ($existing as $post) { + wp_delete_post($post->ID, true); + } + + // πŸ‘€ Ensure author exists + $user_login = sanitize_user($node->author_name ?? 'drupal_user'); + $author_id = username_exists($user_login); + if (! $author_id) { + $author_id = wp_create_user($user_login, wp_generate_password(), $node->author_email ?: "{$user_login}@example.com"); + } + + // πŸͺ„ Convert to Gutenberg blocks + $block_content = $this->convert_to_blocks($node->body_value); + $wp_status = $node->status == 1 ? 'publish' : 'draft'; + $post_date = date('Y-m-d H:i:s', $node->created); + $post_modified = date('Y-m-d H:i:s', $node->changed); + + + // WP_CLI::log("{$node->title} POST DATE: {$post_date} POST MODIFED: {$post_modified}"); + // exit; + + // if ($node->nid == 1005) { + // WP_CLI::log("Raw body for nid 498:\n" . $block_content); + // } + + add_filter('wp_insert_post_data', [$this, 'alter_post_modification_time'], 99, 2); + $post_id = wp_insert_post([ + 'post_title' => $node->title, + 'post_content' => $block_content, + 'post_status' => $wp_status, + 'post_type' => 'page', + 'post_author' => $author_id, + 'post_date' => $post_date, + 'post_date_gmt' => get_gmt_from_date($post_date), + 'post_modified' => $post_modified, + 'post_modified_gmt' => get_gmt_from_date($post_modified), + ]); + remove_filter('wp_insert_post_data', [$this, 'alter_post_modification_time'], 99, 2); + + + // 🏷 Assign tags based on each --type value passed to the script + $term_name = $node->type; + $term_ids = []; + + // 🌱 Create the term if missing + $term_info = term_exists($term_name, 'page_category'); + + // WP_CLI::log("Vocabulary {$term_name}: " . json_encode($term_info)); + // exit; + + if (! is_wp_error($term_info)) { + $term_id = is_array($term_info) ? $term_info['term_id'] : $term_info; + $term_ids[] = $term_id; // πŸ‘ˆ Add node->type term to final list + WP_CLI::log("βœ… Assigned term '{$term_name}' (ID: {$term_id}) to post {$post_id}"); + } + + // Gather parent + child terms + if (isset($taxonomy_by_nid[$node->nid])) { + foreach ($taxonomy_by_nid[$node->nid] as $vocab_slug => $terms) { + foreach ($terms as $pair) { + $parent_name = $pair['parent']; + $child_name = $pair['child']; + + $parent = term_exists($parent_name, 'page_category'); + if (! $parent) { + $parent = wp_insert_term($parent_name, 'page_category'); + } + $parent_id = is_array($parent) ? $parent['term_id'] : $parent; + + $child = term_exists($child_name, 'page_category'); + if (! $child) { + $child = wp_insert_term($child_name, 'page_category', ['parent' => $parent_id]); + } + $child_id = is_array($child) ? $child['term_id'] : $child; + + $term_ids[] = $parent_id; + $term_ids[] = $child_id; + + WP_CLI::log("βœ” Assigned '{$child_name}' under '{$parent_name}' to post {$post_id} in page_category"); + } + } + } + + // βœ… Assign all terms in one go (prevent overwriting) + wp_set_post_terms($post_id, array_unique($term_ids), 'page_category', false); + + + + // 🧩 Save original Drupal NID + update_post_meta($post_id, '_drupal_nid', $node->nid); + + WP_CLI::log("Imported: {$node->title} (NID: {$node->nid}, Author: {$user_login}, Status: {$wp_status})"); + } + + // 🧽 Unpublish orphans + $all_synced = get_posts([ + 'post_type' => 'page', + 'meta_key' => '_drupal_nid', + 'post_status' => ['publish', 'draft'], + 'numberposts' => -1, + ]); + + + // foreach ($all_synced as $page) { + // $nid = get_post_meta($page->ID, '_drupal_nid', true); + // if (! in_array($nid, $imported_nids)) { + // wp_update_post(['ID' => $page->ID, 'post_status' => 'draft']); + // // WP_CLI::log("Unpublished orphaned page ID {$page->ID} (Drupal NID {$nid})"); + // } + // } + + foreach ($all_synced as $page) { + $nid = get_post_meta($page->ID, '_drupal_nid', true); + + // Skip if the nid was imported this run + if (in_array($nid, $imported_nids)) { + continue; + } + + // Get assigned terms + $assigned_terms = wp_get_post_terms($page->ID, 'page_category', ['fields' => 'names']); + + // Check if any assigned term matches current types passed in --type + $matched_vocab = false; + foreach ($assigned_terms as $term_name) { + if (in_array($term_name, $type_arg)) { + $matched_vocab = true; + break; + } + } + + // Only unpublish if the post matches current vocab context + if ($matched_vocab) { + wp_update_post(['ID' => $page->ID, 'post_status' => 'draft']); + WP_CLI::log("πŸ“‰ Unpublished orphaned page ID {$page->ID} with term '{$term_name}'"); + } + } + } + + public function alter_post_modification_time($data, $postarr) + { + if (!empty($postarr['post_modified']) && !empty($postarr['post_modified_gmt'])) { + $data['post_modified'] = $postarr['post_modified']; + $data['post_modified_gmt'] = $postarr['post_modified_gmt']; + } + + return $data; + } + + + private function convert_to_blocks($html) + { + $doc = new DOMDocument(); + libxml_use_internal_errors(true); + $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); + libxml_clear_errors(); + + $body = $doc->getElementsByTagName('body')->item(0); + $output = ''; + + foreach ($body->childNodes as $node) { + $output .= $this->convert_node_to_block($node); + } + + return $output; + } + + private function convert_node_to_block($node) + { + if ($node instanceof DOMText) { + $text = trim($node->wholeText); + return $text !== '' ? "\n

{$text}

\n\n\n" : ''; + } + + if (!($node instanceof DOMElement)) { + return ''; + } + + $tag = strtolower($node->tagName); + + // πŸ–Ό Image handler + if ($tag === 'img') { + $src = $node->getAttribute('src'); + $alt = $node->getAttribute('alt') ?: ''; + $html = '' . esc_attr($alt) . ''; + return "\n
{$html}
\n\n\n"; + } + + // πŸ”— Image wrapped in + if ($tag === 'a' && $node->getElementsByTagName('img')->length > 0) { + $img = $node->getElementsByTagName('img')->item(0); + $src = $img->getAttribute('src'); + $alt = $img->getAttribute('alt') ?: ''; + $href = $node->getAttribute('href'); + $html = '' . esc_attr($alt) . ''; + return "\n
{$html}
\n\n\n"; + } + + // 🧱 Recognize div and process its children recursively + if ($tag === 'div') { + $content = ''; + foreach ($node->childNodes as $child) { + $content .= $this->convert_node_to_block($child); + } + return $content; + } + + if ($tag === 'table') { + $innerHTML = $node->ownerDocument->saveHTML($node); + + // Optional: sanitize or restructure table HTML here + + return "\n{$innerHTML}\n\n\n"; + } + + + // 🧾 Generic block mapping + $innerHTML = $node->ownerDocument->saveHTML($node); + switch ($tag) { + case 'p': + return "\n{$innerHTML}\n\n\n"; + case 'h2': + case 'h3': + return "\n{$innerHTML}\n\n\n"; + case 'blockquote': + return "\n{$innerHTML}\n\n\n"; + case 'ol': + return "\n{$innerHTML}\n\n\n"; + case 'ul': + return "\n{$innerHTML}\n\n\n"; + default: + return "\n{$innerHTML}\n\n\n"; + } + } + + + // private function convert_to_blocks($html) + // { + // $doc = new DOMDocument(); + // libxml_use_internal_errors(true); + // $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); + // libxml_clear_errors(); + + // $body = $doc->getElementsByTagName('body')->item(0); + // $output = ''; + + // // foreach ($body->childNodes as $node) { + // // if (!($node instanceof DOMElement)) { + // // continue; + // // } + + // // // Check if this node (or its children) contain an + // // $img_tag = $node->getElementsByTagName('img')->item(0); + + // // if ($img_tag) { + // // // Extract src and alt + // // $src = $img_tag->getAttribute('src'); + // // $alt = $img_tag->getAttribute('alt') ?: ''; + + // // // Check for a wrapping + // // $link = $node->getElementsByTagName('a')->item(0); + // // $image_html = '' . esc_attr($alt) . ''; + + // // if ($link) { + // // $href = $link->getAttribute('href'); + // // $image_html = '' . $image_html . ''; + // // } + + // // // Wrap in figure block + // // $figure = '
' . $image_html . '
'; + + // // $output .= "\n" . $figure . "\n\n\n"; + // // continue; + // // } + + // // // Render other HTML blocks + // // $innerHTML = $doc->saveHTML($node); + + // // // Fix insecure YouTube embeds + // // $innerHTML = preg_replace( + // // '#src=["\']http://(www\.)?youtube\.com#i', + // // 'src="https://www.youtube.com', + // // $innerHTML + // // ); + + // // switch ($node->nodeName) { + // // case 'p': + // // $output .= "\n{$innerHTML}\n\n\n"; + // // break; + // // case 'ol': + // // $output .= "\n{$innerHTML}\n\n\n"; + // // break; + // // case 'ul': + // // $output .= "\n{$innerHTML}\n\n\n"; + // // break; + // // case 'h2': + // // case 'h3': + // // $output .= "\n{$innerHTML}\n\n\n"; + // // break; + // // case 'blockquote': + // // $output .= "\n{$innerHTML}\n\n\n"; + // // break; + // // default: + // // $output .= "\n{$innerHTML}\n\n\n"; + // // break; + // // } + // // } + + // foreach ($body->childNodes as $node) { + // if (!($node instanceof DOMElement)) { + // continue; + // } + + // // πŸ–Ό Render all images first + // $img_tags = $node->getElementsByTagName('img'); + // if ($img_tags->length > 0) { + // foreach ($img_tags as $img_tag) { + // $src = $img_tag->getAttribute('src'); + // $alt = $img_tag->getAttribute('alt') ?: ''; + + // // Check for wrapping + // $link = $img_tag->parentNode instanceof DOMElement && $img_tag->parentNode->nodeName === 'a' + // ? $img_tag->parentNode + // : null; + + // $image_html = '' . esc_attr($alt) . ''; + // if ($link) { + // $href = $link->getAttribute('href'); + // $image_html = '' . $image_html . ''; + // } + + // $figure = '
' . $image_html . '
'; + // $output .= "\n" . $figure . "\n\n\n"; + // } + // } + + // // πŸ’‘ Continue with other content + // $innerHTML = $doc->saveHTML($node); + + // // Fix insecure embeds + // $innerHTML = preg_replace( + // '#src=["\']http://(www\.)?youtube\.com#i', + // 'src="https://www.youtube.com', + // $innerHTML + // ); + + // switch ($node->nodeName) { + // case 'p': + // $output .= "\n{$innerHTML}\n\n\n"; + // break; + // case 'ol': + // $output .= "\n{$innerHTML}\n\n\n"; + // break; + // case 'ul': + // $output .= "\n{$innerHTML}\n\n\n"; + // break; + // case 'h2': + // case 'h3': + // $output .= "\n{$innerHTML}\n\n\n"; + // break; + // case 'blockquote': + // $output .= "\n{$innerHTML}\n\n\n"; + // break; + // default: + // $output .= "\n{$innerHTML}\n\n\n"; + // break; + // } + // } + + + // return $output; + // } + } + + WP_CLI::add_command('drupal-import', 'Drupal_Import_Command'); + } diff --git a/page-taxonomy.php b/page-taxonomy.php new file mode 100644 index 0000000..41c0490 --- /dev/null +++ b/page-taxonomy.php @@ -0,0 +1,183 @@ + 'Page Categories', + 'hierarchical' => true, + 'public' => true, + 'show_ui' => true, + 'show_in_rest' => true, + ]); +} +add_action('init', 'add_page_categories_taxonomy'); + +// For WP-CLI (where 'init' may not trigger reliably) +if (defined('WP_CLI') && WP_CLI) { + WP_CLI::add_hook('after_wp_load', function () { + register_page_category_taxonomy(); + }); +} + +// Add a new column to the Pages list +add_filter('manage_page_posts_columns', 'add_page_category_column'); +function add_page_category_column($columns) +{ + $new_columns = array(); + foreach ($columns as $key => $value) { + $new_columns[$key] = $value; + if ('author' === $key) { + $new_columns['page_category'] = 'Categories'; + } + } + return $new_columns; +} + + +// Populate the custom column with taxonomy terms +add_action('manage_page_posts_custom_column', 'show_page_category_column', 10, 2); +function show_page_category_column($column, $post_id) +{ + if ('page_category' === $column) { + $terms = get_the_terms($post_id, 'page_category'); + if (!empty($terms) && !is_wp_error($terms)) { + $links = array(); + foreach ($terms as $term) { + $url = admin_url('edit.php?post_type=page&page_category=' . $term->slug); + $links[] = '' . esc_html($term->name) . ''; + } + echo implode(', ', $links); + } else { + echo 'β€”'; + } + } +} + +// filter taxonomy +add_action('restrict_manage_posts', 'filter_page_by_top_level_category'); +function filter_page_by_top_level_category() +{ + global $typenow; + + if ($typenow !== 'page') return; + + $taxonomy = 'page_category'; + $terms = get_terms([ + 'taxonomy' => $taxonomy, + 'parent' => 0, // πŸ‘ˆ Only top-level categories + 'hide_empty' => false, + ]); + + if (!empty($terms)) { + echo ''; + } +} + +add_filter('parse_query', 'apply_top_level_category_filter'); +function apply_top_level_category_filter($query) +{ + global $pagenow; + + if ( + $pagenow === 'edit.php' && + $query->is_main_query() && + isset($_GET['page_category']) && + !empty($_GET['page_category']) && + $query->get('post_type') === 'page' + ) { + $taxonomy = 'page_category'; + $term_slug = sanitize_text_field($_GET['page_category']); + + // Get full term object by slug + $term = get_term_by('slug', $term_slug, $taxonomy); + + if ($term && !is_wp_error($term)) { + $query->set('tax_query', [[ + 'taxonomy' => $taxonomy, + 'field' => 'term_id', + 'terms' => [$term->term_id], + 'include_children' => true, // πŸ‘ˆ Includes all subcategories too + 'operator' => 'IN' // Matches pages with this term, even with others + ]]); + } + } +} + + +// 1️⃣ Add the Last Modified column +add_filter('manage_page_posts_columns', function($columns) { + $columns['modified_date'] = 'Last Modified'; + return $columns; +}); + +// 2️⃣ Display the modified time +add_action('manage_page_posts_custom_column', function($column, $post_id) { + if ($column === 'modified_date') { + echo get_post_modified_time('F j, Y g:i a', false, $post_id); + } +}, 10, 2); + +// 3️⃣ Make it sortable +add_filter('manage_edit-page_sortable_columns', function($columns) { + $columns['modified_date'] = 'modified_date'; + return $columns; +}); + +// 4️⃣ Handle the sorting logic +add_action('pre_get_posts', function($query) { + if (!is_admin() || !$query->is_main_query()) { + return; + } + + if ($query->get('orderby') === 'modified_date') { + $query->set('orderby', 'modified'); + } +}); + +// Taxonomy based templates +add_action('admin_init', 'generate_page_category_templates'); + function generate_page_category_templates() + { + // Define plugin directory and template folder + $plugin_dir = plugin_dir_path(__FILE__); + $template_dir = $plugin_dir . 'templates/'; + + if (!is_dir($template_dir)) { + mkdir($template_dir); // Create if not exists + } + + // Get all top-level terms + $terms = get_terms([ + 'taxonomy' => 'page_category', + 'parent' => 0, + 'hide_empty' => false, + ]); + + foreach ($terms as $term) { + $slug = $term->slug; + $filename = "taxonomy-page_category-{$slug}.php"; + $filepath = $template_dir . $filename; + + if (!file_exists($filepath)) { + // Create a basic scaffold file + file_put_contents($filepath, "name}'\nget_header();\n?> +

Category: name}'); ?>

+ +

\">

+

+ + + "); + } + } + }