580 lines
18 KiB
PHP
580 lines
18 KiB
PHP
<?php
|
|
/**
|
|
* WordPress Site Data Collection
|
|
*
|
|
* Collects WordPress posts, taxonomies, and site data for IGNY8
|
|
* Follows WORDPRESS-PLUGIN-INTEGRATION.md guidelines
|
|
*
|
|
* @package Igny8Bridge
|
|
*/
|
|
|
|
// Prevent direct access
|
|
if (!defined('ABSPATH')) {
|
|
exit;
|
|
}
|
|
|
|
/**
|
|
* Fetch all posts of a specific type from WordPress
|
|
*
|
|
* @param string $post_type Post type
|
|
* @param int $per_page Posts per page
|
|
* @return array|false Formatted posts array or false on failure
|
|
*/
|
|
function igny8_fetch_wordpress_posts($post_type = 'post', $per_page = 100, $args = array()) {
|
|
$defaults = array(
|
|
'status' => 'publish',
|
|
'after' => null,
|
|
'max_pages' => 5,
|
|
);
|
|
$args = wp_parse_args($args, $defaults);
|
|
|
|
$post_type_object = get_post_type_object($post_type);
|
|
$rest_base = ($post_type_object && !empty($post_type_object->rest_base)) ? $post_type_object->rest_base : $post_type;
|
|
|
|
$base_url = sprintf('%s/wp-json/wp/v2/%s', get_site_url(), $rest_base);
|
|
|
|
$query_args = array(
|
|
'per_page' => min($per_page, 100),
|
|
'status' => $args['status'],
|
|
'orderby' => 'modified',
|
|
'order' => 'desc',
|
|
);
|
|
|
|
if (!empty($args['after'])) {
|
|
$query_args['after'] = gmdate('c', $args['after']);
|
|
}
|
|
|
|
$formatted_posts = array();
|
|
$page = 1;
|
|
|
|
do {
|
|
$query_args['page'] = $page;
|
|
$response = wp_remote_get(add_query_arg($query_args, $base_url));
|
|
|
|
if (is_wp_error($response)) {
|
|
break;
|
|
}
|
|
|
|
$posts = json_decode(wp_remote_retrieve_body($response), true);
|
|
|
|
if (!is_array($posts) || empty($posts)) {
|
|
break;
|
|
}
|
|
|
|
foreach ($posts as $post) {
|
|
$content = $post['content']['rendered'] ?? '';
|
|
$word_count = str_word_count(strip_tags($content));
|
|
|
|
$formatted_posts[] = array(
|
|
'id' => $post['id'],
|
|
'title' => html_entity_decode($post['title']['rendered'] ?? ''),
|
|
'content' => $content,
|
|
'excerpt' => $post['excerpt']['rendered'] ?? '',
|
|
'status' => $post['status'] ?? 'draft',
|
|
'url' => $post['link'] ?? '',
|
|
'published' => $post['date'] ?? '',
|
|
'modified' => $post['modified'] ?? '',
|
|
'author' => $post['author'] ?? 0,
|
|
'post_type' => $post['type'] ?? $post_type,
|
|
'taxonomies' => array(
|
|
'categories' => $post['categories'] ?? array(),
|
|
'tags' => $post['tags'] ?? array(),
|
|
),
|
|
'meta' => array(
|
|
'word_count' => $word_count,
|
|
'reading_time' => $word_count ? ceil($word_count / 200) : 0,
|
|
'featured_media' => $post['featured_media'] ?? 0,
|
|
)
|
|
);
|
|
}
|
|
|
|
if (count($posts) < $query_args['per_page']) {
|
|
break;
|
|
}
|
|
|
|
$page++;
|
|
} while ($page <= $args['max_pages']);
|
|
|
|
return $formatted_posts;
|
|
}
|
|
|
|
/**
|
|
* Fetch all available post types from WordPress
|
|
*
|
|
* @return array|false Post types array or false on failure
|
|
*/
|
|
function igny8_fetch_all_post_types() {
|
|
$wp_response = wp_remote_get(get_site_url() . '/wp-json/wp/v2/types');
|
|
|
|
if (is_wp_error($wp_response)) {
|
|
return false;
|
|
}
|
|
|
|
$types = json_decode(wp_remote_retrieve_body($wp_response), true);
|
|
|
|
if (!is_array($types)) {
|
|
return false;
|
|
}
|
|
|
|
$post_types = array();
|
|
foreach ($types as $type_name => $type_data) {
|
|
if ($type_data['public']) {
|
|
$post_types[] = array(
|
|
'name' => $type_name,
|
|
'label' => $type_data['name'],
|
|
'description' => $type_data['description'] ?? '',
|
|
'rest_base' => $type_data['rest_base'] ?? $type_name
|
|
);
|
|
}
|
|
}
|
|
|
|
return $post_types;
|
|
}
|
|
|
|
/**
|
|
* Fetch all posts from all post types
|
|
*
|
|
* @param int $per_page Posts per page
|
|
* @return array All posts
|
|
*/
|
|
function igny8_fetch_all_wordpress_posts($per_page = 100) {
|
|
$post_types = igny8_fetch_all_post_types();
|
|
|
|
if (!$post_types) {
|
|
return array();
|
|
}
|
|
|
|
$all_posts = array();
|
|
foreach ($post_types as $type) {
|
|
$posts = igny8_fetch_wordpress_posts($type['name'], $per_page);
|
|
if ($posts) {
|
|
$all_posts = array_merge($all_posts, $posts);
|
|
}
|
|
}
|
|
|
|
return $all_posts;
|
|
}
|
|
|
|
/**
|
|
* Fetch all taxonomies from WordPress
|
|
*
|
|
* @return array|false Taxonomies array or false on failure
|
|
*/
|
|
function igny8_fetch_wordpress_taxonomies() {
|
|
$wp_response = wp_remote_get(get_site_url() . '/wp-json/wp/v2/taxonomies');
|
|
|
|
if (is_wp_error($wp_response)) {
|
|
return false;
|
|
}
|
|
|
|
$taxonomies = json_decode(wp_remote_retrieve_body($wp_response), true);
|
|
|
|
if (!is_array($taxonomies)) {
|
|
return false;
|
|
}
|
|
|
|
$formatted_taxonomies = array();
|
|
foreach ($taxonomies as $tax_name => $tax_data) {
|
|
if ($tax_data['public']) {
|
|
$formatted_taxonomies[] = array(
|
|
'name' => $tax_name,
|
|
'label' => $tax_data['name'],
|
|
'description' => $tax_data['description'] ?? '',
|
|
'hierarchical' => $tax_data['hierarchical'],
|
|
'rest_base' => $tax_data['rest_base'] ?? $tax_name,
|
|
'object_types' => $tax_data['types'] ?? array()
|
|
);
|
|
}
|
|
}
|
|
|
|
return $formatted_taxonomies;
|
|
}
|
|
|
|
/**
|
|
* Fetch all terms for a specific taxonomy
|
|
*
|
|
* @param string $taxonomy Taxonomy name
|
|
* @param int $per_page Terms per page
|
|
* @return array|false Formatted terms array or false on failure
|
|
*/
|
|
function igny8_fetch_taxonomy_terms($taxonomy, $per_page = 100) {
|
|
$taxonomy_obj = get_taxonomy($taxonomy);
|
|
$rest_base = ($taxonomy_obj && !empty($taxonomy_obj->rest_base)) ? $taxonomy_obj->rest_base : $taxonomy;
|
|
|
|
$base_url = sprintf('%s/wp-json/wp/v2/%s', get_site_url(), $rest_base);
|
|
|
|
$formatted_terms = array();
|
|
$page = 1;
|
|
|
|
do {
|
|
$response = wp_remote_get(add_query_arg(array(
|
|
'per_page' => min($per_page, 100),
|
|
'page' => $page
|
|
), $base_url));
|
|
|
|
if (is_wp_error($response)) {
|
|
break;
|
|
}
|
|
|
|
$terms = json_decode(wp_remote_retrieve_body($response), true);
|
|
|
|
if (!is_array($terms) || empty($terms)) {
|
|
break;
|
|
}
|
|
|
|
foreach ($terms as $term) {
|
|
$formatted_terms[] = array(
|
|
'id' => $term['id'],
|
|
'name' => $term['name'],
|
|
'slug' => $term['slug'],
|
|
'description' => $term['description'] ?? '',
|
|
'count' => $term['count'],
|
|
'parent' => $term['parent'] ?? 0,
|
|
'taxonomy' => $taxonomy,
|
|
'url' => $term['link'] ?? ''
|
|
);
|
|
}
|
|
|
|
if (count($terms) < min($per_page, 100)) {
|
|
break;
|
|
}
|
|
|
|
$page++;
|
|
} while (true);
|
|
|
|
return $formatted_terms;
|
|
}
|
|
|
|
/**
|
|
* Fetch all terms from all taxonomies
|
|
*
|
|
* @param int $per_page Terms per page
|
|
* @return array All terms organized by taxonomy
|
|
*/
|
|
function igny8_fetch_all_taxonomy_terms($per_page = 100) {
|
|
$taxonomies = igny8_fetch_wordpress_taxonomies();
|
|
|
|
if (!$taxonomies) {
|
|
return array();
|
|
}
|
|
|
|
$all_terms = array();
|
|
foreach ($taxonomies as $taxonomy) {
|
|
$terms = igny8_fetch_taxonomy_terms($taxonomy['rest_base'], $per_page);
|
|
if ($terms) {
|
|
$all_terms[$taxonomy['name']] = $terms;
|
|
}
|
|
}
|
|
|
|
return $all_terms;
|
|
}
|
|
|
|
/**
|
|
* Collect all WordPress site data for IGNY8 semantic mapping
|
|
*
|
|
* @return array Complete site data
|
|
*/
|
|
function igny8_collect_site_data($args = array()) {
|
|
// Skip if connection is disabled
|
|
if (!igny8_is_connection_enabled()) {
|
|
return array('disabled' => true, 'reason' => 'connection_disabled');
|
|
}
|
|
|
|
if (function_exists('igny8_is_module_enabled') && !igny8_is_module_enabled('sites')) {
|
|
return array('disabled' => true);
|
|
}
|
|
|
|
$settings = igny8_get_site_scan_settings($args);
|
|
|
|
$site_data = array(
|
|
'site_url' => get_site_url(),
|
|
'site_name' => get_bloginfo('name'),
|
|
'site_description' => get_bloginfo('description'),
|
|
'collected_at' => current_time('mysql'),
|
|
'settings' => $settings,
|
|
'posts' => array(),
|
|
'taxonomies' => array(),
|
|
'products' => array(),
|
|
'product_categories' => array(),
|
|
'product_attributes' => array()
|
|
);
|
|
|
|
foreach ((array) $settings['post_types'] as $post_type) {
|
|
if (!post_type_exists($post_type) || !igny8_is_post_type_enabled($post_type)) {
|
|
continue;
|
|
}
|
|
|
|
$posts = igny8_fetch_wordpress_posts($post_type, $settings['per_page'], array(
|
|
'after' => $settings['since'],
|
|
'status' => 'publish'
|
|
));
|
|
|
|
if ($posts) {
|
|
$site_data['posts'] = array_merge($site_data['posts'], $posts);
|
|
}
|
|
}
|
|
|
|
$tracked_taxonomies = array('category', 'post_tag', 'igny8_sectors', 'igny8_clusters');
|
|
foreach ($tracked_taxonomies as $taxonomy) {
|
|
if (!taxonomy_exists($taxonomy)) {
|
|
continue;
|
|
}
|
|
|
|
$terms = igny8_fetch_taxonomy_terms($taxonomy, 100);
|
|
if ($terms) {
|
|
$tax_obj = get_taxonomy($taxonomy);
|
|
$site_data['taxonomies'][$taxonomy] = array(
|
|
'taxonomy' => array(
|
|
'name' => $taxonomy,
|
|
'label' => $tax_obj ? $tax_obj->label : $taxonomy,
|
|
'description' => $tax_obj->description ?? '',
|
|
'hierarchical' => $tax_obj ? $tax_obj->hierarchical : false,
|
|
),
|
|
'terms' => $terms
|
|
);
|
|
}
|
|
}
|
|
|
|
if (!empty($settings['include_products']) && function_exists('igny8_is_woocommerce_active') && igny8_is_woocommerce_active()) {
|
|
require_once IGNY8_BRIDGE_PLUGIN_DIR . 'data/woocommerce.php';
|
|
|
|
$products = igny8_fetch_woocommerce_products(100);
|
|
if ($products) {
|
|
$site_data['products'] = $products;
|
|
}
|
|
|
|
$product_categories = igny8_fetch_product_categories(100);
|
|
if ($product_categories) {
|
|
$site_data['product_categories'] = $product_categories;
|
|
}
|
|
|
|
$product_attributes = igny8_fetch_product_attributes();
|
|
if ($product_attributes) {
|
|
$site_data['product_attributes'] = $product_attributes;
|
|
}
|
|
}
|
|
|
|
// Extract link graph if Linker module is enabled
|
|
if (function_exists('igny8_is_module_enabled') && igny8_is_module_enabled('linker')) {
|
|
$post_ids = wp_list_pluck($site_data['posts'], 'id');
|
|
$link_graph = igny8_extract_link_graph($post_ids);
|
|
|
|
if (!empty($link_graph)) {
|
|
$site_data['link_graph'] = $link_graph;
|
|
}
|
|
}
|
|
|
|
$site_data['summary'] = array(
|
|
'posts' => count($site_data['posts']),
|
|
'taxonomies' => count($site_data['taxonomies']),
|
|
'products' => count($site_data['products']),
|
|
'links' => isset($site_data['link_graph']) ? count($site_data['link_graph']) : 0
|
|
);
|
|
|
|
update_option('igny8_last_site_snapshot', array(
|
|
'timestamp' => current_time('timestamp'),
|
|
'summary' => $site_data['summary']
|
|
));
|
|
|
|
return $site_data;
|
|
}
|
|
|
|
/**
|
|
* Send WordPress site data to IGNY8 for semantic strategy mapping
|
|
*
|
|
* @param int $site_id IGNY8 site ID
|
|
* @return array|false Response data or false on failure
|
|
*/
|
|
function igny8_send_site_data_to_igny8($site_id, $site_data = null, $args = array()) {
|
|
// Skip if connection is disabled
|
|
if (!igny8_is_connection_enabled()) {
|
|
return false;
|
|
}
|
|
|
|
$api = new Igny8API();
|
|
|
|
if (!$api->is_authenticated()) {
|
|
return false;
|
|
}
|
|
|
|
// Collect all site data if not provided
|
|
if (empty($site_data)) {
|
|
$site_data = igny8_collect_site_data($args);
|
|
}
|
|
|
|
if (empty($site_data) || isset($site_data['disabled'])) {
|
|
return false;
|
|
}
|
|
|
|
// Send to IGNY8 API
|
|
$response = $api->post("/system/sites/{$site_id}/import/", array(
|
|
'site_data' => $site_data,
|
|
'import_type' => $args['mode'] ?? 'full_site_scan'
|
|
));
|
|
|
|
if ($response['success']) {
|
|
// Store import ID for tracking
|
|
update_option('igny8_last_site_import_id', $response['data']['import_id'] ?? null);
|
|
update_option('igny8_last_site_sync', current_time('timestamp'));
|
|
|
|
// Send link graph separately to Linker module if available
|
|
if (!empty($site_data['link_graph']) && function_exists('igny8_is_module_enabled') && igny8_is_module_enabled('linker')) {
|
|
$link_result = igny8_send_link_graph_to_igny8($site_id, $site_data['link_graph']);
|
|
if ($link_result) {
|
|
error_log(sprintf('IGNY8: Sent %d links to Linker module', $link_result['links_sent'] ?? 0));
|
|
}
|
|
}
|
|
|
|
return $response['data'];
|
|
} else {
|
|
error_log("IGNY8: Failed to send site data: " . ($response['error'] ?? 'Unknown error'));
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Sync only changed posts/taxonomies since last sync
|
|
*
|
|
* @param int $site_id IGNY8 site ID
|
|
* @return array|false Sync result or false on failure
|
|
*/
|
|
function igny8_sync_incremental_site_data($site_id, $settings = array()) {
|
|
// Skip if connection is disabled
|
|
if (!igny8_is_connection_enabled()) {
|
|
return array('synced' => 0, 'message' => 'Connection disabled');
|
|
}
|
|
|
|
$api = new Igny8API();
|
|
|
|
if (!$api->is_authenticated()) {
|
|
return false;
|
|
}
|
|
|
|
$settings = igny8_get_site_scan_settings(wp_parse_args($settings, array('mode' => 'incremental')));
|
|
$since = $settings['since'] ?? intval(get_option('igny8_last_site_sync', 0));
|
|
|
|
$formatted_posts = array();
|
|
|
|
foreach ((array) $settings['post_types'] as $post_type) {
|
|
if (!post_type_exists($post_type) || !igny8_is_post_type_enabled($post_type)) {
|
|
continue;
|
|
}
|
|
|
|
$query_args = array(
|
|
'post_type' => $post_type,
|
|
'post_status' => array('publish', 'pending', 'draft', 'future'),
|
|
'posts_per_page' => -1,
|
|
'orderby' => 'modified',
|
|
'order' => 'DESC',
|
|
'suppress_filters' => true,
|
|
);
|
|
|
|
if ($since) {
|
|
$query_args['date_query'] = array(
|
|
array(
|
|
'column' => 'post_modified_gmt',
|
|
'after' => gmdate('Y-m-d H:i:s', $since)
|
|
)
|
|
);
|
|
}
|
|
|
|
$posts = get_posts($query_args);
|
|
|
|
foreach ($posts as $post) {
|
|
$word_count = str_word_count(strip_tags($post->post_content));
|
|
|
|
$formatted_posts[] = array(
|
|
'id' => $post->ID,
|
|
'title' => get_the_title($post),
|
|
'content' => $post->post_content,
|
|
'status' => $post->post_status,
|
|
'modified' => $post->post_modified_gmt,
|
|
'post_type' => $post->post_type,
|
|
'url' => get_permalink($post),
|
|
'taxonomies' => array(
|
|
'categories' => wp_get_post_terms($post->ID, 'category', array('fields' => 'ids')),
|
|
'tags' => wp_get_post_terms($post->ID, 'post_tag', array('fields' => 'ids')),
|
|
),
|
|
'meta' => array(
|
|
'task_id' => get_post_meta($post->ID, '_igny8_task_id', true),
|
|
'cluster_id' => get_post_meta($post->ID, '_igny8_cluster_id', true),
|
|
'sector_id' => get_post_meta($post->ID, '_igny8_sector_id', true),
|
|
'word_count' => $word_count,
|
|
)
|
|
);
|
|
}
|
|
}
|
|
|
|
if (empty($formatted_posts)) {
|
|
return array('synced' => 0, 'message' => 'No changes since last sync');
|
|
}
|
|
|
|
$response = $api->post("/system/sites/{$site_id}/sync/", array(
|
|
'posts' => $formatted_posts,
|
|
'sync_type' => 'incremental',
|
|
'last_sync' => $since,
|
|
'post_types' => $settings['post_types']
|
|
));
|
|
|
|
if ($response['success']) {
|
|
update_option('igny8_last_site_sync', current_time('timestamp'));
|
|
update_option('igny8_last_incremental_site_sync', array(
|
|
'timestamp' => current_time('timestamp'),
|
|
'count' => count($formatted_posts)
|
|
));
|
|
|
|
return array(
|
|
'synced' => count($formatted_posts),
|
|
'message' => 'Incremental sync completed'
|
|
);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Run a full site scan and semantic mapping
|
|
*
|
|
* @param int $site_id IGNY8 site ID
|
|
* @param array $settings Scan settings
|
|
* @return array|false
|
|
*/
|
|
function igny8_perform_full_site_scan($site_id, $settings = array()) {
|
|
$site_data = igny8_collect_site_data($settings);
|
|
|
|
if (empty($site_data) || isset($site_data['disabled'])) {
|
|
return false;
|
|
}
|
|
|
|
$import = igny8_send_site_data_to_igny8($site_id, $site_data, array('mode' => 'full_site_scan'));
|
|
|
|
if (!$import) {
|
|
return false;
|
|
}
|
|
|
|
update_option('igny8_last_full_site_scan', current_time('timestamp'));
|
|
|
|
// Map to semantic strategy (requires Planner module)
|
|
if (!function_exists('igny8_is_module_enabled') || igny8_is_module_enabled('planner')) {
|
|
$map_response = igny8_map_site_to_semantic_strategy($site_id, $site_data);
|
|
if (!empty($map_response['success'])) {
|
|
update_option('igny8_last_semantic_map', current_time('timestamp'));
|
|
update_option('igny8_last_semantic_map_summary', array(
|
|
'sectors' => count($map_response['data']['sectors'] ?? array()),
|
|
'keywords' => count($map_response['data']['keywords'] ?? array())
|
|
));
|
|
}
|
|
}
|
|
|
|
// Send link graph to Linker module if available
|
|
if (!empty($site_data['link_graph']) && function_exists('igny8_is_module_enabled') && igny8_is_module_enabled('linker')) {
|
|
$link_result = igny8_send_link_graph_to_igny8($site_id, $site_data['link_graph']);
|
|
if ($link_result) {
|
|
error_log(sprintf('IGNY8: Sent %d links to Linker module during full scan', $link_result['links_sent'] ?? 0));
|
|
}
|
|
}
|
|
|
|
return $import;
|
|
}
|
|
|