From 12164bb700ec4d5fb5b1fdf72467cfed1eafa84b Mon Sep 17 00:00:00 2001 From: Chris Huber Date: Thu, 19 Mar 2026 02:55:59 +0000 Subject: [PATCH] feat: consolidate SiteContext into SITE.md with auto-refresh (#871) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SITE.md was meant to replace SiteContext but both systems remained, injecting site context into every AI call twice (SITE.md at priority 20 as static markdown, SiteContextDirective at priority 80 as live JSON). This consolidation: - Enriches SITE.md scaffold with all SiteContext data: post counts per type, taxonomy term counts with hierarchy/associations, language, timezone, permalink structure — rendered as markdown tables - Adds datamachine_regenerate_site_md() with 60-second debounce transient to auto-refresh SITE.md on structural changes (same hooks SiteContext used: save_post, switch_theme, activated_plugin, etc.) - Preserves user-added content below marker - Deprecates SiteContext class and SiteContextDirective to no-ops (kept for backward compat, will be removed in a future major version) - Removes SiteContextDirective from the directive chain — no more duplicate site context in AI prompts - Repurposes site_context_enabled setting to control SITE.md auto-refresh - Cleans up legacy SiteContext transient on activation Single source of truth: SITE.md, injected once via CoreMemoryFilesDirective. Closes #871. --- data-machine.php | 4 + inc/Core/WordPress/SiteContext.php | 165 ++--------------- .../AI/Directives/SiteContextDirective.php | 89 ++++------ inc/bootstrap.php | 7 +- inc/migrations.php | 168 +++++++++++++++++- 5 files changed, 216 insertions(+), 217 deletions(-) diff --git a/data-machine.php b/data-machine.php index d1a567057..ce8879dc4 100644 --- a/data-machine.php +++ b/data-machine.php @@ -500,6 +500,10 @@ function datamachine_activate_for_site() { // Migrate USER.md to network-scoped paths and create NETWORK.md on multisite (idempotent). datamachine_migrate_user_md_to_network_scope(); + // Regenerate SITE.md with enriched content and clean up legacy SiteContext transient. + datamachine_regenerate_site_md(); + delete_transient( 'datamachine_site_context_data' ); + // Clean up legacy per-agent-type log level options (idempotent). foreach ( array( 'pipeline', 'chat', 'system' ) as $legacy_agent_type ) { delete_option( "datamachine_log_level_{$legacy_agent_type}" ); diff --git a/inc/Core/WordPress/SiteContext.php b/inc/Core/WordPress/SiteContext.php index 175ca8e2f..36ee75a06 100644 --- a/inc/Core/WordPress/SiteContext.php +++ b/inc/Core/WordPress/SiteContext.php @@ -1,6 +1,14 @@ self::get_site_metadata(), - 'post_types' => self::get_post_types_data(), - 'taxonomies' => self::get_taxonomies_data(), - ); - - /** - * Filter site context data before caching. - * - * Plugins can use this hook to inject custom context data (e.g., events, - * analytics, custom post type summaries). Note: When this filter is used, - * caching is bypassed to ensure dynamic data remains fresh. - * - * @param array $context Site context data with 'site', 'post_types', 'taxonomies' keys - * @return array Modified context data - */ - $context = apply_filters( 'datamachine_site_context', $context ); - - set_transient( self::CACHE_KEY, $context, 0 ); // 0 = permanent until invalidated - - return $context; - } - - /** - * Get site metadata. - * - * @return array Site name, URL, language, timezone, current_date - */ - private static function get_site_metadata(): array { - return array( - 'name' => get_bloginfo( 'name' ), - 'tagline' => get_bloginfo( 'description' ), - 'url' => home_url(), - 'admin_url' => admin_url(), - 'language' => get_locale(), - 'timezone' => wp_timezone_string(), - 'current_date' => wp_date( 'Y-m-d' ), - ); - } - - /** - * Get public post types with published counts. - * - * @return array Post type labels, counts, and hierarchy status - */ - private static function get_post_types_data(): array { - $post_types_data = array(); - $post_types = get_post_types( array( 'public' => true ), 'objects' ); - - foreach ( $post_types as $post_type ) { - $count = wp_count_posts( $post_type->name ); - $published_count = $count->publish ?? 0; - - $post_types_data[ $post_type->name ] = array( - 'label' => $post_type->label, - 'singular_label' => ( is_object( $post_type->labels ) && isset( $post_type->labels->singular_name ) ) - ? $post_type->labels->singular_name - : $post_type->label, - 'count' => (int) $published_count, - 'hierarchical' => $post_type->hierarchical, - ); - } - - return $post_types_data; - } - - /** - * Get public taxonomies with metadata and term counts. - * - * Returns taxonomy structure without individual term listings to keep - * context payload small. Use search_taxonomy_terms tool for term discovery. - * - * @return array Taxonomy labels, term counts, hierarchy, post type associations - */ - private static function get_taxonomies_data(): array { - $taxonomies_data = array(); - $taxonomies = get_taxonomies( array( 'public' => true ), 'objects' ); - - foreach ( $taxonomies as $taxonomy ) { - if ( \DataMachine\Core\WordPress\TaxonomyHandler::shouldSkipTaxonomy( $taxonomy->name ) ) { - continue; - } - - $term_count = wp_count_terms( - array( - 'taxonomy' => $taxonomy->name, - 'hide_empty' => false, - ) - ); - if ( is_wp_error( $term_count ) ) { - $term_count = 0; - } - - $taxonomies_data[ $taxonomy->name ] = array( - 'label' => $taxonomy->label, - 'singular_label' => ( is_object( $taxonomy->labels ) && isset( $taxonomy->labels->singular_name ) ) - ? $taxonomy->labels->singular_name - : $taxonomy->label, - 'term_count' => (int) $term_count, - 'hierarchical' => $taxonomy->hierarchical, - 'post_types' => $taxonomy->object_type ?? array(), - ); - } - - return $taxonomies_data; + return array(); } /** * Clear site context cache. + * + * @deprecated 0.48.0 SITE.md regeneration handles freshness. Cleans up legacy transient. */ public static function clear_cache(): void { delete_transient( self::CACHE_KEY ); @@ -151,29 +41,10 @@ public static function clear_cache(): void { /** * Register automatic cache invalidation hooks. * - * Clears cache when posts, terms, or site options change. - * Comprehensive invalidation hooks eliminate need for time-based expiration. + * @deprecated 0.48.0 Use datamachine_register_site_md_invalidation() instead. */ public static function register_cache_invalidation(): void { - add_action( 'save_post', array( __CLASS__, 'clear_cache' ) ); - add_action( 'delete_post', array( __CLASS__, 'clear_cache' ) ); - add_action( 'wp_trash_post', array( __CLASS__, 'clear_cache' ) ); - add_action( 'untrash_post', array( __CLASS__, 'clear_cache' ) ); - - add_action( 'create_term', array( __CLASS__, 'clear_cache' ) ); - add_action( 'edit_term', array( __CLASS__, 'clear_cache' ) ); - add_action( 'delete_term', array( __CLASS__, 'clear_cache' ) ); - add_action( 'set_object_terms', array( __CLASS__, 'clear_cache' ) ); - - add_action( 'user_register', array( __CLASS__, 'clear_cache' ) ); - add_action( 'delete_user', array( __CLASS__, 'clear_cache' ) ); - add_action( 'set_user_role', array( __CLASS__, 'clear_cache' ) ); - - add_action( 'switch_theme', array( __CLASS__, 'clear_cache' ) ); - - add_action( 'update_option_blogname', array( __CLASS__, 'clear_cache' ) ); - add_action( 'update_option_blogdescription', array( __CLASS__, 'clear_cache' ) ); - add_action( 'update_option_home', array( __CLASS__, 'clear_cache' ) ); - add_action( 'update_option_siteurl', array( __CLASS__, 'clear_cache' ) ); + // No-op. SITE.md invalidation hooks are registered in bootstrap.php. + // This method remains to avoid fatal errors from code calling it directly. } } diff --git a/inc/Engine/AI/Directives/SiteContextDirective.php b/inc/Engine/AI/Directives/SiteContextDirective.php index 331aae087..f8a7939f3 100644 --- a/inc/Engine/AI/Directives/SiteContextDirective.php +++ b/inc/Engine/AI/Directives/SiteContextDirective.php @@ -1,81 +1,52 @@ 'system_json', - 'label' => 'WORDPRESS SITE CONTEXT', - 'data' => $context_data, - ), - ); + return array(); } /** - * Check if site context injection is enabled in plugin settings. + * Check if site context injection is enabled. * - * @return bool True if enabled, false otherwise + * @deprecated 0.48.0 Use the site_context_enabled setting directly. Controls SITE.md auto-refresh. + * @return bool */ public static function is_site_context_enabled(): bool { - return PluginSettings::get( 'site_context_enabled', true ); + return \DataMachine\Core\PluginSettings::get( 'site_context_enabled', true ); } } -/** - * Allow plugins to override the site context directive class. - * datamachine-multisite uses this to replace single-site context with multisite context. - * - * @param string $directive_class The directive class to use for site context - * @return string The filtered directive class - */ -$datamachine_site_context_directive = apply_filters( 'datamachine_site_context_directive', SiteContextDirective::class ); - -// Register the filtered directive for global context (applies to all AI agents - allows replacement by multisite plugin) -if ( $datamachine_site_context_directive ) { - add_filter( - 'datamachine_directives', - function ( $directives ) use ( $datamachine_site_context_directive ) { - $directives[] = array( - 'class' => $datamachine_site_context_directive, - 'priority' => 80, - 'contexts' => array( 'all' ), - ); - return $directives; - } - ); -} +// The directive is no longer registered in the directive system. +// The class exists purely for backward compatibility with code that +// references it by name (e.g., datamachine_site_context_directive filter). diff --git a/inc/bootstrap.php b/inc/bootstrap.php index 8ba52b646..33f0f1dd0 100644 --- a/inc/bootstrap.php +++ b/inc/bootstrap.php @@ -104,9 +104,10 @@ 'label' => 'Network Context', 'description' => 'WordPress multisite network topology and shared resources.', ) ); -// SiteContext is autoloaded (Core\WordPress\SiteContext) — register its cache invalidation hook here. -add_action( 'init', array( \DataMachine\Core\WordPress\SiteContext::class, 'register_cache_invalidation' ) ); -require_once __DIR__ . '/Engine/AI/Directives/SiteContextDirective.php'; +// SITE.md auto-regeneration — replaces the former SiteContext + SiteContextDirective system. +// SITE.md is now the single source of truth for site context, auto-refreshing on structural changes. +add_action( 'init', 'datamachine_register_site_md_invalidation' ); + require_once __DIR__ . '/Engine/AI/Directives/DailyMemorySelectorDirective.php'; require_once __DIR__ . '/Api/Chat/ChatContextDirective.php'; require_once __DIR__ . '/Api/System/SystemContextDirective.php'; diff --git a/inc/migrations.php b/inc/migrations.php index 41534ccb7..f8502bc83 100644 --- a/inc/migrations.php +++ b/inc/migrations.php @@ -336,19 +336,28 @@ function datamachine_get_scaffold_defaults(): array { } /** - * Build shared SITE.md scaffold content from WordPress site data. + * Build shared SITE.md content from WordPress site data. + * + * This is the single source of truth for site context injected into AI calls. + * Replaces the former SiteContext class + SiteContextDirective which injected + * a duplicate JSON blob at priority 80. Now SITE.md contains all the same + * data in markdown format, injected once via CoreMemoryFilesDirective. * * @since 0.36.1 + * @since 0.48.0 Enriched with post counts, taxonomy details, language, timezone. * @return string */ function datamachine_get_site_scaffold_content(): string { $site_name = get_bloginfo( 'name' ) ? get_bloginfo( 'name' ) : 'WordPress Site'; $site_description = get_bloginfo( 'description' ) ? get_bloginfo( 'description' ) : ''; $site_url = home_url(); - $post_types = get_post_types( array( 'public' => true ), 'names' ); - $taxonomies = get_taxonomies( array( 'public' => true ), 'names' ); - $active_plugins = get_option( 'active_plugins', array() ); + $language = get_locale(); + $timezone = wp_timezone_string(); $theme_name = wp_get_theme()->get( 'Name' ) ? wp_get_theme()->get( 'Name' ) : 'Unknown'; + $permalink = get_option( 'permalink_structure', '' ); + + // --- Active plugins (exclude Data Machine) --- + $active_plugins = get_option( 'active_plugins', array() ); if ( is_multisite() ) { $network_plugins = array_keys( get_site_option( 'active_sitewide_plugins', array() ) ); @@ -373,6 +382,33 @@ function datamachine_get_site_scaffold_content(): string { $plugin_names[] = $plugin_name; } + // --- Post types with counts --- + $post_types = get_post_types( array( 'public' => true ), 'objects' ); + $post_type_lines = array(); + foreach ( $post_types as $pt ) { + $count = wp_count_posts( $pt->name ); + $published = isset( $count->publish ) ? (int) $count->publish : 0; + $hier = $pt->hierarchical ? 'hierarchical' : 'flat'; + $post_type_lines[] = sprintf( '| %s | %s | %d | %s |', $pt->label, $pt->name, $published, $hier ); + } + + // --- Taxonomies with term counts --- + $taxonomies = get_taxonomies( array( 'public' => true ), 'objects' ); + $taxonomy_lines = array(); + foreach ( $taxonomies as $tax ) { + $term_count = wp_count_terms( array( + 'taxonomy' => $tax->name, + 'hide_empty' => false, + ) ); + if ( is_wp_error( $term_count ) ) { + $term_count = 0; + } + $hier = $tax->hierarchical ? 'hierarchical' : 'flat'; + $associated = implode( ', ', $tax->object_type ?? array() ); + $taxonomy_lines[] = sprintf( '| %s | %s | %d | %s | %s |', $tax->label, $tax->name, (int) $term_count, $hier, $associated ); + } + + // --- Build SITE.md --- $lines = array(); $lines[] = '# SITE'; $lines[] = ''; @@ -383,15 +419,35 @@ function datamachine_get_site_scaffold_content(): string { } $lines[] = '- **url:** ' . $site_url; $lines[] = '- **theme:** ' . $theme_name; + $lines[] = '- **language:** ' . $language; + $lines[] = '- **timezone:** ' . $timezone; + if ( ! empty( $permalink ) ) { + $lines[] = '- **permalinks:** ' . $permalink; + } $lines[] = '- **multisite:** ' . ( is_multisite() ? 'true' : 'false' ); $lines[] = ''; - $lines[] = '## Content Model'; - $lines[] = '- **post_types:** ' . implode( ', ', $post_types ); - $lines[] = '- **taxonomies:** ' . implode( ', ', $taxonomies ); + + $lines[] = '## Post Types'; + $lines[] = '| Label | Slug | Published | Type |'; + $lines[] = '|-------|------|-----------|------|'; + foreach ( $post_type_lines as $line ) { + $lines[] = $line; + } + $lines[] = ''; + + $lines[] = '## Taxonomies'; + $lines[] = '| Label | Slug | Terms | Type | Post Types |'; + $lines[] = '|-------|------|-------|------|------------|'; + foreach ( $taxonomy_lines as $line ) { + $lines[] = $line; + } $lines[] = ''; + $lines[] = '## Active Plugins'; if ( ! empty( $plugin_names ) ) { - $lines[] = '- ' . implode( "\n- ", $plugin_names ); + foreach ( $plugin_names as $name ) { + $lines[] = '- ' . $name; + } } else { $lines[] = '- (none)'; } @@ -399,6 +455,102 @@ function datamachine_get_site_scaffold_content(): string { return implode( "\n", $lines ) . "\n"; } +/** + * Regenerate SITE.md on disk from current WordPress state. + * + * Called by invalidation hooks when site structure changes (plugins, + * themes, post types, taxonomies, options). Debounced via a short-lived + * transient to avoid excessive writes during bulk operations. + * + * Preserves user-added content below the auto-generated section by + * looking for a marker. + * + * @since 0.48.0 + * @return void + */ +function datamachine_regenerate_site_md(): void { + // Debounce: skip if we regenerated in the last 60 seconds. + if ( get_transient( 'datamachine_site_md_regenerating' ) ) { + return; + } + set_transient( 'datamachine_site_md_regenerating', 1, 60 ); + + // Check the setting — if disabled, skip regeneration. + if ( ! \DataMachine\Core\PluginSettings::get( 'site_context_enabled', true ) ) { + return; + } + + $directory_manager = new \DataMachine\Core\FilesRepository\DirectoryManager(); + $shared_dir = $directory_manager->get_shared_directory(); + $site_md_path = trailingslashit( $shared_dir ) . 'SITE.md'; + + $fs = \DataMachine\Core\FilesRepository\FilesystemHelper::get(); + if ( ! $fs ) { + return; + } + + // Preserve user-added content below marker. + $custom_content = ''; + if ( file_exists( $site_md_path ) ) { + $existing = $fs->get_contents( $site_md_path ); + $marker = ''; + $pos = strpos( $existing, $marker ); + if ( false !== $pos ) { + $custom_content = substr( $existing, $pos ); + } + } + + $content = datamachine_get_site_scaffold_content(); + + if ( ! empty( $custom_content ) ) { + $content .= "\n" . $custom_content; + } + + if ( ! is_dir( $shared_dir ) ) { + wp_mkdir_p( $shared_dir ); + } + + $fs->put_contents( $site_md_path, $content, FS_CHMOD_FILE ); + \DataMachine\Core\FilesRepository\FilesystemHelper::make_group_writable( $site_md_path ); +} + +/** + * Register hooks that trigger SITE.md regeneration on structural changes. + * + * These are the same hooks that SiteContext used for cache invalidation, + * but now they regenerate the actual file on disk. The debounce in + * datamachine_regenerate_site_md() prevents excessive writes. + * + * @since 0.48.0 + * @return void + */ +function datamachine_register_site_md_invalidation(): void { + $callback = 'datamachine_regenerate_site_md'; + + // Plugin/theme structural changes — always regenerate. + add_action( 'switch_theme', $callback ); + add_action( 'activated_plugin', $callback ); + add_action( 'deactivated_plugin', $callback ); + + // Post lifecycle — updates published counts. + add_action( 'save_post', $callback ); + add_action( 'delete_post', $callback ); + add_action( 'wp_trash_post', $callback ); + add_action( 'untrash_post', $callback ); + + // Term lifecycle — updates term counts. + add_action( 'create_term', $callback ); + add_action( 'edit_term', $callback ); + add_action( 'delete_term', $callback ); + + // Site identity changes. + add_action( 'update_option_blogname', $callback ); + add_action( 'update_option_blogdescription', $callback ); + add_action( 'update_option_home', $callback ); + add_action( 'update_option_siteurl', $callback ); + add_action( 'update_option_permalink_structure', $callback ); +} + /** * Migrate existing user_id-scoped agent files to layered architecture. *