diff --git a/codex-rs/app-server/tests/common/models_cache.rs b/codex-rs/app-server/tests/common/models_cache.rs index 56a9378662c..666f6ed7cda 100644 --- a/codex-rs/app-server/tests/common/models_cache.rs +++ b/codex-rs/app-server/tests/common/models_cache.rs @@ -54,6 +54,7 @@ fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo { supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, } } diff --git a/codex-rs/codex-api/tests/models_integration.rs b/codex-rs/codex-api/tests/models_integration.rs index 742b39d5fd6..9ff273211ff 100644 --- a/codex-rs/codex-api/tests/models_integration.rs +++ b/codex-rs/codex-api/tests/models_integration.rs @@ -100,6 +100,7 @@ async fn models_client_hits_models_endpoint() { supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, }], }; diff --git a/codex-rs/core/src/guardian/tests.rs b/codex-rs/core/src/guardian/tests.rs index 40a07d38023..85fcbf005fd 100644 --- a/codex-rs/core/src/guardian/tests.rs +++ b/codex-rs/core/src/guardian/tests.rs @@ -25,6 +25,8 @@ use codex_model_provider::create_model_provider; use codex_model_provider_info::AMAZON_BEDROCK_GPT_5_4_MODEL_ID; use codex_model_provider_info::AMAZON_BEDROCK_PROVIDER_ID; use codex_model_provider_info::ModelProviderInfo; +use codex_models_manager::manager::RefreshStrategy; +use codex_models_manager::model_info::model_info_from_slug; use codex_network_proxy::NetworkProxyConfig; use codex_protocol::ThreadId; use codex_protocol::approvals::NetworkApprovalProtocol; @@ -32,6 +34,8 @@ use codex_protocol::config_types::ApprovalsReviewer; use codex_protocol::models::ContentItem; use codex_protocol::models::PermissionProfile; use codex_protocol::models::ResponseItem; +use codex_protocol::openai_models::ModelVisibility; +use codex_protocol::openai_models::ModelsResponse; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::permissions::FileSystemAccessMode; use codex_protocol::permissions::FileSystemPath; @@ -45,6 +49,7 @@ use codex_protocol::protocol::GranularApprovalConfig; use codex_protocol::protocol::GuardianAssessmentStatus; use codex_protocol::protocol::GuardianRiskLevel; use codex_protocol::protocol::GuardianUserAuthorization; +use codex_protocol::protocol::MultiAgentVersion; use codex_protocol::protocol::ReviewDecision; use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::TurnCompleteEvent; @@ -55,6 +60,7 @@ use core_test_support::context_snapshot::ContextSnapshotOptions; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_response_created; +use core_test_support::responses::mount_models_once; use core_test_support::responses::mount_response_once; use core_test_support::responses::mount_sse_once; use core_test_support::responses::mount_sse_sequence; @@ -67,6 +73,7 @@ use core_test_support::test_path_buf; use insta::Settings; use insta::assert_snapshot; use pretty_assertions::assert_eq; +use serde_json::Value; use std::collections::BTreeMap; use std::collections::HashMap; use std::sync::Arc; @@ -1377,6 +1384,86 @@ async fn guardian_review_uses_preferred_review_model_without_model_catalog_overr Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn guardian_stays_disabled_when_model_selects_multi_agent_v2() -> anyhow::Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let guardian_model = "guardian-multi-agent-v2"; + let mut model = model_info_from_slug(guardian_model); + model.visibility = ModelVisibility::List; + model.used_fallback_model_metadata = false; + model.multi_agent_version = Some(MultiAgentVersion::V2); + let models_mock = mount_models_once( + &server, + ModelsResponse { + models: vec![model], + }, + ) + .await; + let request_log = mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-guardian"), + ev_assistant_message("msg-guardian", "{\"outcome\":\"allow\"}"), + ev_completed("resp-guardian"), + ]), + ) + .await; + + let (session, mut turn) = guardian_test_session_and_turn(&server).await; + let _ = session + .services + .models_manager + .list_models(RefreshStrategy::Online) + .await; + Arc::get_mut(&mut turn) + .expect("turn should be unique") + .model_info + .auto_review_model_override = Some(guardian_model.to_string()); + seed_guardian_parent_history(&session, &turn).await; + + let outcome = run_guardian_review_session_for_test( + Arc::clone(&session), + turn, + GuardianApprovalRequest::Shell { + id: "shell-1".to_string(), + command: vec!["git".to_string(), "push".to_string()], + cwd: test_path_buf("/repo/codex-rs/core").abs(), + sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault, + additional_permissions: None, + justification: None, + }, + Some("Sandbox denied outbound git push to github.com.".to_string()), + guardian_output_schema(), + /*external_cancel*/ None, + ) + .await; + let (GuardianReviewOutcome::Completed(_), _) = outcome else { + panic!("expected guardian assessment"); + }; + let request_body = request_log.single_request().body_json(); + let has_spawn_agent = request_body + .get("tools") + .and_then(Value::as_array) + .is_some_and(|tools| { + tools + .iter() + .any(|tool| tool.get("name").and_then(Value::as_str) == Some("spawn_agent")) + }); + + assert_eq!( + ( + models_mock.requests().len(), + request_body.get("model").and_then(Value::as_str), + has_spawn_agent, + ), + (1, Some(guardian_model), false) + ); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn guardian_review_request_layout_matches_model_visible_request_snapshot() -> anyhow::Result<()> { diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs index 8d471c2a6fe..ba31bf79eb7 100644 --- a/codex-rs/core/src/session/mod.rs +++ b/codex-rs/core/src/session/mod.rs @@ -428,11 +428,13 @@ pub(crate) struct CodexSpawnArgs { fn resolve_multi_agent_version( conversation_history: &InitialHistory, inherited_multi_agent_version: Option, + model_info: &ModelInfo, config: &Config, ) -> Option { conversation_history .get_multi_agent_version() .or(inherited_multi_agent_version) + .or(model_info.multi_agent_version) .or_else(|| config.multi_agent_version_from_features()) } @@ -554,6 +556,7 @@ impl Codex { let multi_agent_version = resolve_multi_agent_version( &conversation_history, inherited_multi_agent_version, + &model_info, &config, ); let _ = config diff --git a/codex-rs/core/tests/suite/auto_review.rs b/codex-rs/core/tests/suite/auto_review.rs index b3574cf5e2e..570f97008f9 100644 --- a/codex-rs/core/tests/suite/auto_review.rs +++ b/codex-rs/core/tests/suite/auto_review.rs @@ -233,6 +233,7 @@ fn remote_model_with_auto_review_override(slug: &str, review_model: &str) -> Mod supports_search_tool: false, auto_review_model_override: Some(review_model.to_string()), tool_mode: None, + multi_agent_version: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), diff --git a/codex-rs/core/tests/suite/model_runtime_selectors.rs b/codex-rs/core/tests/suite/model_runtime_selectors.rs index b6f2ea3c7df..6b17a4628fa 100644 --- a/codex-rs/core/tests/suite/model_runtime_selectors.rs +++ b/codex-rs/core/tests/suite/model_runtime_selectors.rs @@ -1,4 +1,5 @@ use anyhow::Result; +use anyhow::bail; use codex_core::config::Config; use codex_features::Feature; use codex_login::CodexAuth; @@ -11,15 +12,18 @@ use codex_protocol::openai_models::ModelVisibility; use codex_protocol::openai_models::ModelsResponse; use codex_protocol::openai_models::ToolMode; use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::MultiAgentVersion; use codex_protocol::protocol::Op; use codex_protocol::protocol::ThreadSettingsOverrides; use codex_protocol::user_input::UserInput; use core_test_support::responses; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_function_call; use core_test_support::responses::ev_response_created; use core_test_support::responses::mount_models_once; use core_test_support::responses::mount_sse_once; +use core_test_support::responses::mount_sse_once_match; use core_test_support::responses::sse; use core_test_support::skip_if_no_network; use core_test_support::submit_thread_settings; @@ -27,9 +31,18 @@ use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use pretty_assertions::assert_eq; use serde_json::Value; +use serde_json::json; +use std::io::Cursor; use tokio::time::Duration; use tokio::time::Instant; use tokio::time::sleep; +use wiremock::Request; + +const CHILD_PROMPT: &str = "inspect the child runtime"; +const CHILD_MODEL: &str = "test-multi-agent-child"; +const ROOT_MODEL: &str = "test-multi-agent-root"; +const ROOT_PROMPT: &str = "spawn a child"; +const SPAWN_CALL_ID: &str = "spawn-call-1"; fn remote_model(slug: &str) -> ModelInfo { ModelInfo { @@ -39,6 +52,26 @@ fn remote_model(slug: &str) -> ModelInfo { } } +fn body_contains(req: &Request, text: &str) -> bool { + let is_zstd = req + .headers + .get("content-encoding") + .and_then(|value| value.to_str().ok()) + .is_some_and(|value| { + value + .split(',') + .any(|entry| entry.trim().eq_ignore_ascii_case("zstd")) + }); + let bytes = if is_zstd { + zstd::stream::decode_all(Cursor::new(&req.body)).ok() + } else { + Some(req.body.clone()) + }; + bytes + .and_then(|body| String::from_utf8(body).ok()) + .is_some_and(|body| body.contains(text)) +} + fn tool_names(body: &Value) -> Vec { body.get("tools") .and_then(Value::as_array) @@ -171,3 +204,114 @@ async fn remote_tool_mode_selector_overrides_feature_flags() -> Result<()> { Ok(()) } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn remote_multi_agent_selector_overrides_features_and_child_model_info() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = wiremock::MockServer::start().await; + let mut root_model = remote_model(ROOT_MODEL); + root_model.multi_agent_version = Some(MultiAgentVersion::V2); + let mut child_model = remote_model(CHILD_MODEL); + child_model.multi_agent_version = Some(MultiAgentVersion::V1); + let models_mock = mount_models_once( + &server, + ModelsResponse { + models: vec![root_model, child_model], + }, + ) + .await; + let spawn_args = serde_json::to_string(&json!({ + "message": CHILD_PROMPT, + "task_name": "worker", + "model": CHILD_MODEL, + "fork_turns": "none", + }))?; + mount_sse_once_match( + &server, + |req: &Request| body_contains(req, ROOT_PROMPT), + sse(vec![ + ev_response_created("resp-root-1"), + ev_function_call(SPAWN_CALL_ID, "spawn_agent", &spawn_args), + ev_completed("resp-root-1"), + ]), + ) + .await; + mount_sse_once_match( + &server, + |req: &Request| body_contains(req, CHILD_PROMPT) && !body_contains(req, SPAWN_CALL_ID), + sse(vec![ + ev_response_created("resp-child-1"), + ev_assistant_message("msg-child-1", "child done"), + ev_completed("resp-child-1"), + ]), + ) + .await; + let root_followup_mock = mount_sse_once_match( + &server, + |req: &Request| body_contains(req, SPAWN_CALL_ID), + sse(vec![ + ev_response_created("resp-root-2"), + ev_assistant_message("msg-root-2", "root done"), + ev_completed("resp-root-2"), + ]), + ) + .await; + + let mut builder = test_codex() + .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) + .with_config(|config| { + config + .features + .enable(Feature::Collab) + .expect("test config should allow feature update"); + config.model = Some(ROOT_MODEL.to_string()); + }); + let test = builder.build(&server).await?; + assert_eq!( + ( + models_mock.requests().len(), + test.codex.multi_agent_version(), + ), + (1, Some(MultiAgentVersion::V2)) + ); + test.submit_turn(ROOT_PROMPT).await?; + let deadline = Instant::now() + Duration::from_secs(2); + let child_id = loop { + if let Some(child_id) = test + .thread_manager + .list_thread_ids() + .await + .into_iter() + .find(|thread_id| *thread_id != test.session_configured.thread_id) + { + break child_id; + } + if Instant::now() >= deadline { + bail!( + "timed out waiting for spawn_agent to create a child thread: root lock {:?}, spawn output {:?}", + test.codex.multi_agent_version(), + root_followup_mock.function_call_output_text(SPAWN_CALL_ID), + ); + } + sleep(Duration::from_millis(10)).await; + }; + let child = test.thread_manager.get_thread(child_id).await?; + + assert_eq!( + ( + models_mock.requests().len(), + test.codex.multi_agent_version(), + child.config_snapshot().await.model, + child.multi_agent_version(), + ), + ( + 1, + Some(MultiAgentVersion::V2), + CHILD_MODEL.to_string(), + Some(MultiAgentVersion::V2), + ) + ); + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/model_switching.rs b/codex-rs/core/tests/suite/model_switching.rs index f7d2bf578c1..b76a3522ec5 100644 --- a/codex-rs/core/tests/suite/model_switching.rs +++ b/codex-rs/core/tests/suite/model_switching.rs @@ -114,6 +114,7 @@ fn test_model_info( supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), @@ -933,6 +934,7 @@ async fn model_switch_to_smaller_model_updates_token_context_window() -> Result< supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), diff --git a/codex-rs/core/tests/suite/models_cache_ttl.rs b/codex-rs/core/tests/suite/models_cache_ttl.rs index 5244a178a49..34fc98b59f9 100644 --- a/codex-rs/core/tests/suite/models_cache_ttl.rs +++ b/codex-rs/core/tests/suite/models_cache_ttl.rs @@ -372,5 +372,6 @@ fn test_remote_model(slug: &str, priority: i32) -> ModelInfo { supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, } } diff --git a/codex-rs/core/tests/suite/personality.rs b/codex-rs/core/tests/suite/personality.rs index 4219576d4ff..231a6d826f6 100644 --- a/codex-rs/core/tests/suite/personality.rs +++ b/codex-rs/core/tests/suite/personality.rs @@ -594,6 +594,7 @@ async fn remote_model_friendly_personality_instructions_with_feature() -> anyhow supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, }; let _models_mock = mount_models_once( @@ -706,6 +707,7 @@ async fn user_turn_personality_remote_model_template_includes_update_message() - supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, }; let _models_mock = mount_models_once( diff --git a/codex-rs/core/tests/suite/remote_models.rs b/codex-rs/core/tests/suite/remote_models.rs index 00222a3a3ac..fd6e01c0755 100644 --- a/codex-rs/core/tests/suite/remote_models.rs +++ b/codex-rs/core/tests/suite/remote_models.rs @@ -479,6 +479,7 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), @@ -730,6 +731,7 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> { supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), @@ -1215,6 +1217,7 @@ fn test_remote_model_with_policy( supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, priority, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), diff --git a/codex-rs/core/tests/suite/rmcp_client.rs b/codex-rs/core/tests/suite/rmcp_client.rs index c3d598c7ba3..36ea7d4b2c1 100644 --- a/codex-rs/core/tests/suite/rmcp_client.rs +++ b/codex-rs/core/tests/suite/rmcp_client.rs @@ -1351,6 +1351,7 @@ async fn stdio_image_responses_are_sanitized_for_text_only_model() -> anyhow::Re supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, }], }, ) diff --git a/codex-rs/core/tests/suite/spawn_agent_description.rs b/codex-rs/core/tests/suite/spawn_agent_description.rs index 787ba10501d..c1f0d522c71 100644 --- a/codex-rs/core/tests/suite/spawn_agent_description.rs +++ b/codex-rs/core/tests/suite/spawn_agent_description.rs @@ -62,6 +62,7 @@ fn test_model_info( supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers, diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index 0c6cdf6d8b6..2e34475387c 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -1357,6 +1357,7 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), diff --git a/codex-rs/models-manager/src/model_info.rs b/codex-rs/models-manager/src/model_info.rs index 379d173e4e4..58137a3f500 100644 --- a/codex-rs/models-manager/src/model_info.rs +++ b/codex-rs/models-manager/src/model_info.rs @@ -101,6 +101,7 @@ pub fn model_info_from_slug(slug: &str) -> ModelInfo { supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, } } diff --git a/codex-rs/protocol/src/openai_models.rs b/codex-rs/protocol/src/openai_models.rs index 0214f450fd0..4b678a92090 100644 --- a/codex-rs/protocol/src/openai_models.rs +++ b/codex-rs/protocol/src/openai_models.rs @@ -22,6 +22,7 @@ use crate::config_types::ReasoningSummary; use crate::config_types::SERVICE_TIER_DEFAULT_REQUEST_VALUE; use crate::config_types::ServiceTier; use crate::config_types::Verbosity; +use crate::protocol::MultiAgentVersion; const PERSONALITY_PLACEHOLDER: &str = "{{ personality }}"; pub const SPEED_TIER_FAST: &str = "fast"; @@ -347,6 +348,12 @@ pub struct ModelInfo { deserialize_with = "deserialize_optional_model_selector" )] pub tool_mode: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + deserialize_with = "deserialize_optional_model_selector" + )] + pub multi_agent_version: Option, } impl ModelInfo { @@ -643,6 +650,7 @@ mod tests { supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, } } @@ -901,6 +909,22 @@ mod tests { assert!(!object.contains_key("tool_mode")); } + #[test] + fn model_info_treats_unknown_multi_agent_version_as_omitted() { + let mut value = + serde_json::to_value(test_model(/*spec*/ None)).expect("serialize test model"); + let object = value + .as_object_mut() + .expect("model info should be an object"); + object.insert( + "multi_agent_version".to_string(), + serde_json::Value::String("future_multi_agent_version".to_string()), + ); + let model = serde_json::from_value::(value).expect("deserialize model info"); + + assert_eq!(model.multi_agent_version, None); + } + #[test] fn resolved_context_window_prefers_context_window() { let model = ModelInfo { diff --git a/codex-rs/tools/src/tool_config_tests.rs b/codex-rs/tools/src/tool_config_tests.rs index 165d6efa20e..1b4d789394a 100644 --- a/codex-rs/tools/src/tool_config_tests.rs +++ b/codex-rs/tools/src/tool_config_tests.rs @@ -46,6 +46,7 @@ fn model_with_shell_type(shell_type: ConfigShellToolType) -> ModelInfo { supports_search_tool: false, auto_review_model_override: None, tool_mode: None, + multi_agent_version: None, } }