From f32ec83dc26b1b8f140262ad70ca21b3a33982c0 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 29 May 2026 15:38:50 -0700 Subject: [PATCH 1/3] codex: add model multi-agent system overlay --- .../app-server/tests/common/models_cache.rs | 1 + .../codex-api/tests/models_integration.rs | 1 + codex-rs/core/src/agent/control.rs | 10 +- codex-rs/core/src/session/mod.rs | 1 + codex-rs/core/src/session/tests.rs | 5 + codex-rs/core/src/session/turn_context.rs | 21 +- codex-rs/core/src/thread_manager.rs | 1 + codex-rs/core/src/tools/spec_plan_tests.rs | 3 + .../tests/suite/model_runtime_selectors.rs | 261 +++++++++++++----- codex-rs/core/tests/suite/model_switching.rs | 2 + codex-rs/core/tests/suite/models_cache_ttl.rs | 1 + codex-rs/core/tests/suite/personality.rs | 2 + codex-rs/core/tests/suite/remote_models.rs | 3 + codex-rs/core/tests/suite/rmcp_client.rs | 1 + .../tests/suite/spawn_agent_description.rs | 1 + codex-rs/core/tests/suite/view_image.rs | 1 + codex-rs/models-manager/src/model_info.rs | 1 + codex-rs/protocol/src/openai_models.rs | 45 +++ codex-rs/tools/src/tool_config_tests.rs | 1 + 19 files changed, 289 insertions(+), 73 deletions(-) diff --git a/codex-rs/app-server/tests/common/models_cache.rs b/codex-rs/app-server/tests/common/models_cache.rs index aaf9f694776..252d95e7e4a 100644 --- a/codex-rs/app-server/tests/common/models_cache.rs +++ b/codex-rs/app-server/tests/common/models_cache.rs @@ -53,6 +53,7 @@ fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo { used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, } } diff --git a/codex-rs/codex-api/tests/models_integration.rs b/codex-rs/codex-api/tests/models_integration.rs index 38d8dc98620..4bc6d1c335f 100644 --- a/codex-rs/codex-api/tests/models_integration.rs +++ b/codex-rs/codex-api/tests/models_integration.rs @@ -99,6 +99,7 @@ async fn models_client_hits_models_endpoint() { used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, }], }; diff --git a/codex-rs/core/src/agent/control.rs b/codex-rs/core/src/agent/control.rs index 38237d8f6f3..6a9ee978387 100644 --- a/codex-rs/core/src/agent/control.rs +++ b/codex-rs/core/src/agent/control.rs @@ -258,6 +258,7 @@ impl AgentControl { &config, session_source, options.multi_agent_version, + /*model_info*/ None, ) }); let notification_source = session_source.clone(); @@ -384,8 +385,12 @@ impl AgentControl { inherited_shell_snapshot: Option>, inherited_exec_policy: Option>, ) -> CodexResult { - let multi_agent_version = - resolve_multi_agent_version(&config, &session_source, options.multi_agent_version); + let multi_agent_version = resolve_multi_agent_version( + &config, + &session_source, + options.multi_agent_version, + /*model_info*/ None, + ); if options.fork_parent_spawn_call_id.is_none() { return Err(CodexErr::Fatal( "spawn_agent fork requires a parent spawn call id".to_string(), @@ -618,6 +623,7 @@ impl AgentControl { &config, &session_source, stored_thread.multi_agent_version, + /*model_info*/ None, ); if let SessionSource::SubAgent(SubAgentSource::ThreadSpawn { depth, .. }) = &session_source && *depth >= config.agent_max_depth diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs index 6abf5da5e19..555bdecf3d6 100644 --- a/codex-rs/core/src/session/mod.rs +++ b/codex-rs/core/src/session/mod.rs @@ -544,6 +544,7 @@ impl Codex { conversation_history .get_multi_agent_version() .or(parent_multi_agent_version), + Some(&model_info), ); if let SessionSource::SubAgent(SubAgentSource::ThreadSpawn { depth, .. }) = session_source && depth >= config.agent_max_depth diff --git a/codex-rs/core/src/session/tests.rs b/codex-rs/core/src/session/tests.rs index d724b30c4e4..8f8c9287b17 100644 --- a/codex-rs/core/src/session/tests.rs +++ b/codex-rs/core/src/session/tests.rs @@ -7237,6 +7237,7 @@ async fn build_initial_context_adds_multi_agent_v2_usage_hint_when_selector_is_v #[tokio::test] async fn spawned_child_multi_agent_version_follows_parent_system() { let (_session, turn_context) = make_session_and_context().await; + let mut model_info = turn_context.model_info.clone(); let child_source = SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id: ThreadId::new(), depth: 1, @@ -7244,16 +7245,20 @@ async fn spawned_child_multi_agent_version_follows_parent_system() { agent_nickname: None, agent_role: None, }); + model_info.multi_agent_version = Some(MultiAgentVersion::V1); let resolved_from_v2_parent = resolve_multi_agent_version( turn_context.config.as_ref(), &child_source, Some(MultiAgentVersion::V2), + Some(&model_info), ); + model_info.multi_agent_version = Some(MultiAgentVersion::V2); let resolved_from_v1_parent = resolve_multi_agent_version( turn_context.config.as_ref(), &child_source, Some(MultiAgentVersion::V1), + Some(&model_info), ); assert_eq!( diff --git a/codex-rs/core/src/session/turn_context.rs b/codex-rs/core/src/session/turn_context.rs index b5ecc4d4f0e..5e292964dce 100644 --- a/codex-rs/core/src/session/turn_context.rs +++ b/codex-rs/core/src/session/turn_context.rs @@ -110,6 +110,7 @@ pub(crate) fn resolve_multi_agent_version( config: &Config, session_source: &SessionSource, inherited_multi_agent_version: Option, + model_info: Option<&ModelInfo>, ) -> Option { if is_guardian_reviewer_source(session_source) || matches!( @@ -119,15 +120,17 @@ pub(crate) fn resolve_multi_agent_version( { return None; } - inherited_multi_agent_version.or_else(|| { - if config.features.enabled(Feature::MultiAgentV2) { - Some(MultiAgentVersion::V2) - } else if config.features.enabled(Feature::Collab) { - Some(MultiAgentVersion::V1) - } else { - None - } - }) + inherited_multi_agent_version + .or_else(|| model_info.and_then(|model_info| model_info.multi_agent_version)) + .or_else(|| { + if config.features.enabled(Feature::MultiAgentV2) { + Some(MultiAgentVersion::V2) + } else if config.features.enabled(Feature::Collab) { + Some(MultiAgentVersion::V1) + } else { + None + } + }) } impl TurnContext { diff --git a/codex-rs/core/src/thread_manager.rs b/codex-rs/core/src/thread_manager.rs index ffd99a1514c..12bbbec06ca 100644 --- a/codex-rs/core/src/thread_manager.rs +++ b/codex-rs/core/src/thread_manager.rs @@ -922,6 +922,7 @@ impl ThreadManager { &config, &session_source, history.get_multi_agent_version(), + /*model_info*/ None, ); let interrupted_marker = InterruptedTurnHistoryMarker::from_config(&config, multi_agent_version); diff --git a/codex-rs/core/src/tools/spec_plan_tests.rs b/codex-rs/core/src/tools/spec_plan_tests.rs index 5faaa81b446..7cc8456dc62 100644 --- a/codex-rs/core/src/tools/spec_plan_tests.rs +++ b/codex-rs/core/src/tools/spec_plan_tests.rs @@ -886,6 +886,7 @@ async fn resolved_multi_agent_version_controls_runtime_behavior() { turn.config.as_ref(), &turn.session_source, /*inherited_multi_agent_version*/ None, + Some(&turn.model_info), ); }) .await; @@ -906,6 +907,7 @@ async fn resolved_multi_agent_version_controls_runtime_behavior() { turn.config.as_ref(), &turn.session_source, /*inherited_multi_agent_version*/ None, + Some(&turn.model_info), ); }) .await; @@ -931,6 +933,7 @@ async fn resolved_multi_agent_version_controls_runtime_behavior() { turn.config.as_ref(), &turn.session_source, Some(MultiAgentVersion::V1), + Some(&turn.model_info), ); }) .await; diff --git a/codex-rs/core/tests/suite/model_runtime_selectors.rs b/codex-rs/core/tests/suite/model_runtime_selectors.rs index b6f2ea3c7df..e6ad052631a 100644 --- a/codex-rs/core/tests/suite/model_runtime_selectors.rs +++ b/codex-rs/core/tests/suite/model_runtime_selectors.rs @@ -1,40 +1,31 @@ use anyhow::Result; use codex_core::config::Config; use codex_features::Feature; -use codex_login::CodexAuth; -use codex_models_manager::manager::RefreshStrategy; -use codex_models_manager::manager::SharedModelsManager; use codex_models_manager::model_info::model_info_from_slug; use codex_protocol::openai_models::ModelInfo; -use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ModelVisibility; use codex_protocol::openai_models::ModelsResponse; +use codex_protocol::openai_models::MultiAgentVersion; use codex_protocol::openai_models::ToolMode; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::Op; -use codex_protocol::protocol::ThreadSettingsOverrides; use codex_protocol::user_input::UserInput; use core_test_support::responses; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_response_created; -use core_test_support::responses::mount_models_once; use core_test_support::responses::mount_sse_once; use core_test_support::responses::sse; -use core_test_support::skip_if_no_network; -use core_test_support::submit_thread_settings; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use pretty_assertions::assert_eq; use serde_json::Value; -use tokio::time::Duration; -use tokio::time::Instant; -use tokio::time::sleep; -fn remote_model(slug: &str) -> ModelInfo { +fn catalog_model(slug: &str) -> ModelInfo { ModelInfo { visibility: ModelVisibility::List, used_fallback_model_metadata: false, + supports_search_tool: false, ..model_info_from_slug(slug) } } @@ -56,38 +47,31 @@ fn tool_names(body: &Value) -> Vec { .unwrap_or_default() } -async fn wait_for_model_available(manager: &SharedModelsManager, slug: &str) -> ModelPreset { - let deadline = Instant::now() + Duration::from_secs(2); - loop { - if let Some(model) = manager - .list_models(RefreshStrategy::Online) - .await - .iter() - .find(|model| model.model == slug) - .cloned() - { - return model; - } - if Instant::now() >= deadline { - panic!("timed out waiting for the remote model {slug} to appear"); - } - sleep(Duration::from_millis(25)).await; - } +fn selected_tool_names(body: &Value, selected: &[&str]) -> Vec { + tool_names(body) + .into_iter() + .filter(|name| selected.contains(&name.as_str())) + .collect() +} + +fn tool_description<'a>(body: &'a Value, name: &str) -> Option<&'a str> { + body.get("tools") + .and_then(Value::as_array) + .and_then(|tools| { + tools + .iter() + .find(|tool| tool.get("name").and_then(Value::as_str) == Some(name)) + }) + .and_then(|tool| tool.get("description")) + .and_then(Value::as_str) } -async fn response_body_for_remote_model( - remote_model: ModelInfo, +async fn response_body_for_catalog_model( + catalog_model: ModelInfo, configure: impl FnOnce(&mut Config) + Send + 'static, ) -> Result { let server = responses::start_mock_server().await; - let model_slug = remote_model.slug.clone(); - let models_mock = mount_models_once( - &server, - ModelsResponse { - models: vec![remote_model], - }, - ) - .await; + let model_slug = catalog_model.slug.clone(); let response_mock = mount_sse_once( &server, sse(vec![ @@ -98,23 +82,14 @@ async fn response_body_for_remote_model( ) .await; - let mut builder = test_codex() - .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) - .with_config(configure); + let mut builder = test_codex().with_config(move |config| { + config.model = Some(model_slug); + config.model_catalog = Some(ModelsResponse { + models: vec![catalog_model], + }); + configure(config); + }); let test = builder.build(&server).await?; - let models_manager = test.thread_manager.get_models_manager(); - let available_model = wait_for_model_available(&models_manager, &model_slug).await; - assert_eq!(available_model.model, model_slug); - assert_eq!(models_mock.requests().len(), 1); - - submit_thread_settings( - &test.codex, - ThreadSettingsOverrides { - model: Some(model_slug), - ..Default::default() - }, - ) - .await?; test.codex .submit(Op::UserInput { items: vec![UserInput::Text { @@ -137,12 +112,10 @@ async fn response_body_for_remote_model( } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn remote_tool_mode_selector_overrides_feature_flags() -> Result<()> { - skip_if_no_network!(Ok(())); - - let mut direct_model = remote_model("test-tool-mode-direct"); +async fn catalog_tool_mode_selector_overrides_feature_flags() -> Result<()> { + let mut direct_model = catalog_model("test-tool-mode-direct"); direct_model.tool_mode = Some(ToolMode::Direct); - let direct_body = response_body_for_remote_model(direct_model, |config| { + let direct_body = response_body_for_catalog_model(direct_model, |config| { config .features .enable(Feature::CodeModeOnly) @@ -158,9 +131,9 @@ async fn remote_tool_mode_selector_overrides_feature_flags() -> Result<()> { "direct mode should override enabled code mode flags: {direct_tools:?}" ); - let mut code_mode_only_model = remote_model("test-tool-mode-code-mode-only"); + let mut code_mode_only_model = catalog_model("test-tool-mode-code-mode-only"); code_mode_only_model.tool_mode = Some(ToolMode::CodeModeOnly); - let code_mode_only_body = response_body_for_remote_model(code_mode_only_model, |_| {}).await?; + let code_mode_only_body = response_body_for_catalog_model(code_mode_only_model, |_| {}).await?; assert_eq!( tool_names(&code_mode_only_body), vec![ @@ -171,3 +144,167 @@ async fn remote_tool_mode_selector_overrides_feature_flags() -> Result<()> { Ok(()) } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn catalog_multi_agent_version_selector_overrides_feature_flags() -> Result<()> { + let mut v1_model = catalog_model("test-multi-agent-v1"); + v1_model.multi_agent_version = Some(MultiAgentVersion::V1); + let v1_body = response_body_for_catalog_model(v1_model, |config| { + config + .features + .enable(Feature::MultiAgentV2) + .expect("test config should allow feature update"); + config.multi_agent_v2.root_agent_usage_hint_text = + Some("V2 guidance must not reach v1 models.".to_string()); + }) + .await?; + assert_eq!( + selected_tool_names( + &v1_body, + &[ + "spawn_agent", + "send_input", + "resume_agent", + "wait_agent", + "close_agent", + "send_message", + "followup_task", + "list_agents", + ], + ), + vec![ + "spawn_agent".to_string(), + "send_input".to_string(), + "resume_agent".to_string(), + "wait_agent".to_string(), + "close_agent".to_string(), + ] + ); + assert!( + !v1_body + .to_string() + .contains("V2 guidance must not reach v1 models."), + "v1 models should not receive v2 usage hints: {v1_body:?}" + ); + + let mut v2_model = catalog_model("test-multi-agent-v2"); + v2_model.multi_agent_version = Some(MultiAgentVersion::V2); + let v2_body = response_body_for_catalog_model(v2_model, |config| { + config + .features + .disable(Feature::Collab) + .expect("test config should allow feature update"); + config + .features + .disable(Feature::MultiAgentV2) + .expect("test config should allow feature update"); + config.multi_agent_v2.max_concurrent_threads_per_session = 17; + config.multi_agent_v2.root_agent_usage_hint_text = + Some("V2 guidance should reach v2 models.".to_string()); + }) + .await?; + assert_eq!( + selected_tool_names( + &v2_body, + &[ + "spawn_agent", + "send_input", + "resume_agent", + "wait_agent", + "close_agent", + "send_message", + "followup_task", + "list_agents", + ], + ), + vec![ + "spawn_agent".to_string(), + "send_message".to_string(), + "followup_task".to_string(), + "wait_agent".to_string(), + "close_agent".to_string(), + "list_agents".to_string(), + ] + ); + assert!( + tool_description(&v2_body, "spawn_agent").is_some_and( + |description| description.contains("max_concurrent_threads_per_session = 17") + ), + "v2 spawn_agent should advertise the configured concurrency cap: {v2_body:?}" + ); + assert!( + v2_body + .to_string() + .contains("V2 guidance should reach v2 models."), + "v2 models should receive v2 usage hints: {v2_body:?}" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn catalog_omitted_and_unknown_multi_agent_versions_follow_feature_flags() -> Result<()> { + let omitted_body = + response_body_for_catalog_model(catalog_model("test-multi-agent-omitted"), |config| { + config + .features + .enable(Feature::Collab) + .expect("test config should allow feature update"); + config + .features + .disable(Feature::MultiAgentV2) + .expect("test config should allow feature update"); + }) + .await?; + + let mut unknown_model = + serde_json::to_value(catalog_model("test-multi-agent-unknown-version"))?; + unknown_model["multi_agent_version"] = Value::String("future_multi_agent_version".to_string()); + let unknown_model = serde_json::from_value::(unknown_model)?; + let unknown_body = response_body_for_catalog_model(unknown_model, |config| { + config + .features + .enable(Feature::Collab) + .expect("test config should allow feature update"); + config + .features + .disable(Feature::MultiAgentV2) + .expect("test config should allow feature update"); + }) + .await?; + + let expected_v1_tools = vec![ + "spawn_agent".to_string(), + "send_input".to_string(), + "resume_agent".to_string(), + "wait_agent".to_string(), + "close_agent".to_string(), + ]; + assert_eq!( + ( + selected_tool_names( + &omitted_body, + &[ + "spawn_agent", + "send_input", + "resume_agent", + "wait_agent", + "close_agent", + ], + ), + selected_tool_names( + &unknown_body, + &[ + "spawn_agent", + "send_input", + "resume_agent", + "wait_agent", + "close_agent", + ], + ), + ), + (expected_v1_tools.clone(), expected_v1_tools) + ); + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/model_switching.rs b/codex-rs/core/tests/suite/model_switching.rs index 90bb741f05b..b8c62917cd5 100644 --- a/codex-rs/core/tests/suite/model_switching.rs +++ b/codex-rs/core/tests/suite/model_switching.rs @@ -113,6 +113,7 @@ fn test_model_info( used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), @@ -931,6 +932,7 @@ async fn model_switch_to_smaller_model_updates_token_context_window() -> Result< used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), diff --git a/codex-rs/core/tests/suite/models_cache_ttl.rs b/codex-rs/core/tests/suite/models_cache_ttl.rs index 76ebb4529cf..06aad67e2f4 100644 --- a/codex-rs/core/tests/suite/models_cache_ttl.rs +++ b/codex-rs/core/tests/suite/models_cache_ttl.rs @@ -371,5 +371,6 @@ fn test_remote_model(slug: &str, priority: i32) -> ModelInfo { used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, } } diff --git a/codex-rs/core/tests/suite/personality.rs b/codex-rs/core/tests/suite/personality.rs index 4cb8b63a098..dcb780a21a2 100644 --- a/codex-rs/core/tests/suite/personality.rs +++ b/codex-rs/core/tests/suite/personality.rs @@ -593,6 +593,7 @@ async fn remote_model_friendly_personality_instructions_with_feature() -> anyhow used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, }; let _models_mock = mount_models_once( @@ -704,6 +705,7 @@ async fn user_turn_personality_remote_model_template_includes_update_message() - used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, }; let _models_mock = mount_models_once( diff --git a/codex-rs/core/tests/suite/remote_models.rs b/codex-rs/core/tests/suite/remote_models.rs index 82db097f392..582972cb13f 100644 --- a/codex-rs/core/tests/suite/remote_models.rs +++ b/codex-rs/core/tests/suite/remote_models.rs @@ -478,6 +478,7 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), @@ -728,6 +729,7 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> { used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), @@ -1212,6 +1214,7 @@ fn test_remote_model_with_policy( used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, priority, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), diff --git a/codex-rs/core/tests/suite/rmcp_client.rs b/codex-rs/core/tests/suite/rmcp_client.rs index 0d669e5dd7e..eb37bfcf7a4 100644 --- a/codex-rs/core/tests/suite/rmcp_client.rs +++ b/codex-rs/core/tests/suite/rmcp_client.rs @@ -1350,6 +1350,7 @@ async fn stdio_image_responses_are_sanitized_for_text_only_model() -> anyhow::Re used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, }], }, ) diff --git a/codex-rs/core/tests/suite/spawn_agent_description.rs b/codex-rs/core/tests/suite/spawn_agent_description.rs index f9be02349ce..6be7cfe67cb 100644 --- a/codex-rs/core/tests/suite/spawn_agent_description.rs +++ b/codex-rs/core/tests/suite/spawn_agent_description.rs @@ -61,6 +61,7 @@ fn test_model_info( used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers, diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index 6062228de07..d071a83907d 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -1356,6 +1356,7 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), diff --git a/codex-rs/models-manager/src/model_info.rs b/codex-rs/models-manager/src/model_info.rs index e57b4e186ec..af9218a187a 100644 --- a/codex-rs/models-manager/src/model_info.rs +++ b/codex-rs/models-manager/src/model_info.rs @@ -100,6 +100,7 @@ pub fn model_info_from_slug(slug: &str) -> ModelInfo { used_fallback_model_metadata: true, // this is the fallback model metadata supports_search_tool: false, tool_mode: None, + multi_agent_version: None, } } diff --git a/codex-rs/protocol/src/openai_models.rs b/codex-rs/protocol/src/openai_models.rs index 087d9f57855..d1429e4b108 100644 --- a/codex-rs/protocol/src/openai_models.rs +++ b/codex-rs/protocol/src/openai_models.rs @@ -352,6 +352,12 @@ pub struct ModelInfo { deserialize_with = "deserialize_optional_model_selector" )] pub tool_mode: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + deserialize_with = "deserialize_optional_model_selector" + )] + pub multi_agent_version: Option, } impl ModelInfo { @@ -647,6 +653,7 @@ mod tests { used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, } } @@ -865,6 +872,7 @@ mod tests { assert_eq!(model.web_search_tool_type, WebSearchToolType::Text); assert!(!model.supports_search_tool); assert_eq!(model.tool_mode, None); + assert_eq!(model.multi_agent_version, None); } #[test] @@ -904,6 +912,43 @@ mod tests { assert!(!object.contains_key("tool_mode")); } + #[test] + fn model_info_deserializes_known_multi_agent_version() { + let mut value = + serde_json::to_value(test_model(/*spec*/ None)).expect("serialize test model"); + let object = value + .as_object_mut() + .expect("model info should be an object"); + object.insert( + "multi_agent_version".to_string(), + serde_json::Value::String("v2".to_string()), + ); + let model = serde_json::from_value::(value).expect("deserialize model info"); + + assert_eq!(model.multi_agent_version, Some(MultiAgentVersion::V2)); + } + + #[test] + fn model_info_treats_unknown_multi_agent_version_as_omitted() { + let mut value = + serde_json::to_value(test_model(/*spec*/ None)).expect("serialize test model"); + let object = value + .as_object_mut() + .expect("model info should be an object"); + object.insert( + "multi_agent_version".to_string(), + serde_json::Value::String("future_multi_agent_version".to_string()), + ); + let model = serde_json::from_value::(value).expect("deserialize model info"); + + assert_eq!(model.multi_agent_version, None); + let serialized = serde_json::to_value(model).expect("serialize model info"); + let object = serialized + .as_object() + .expect("model info should be an object"); + assert!(!object.contains_key("multi_agent_version")); + } + #[test] fn resolved_context_window_prefers_context_window() { let model = ModelInfo { diff --git a/codex-rs/tools/src/tool_config_tests.rs b/codex-rs/tools/src/tool_config_tests.rs index 2e49c155847..57bd0bcec61 100644 --- a/codex-rs/tools/src/tool_config_tests.rs +++ b/codex-rs/tools/src/tool_config_tests.rs @@ -45,6 +45,7 @@ fn model_with_shell_type(shell_type: ConfigShellToolType) -> ModelInfo { used_fallback_model_metadata: false, supports_search_tool: false, tool_mode: None, + multi_agent_version: None, } } From 5e59751c7e50d3c8f1e6fb5b5cecf9ced579237a Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 29 May 2026 15:53:27 -0700 Subject: [PATCH 2/3] codex: fix model runtime selector integration assertions --- .../tests/suite/model_runtime_selectors.rs | 101 ++++++++++-------- 1 file changed, 57 insertions(+), 44 deletions(-) diff --git a/codex-rs/core/tests/suite/model_runtime_selectors.rs b/codex-rs/core/tests/suite/model_runtime_selectors.rs index e6ad052631a..73a4e4a614e 100644 --- a/codex-rs/core/tests/suite/model_runtime_selectors.rs +++ b/codex-rs/core/tests/suite/model_runtime_selectors.rs @@ -47,6 +47,33 @@ fn tool_names(body: &Value) -> Vec { .unwrap_or_default() } +fn namespace_child_tool_names(body: &Value, namespace: &str) -> Vec { + body.get("tools") + .and_then(Value::as_array) + .and_then(|tools| { + tools.iter().find_map(|tool| { + if tool.get("type").and_then(Value::as_str) == Some("namespace") + && tool.get("name").and_then(Value::as_str) == Some(namespace) + { + tool.get("tools").and_then(Value::as_array).map(|children| { + children + .iter() + .filter_map(|child| { + child + .get("name") + .and_then(Value::as_str) + .map(str::to_string) + }) + .collect() + }) + } else { + None + } + }) + }) + .unwrap_or_default() +} + fn selected_tool_names(body: &Value, selected: &[&str]) -> Vec { tool_names(body) .into_iter() @@ -159,27 +186,19 @@ async fn catalog_multi_agent_version_selector_overrides_feature_flags() -> Resul }) .await?; assert_eq!( - selected_tool_names( - &v1_body, - &[ - "spawn_agent", - "send_input", - "resume_agent", - "wait_agent", - "close_agent", - "send_message", - "followup_task", - "list_agents", - ], - ), + namespace_child_tool_names(&v1_body, "multi_agent_v1"), vec![ - "spawn_agent".to_string(), - "send_input".to_string(), + "close_agent".to_string(), "resume_agent".to_string(), + "send_input".to_string(), + "spawn_agent".to_string(), "wait_agent".to_string(), - "close_agent".to_string(), ] ); + assert_eq!( + selected_tool_names(&v1_body, &["send_message", "followup_task", "list_agents"]), + Vec::::new() + ); assert!( !v1_body .to_string() @@ -226,6 +245,10 @@ async fn catalog_multi_agent_version_selector_overrides_feature_flags() -> Resul "list_agents".to_string(), ] ); + assert_eq!( + namespace_child_tool_names(&v2_body, "multi_agent_v1"), + Vec::::new() + ); assert!( tool_description(&v2_body, "spawn_agent").is_some_and( |description| description.contains("max_concurrent_threads_per_session = 17") @@ -273,37 +296,27 @@ async fn catalog_omitted_and_unknown_multi_agent_versions_follow_feature_flags() }) .await?; - let expected_v1_tools = vec![ - "spawn_agent".to_string(), - "send_input".to_string(), - "resume_agent".to_string(), - "wait_agent".to_string(), - "close_agent".to_string(), - ]; assert_eq!( ( - selected_tool_names( - &omitted_body, - &[ - "spawn_agent", - "send_input", - "resume_agent", - "wait_agent", - "close_agent", - ], - ), - selected_tool_names( - &unknown_body, - &[ - "spawn_agent", - "send_input", - "resume_agent", - "wait_agent", - "close_agent", - ], - ), + namespace_child_tool_names(&omitted_body, "multi_agent_v1"), + namespace_child_tool_names(&unknown_body, "multi_agent_v1"), ), - (expected_v1_tools.clone(), expected_v1_tools) + ( + vec![ + "close_agent".to_string(), + "resume_agent".to_string(), + "send_input".to_string(), + "spawn_agent".to_string(), + "wait_agent".to_string(), + ], + vec![ + "close_agent".to_string(), + "resume_agent".to_string(), + "send_input".to_string(), + "spawn_agent".to_string(), + "wait_agent".to_string(), + ], + ) ); Ok(()) From 151d815493d783df3a96ee89dac93d6cb39f7384 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 29 May 2026 18:35:39 -0700 Subject: [PATCH 3/3] codex: resolve model multi-agent overlay once per session --- codex-rs/core/src/agent/control.rs | 10 ++-------- codex-rs/core/src/session/mod.rs | 4 ++-- codex-rs/core/src/session/tests.rs | 5 ----- codex-rs/core/src/session/turn_context.rs | 21 +++++++++------------ codex-rs/core/src/thread_manager.rs | 1 - codex-rs/core/src/tools/spec_plan_tests.rs | 3 --- 6 files changed, 13 insertions(+), 31 deletions(-) diff --git a/codex-rs/core/src/agent/control.rs b/codex-rs/core/src/agent/control.rs index 6a9ee978387..38237d8f6f3 100644 --- a/codex-rs/core/src/agent/control.rs +++ b/codex-rs/core/src/agent/control.rs @@ -258,7 +258,6 @@ impl AgentControl { &config, session_source, options.multi_agent_version, - /*model_info*/ None, ) }); let notification_source = session_source.clone(); @@ -385,12 +384,8 @@ impl AgentControl { inherited_shell_snapshot: Option>, inherited_exec_policy: Option>, ) -> CodexResult { - let multi_agent_version = resolve_multi_agent_version( - &config, - &session_source, - options.multi_agent_version, - /*model_info*/ None, - ); + let multi_agent_version = + resolve_multi_agent_version(&config, &session_source, options.multi_agent_version); if options.fork_parent_spawn_call_id.is_none() { return Err(CodexErr::Fatal( "spawn_agent fork requires a parent spawn call id".to_string(), @@ -623,7 +618,6 @@ impl AgentControl { &config, &session_source, stored_thread.multi_agent_version, - /*model_info*/ None, ); if let SessionSource::SubAgent(SubAgentSource::ThreadSpawn { depth, .. }) = &session_source && *depth >= config.agent_max_depth diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs index 555bdecf3d6..c8a408d4c29 100644 --- a/codex-rs/core/src/session/mod.rs +++ b/codex-rs/core/src/session/mod.rs @@ -543,8 +543,8 @@ impl Codex { &session_source, conversation_history .get_multi_agent_version() - .or(parent_multi_agent_version), - Some(&model_info), + .or(parent_multi_agent_version) + .or(model_info.multi_agent_version), ); if let SessionSource::SubAgent(SubAgentSource::ThreadSpawn { depth, .. }) = session_source && depth >= config.agent_max_depth diff --git a/codex-rs/core/src/session/tests.rs b/codex-rs/core/src/session/tests.rs index 8f8c9287b17..d724b30c4e4 100644 --- a/codex-rs/core/src/session/tests.rs +++ b/codex-rs/core/src/session/tests.rs @@ -7237,7 +7237,6 @@ async fn build_initial_context_adds_multi_agent_v2_usage_hint_when_selector_is_v #[tokio::test] async fn spawned_child_multi_agent_version_follows_parent_system() { let (_session, turn_context) = make_session_and_context().await; - let mut model_info = turn_context.model_info.clone(); let child_source = SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id: ThreadId::new(), depth: 1, @@ -7245,20 +7244,16 @@ async fn spawned_child_multi_agent_version_follows_parent_system() { agent_nickname: None, agent_role: None, }); - model_info.multi_agent_version = Some(MultiAgentVersion::V1); let resolved_from_v2_parent = resolve_multi_agent_version( turn_context.config.as_ref(), &child_source, Some(MultiAgentVersion::V2), - Some(&model_info), ); - model_info.multi_agent_version = Some(MultiAgentVersion::V2); let resolved_from_v1_parent = resolve_multi_agent_version( turn_context.config.as_ref(), &child_source, Some(MultiAgentVersion::V1), - Some(&model_info), ); assert_eq!( diff --git a/codex-rs/core/src/session/turn_context.rs b/codex-rs/core/src/session/turn_context.rs index 5e292964dce..b5ecc4d4f0e 100644 --- a/codex-rs/core/src/session/turn_context.rs +++ b/codex-rs/core/src/session/turn_context.rs @@ -110,7 +110,6 @@ pub(crate) fn resolve_multi_agent_version( config: &Config, session_source: &SessionSource, inherited_multi_agent_version: Option, - model_info: Option<&ModelInfo>, ) -> Option { if is_guardian_reviewer_source(session_source) || matches!( @@ -120,17 +119,15 @@ pub(crate) fn resolve_multi_agent_version( { return None; } - inherited_multi_agent_version - .or_else(|| model_info.and_then(|model_info| model_info.multi_agent_version)) - .or_else(|| { - if config.features.enabled(Feature::MultiAgentV2) { - Some(MultiAgentVersion::V2) - } else if config.features.enabled(Feature::Collab) { - Some(MultiAgentVersion::V1) - } else { - None - } - }) + inherited_multi_agent_version.or_else(|| { + if config.features.enabled(Feature::MultiAgentV2) { + Some(MultiAgentVersion::V2) + } else if config.features.enabled(Feature::Collab) { + Some(MultiAgentVersion::V1) + } else { + None + } + }) } impl TurnContext { diff --git a/codex-rs/core/src/thread_manager.rs b/codex-rs/core/src/thread_manager.rs index 12bbbec06ca..ffd99a1514c 100644 --- a/codex-rs/core/src/thread_manager.rs +++ b/codex-rs/core/src/thread_manager.rs @@ -922,7 +922,6 @@ impl ThreadManager { &config, &session_source, history.get_multi_agent_version(), - /*model_info*/ None, ); let interrupted_marker = InterruptedTurnHistoryMarker::from_config(&config, multi_agent_version); diff --git a/codex-rs/core/src/tools/spec_plan_tests.rs b/codex-rs/core/src/tools/spec_plan_tests.rs index 7cc8456dc62..5faaa81b446 100644 --- a/codex-rs/core/src/tools/spec_plan_tests.rs +++ b/codex-rs/core/src/tools/spec_plan_tests.rs @@ -886,7 +886,6 @@ async fn resolved_multi_agent_version_controls_runtime_behavior() { turn.config.as_ref(), &turn.session_source, /*inherited_multi_agent_version*/ None, - Some(&turn.model_info), ); }) .await; @@ -907,7 +906,6 @@ async fn resolved_multi_agent_version_controls_runtime_behavior() { turn.config.as_ref(), &turn.session_source, /*inherited_multi_agent_version*/ None, - Some(&turn.model_info), ); }) .await; @@ -933,7 +931,6 @@ async fn resolved_multi_agent_version_controls_runtime_behavior() { turn.config.as_ref(), &turn.session_source, Some(MultiAgentVersion::V1), - Some(&turn.model_info), ); }) .await;