Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions codex-rs/app-server/tests/common/models_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo {
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
multi_agent_version: None,
}
}

Expand Down
1 change: 1 addition & 0 deletions codex-rs/codex-api/tests/models_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ async fn models_client_hits_models_endpoint() {
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
multi_agent_version: None,
}],
};

Expand Down
3 changes: 2 additions & 1 deletion codex-rs/core/src/session/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,8 @@ impl Codex {
&session_source,
conversation_history
.get_multi_agent_version()
.or(parent_multi_agent_version),
.or(parent_multi_agent_version)
.or(model_info.multi_agent_version),
);
if let SessionSource::SubAgent(SubAgentSource::ThreadSpawn { depth, .. }) = session_source
&& depth >= config.agent_max_depth
Expand Down
274 changes: 212 additions & 62 deletions codex-rs/core/tests/suite/model_runtime_selectors.rs
Original file line number Diff line number Diff line change
@@ -1,40 +1,31 @@
use anyhow::Result;
use codex_core::config::Config;
use codex_features::Feature;
use codex_login::CodexAuth;
use codex_models_manager::manager::RefreshStrategy;
use codex_models_manager::manager::SharedModelsManager;
use codex_models_manager::model_info::model_info_from_slug;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ModelPreset;
use codex_protocol::openai_models::ModelVisibility;
use codex_protocol::openai_models::ModelsResponse;
use codex_protocol::openai_models::MultiAgentVersion;
use codex_protocol::openai_models::ToolMode;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::ThreadSettingsOverrides;
use codex_protocol::user_input::UserInput;
use core_test_support::responses;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_models_once;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::submit_thread_settings;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use pretty_assertions::assert_eq;
use serde_json::Value;
use tokio::time::Duration;
use tokio::time::Instant;
use tokio::time::sleep;

fn remote_model(slug: &str) -> ModelInfo {
fn catalog_model(slug: &str) -> ModelInfo {
ModelInfo {
visibility: ModelVisibility::List,
used_fallback_model_metadata: false,
supports_search_tool: false,
..model_info_from_slug(slug)
}
}
Expand All @@ -56,38 +47,58 @@ fn tool_names(body: &Value) -> Vec<String> {
.unwrap_or_default()
}

async fn wait_for_model_available(manager: &SharedModelsManager, slug: &str) -> ModelPreset {
let deadline = Instant::now() + Duration::from_secs(2);
loop {
if let Some(model) = manager
.list_models(RefreshStrategy::Online)
.await
.iter()
.find(|model| model.model == slug)
.cloned()
{
return model;
}
if Instant::now() >= deadline {
panic!("timed out waiting for the remote model {slug} to appear");
}
sleep(Duration::from_millis(25)).await;
}
fn namespace_child_tool_names(body: &Value, namespace: &str) -> Vec<String> {
body.get("tools")
.and_then(Value::as_array)
.and_then(|tools| {
tools.iter().find_map(|tool| {
if tool.get("type").and_then(Value::as_str) == Some("namespace")
&& tool.get("name").and_then(Value::as_str) == Some(namespace)
{
tool.get("tools").and_then(Value::as_array).map(|children| {
children
.iter()
.filter_map(|child| {
child
.get("name")
.and_then(Value::as_str)
.map(str::to_string)
})
.collect()
})
} else {
None
}
})
})
.unwrap_or_default()
}

fn selected_tool_names(body: &Value, selected: &[&str]) -> Vec<String> {
tool_names(body)
.into_iter()
.filter(|name| selected.contains(&name.as_str()))
.collect()
}

fn tool_description<'a>(body: &'a Value, name: &str) -> Option<&'a str> {
body.get("tools")
.and_then(Value::as_array)
.and_then(|tools| {
tools
.iter()
.find(|tool| tool.get("name").and_then(Value::as_str) == Some(name))
})
.and_then(|tool| tool.get("description"))
.and_then(Value::as_str)
}

async fn response_body_for_remote_model(
remote_model: ModelInfo,
async fn response_body_for_catalog_model(
catalog_model: ModelInfo,
configure: impl FnOnce(&mut Config) + Send + 'static,
) -> Result<Value> {
let server = responses::start_mock_server().await;
let model_slug = remote_model.slug.clone();
let models_mock = mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
let model_slug = catalog_model.slug.clone();
let response_mock = mount_sse_once(
&server,
sse(vec![
Expand All @@ -98,23 +109,14 @@ async fn response_body_for_remote_model(
)
.await;

let mut builder = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(configure);
let mut builder = test_codex().with_config(move |config| {
config.model = Some(model_slug);
config.model_catalog = Some(ModelsResponse {
models: vec![catalog_model],
});
configure(config);
});
let test = builder.build(&server).await?;
let models_manager = test.thread_manager.get_models_manager();
let available_model = wait_for_model_available(&models_manager, &model_slug).await;
assert_eq!(available_model.model, model_slug);
assert_eq!(models_mock.requests().len(), 1);

submit_thread_settings(
&test.codex,
ThreadSettingsOverrides {
model: Some(model_slug),
..Default::default()
},
)
.await?;
test.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
Expand All @@ -137,12 +139,10 @@ async fn response_body_for_remote_model(
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_tool_mode_selector_overrides_feature_flags() -> Result<()> {
skip_if_no_network!(Ok(()));

let mut direct_model = remote_model("test-tool-mode-direct");
async fn catalog_tool_mode_selector_overrides_feature_flags() -> Result<()> {
let mut direct_model = catalog_model("test-tool-mode-direct");
direct_model.tool_mode = Some(ToolMode::Direct);
let direct_body = response_body_for_remote_model(direct_model, |config| {
let direct_body = response_body_for_catalog_model(direct_model, |config| {
config
.features
.enable(Feature::CodeModeOnly)
Expand All @@ -158,9 +158,9 @@ async fn remote_tool_mode_selector_overrides_feature_flags() -> Result<()> {
"direct mode should override enabled code mode flags: {direct_tools:?}"
);

let mut code_mode_only_model = remote_model("test-tool-mode-code-mode-only");
let mut code_mode_only_model = catalog_model("test-tool-mode-code-mode-only");
code_mode_only_model.tool_mode = Some(ToolMode::CodeModeOnly);
let code_mode_only_body = response_body_for_remote_model(code_mode_only_model, |_| {}).await?;
let code_mode_only_body = response_body_for_catalog_model(code_mode_only_model, |_| {}).await?;
assert_eq!(
tool_names(&code_mode_only_body),
vec![
Expand All @@ -171,3 +171,153 @@ async fn remote_tool_mode_selector_overrides_feature_flags() -> Result<()> {

Ok(())
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn catalog_multi_agent_version_selector_overrides_feature_flags() -> Result<()> {
let mut v1_model = catalog_model("test-multi-agent-v1");
v1_model.multi_agent_version = Some(MultiAgentVersion::V1);
let v1_body = response_body_for_catalog_model(v1_model, |config| {
config
.features
.enable(Feature::MultiAgentV2)
.expect("test config should allow feature update");
config.multi_agent_v2.root_agent_usage_hint_text =
Some("V2 guidance must not reach v1 models.".to_string());
})
.await?;
assert_eq!(
namespace_child_tool_names(&v1_body, "multi_agent_v1"),
vec![
"close_agent".to_string(),
"resume_agent".to_string(),
"send_input".to_string(),
"spawn_agent".to_string(),
"wait_agent".to_string(),
]
);
assert_eq!(
selected_tool_names(&v1_body, &["send_message", "followup_task", "list_agents"]),
Vec::<String>::new()
);
assert!(
!v1_body
.to_string()
.contains("V2 guidance must not reach v1 models."),
"v1 models should not receive v2 usage hints: {v1_body:?}"
);

let mut v2_model = catalog_model("test-multi-agent-v2");
v2_model.multi_agent_version = Some(MultiAgentVersion::V2);
let v2_body = response_body_for_catalog_model(v2_model, |config| {
config
.features
.disable(Feature::Collab)
.expect("test config should allow feature update");
config
.features
.disable(Feature::MultiAgentV2)
.expect("test config should allow feature update");
config.multi_agent_v2.max_concurrent_threads_per_session = 17;
config.multi_agent_v2.root_agent_usage_hint_text =
Some("V2 guidance should reach v2 models.".to_string());
})
.await?;
assert_eq!(
selected_tool_names(
&v2_body,
&[
"spawn_agent",
"send_input",
"resume_agent",
"wait_agent",
"close_agent",
"send_message",
"followup_task",
"list_agents",
],
),
vec![
"spawn_agent".to_string(),
"send_message".to_string(),
"followup_task".to_string(),
"wait_agent".to_string(),
"close_agent".to_string(),
"list_agents".to_string(),
]
);
assert_eq!(
namespace_child_tool_names(&v2_body, "multi_agent_v1"),
Vec::<String>::new()
);
assert!(
tool_description(&v2_body, "spawn_agent").is_some_and(
|description| description.contains("max_concurrent_threads_per_session = 17")
),
"v2 spawn_agent should advertise the configured concurrency cap: {v2_body:?}"
);
assert!(
v2_body
.to_string()
.contains("V2 guidance should reach v2 models."),
"v2 models should receive v2 usage hints: {v2_body:?}"
);

Ok(())
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn catalog_omitted_and_unknown_multi_agent_versions_follow_feature_flags() -> Result<()> {
let omitted_body =
response_body_for_catalog_model(catalog_model("test-multi-agent-omitted"), |config| {
config
.features
.enable(Feature::Collab)
.expect("test config should allow feature update");
config
.features
.disable(Feature::MultiAgentV2)
.expect("test config should allow feature update");
})
.await?;

let mut unknown_model =
serde_json::to_value(catalog_model("test-multi-agent-unknown-version"))?;
unknown_model["multi_agent_version"] = Value::String("future_multi_agent_version".to_string());
let unknown_model = serde_json::from_value::<ModelInfo>(unknown_model)?;
let unknown_body = response_body_for_catalog_model(unknown_model, |config| {
config
.features
.enable(Feature::Collab)
.expect("test config should allow feature update");
config
.features
.disable(Feature::MultiAgentV2)
.expect("test config should allow feature update");
})
.await?;

assert_eq!(
(
namespace_child_tool_names(&omitted_body, "multi_agent_v1"),
namespace_child_tool_names(&unknown_body, "multi_agent_v1"),
),
(
vec![
"close_agent".to_string(),
"resume_agent".to_string(),
"send_input".to_string(),
"spawn_agent".to_string(),
"wait_agent".to_string(),
],
vec![
"close_agent".to_string(),
"resume_agent".to_string(),
"send_input".to_string(),
"spawn_agent".to_string(),
"wait_agent".to_string(),
],
)
);

Ok(())
}
2 changes: 2 additions & 0 deletions codex-rs/core/tests/suite/model_switching.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ fn test_model_info(
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
multi_agent_version: None,
priority: 1,
additional_speed_tiers: Vec::new(),
service_tiers: Vec::new(),
Expand Down Expand Up @@ -931,6 +932,7 @@ async fn model_switch_to_smaller_model_updates_token_context_window() -> Result<
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
multi_agent_version: None,
priority: 1,
additional_speed_tiers: Vec::new(),
service_tiers: Vec::new(),
Expand Down
1 change: 1 addition & 0 deletions codex-rs/core/tests/suite/models_cache_ttl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,5 +371,6 @@ fn test_remote_model(slug: &str, priority: i32) -> ModelInfo {
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
multi_agent_version: None,
}
}
Loading
Loading