diff --git a/src/ucode/agents/codex.py b/src/ucode/agents/codex.py index 7c3ef73..3719eea 100644 --- a/src/ucode/agents/codex.py +++ b/src/ucode/agents/codex.py @@ -56,6 +56,15 @@ ["model_providers", CODEX_MODEL_PROVIDER_NAME, "http_headers"], ] +_GPT_RE = re.compile(r"(?:databricks-)?gpt-(\d+)(?:[.-](\d+))?(?:[.-](\d+))?(-.+|[a-z].*)?") + +# These models should use the Databricks ID, not the OpenAI ID, as the OpenAI +# ID is incompatible with Codex. +CODEX_OPENAI_ID_INCOMPATIBLE_MODELS = { + "databricks-gpt-5-2-codex", + "databricks-gpt-5-4-nano", +} + def is_update_available() -> tuple[str, str] | None: return available_npm_package_update(SPEC["package"]) @@ -177,9 +186,44 @@ def _remove_legacy_ucode_profile() -> None: write_toml_file(path, doc) +def _openai_model_id(model: str | None) -> str | None: + """Map Databricks GPT endpoint ids to OpenAI model ids for Codex metadata.""" + parsed = _parse_gpt(model) + if parsed is None: + return model + major, minor, patch, suffix = parsed + version = str(major) + if minor is not None: + version += f".{minor}" + if patch is not None: + version += f".{patch}" + return f"gpt-{version}{suffix}" + + +def _codex_model_id(model: str | None) -> str | None: + if model in CODEX_OPENAI_ID_INCOMPATIBLE_MODELS: + return model + return _openai_model_id(model) + + +def _parse_gpt(model: str | None) -> tuple[int, int | None, int | None, str] | None: + if not model: + return None + match = _GPT_RE.fullmatch(model.split("/")[-1]) + if not match: + return None + major, minor, patch, suffix = match.groups() + return ( + int(major), + int(minor) if minor is not None else None, + int(patch) if patch is not None else None, + suffix or "", + ) + + def write_tool_config(state: dict, model: str | None = None) -> dict: workspace = state["workspace"] - chosen_model = model or default_model(state) + chosen_model = _codex_model_id(model or default_model(state)) databricks_profile = state.get("profile") if _use_legacy_layout(): @@ -208,8 +252,26 @@ def write_tool_config(state: dict, model: str | None = None) -> dict: def default_model(state: dict) -> str | None: + """Pick the newest GPT model when multiple are available. + + The discovery list is alphabetically sorted, which can put + "databricks-gpt-5" ahead of "databricks-gpt-5-5". Prefer the + highest semantic version instead. Falls back to the first + discovered entry when parsing fails. + """ codex_models = state.get("codex_models") or [] - return codex_models[0] if codex_models else None + if not codex_models: + return None + + def _gpt_version_key(mid: str) -> tuple[int, int, int, int]: + parsed = _parse_gpt(mid) + if parsed is None: + return (0, 0, 0, 0) + major, minor, patch, suffix = parsed + base_bonus = 1 if not suffix else 0 + return (major, minor or 0, patch or 0, base_bonus) + + return max(codex_models, key=_gpt_version_key) def launch(state: dict, tool_args: list[str]) -> None: diff --git a/tests/test_agent_codex.py b/tests/test_agent_codex.py index 3f6ac35..6c2d64a 100644 --- a/tests/test_agent_codex.py +++ b/tests/test_agent_codex.py @@ -91,6 +91,39 @@ def test_writes_ucode_profile_config_file(self, tmp_path, monkeypatch): assert doc["model"] == "gpt-5" assert "profiles" not in doc + def test_writes_openai_model_id_for_databricks_gpt_endpoint(self, tmp_path, monkeypatch): + config_path = tmp_path / ".codex" / "ucode.config.toml" + backup_path = tmp_path / "codex-ucode-config.backup.toml" + monkeypatch.setattr(codex, "CODEX_CONFIG_PATH", config_path) + monkeypatch.setattr(codex, "CODEX_BACKUP_PATH", backup_path) + monkeypatch.setattr(codex, "agent_version", lambda binary: "0.134.0") + monkeypatch.setattr(codex, "save_state", lambda state: None) + + codex.write_tool_config( + {"workspace": WS, "codex_models": ["databricks-gpt-5", "databricks-gpt-5-5"]} + ) + + doc = read_toml_safe(config_path) + assert doc["model"] == "gpt-5.5" + + def test_preserves_databricks_model_id_when_openai_id_is_incompatible( + self, tmp_path, monkeypatch + ): + config_path = tmp_path / ".codex" / "ucode.config.toml" + backup_path = tmp_path / "codex-ucode-config.backup.toml" + monkeypatch.setattr(codex, "CODEX_CONFIG_PATH", config_path) + monkeypatch.setattr(codex, "CODEX_BACKUP_PATH", backup_path) + monkeypatch.setattr(codex, "agent_version", lambda binary: "0.134.0") + monkeypatch.setattr(codex, "save_state", lambda state: None) + + codex.write_tool_config( + {"workspace": WS, "codex_models": ["databricks-gpt-5-2-codex"]}, + "databricks-gpt-5-2-codex", + ) + + doc = read_toml_safe(config_path) + assert doc["model"] == "databricks-gpt-5-2-codex" + def test_removes_legacy_ucode_profile_from_shared_config(self, tmp_path, monkeypatch): config_dir = tmp_path / ".codex" config_dir.mkdir() @@ -187,12 +220,36 @@ def test_unknown_version_uses_modern_layout(self, monkeypatch): class TestCodexDefaultModel: - def test_returns_first_codex_model(self): - assert codex.default_model({"codex_models": ["gpt-5", "gpt-4o"]}) == "gpt-5" + def test_picks_highest_semver_over_alpha(self): + state = {"codex_models": ["databricks-gpt-5", "databricks-gpt-5-5"]} + + assert codex.default_model(state) == "databricks-gpt-5-5" def test_none_when_no_models(self): assert codex.default_model({}) is None + def test_prefers_base_over_suffixed_same_version(self): + models = ["gpt-5-5-mini", "gpt-5-5", "gpt-5"] + + assert codex.default_model({"codex_models": models}) == "gpt-5-5" + + def test_namespaced_models_use_same_version_parser(self): + models = ["served-models/databricks-gpt-5", "served-models/databricks-gpt-5-5"] + + assert codex.default_model({"codex_models": models}) == "served-models/databricks-gpt-5-5" + + def test_openai_model_id_maps_databricks_naming(self): + assert codex._openai_model_id("databricks-gpt-5-5") == "gpt-5.5" + assert codex._openai_model_id("databricks-gpt-5-5-mini") == "gpt-5.5-mini" + assert codex._openai_model_id("databricks-gpt-4o") == "gpt-4o" + assert codex._openai_model_id("served-models/databricks-gpt-5-5") == "gpt-5.5" + assert codex._openai_model_id("gpt-5.5") == "gpt-5.5" + + def test_codex_model_id_preserves_openai_incompatible_models(self): + assert codex._codex_model_id("databricks-gpt-5-2-codex") == "databricks-gpt-5-2-codex" + assert codex._codex_model_id("databricks-gpt-5-4-nano") == "databricks-gpt-5-4-nano" + assert codex._codex_model_id("databricks-gpt-5-5") == "gpt-5.5" + class TestCodexValidateCmd: def test_starts_with_binary(self):