diff --git a/oximeter/instruments/src/kstat/mod.rs b/oximeter/instruments/src/kstat/mod.rs index e010545329e..daba5547c1f 100644 --- a/oximeter/instruments/src/kstat/mod.rs +++ b/oximeter/instruments/src/kstat/mod.rs @@ -94,6 +94,7 @@ mod sampler; pub use sampler::CollectionDetails; pub use sampler::ExpirationBehavior; pub use sampler::KstatSampler; +pub use sampler::KstatSemaphore; pub use sampler::TargetId; pub use sampler::TargetStatus; diff --git a/oximeter/instruments/src/kstat/sampler.rs b/oximeter/instruments/src/kstat/sampler.rs index bce4dd7a514..8b2a05fb17d 100644 --- a/oximeter/instruments/src/kstat/sampler.rs +++ b/oximeter/instruments/src/kstat/sampler.rs @@ -313,6 +313,37 @@ impl<'a> From> for KstatPath { pub(crate) const CREATION_TIME_PRUNE_INTERVAL: Duration = Duration::from_secs(60); +/// A semaphore to prevent XDE creations/deletions and kstat reads from +/// happening in parallel. +/// +/// This is a semaphore with one permit, and is cheaply cloneable. +/// +/// This is a temporary workaround for +/// https://github.com/oxidecomputer/opte/issues/758. See +/// https://github.com/oxidecomputer/omicron/issues/9211 for more details. +#[derive(Clone, Debug)] +pub struct KstatSemaphore { + semaphore: Arc>, +} + +impl KstatSemaphore { + /// Create a new semaphore with one permit. + pub fn new() -> Self { + Self { semaphore: Arc::new(Mutex::new(())) } + } + + /// Run some code guarded by the semaphore. + pub fn run(&self, f: F) -> T + where + F: FnOnce() -> T, + { + let permit = self.semaphore.lock().unwrap(); + let ret = f(); + drop(permit); + ret + } +} + /// Type which owns the `kstat` chain and samples each target on an interval. /// /// This type runs in a separate tokio task. As targets are added, it schedules @@ -324,6 +355,9 @@ pub(crate) const CREATION_TIME_PRUNE_INTERVAL: Duration = struct KstatSamplerWorker { log: Logger, + /// The semaphore used to prevent a deadlock within the illumos kernel. + semaphore: KstatSemaphore, + /// The kstat chain. ctl: Option, @@ -394,6 +428,7 @@ impl KstatSamplerWorker { /// Create a new sampler worker. fn new( log: Logger, + semaphore: KstatSemaphore, inbox: mpsc::Receiver, self_stat_queue: broadcast::Sender, samples: Arc>>>, @@ -402,6 +437,7 @@ impl KstatSamplerWorker { let ctl = Some(Ctl::new().map_err(Error::Kstat)?); let self_stats = hostname().map(self_stats::SelfStats::new); Ok(Self { + semaphore, ctl, log, targets: BTreeMap::new(), @@ -840,19 +876,20 @@ impl KstatSamplerWorker { // Fetch each interested kstat, and include the data and creation times // for each of them. - let kstats = ctl - .iter() - .filter(|kstat| sampled_kstat.target.interested(kstat)) - .map(|mut kstat| { - let data = ctl.read(&mut kstat).map_err(Error::Kstat)?; - let creation_time = Self::ensure_kstat_creation_time( - &self.log, - kstat, - &mut self.creation_times, - )?; - Ok((creation_time, kstat, data)) - }) - .collect::, _>>(); + let kstats = self.semaphore.run(|| { + ctl.iter() + .filter(|kstat| sampled_kstat.target.interested(kstat)) + .map(|mut kstat| { + let data = ctl.read(&mut kstat).map_err(Error::Kstat)?; + let creation_time = Self::ensure_kstat_creation_time( + &self.log, + kstat, + &mut self.creation_times, + )?; + Ok((creation_time, kstat, data)) + }) + .collect::, _>>() + }); match kstats { Ok(k) if !k.is_empty() => { sampled_kstat.time_of_last_collection = Some(now()); @@ -1230,13 +1267,14 @@ impl KstatSampler { pub const DEFAULT_SAMPLE_LIMIT: usize = 500; /// Create a new sampler. - pub fn new(log: &Logger) -> Result { - Self::with_sample_limit(log, Self::DEFAULT_SAMPLE_LIMIT) + pub fn new(log: &Logger, semaphore: KstatSemaphore) -> Result { + Self::with_sample_limit(log, semaphore, Self::DEFAULT_SAMPLE_LIMIT) } /// Create a new sampler with a sample limit. pub fn with_sample_limit( log: &Logger, + semaphore: KstatSemaphore, limit: usize, ) -> Result { let samples = Arc::new(Mutex::new(BTreeMap::new())); @@ -1245,6 +1283,7 @@ impl KstatSampler { let (outbox, inbox) = mpsc::channel(1); let worker = KstatSamplerWorker::new( log.new(o!("component" => "kstat-sampler-worker")), + semaphore, inbox, self_stat_tx, samples.clone(), diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index bd689156f88..57b7cff4374 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -35,6 +35,7 @@ use illumos_utils::zone::Api; use illumos_utils::zone::Zones; use omicron_common::FileKv; use omicron_common::address::Ipv6Subnet; +use oximeter_instruments::kstat::KstatSemaphore; use sled_agent_config_reconciler::ConfigReconcilerSpawnToken; use sled_hardware::DendriteAsic; use sled_hardware::SledMode; @@ -48,6 +49,7 @@ use tokio::sync::oneshot; pub(super) struct BootstrapAgentStartup { pub(super) config: Config, + pub(super) semaphore: KstatSemaphore, pub(super) global_zone_bootstrap_ip: Ipv6Addr, pub(super) base_log: Logger, pub(super) startup_log: Logger, @@ -136,6 +138,8 @@ impl BootstrapAgentStartup { let global_zone_bootstrap_ip = startup_networking.global_zone_bootstrap_ip; + let semaphore = KstatSemaphore::new(); + let service_manager = ServiceManager::new( &base_log, ddm_reconciler, @@ -155,6 +159,7 @@ impl BootstrapAgentStartup { Ok(Self { config, + semaphore, global_zone_bootstrap_ip, base_log, startup_log: log, diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 6b9ca4b890d..732f781b3dd 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -39,6 +39,7 @@ use omicron_ddm_admin_client::DdmError; use omicron_ddm_admin_client::types::EnableStatsRequest; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::RackInitUuid; +use oximeter_instruments::kstat::KstatSemaphore; use sled_agent_config_reconciler::ConfigReconcilerSpawnToken; use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequestParams; @@ -175,6 +176,7 @@ impl Server { // fail to start. let BootstrapAgentStartup { config, + semaphore, global_zone_bootstrap_ip, base_log, startup_log, @@ -246,6 +248,7 @@ impl Server { let start_sled_agent_request = ledger.into_inner(); let sled_agent_server = start_sled_agent( &config, + semaphore.clone(), start_sled_agent_request, long_running_task_handles.clone(), config_reconciler_spawn_token, @@ -275,6 +278,7 @@ impl Server { // agent state. let inner = Inner { config, + semaphore, state, sled_init_rx, sled_reset_rx, @@ -356,6 +360,7 @@ impl From for StartError { #[allow(clippy::too_many_arguments)] async fn start_sled_agent( config: &SledConfig, + semaphore: KstatSemaphore, request: StartSledAgentRequest, long_running_task_handles: LongRunningTaskHandles, config_reconciler_spawn_token: ConfigReconcilerSpawnToken, @@ -407,6 +412,7 @@ async fn start_sled_agent( // Server does not exist, initialize it. let server = SledAgentServer::start( config, + semaphore, base_log.clone(), request.clone(), long_running_task_handles.clone(), @@ -493,6 +499,7 @@ async fn sled_config_paths( struct Inner { config: SledConfig, + semaphore: KstatSemaphore, state: SledAgentState, sled_init_rx: mpsc::Receiver<( StartSledAgentRequest, @@ -560,6 +567,7 @@ impl Inner { let response = match start_sled_agent( &self.config, + self.semaphore.clone(), request, self.long_running_task_handles.clone(), config_reconciler_spawn_token, @@ -686,7 +694,9 @@ impl Inner { sled_hardware::cleanup::delete_omicron_vnics(&log) .await .map_err(BootstrapError::Cleanup)?; - illumos_utils::opte::delete_all_xde_devices(&log)?; + // XDE deletions need to be protected by the semaphore. + self.semaphore + .run(|| illumos_utils::opte::delete_all_xde_devices(&log))?; Ok(()) } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 12e1c39adf1..60c4c7d01b2 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -12,12 +12,13 @@ use crate::instance_manager::{ }; use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClient; +use crate::port_manager::SledAgentPortManager; use crate::profile::*; use crate::zone_bundle::ZoneBundler; use chrono::Utc; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; -use illumos_utils::opte::{DhcpCfg, PortCreateParams, PortManager}; +use illumos_utils::opte::{DhcpCfg, PortCreateParams}; use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; use illumos_utils::zpool::ZpoolOrRamdisk; @@ -513,7 +514,7 @@ struct InstanceRunner { // Reference to the port manager for creating OPTE ports when starting the // instance - port_manager: PortManager, + port_manager: SledAgentPortManager, // Guest NIC and OPTE port information requested_nics: Vec, @@ -2274,6 +2275,7 @@ mod tests { use omicron_common::api::internal::shared::{DhcpConfig, SledIdentifiers}; use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::InternalZpoolUuid; + use oximeter_instruments::kstat::KstatSemaphore; use propolis_client::types::{ InstanceMigrateStatusResponse, InstanceStateMonitorResponse, }; @@ -2521,8 +2523,9 @@ mod tests { Etherstub("mystub".to_string()), illumos_utils::fakes::dladm::Dladm::new(), ); - let port_manager = PortManager::new( + let port_manager = SledAgentPortManager::new( log.new(o!("component" => "PortManager")), + KstatSemaphore::new(), Ipv6Addr::new(0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01), ); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index fa8a11c89d8..c7d9bf68ec7 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -8,13 +8,13 @@ use crate::instance::Instance; use crate::instance::VmmStateOwner; use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClient; +use crate::port_manager::SledAgentPortManager; use crate::vmm_reservoir::VmmReservoirManagerHandle; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; -use illumos_utils::opte::PortManager; use illumos_utils::running_zone::ZoneBuilderFactory; use omicron_common::api::external::ByteCount; use omicron_common::api::internal::nexus::SledVmmState; @@ -64,7 +64,7 @@ pub enum Error { pub(crate) struct InstanceManagerServices { pub nexus_client: NexusClient, pub vnic_allocator: VnicAllocator, - pub port_manager: PortManager, + pub port_manager: SledAgentPortManager, pub zone_bundler: ZoneBundler, pub zone_builder_factory: ZoneBuilderFactory, pub metrics_queue: MetricsRequestQueue, @@ -93,7 +93,7 @@ impl InstanceManager { log: Logger, nexus_client: NexusClient, vnic_allocator: VnicAllocator, - port_manager: PortManager, + port_manager: SledAgentPortManager, currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, available_datasets_rx: AvailableDatasetsReceiver, zone_bundler: ZoneBundler, @@ -123,7 +123,7 @@ impl InstanceManager { log: Logger, nexus_client: NexusClient, vnic_allocator: VnicAllocator, - port_manager: PortManager, + port_manager: SledAgentPortManager, currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, available_datasets_rx: AvailableDatasetsReceiver, zone_bundler: ZoneBundler, @@ -406,7 +406,7 @@ struct InstanceManagerRunner { jobs: BTreeMap, vnic_allocator: VnicAllocator, - port_manager: PortManager, + port_manager: SledAgentPortManager, currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, available_datasets_rx: AvailableDatasetsReceiver, zone_bundler: ZoneBundler, diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 2a808be4a29..8c588640845 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -27,6 +27,7 @@ mod instance_manager; mod long_running_tasks; mod metrics; mod nexus; +mod port_manager; mod probe_manager; mod profile; pub mod rack_setup; diff --git a/sled-agent/src/metrics.rs b/sled-agent/src/metrics.rs index e6342b27bec..99091779d09 100644 --- a/sled-agent/src/metrics.rs +++ b/sled-agent/src/metrics.rs @@ -11,6 +11,7 @@ use omicron_common::api::internal::shared::SledIdentifiers; use oximeter_instruments::kstat::CollectionDetails; use oximeter_instruments::kstat::Error as KstatError; use oximeter_instruments::kstat::KstatSampler; +use oximeter_instruments::kstat::KstatSemaphore; use oximeter_instruments::kstat::TargetId; use oximeter_instruments::kstat::link::SledDataLink; use oximeter_instruments::kstat::link::SledDataLinkTarget; @@ -358,10 +359,12 @@ impl MetricsManager { /// Construct a new metrics manager. pub fn new( log: &Logger, + semaphore: KstatSemaphore, identifiers: SledIdentifiers, address: Ipv6Addr, ) -> Result { - let sampler = KstatSampler::new(log).map_err(Error::Kstat)?; + let sampler = + KstatSampler::new(log, semaphore).map_err(Error::Kstat)?; let server = start_producer_server(&log, identifiers.sled_id, address)?; server .registry() diff --git a/sled-agent/src/port_manager.rs b/sled-agent/src/port_manager.rs new file mode 100644 index 00000000000..bf3b766085a --- /dev/null +++ b/sled-agent/src/port_manager.rs @@ -0,0 +1,131 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::net::{IpAddr, Ipv6Addr}; + +use illumos_utils::opte::{ + Error, Port, PortCreateParams, PortManager, PortTicket, +}; +use nexus_types::inventory::{NetworkInterfaceKind, SourceNatConfig}; +use omicron_common::api::{ + external, + internal::shared::{ + ExternalIpGatewayMap, ResolvedVpcFirewallRule, ResolvedVpcRouteSet, + ResolvedVpcRouteState, VirtualNetworkInterfaceHost, + }, +}; +use oximeter_instruments::kstat::KstatSemaphore; +use uuid::Uuid; + +/// A wrapper around illumos-utils's PortManager to allow for accesses to be +/// protected by a semaphore. +/// +/// All operations that result in an ioctl to XDE (i.e. that result in an upcall +/// to the XDE driver) are guarded by the semaphore. +/// +/// This is a temporary workaround for oxidecomputer/opte#758. +#[derive(Clone, Debug)] +pub(crate) struct SledAgentPortManager { + semaphore: KstatSemaphore, + port_manager: PortManager, +} + +impl SledAgentPortManager { + pub(crate) fn new( + log: slog::Logger, + semaphore: KstatSemaphore, + underlay_ip: Ipv6Addr, + ) -> Self { + let port_manager = PortManager::new(log, underlay_ip); + Self { semaphore, port_manager } + } + + pub(crate) fn underlay_ip(&self) -> &Ipv6Addr { + // no ioctl (just reads in-memory state) + self.port_manager.underlay_ip() + } + + pub(crate) fn create_port( + &self, + params: PortCreateParams, + ) -> Result<(Port, PortTicket), Error> { + // ioctl => must be guarded by the semaphore + self.semaphore.run(|| self.port_manager.create_port(params)) + } + + pub(crate) fn vpc_routes_list(&self) -> Vec { + // no ioctl (just reads in-memory state) + self.port_manager.vpc_routes_list() + } + + pub(crate) fn vpc_routes_ensure( + &self, + new_routes: Vec, + ) -> Result<(), Error> { + // ioctl => must be guarded by the semaphore + self.semaphore.run(|| self.port_manager.vpc_routes_ensure(new_routes)) + } + + pub(crate) fn set_eip_gateways( + &self, + mappings: ExternalIpGatewayMap, + ) -> bool { + // no ioctl (just an in-memory set), no need to be guarded + self.port_manager.set_eip_gateways(mappings) + } + + pub(crate) fn external_ips_ensure( + &self, + nic_id: Uuid, + nic_kind: NetworkInterfaceKind, + source_nat: Option, + ephemeral_ip: Option, + floating_ips: &[IpAddr], + ) -> Result<(), Error> { + // ioctl => must be guarded by the semaphore + self.semaphore.run(|| { + self.port_manager.external_ips_ensure( + nic_id, + nic_kind, + source_nat, + ephemeral_ip, + floating_ips, + ) + }) + } + + pub(crate) fn firewall_rules_ensure( + &self, + vni: external::Vni, + rules: &[ResolvedVpcFirewallRule], + ) -> Result<(), Error> { + // ioctl => must be guarded by the semaphore + self.semaphore + .run(|| self.port_manager.firewall_rules_ensure(vni, rules)) + } + + pub(crate) fn list_virtual_nics( + &self, + ) -> Result, Error> { + // ioctl => must be guarded by the semaphore (unfortunately, because + // this is a read operation) + self.semaphore.run(|| self.port_manager.list_virtual_nics()) + } + + pub(crate) fn set_virtual_nic_host( + &self, + mapping: &VirtualNetworkInterfaceHost, + ) -> Result<(), Error> { + // ioctl => must be guarded by the semaphore + self.semaphore.run(|| self.port_manager.set_virtual_nic_host(mapping)) + } + + pub(crate) fn unset_virtual_nic_host( + &self, + mapping: &VirtualNetworkInterfaceHost, + ) -> Result<(), Error> { + // ioctl => must be guarded by the semaphore + self.semaphore.run(|| self.port_manager.unset_virtual_nic_host(mapping)) + } +} diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs index afef7e84652..8a51e1361a7 100644 --- a/sled-agent/src/probe_manager.rs +++ b/sled-agent/src/probe_manager.rs @@ -1,9 +1,10 @@ use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClient; +use crate::port_manager::SledAgentPortManager; use anyhow::{Result, anyhow}; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; -use illumos_utils::opte::{DhcpCfg, PortCreateParams, PortManager}; +use illumos_utils::opte::{DhcpCfg, PortCreateParams}; use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; use illumos_utils::zpool::ZpoolOrRamdisk; use nexus_client::types::{ProbeExternalIp, ProbeInfo}; @@ -59,7 +60,7 @@ pub(crate) struct ProbeManagerInner { log: Logger, sled_id: Uuid, vnic_allocator: VnicAllocator, - port_manager: PortManager, + port_manager: SledAgentPortManager, metrics_queue: MetricsRequestQueue, running_probes: Mutex, available_datasets_rx: AvailableDatasetsReceiver, @@ -72,7 +73,7 @@ impl ProbeManager { sled_id: Uuid, nexus_client: NexusClient, etherstub: Etherstub, - port_manager: PortManager, + port_manager: SledAgentPortManager, metrics_queue: MetricsRequestQueue, available_datasets_rx: AvailableDatasetsReceiver, log: Logger, diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 60d6d9f0fb8..cf62569642c 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -12,6 +12,7 @@ use crate::nexus::make_nexus_client; use crate::services::ServiceManager; use internal_dns_resolver::Resolver; use omicron_uuid_kinds::SledUuid; +use oximeter_instruments::kstat::KstatSemaphore; use sled_agent_config_reconciler::ConfigReconcilerSpawnToken; use sled_agent_types::sled::StartSledAgentRequest; use slog::Logger; @@ -37,6 +38,7 @@ impl Server { /// Starts a SledAgent server pub async fn start( config: &Config, + semaphore: KstatSemaphore, log: Logger, request: StartSledAgentRequest, long_running_tasks_handles: LongRunningTaskHandles, @@ -58,6 +60,7 @@ impl Server { let sled_agent = SledAgent::new( &config, + semaphore, log.clone(), nexus_client, request, diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 680c3ac40ba..4936073ef8e 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -32,6 +32,7 @@ use crate::bootstrap::early_networking::{ use crate::config::SidecarRevision; use crate::ddm_reconciler::DdmReconciler; use crate::metrics::MetricsRequestQueue; +use crate::port_manager::SledAgentPortManager; use crate::profile::*; use anyhow::anyhow; use camino::{Utf8Path, Utf8PathBuf}; @@ -47,9 +48,7 @@ use illumos_utils::dladm::{ Dladm, Etherstub, EtherstubVnic, GetSimnetError, PhysicalLink, }; use illumos_utils::link::{Link, VnicAllocator}; -use illumos_utils::opte::{ - DhcpCfg, Port, PortCreateParams, PortManager, PortTicket, -}; +use illumos_utils::opte::{DhcpCfg, Port, PortCreateParams, PortTicket}; use illumos_utils::running_zone::{ EnsureAddressError, InstalledZone, RunCommandError, RunningZone, ZoneBuilderFactory, @@ -588,7 +587,7 @@ pub struct ServiceManagerInner { // operational. struct SledAgentInfo { config: Config, - port_manager: PortManager, + port_manager: SledAgentPortManager, resolver: Resolver, underlay_address: Ipv6Addr, rack_id: Uuid, @@ -787,10 +786,10 @@ impl ServiceManager { /// Sets up "Sled Agent" information, including underlay info. /// /// Any subsequent calls after the first invocation return an error. - pub async fn sled_agent_started( + pub(crate) async fn sled_agent_started( &self, config: Config, - port_manager: PortManager, + port_manager: SledAgentPortManager, underlay_address: Ipv6Addr, rack_id: Uuid, rack_network_config: Option, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index f1aab5c64e0..4fecbcefde5 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -15,6 +15,7 @@ use crate::metrics::{MetricsManager, MetricsRequestQueue}; use crate::nexus::{ NexusClient, NexusNotifierHandle, NexusNotifierInput, NexusNotifierTask, }; +use crate::port_manager::SledAgentPortManager; use crate::probe_manager::ProbeManager; use crate::services::{self, ServiceManager, UnderlayInfo}; use crate::support_bundle::logs::SupportBundleLogs; @@ -29,7 +30,6 @@ use derive_more::From; use dropshot::HttpError; use futures::StreamExt; use futures::stream::FuturesUnordered; -use illumos_utils::opte::PortManager; use illumos_utils::running_zone::RunningZone; use illumos_utils::zpool::PathInPool; use itertools::Itertools as _; @@ -53,6 +53,7 @@ use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{ GenericUuid, MupdateOverrideUuid, PropolisUuid, SledUuid, }; +use oximeter_instruments::kstat::KstatSemaphore; use sled_agent_config_reconciler::{ ConfigReconcilerHandle, ConfigReconcilerSpawnToken, InternalDisks, InternalDisksReceiver, LedgerNewConfigError, LedgerTaskError, @@ -346,7 +347,7 @@ struct SledAgentInner { hardware: HardwareManager, // Component of Sled Agent responsible for managing OPTE ports. - port_manager: PortManager, + port_manager: SledAgentPortManager, // Other Oxide-controlled services running on this Sled. services: ServiceManager, @@ -395,8 +396,10 @@ pub struct SledAgent { impl SledAgent { /// Initializes a new [`SledAgent`] object. + #[expect(clippy::too_many_arguments)] pub async fn new( config: &Config, + semaphore: KstatSemaphore, log: Logger, nexus_client: NexusClient, request: StartSledAgentRequest, @@ -476,7 +479,9 @@ impl SledAgent { // Initialize the xde kernel driver with the underlay devices. let underlay_nics = underlay::find_nics(&config.data_links).await?; - illumos_utils::opte::initialize_xde_driver(&log, &underlay_nics)?; + semaphore.run(|| { + illumos_utils::opte::initialize_xde_driver(&log, &underlay_nics) + })?; // Start collecting metric data. let baseboard = long_running_task_handles.hardware_manager.baseboard(); @@ -487,8 +492,12 @@ impl SledAgent { revision: baseboard.revision(), serial: baseboard.identifier().to_string(), }; - let metrics_manager = - MetricsManager::new(&log, identifiers.clone(), *sled_address.ip())?; + let metrics_manager = MetricsManager::new( + &log, + semaphore.clone(), + identifiers.clone(), + *sled_address.ip(), + )?; // Start tracking the underlay physical links. for link in underlay::find_chelsio_links(&config.data_links).await? { @@ -508,8 +517,9 @@ impl SledAgent { } // Create the PortManager to manage all the OPTE ports on the sled. - let port_manager = PortManager::new( + let port_manager = SledAgentPortManager::new( parent_log.new(o!("component" => "PortManager")), + semaphore, *sled_address.ip(), );