From 0aa3d773ab64a193df0c64d799f46638818e19fa Mon Sep 17 00:00:00 2001 From: Dario Gonzalez Date: Mon, 22 Jul 2024 16:13:34 -0700 Subject: [PATCH] Add record keeping for debugging This PR has the enclave runner keep track about all of the usercalls that enclaves do over their lifetimes. This includes: - The total number of each async usercall done - How many of each sync usercall each enclave TCS has done - For `wait`s and `send`s, additional grouping by signal type --- intel-sgx/enclave-runner/src/lib.rs | 5 +- intel-sgx/enclave-runner/src/stats.rs | 211 ++++++++++++++++++ intel-sgx/enclave-runner/src/usercalls/abi.rs | 10 + intel-sgx/enclave-runner/src/usercalls/mod.rs | 53 +++-- 4 files changed, 262 insertions(+), 17 deletions(-) create mode 100644 intel-sgx/enclave-runner/src/stats.rs diff --git a/intel-sgx/enclave-runner/src/lib.rs b/intel-sgx/enclave-runner/src/lib.rs index 6145c627..29dedb6e 100644 --- a/intel-sgx/enclave-runner/src/lib.rs +++ b/intel-sgx/enclave-runner/src/lib.rs @@ -4,8 +4,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#![allow(non_local_definitions)] // Required by failure -#![deny(warnings)] +#![feature(macro_metavar_expr)] #![doc( html_logo_url = "https://edp.fortanix.com/img/docs/edp-logo.svg", html_favicon_url = "https://edp.fortanix.com/favicon.ico", @@ -16,6 +15,8 @@ mod command; mod library; mod loader; mod tcs; + +pub mod stats; pub mod usercalls; pub use crate::command::Command; diff --git a/intel-sgx/enclave-runner/src/stats.rs b/intel-sgx/enclave-runner/src/stats.rs new file mode 100644 index 00000000..fea5ac6b --- /dev/null +++ b/intel-sgx/enclave-runner/src/stats.rs @@ -0,0 +1,211 @@ +use std::collections::HashMap; +use std::convert::TryInto; +use std::fmt::{Write, Error as FmtError}; +use std::result::Result as StdResult; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; + +use crate::usercalls::*; + +use fortanix_sgx_abi::*; + +use lazy_static::lazy_static; + +lazy_static! { + static ref USERCALL_COUNTERS: [AtomicUsize; 17] = [ + AtomicUsize::default(), + AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), + AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), + AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), + AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), + ]; + static ref TCS_MAPPINGS: Arc>> = Arc::new(Mutex::new(HashMap::new())); +} + +#[derive(Clone, Default, Debug)] +pub struct TcsStats { + // Each index corresponds to that usercall number + sync_calls: [usize; 17], + // WAIT_NO, WAIT_INDEFINITE, other + // There are 16 because there are 4 possible events to wait for + waits: [(usize, usize, usize); 16], + // targeted, not targeted + sends: [usize; 16], +} + +pub struct RunnerStats { + pub sync_calls: HashMap, + pub async_calls: [usize; 17], +} + +impl RunnerStats { + /// The total number of sync usercalls that have been handled as of this snapshot + pub fn total_sync_calls(&self) -> usize { + self.sync_calls.iter() + .map(|(_, stats)| stats.sync_calls.iter().sum::()) + .sum() + } + + /// The total number of async usercalls that have been handled as of this snapshot + pub fn total_async_calls(&self) -> usize { + self.async_calls.iter().sum::() + } + + /// The total number of usercalls that have been handled as of this snapshot + pub fn total_calls(&self) -> usize { + self.total_sync_calls() + self.total_async_calls() + } + + // A "stock" formatting for this information + pub fn pretty_format(&self) -> StdResult { + let mut out = String::new(); + let mut counts = USERCALL_COUNTERS.iter() + .enumerate() + .map(|(i, counter)| (i, counter.load(Ordering::Relaxed))) + .filter(|(_, counter)| *counter > 0) + .map(|(i, counter)| format!("{:?}: {}", abi::UsercallList::from_u64(i as _), counter)) + .collect::>() + .join(", "); + + if counts.is_empty() { + counts = "None".to_owned(); + } + + writeln!(out, "Async usercall counts: {}", counts)?; + writeln!(out, "Sync usercall count mappings:")?; + for (addr, stats) in TCS_MAPPINGS.lock().map_err(|_| FmtError)?.iter() { + if stats.should_print() { + writeln!(out, "Address: 0x{:0>16x}", addr)?; + write!(out, "{}", stats.format()?)?; + } + } + + Ok(out) + } +} + +fn mask_to_str(ev: usize) -> String { + let mut events = vec!(); + let ev = ev as u64; + if ev & EV_CANCELQ_NOT_FULL != 0 { + events.push("CANCELQ_NOT_FULL"); + } + if ev & EV_RETURNQ_NOT_EMPTY != 0 { + events.push("RETURNQ_NOT_EMPTY"); + } + if ev & EV_USERCALLQ_NOT_FULL != 0 { + events.push("USERCALLQ_NOT_FULL"); + } + if ev & EV_UNPARK != 0 { + events.push("UNPARK"); + } + if events.is_empty() { + events.push("NONE"); + } + events.join(" | ") +} + +pub(crate) fn record_usercall( + tcs_address: Option, + p1: u64, + p2: u64, + p3: u64 +) { + // Map sync usercalls to the TCS that made them + if let Some(tcs_address) = tcs_address { + let mut mappings = TCS_MAPPINGS.lock().expect("poisoned mutex"); + let entry = mappings.entry(tcs_address.0).or_default(); + // type + entry.sync_calls[p1 as usize] += 1; + if p1 == 11 { + // waits + let mask = &mut entry.waits[p2 as usize]; + match p3 { + WAIT_NO => mask.0 += 1, + WAIT_INDEFINITE => mask.1 += 1, + _ => mask.2 += 1, + } + } else if p1 == 12 { + // sends + entry.sends[p2 as usize] += 1; // event mask + } + } else { + // For async calls where we don't know the TCS, just store aggregates + USERCALL_COUNTERS[p1 as usize].fetch_add(1, Ordering::Relaxed); + } +} + +impl TcsStats { + fn should_print(&self) -> bool { + self.sync_calls.iter().sum::() > 10 + } + + pub fn format(&self) -> StdResult { + let mut out = String::new(); + writeln!(out, + " Sync Totals: {}", + self.sync_calls.iter() + .enumerate() + .filter(|(_, cnt)| **cnt > 0) + .map(|(idx, cnt)| { + format!("{:?}: {}", abi::UsercallList::from_u64(idx as u64), cnt) + }) + .collect::>() + .join(", ") + )?; + writeln!(out, + " Wait Totals: {}", + self.waits.iter() + .enumerate() + .filter(|(_, (a, b, c))| a + b + c > 0) + .map(|(idx, cnt)| { + let mut out = format!("{}: ", mask_to_str(idx)); + let mut masks = Vec::new(); + if cnt.0 > 0 { + masks.push(format!("WAIT_NO: {}", cnt.0)) + } + if cnt.1 > 0 { + masks.push(format!("WAIT_INDEFINITE: {}", cnt.1)); + } + if cnt.2 > 0 { + masks.push(format!("OTHER: {}", cnt.2)); + } + out.push_str(&masks.join(", ")); + out + }) + .collect::>() + .join("\n ") + )?; + writeln!(out, + " Send Totals: {}", + self.sends.iter() + .enumerate() + .filter(|(_, cnt)| **cnt > 0) + .map(|(idx, cnt)| { + format!("{}: {}", mask_to_str(idx), cnt) + }) + .collect::>() + .join(", ") + )?; + Ok(out) + } +} + +pub fn get_stats() -> RunnerStats { + let async_calls: [usize; 17] = USERCALL_COUNTERS.iter() + .map(|c| c.load(Ordering::Relaxed)) + .collect::>() + .try_into() + .unwrap(); + + assert!(async_calls.len() == 17); + + let sync_calls = { + TCS_MAPPINGS.lock().expect("poison error").clone() + }; + + RunnerStats { + sync_calls, + async_calls, + } +} diff --git a/intel-sgx/enclave-runner/src/usercalls/abi.rs b/intel-sgx/enclave-runner/src/usercalls/abi.rs index e36a292e..6725e37d 100644 --- a/intel-sgx/enclave-runner/src/usercalls/abi.rs +++ b/intel-sgx/enclave-runner/src/usercalls/abi.rs @@ -36,6 +36,7 @@ pub(super) trait ReturnValue { macro_rules! define_usercalls { // Using `$r:tt` because `$r:ty` doesn't match ! in `dispatch_return_type` ($(fn $f:ident($($n:ident: $t:ty),*) $(-> $r:tt)*; )*) => { + #[derive(Debug)] #[repr(C)] #[allow(non_camel_case_types)] pub(crate) enum UsercallList { @@ -43,6 +44,15 @@ macro_rules! define_usercalls { $($f,)* } + impl UsercallList { + pub(crate) fn from_u64(a: u64) -> Self { + match a - 1 { + $( ${index()} => UsercallList::$f, )* + _ => Self::__enclave_usercalls_invalid, + } + } + } + pub(super) trait Usercalls <'future>: Sized { $(fn $f (self, $($n: $t),*) -> dispatch_return_type!($(-> $r )* 'future);)* fn other(self, n: u64, a1: u64, a2: u64, a3: u64, a4: u64) -> (Self, DispatchResult) { diff --git a/intel-sgx/enclave-runner/src/usercalls/mod.rs b/intel-sgx/enclave-runner/src/usercalls/mod.rs index a1340ddc..a9d3f33c 100644 --- a/intel-sgx/enclave-runner/src/usercalls/mod.rs +++ b/intel-sgx/enclave-runner/src/usercalls/mod.rs @@ -37,6 +37,7 @@ use sgxs::loader::Tcs as SgxsTcs; use crate::loader::{EnclavePanic, ErasedTcs}; use crate::tcs::{self, CoResult, ThreadResult}; +use crate::stats::record_usercall; use self::abi::dispatch; use self::abi::ReturnValue; use self::abi::UsercallList; @@ -56,9 +57,9 @@ lazy_static! { // is not public. const EV_ABORT: u64 = 0b0000_0000_0001_0000; -const USERCALL_QUEUE_SIZE: usize = 16; -const RETURN_QUEUE_SIZE: usize = 1024; -const CANCEL_QUEUE_SIZE: usize = USERCALL_QUEUE_SIZE * 2; +const USERCALL_QUEUE_SIZE: usize = 32768; +const RETURN_QUEUE_SIZE: usize = 32768; +const CANCEL_QUEUE_SIZE: usize = USERCALL_QUEUE_SIZE; enum UsercallSendData { Sync(ThreadResult, RunningTcs, RefCell<[u8; 1024]>), @@ -470,7 +471,7 @@ impl EnclaveAbort { } #[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Ord, PartialOrd)] -struct TcsAddress(usize); +pub(crate) struct TcsAddress(pub usize); impl ErasedTcs { fn address(&self) -> TcsAddress { @@ -551,8 +552,16 @@ impl PendingEvents { } let it = std::iter::once((EV_ABORT, &self.abort)) - .chain(self.counts.iter().enumerate().map(|(ev, sem)| (ev as u64, sem)).filter(|&(ev, _)| ev & event_mask != 0)) - .map(|(ev, sem)| sem.acquire().map(move |permit| (ev, permit)).boxed()); + .chain( + self.counts.iter() + .enumerate() + .map(|(ev, sem)| (ev as u64, sem)) + .filter(|&(ev, _)| ev & event_mask != 0) + ).map(|(ev, sem)| { + sem.acquire() + .map(move |permit| (ev, permit)) + .boxed() + }); let ((ev, permit), _, _) = futures::future::select_all(it).await; @@ -773,14 +782,16 @@ impl EnclaveState { UsercallHandleData::Async(_, ref mut notifier_rx, _) => notifier_rx.take(), _ => None, }; - let (parameters, mode, tcs) = match handle_data { + let ((p1, p2, p3, p4, p5), mode, tcs) = match handle_data { UsercallHandleData::Sync(ref usercall, ref mut tcs, _) => (usercall.parameters(), tcs.mode.into(), Some(tcs)), UsercallHandleData::Async(ref usercall, _, _) => (usercall.data.into(), ReturnSource::AsyncUsercall, None), }; + + record_usercall(tcs.as_ref().map(|tcs| tcs.tcs_address), p1, p2, p3); + let mut input = IOHandlerInput { enclave: enclave.clone(), tcs, work_sender: &work_sender }; let handler = Handler(&mut input); let result = { - let (p1, p2, p3, p4, p5) = parameters; match notifier_rx { None => dispatch(handler, p1, p2, p3, p4, p5).await.1, Some(notifier_rx) => { @@ -932,7 +943,10 @@ impl EnclaveState { None } else { let (notifier_tx, notifier_rx) = oneshot::channel(); - usercall_event_tx_clone.send(UsercallEvent::Started(usercall.id, notifier_tx)).ok().expect("failed to send usercall event"); + usercall_event_tx_clone + .send(UsercallEvent::Started(usercall.id, notifier_tx)) + .ok() + .expect("failed to send usercall event"); Some(notifier_rx) }; let _ = io_queue_send.send(UsercallSendData::Async(usercall, notifier_rx)); @@ -1068,15 +1082,24 @@ impl EnclaveState { .send(start_work) .expect("Work sender couldn't send data to receiver"); - let join_handlers = - create_worker_threads(num_of_worker_threads, work_receiver, io_queue_send.clone()); + let join_handles = create_worker_threads( + num_of_worker_threads, + work_receiver, + io_queue_send.clone() + ); + // main syscall polling loop - let main_result = - EnclaveState::syscall_loop(enclave.clone(), io_queue_receive, io_queue_send, work_sender); + let main_result = EnclaveState::syscall_loop( + enclave.clone(), + io_queue_receive, + io_queue_send, + work_sender + ); - for handler in join_handlers { - let _ = handler.join(); + for handle in join_handles { + let _ = handle.join(); } + return main_result; }