Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

revive: Rework the instruction benchmark #7721

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions prdoc/pr_7721.prdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
title: 'revive: Rework the instruction benchmark'
doc:
- audience: Runtime Dev
description: |-
Fixes https://github.com/paritytech/polkadot-sdk/issues/6157

This fixes the last remaining benchmark that was not correct since it was too low level to be written in Rust. Instead, we opted.

This PR changes the benchmark that determines the scaling from `ref_time` to PolkaVM `Gas` by benchmarking the absolute worst case of an instruction: One that causes two cache misses by touching two cache lines.

The Contract itself is designed to be as simple as possible. It does random unaligned reads in a loop until the `r` (repetition) number is reached. The randomness is fully generated by the host and written to the guests memory before the benchmark is run. This allows the benchmark to determine the influence of one loop iteration via linear regression.
crates:
- name: pallet-revive
bump: patch
- name: pallet-revive-fixtures
bump: major
5 changes: 5 additions & 0 deletions substrate/frame/revive/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ log = { workspace = true }
paste = { workspace = true }
polkavm = { version = "0.21.0", default-features = false }
polkavm-common = { version = "0.21.0", default-features = false, optional = true }
rand = { workspace = true, optional = true }
rand_pcg = { workspace = true, optional = true }
rlp = { workspace = true }
scale-info = { features = ["derive"], workspace = true }
serde = { features = [
Expand Down Expand Up @@ -96,6 +98,7 @@ std = [
"pallet-utility/std",
"polkavm-common?/std",
"polkavm/std",
"rand?/std",
"rlp/std",
"scale-info/std",
"secp256k1/std",
Expand Down Expand Up @@ -125,6 +128,8 @@ runtime-benchmarks = [
"pallet-transaction-payment/runtime-benchmarks",
"pallet-utility/runtime-benchmarks",
"polkavm-common/alloc",
"rand",
"rand_pcg",
"sp-consensus-aura",
"sp-consensus-babe",
"sp-consensus-slots",
Expand Down
43 changes: 0 additions & 43 deletions substrate/frame/revive/fixtures/contracts/instr_benchmark.rs

This file was deleted.

1 change: 0 additions & 1 deletion substrate/frame/revive/fixtures/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ pub mod bench {
use alloc::vec::Vec;
pub const DUMMY: &[u8] = fixture!("dummy");
pub const NOOP: &[u8] = fixture!("noop");
pub const INSTR: &[u8] = fixture!("instr_benchmark");

pub fn dummy_unique(replace_with: u32) -> Vec<u8> {
let mut dummy = DUMMY.to_vec();
Expand Down
5 changes: 4 additions & 1 deletion substrate/frame/revive/src/benchmarking/call_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,11 @@ where
ext: &'a mut StackExt<'a, T>,
module: WasmBlob<T>,
input: Vec<u8>,
aux_data_size: u32,
) -> PreparedCall<'a, StackExt<'a, T>> {
module.prepare_call(Runtime::new(ext, input), ExportedFunction::Call).unwrap()
module
.prepare_call(Runtime::new(ext, input), ExportedFunction::Call, aux_data_size)
.unwrap()
}

/// Add transient_storage
Expand Down
25 changes: 22 additions & 3 deletions substrate/frame/revive/src/benchmarking/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,28 @@ impl WasmModule {
Self::new(bench_fixtures::NOOP.to_vec())
}

/// A contract code that executes some ALU instructions in a loop.
pub fn instr() -> Self {
Self::new(bench_fixtures::INSTR.to_vec())
/// A contract code that does unaligned memory accessed in a loop.
pub fn instr(do_load: bool) -> Self {
let load = match do_load {
false => "",
true => "a0 = u64 [a0]",
};
let text = alloc::format!(
"
pub @deploy:
ret
pub @call:
@loop:
jump @done if t0 == a1
{load}
t0 = t0 + 1
jump @loop
@done:
ret
"
);
let code = polkavm_common::assembler::assemble(&text).unwrap();
Self::new(code)
}

fn new(code: Vec<u8>) -> Self {
Expand Down
85 changes: 75 additions & 10 deletions substrate/frame/revive/src/benchmarking/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,6 @@ use sp_runtime::{
/// but might make the results less precise.
const API_BENCHMARK_RUNS: u32 = 1600;

/// How many runs we do per instruction benchmark.
///
/// Same rationale as for [`API_BENCHMARK_RUNS`]. The number is bigger because instruction
/// benchmarks are faster.
const INSTR_BENCHMARK_RUNS: u32 = 5000;

/// Number of layers in a Radix16 unbalanced trie.
const UNBALANCED_TRIE_LAYERS: u32 = 20;

Expand Down Expand Up @@ -549,7 +543,7 @@ mod benchmarks {
fn noop_host_fn(r: Linear<0, API_BENCHMARK_RUNS>) {
let mut setup = CallSetup::<T>::new(WasmModule::noop());
let (mut ext, module) = setup.ext();
let prepared = CallSetup::<T>::prepare_call(&mut ext, module, r.encode());
let prepared = CallSetup::<T>::prepare_call(&mut ext, module, r.encode(), 0);
#[block]
{
prepared.call().unwrap();
Expand Down Expand Up @@ -2004,11 +1998,82 @@ mod benchmarks {
}

// Benchmark the execution of instructions.
//
// It benchmarks the absolute worst case by allocating a lot of memory
// and then accessing it so that each instruction generates two cache misses.
#[benchmark(pov_mode = Ignored)]
fn instr(r: Linear<0, 10_000>) {
use rand::{seq::SliceRandom, SeedableRng};
use rand_pcg::Pcg64;

// Ideally, this needs to be bigger than the cache.
const MEMORY_SIZE: u64 = sp_core::MAX_POSSIBLE_ALLOCATION as u64;

// This is benchmarked for x86-64.
const CACHE_LINE_SIZE: u64 = 64;

// An 8 byte load from this misalignment will reach into the subsequent line.
const MISALIGNMENT: u64 = 60;

// We only need one address per cache line.
// -1 because we skip the first address
const NUM_ADDRESSES: u64 = (MEMORY_SIZE - MISALIGNMENT) / CACHE_LINE_SIZE - 1;

assert!(
u64::from(r) <= NUM_ADDRESSES / 2,
"If we do too many iterations we run into the risk of loading from warm cache lines",
);

let mut setup = CallSetup::<T>::new(WasmModule::instr(true));
let (mut ext, module) = setup.ext();
let mut prepared =
CallSetup::<T>::prepare_call(&mut ext, module, Vec::new(), MEMORY_SIZE as u32);

assert!(
u64::from(prepared.aux_data_base()) & (CACHE_LINE_SIZE - 1) == 0,
"aux data base must be cache aligned"
);

// Addresses data will be located inside the aux data.
let misaligned_base = u64::from(prepared.aux_data_base()) + MISALIGNMENT;

// Create all possible addresses and shuffle them. This makes sure
// the accesses are random but no address is accessed more than once.
// we skip the first address since it is out entry point
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// we skip the first address since it is out entry point
// we skip the first address since it is our entry point

let mut addresses = Vec::with_capacity(NUM_ADDRESSES as usize);
for i in 1..NUM_ADDRESSES {
let addr = (misaligned_base + i * CACHE_LINE_SIZE).to_le_bytes();
addresses.push(addr);
}
let mut rng = Pcg64::seed_from_u64(1337);
addresses.shuffle(&mut rng);

// The addresses need to be padded to be one cache line apart.
let mut memory = Vec::with_capacity((NUM_ADDRESSES * CACHE_LINE_SIZE) as usize);
for address in addresses {
memory.extend_from_slice(&address);
memory.resize(memory.len() + CACHE_LINE_SIZE as usize - 8, 0);
}

// Copies `memory` to `aux_data_base + MISALIGNMENT`.
// Sets `a0 = MISALIGNMENT` and `a1 = r`.
prepared
.setup_aux_data(memory.as_slice(), MISALIGNMENT as u32, r.into())
.unwrap();

#[block]
{
prepared.call().unwrap();
}
}

#[benchmark(pov_mode = Ignored)]
fn instr(r: Linear<0, INSTR_BENCHMARK_RUNS>) {
let mut setup = CallSetup::<T>::new(WasmModule::instr());
fn instr_empty_loop(r: Linear<0, 100_000>) {
let mut setup = CallSetup::<T>::new(WasmModule::instr(false));
let (mut ext, module) = setup.ext();
let prepared = CallSetup::<T>::prepare_call(&mut ext, module, r.encode());
let mut prepared = CallSetup::<T>::prepare_call(&mut ext, module, Vec::new(), 0);
prepared.setup_aux_data(&[], 0, r.into()).unwrap();

#[block]
{
prepared.call().unwrap();
Expand Down
11 changes: 6 additions & 5 deletions substrate/frame/revive/src/gas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,12 @@ impl<T: Config> EngineMeter<T> {

/// How much ref time does each PolkaVM gas correspond to.
fn ref_time_per_fuel() -> u64 {
// We execute 6 different instructions therefore we have to divide the actual
// computed gas costs by 6 to have a rough estimate as to how expensive each
// single executed instruction is going to be.
let instr_cost = T::WeightInfo::instr(1).saturating_sub(T::WeightInfo::instr(0)).ref_time();
instr_cost / 6
let loop_iteration =
T::WeightInfo::instr(1).saturating_sub(T::WeightInfo::instr(0)).ref_time();
let empty_loop_iteration = T::WeightInfo::instr_empty_loop(1)
.saturating_sub(T::WeightInfo::instr_empty_loop(0))
.ref_time();
loop_iteration.saturating_sub(empty_loop_iteration)
}
}

Expand Down
33 changes: 32 additions & 1 deletion substrate/frame/revive/src/wasm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,13 +307,43 @@ where
let _ = self.runtime.ext().gas_meter_mut().sync_from_executor(self.instance.gas())?;
exec_result
}

/// The guest memory address at which the aux data is located.
#[cfg(feature = "runtime-benchmarks")]
pub fn aux_data_base(&self) -> u32 {
self.instance.module().memory_map().aux_data_address()
}

/// Copies `data` to the aux data at address `offset`.
///
/// It sets `a0` to the beginning of data inside the aux data.
/// It sets `a1` to the value passed.
///
/// Only used in benchmarking so far.
#[cfg(feature = "runtime-benchmarks")]
pub fn setup_aux_data(&mut self, data: &[u8], offset: u32, a1: u64) -> DispatchResult {
let a0 = self.aux_data_base().saturating_add(offset);
self.instance.write_memory(a0, data).map_err(|err| {
log::debug!(target: LOG_TARGET, "failed to write aux data: {err:?}");
Error::<E::T>::CodeRejected
})?;
self.instance.set_reg(polkavm::Reg::A0, a0.into());
self.instance.set_reg(polkavm::Reg::A1, a1);
Ok(())
}
}

impl<T: Config> WasmBlob<T> {
/// Compile and instantiate contract.
///
/// `aux_data_size` is only used for runtime benchmarks. Real contracts
/// don't make use of this buffer. Hence this should not be set to anything
/// other than `0` when not used for benchmarking.
pub fn prepare_call<E: Ext<T = T>>(
self,
mut runtime: Runtime<E, polkavm::RawInstance>,
entry_point: ExportedFunction,
aux_data_size: u32,
) -> Result<PreparedCall<E>, ExecError> {
let mut config = polkavm::Config::default();
config.set_backend(Some(polkavm::BackendKind::Interpreter));
Expand All @@ -332,6 +362,7 @@ impl<T: Config> WasmBlob<T> {
module_config.set_page_size(limits::PAGE_SIZE);
module_config.set_gas_metering(Some(polkavm::GasMeteringKind::Sync));
module_config.set_allow_sbrk(false);
module_config.set_aux_data_size(aux_data_size);
let module = polkavm::Module::new(&engine, &module_config, self.code.into_inner().into())
.map_err(|err| {
log::debug!(target: LOG_TARGET, "failed to create polkavm module: {err:?}");
Expand Down Expand Up @@ -375,7 +406,7 @@ where
function: ExportedFunction,
input_data: Vec<u8>,
) -> ExecResult {
let prepared_call = self.prepare_call(Runtime::new(ext, input_data), function)?;
let prepared_call = self.prepare_call(Runtime::new(ext, input_data), function, 0)?;
prepared_call.call()
}

Expand Down
Loading
Loading