Skip to content

Commit

Permalink
Address feedback from @CPerezz, part 3
Browse files Browse the repository at this point in the history
  • Loading branch information
ed255 committed Feb 2, 2024
1 parent bbf034a commit 9927c82
Show file tree
Hide file tree
Showing 8 changed files with 19 additions and 117 deletions.
1 change: 1 addition & 0 deletions halo2_backend/src/plonk/keygen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ where

// Compute l_0(X)
// TODO: this can be done more efficiently
// https://github.com/privacy-scaling-explorations/halo2/issues/269
let mut l0 = vk.domain.empty_lagrange();
l0[0] = C::Scalar::ONE;
let l0 = vk.domain.lagrange_to_coeff(l0);
Expand Down
2 changes: 1 addition & 1 deletion halo2_common/src/plonk/keygen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use halo2_middleware::plonk::Assigned;
pub struct Assembly<F: Field> {
pub k: u32,
pub fixed: Vec<Polynomial<Assigned<F>, LagrangeCoeff>>,
pub permutation: permutation::AssemblyFront,
pub permutation: permutation::Assembly,
pub selectors: Vec<Vec<bool>>,
// A range of available rows for assignment and copies.
pub usable_rows: Range<usize>,
Expand Down
9 changes: 4 additions & 5 deletions halo2_common/src/plonk/permutation.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
//! Implementation of permutation argument.

use crate::plonk::{Column, Error};
use halo2_middleware::circuit::Any;
use halo2_middleware::permutation::{ArgumentV2, Cell};
use halo2_middleware::circuit::{Any, Cell};
use halo2_middleware::permutation::ArgumentV2;

/// A permutation argument.
#[derive(Default, Debug, Clone)]
Expand Down Expand Up @@ -70,15 +70,14 @@ impl Argument {
}
}

// TODO: Move to frontend
#[derive(Clone, Debug)]
pub struct AssemblyFront {
pub struct Assembly {
pub n: usize,
pub columns: Vec<Column<Any>>,
pub copies: Vec<(Cell, Cell)>,
}

impl AssemblyFront {
impl Assembly {
pub fn new(n: usize, p: &Argument) -> Self {
Self {
n,
Expand Down
2 changes: 1 addition & 1 deletion halo2_frontend/src/circuit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ pub fn compile_circuit<F: Field, ConcreteCircuit: Circuit<F>>(
let mut assembly = halo2_common::plonk::keygen::Assembly {
k,
fixed: vec![Polynomial::new_empty(n, F::ZERO.into()); cs.num_fixed_columns],
permutation: permutation::AssemblyFront::new(n, &cs.permutation),
permutation: permutation::Assembly::new(n, &cs.permutation),
selectors: vec![vec![false; n]; cs.num_selectors],
usable_rows: 0..n - (cs.blinding_factors() + 1),
_marker: std::marker::PhantomData,
Expand Down
6 changes: 3 additions & 3 deletions halo2_frontend/src/dev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ pub struct MockProver<F: Field> {

challenges: Vec<F>,

permutation: permutation::AssemblyFront,
permutation: permutation::Assembly,

// A range of available rows for assignment and copies.
usable_rows: Range<usize>,
Expand Down Expand Up @@ -673,7 +673,7 @@ impl<F: FromUniformBytes<64> + Ord> MockProver<F> {
};
cs.num_advice_columns
];
let permutation = permutation::AssemblyFront::new(n, &cs.permutation);
let permutation = permutation::Assembly::new(n, &cs.permutation);
let constants = cs.constants.clone();

// Use hash chain to derive deterministic challenges for testing
Expand Down Expand Up @@ -1243,7 +1243,7 @@ impl<F: FromUniformBytes<64> + Ord> MockProver<F> {
}

/// Returns the permutation argument (`Assembly`) used within a MockProver instance.
pub fn permutation(&self) -> &permutation::AssemblyFront {
pub fn permutation(&self) -> &permutation::Assembly {
&self.permutation
}
}
Expand Down
8 changes: 8 additions & 0 deletions halo2_middleware/src/circuit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ pub struct CompiledCircuitV2<F: Field> {
// trait is implemented for Any which is used in the backend. It would be great to find a way to
// move all the `query_cell` implementations to the frontend and have them return `Expression`,
// while keeping `Any` in the middleware.
// https://github.com/privacy-scaling-explorations/halo2/issues/270
/// A column type
pub trait ColumnType:
'static + Sized + Copy + std::fmt::Debug + PartialEq + Eq + Into<Any>
Expand All @@ -184,6 +185,13 @@ pub struct ColumnMid {
pub column_type: Any,
}

/// A cell identifies a position in the plonkish matrix identified by a column and a row offset.
#[derive(Clone, Debug)]
pub struct Cell {
pub column: ColumnMid,
pub row: usize,
}

/// An advice column
#[derive(Default, Clone, Copy, Eq, PartialEq, Hash)]
pub struct Advice {
Expand Down
99 changes: 0 additions & 99 deletions halo2_middleware/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,102 +7,3 @@ pub mod poly;
pub mod shuffle;

pub use ff;

// TODO: Remove with permutation::Argument simplification
pub mod multicore {
pub use rayon::{
current_num_threads,
iter::{IndexedParallelIterator, IntoParallelRefIterator},
iter::{IntoParallelIterator, IntoParallelRefMutIterator, ParallelIterator},
join, scope,
slice::ParallelSliceMut,
Scope,
};

pub trait TryFoldAndReduce<T, E> {
/// Implements `iter.try_fold().try_reduce()` for `rayon::iter::ParallelIterator`,
/// falling back on `Iterator::try_fold` when the `multicore` feature flag is
/// disabled.
/// The `try_fold_and_reduce` function can only be called by a iter with
/// `Result<T, E>` item type because the `fold_op` must meet the trait
/// bounds of both `try_fold` and `try_reduce` from rayon.
fn try_fold_and_reduce(
self,
identity: impl Fn() -> T + Send + Sync,
fold_op: impl Fn(T, Result<T, E>) -> Result<T, E> + Send + Sync,
) -> Result<T, E>;
}

impl<T, E, I> TryFoldAndReduce<T, E> for I
where
T: Send + Sync,
E: Send + Sync,
I: rayon::iter::ParallelIterator<Item = Result<T, E>>,
{
fn try_fold_and_reduce(
self,
identity: impl Fn() -> T + Send + Sync,
fold_op: impl Fn(T, Result<T, E>) -> Result<T, E> + Send + Sync,
) -> Result<T, E> {
self.try_fold(&identity, &fold_op)
.try_reduce(&identity, |a, b| fold_op(a, Ok(b)))
}
}
}

// TODO: Remove with permutation::Argument simplification
pub mod arithmetic {
use super::multicore;

/// This utility function will parallelize an operation that is to be
/// performed over a mutable slice.
pub fn parallelize<T: Send, F: Fn(&mut [T], usize) + Send + Sync + Clone>(v: &mut [T], f: F) {
// Algorithm rationale:
//
// Using the stdlib `chunks_mut` will lead to severe load imbalance.
// From https://github.com/rust-lang/rust/blob/e94bda3/library/core/src/slice/iter.rs#L1607-L1637
// if the division is not exact, the last chunk will be the remainder.
//
// Dividing 40 items on 12 threads will lead to a chunk size of 40/12 = 3,
// There will be a 13 chunks of size 3 and 1 of size 1 distributed on 12 threads.
// This leads to 1 thread working on 6 iterations, 1 on 4 iterations and 10 on 3 iterations,
// a load imbalance of 2x.
//
// Instead we can divide work into chunks of size
// 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3 = 4*4 + 3*8 = 40
//
// This would lead to a 6/4 = 1.5x speedup compared to naive chunks_mut
//
// See also OpenMP spec (page 60)
// http://www.openmp.org/mp-documents/openmp-4.5.pdf
// "When no chunk_size is specified, the iteration space is divided into chunks
// that are approximately equal in size, and at most one chunk is distributed to
// each thread. The size of the chunks is unspecified in this case."
// This implies chunks are the same size ±1

let f = &f;
let total_iters = v.len();
let num_threads = multicore::current_num_threads();
let base_chunk_size = total_iters / num_threads;
let cutoff_chunk_id = total_iters % num_threads;
let split_pos = cutoff_chunk_id * (base_chunk_size + 1);
let (v_hi, v_lo) = v.split_at_mut(split_pos);

multicore::scope(|scope| {
// Skip special-case: number of iterations is cleanly divided by number of threads.
if cutoff_chunk_id != 0 {
for (chunk_id, chunk) in v_hi.chunks_exact_mut(base_chunk_size + 1).enumerate() {
let offset = chunk_id * (base_chunk_size + 1);
scope.spawn(move |_| f(chunk, offset));
}
}
// Skip special-case: less iterations than number of threads.
if base_chunk_size != 0 {
for (chunk_id, chunk) in v_lo.chunks_exact_mut(base_chunk_size).enumerate() {
let offset = split_pos + (chunk_id * base_chunk_size);
scope.spawn(move |_| f(chunk, offset));
}
}
});
}
}
9 changes: 1 addition & 8 deletions halo2_middleware/src/permutation.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
use crate::circuit::ColumnMid;

// TODO: Dedup with other Cell definition, or move this to a higher level
#[derive(Clone, Debug)]
pub struct Cell {
pub column: ColumnMid,
pub row: usize,
}
use crate::circuit::{Cell, ColumnMid};

#[derive(Clone, Debug)]
pub struct AssemblyMid {
Expand Down

0 comments on commit 9927c82

Please sign in to comment.