Skip to content

Commit

Permalink
Merge pull request #32 from scroll-tech/parallel_syn
Browse files Browse the repository at this point in the history
Add new api `assign_regions` to enable parallel synthesize of regions
  • Loading branch information
kunxian-xia authored Mar 22, 2023
2 parents 5689a74 + 088a066 commit 1c21e5b
Show file tree
Hide file tree
Showing 24 changed files with 1,022 additions and 183 deletions.
3 changes: 3 additions & 0 deletions halo2_proofs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,15 @@ poseidon = { git = 'https://github.com/privacy-scaling-explorations/poseidon.git
num-integer = "0.1"
num-bigint = { version = "0.4", features = ["rand"] }

crossbeam = "0.8.0"
# Developer tooling dependencies
plotters = { version = "0.3.0", optional = true }
tabbycat = { version = "0.1", features = ["attributes"], optional = true }
log = "0.4.17"

# timer
ark-std = { version = "0.3.0" }
env_logger = "0.8.0"

[dev-dependencies]
assert_matches = "1.5"
Expand All @@ -85,6 +87,7 @@ sanity-checks = []
batch = ["rand_core/getrandom"]
shplonk = []
gwc = []
parallel_syn = []
phase-check = []
profile = ["ark-std/print-trace"]

Expand Down
18 changes: 9 additions & 9 deletions halo2_proofs/src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ fn serial_fft<G: Group>(a: &mut [G], omega: G::Scalar, log_n: u32) {

let mut m = 1;
for _ in 0..log_n {
let w_m = omega.pow_vartime(&[u64::from(n / (2 * m)), 0, 0, 0]);
let w_m = omega.pow_vartime([u64::from(n / (2 * m)), 0, 0, 0]);

let mut k = 0;
while k < n {
Expand Down Expand Up @@ -316,7 +316,7 @@ pub fn generate_twiddle_lookup_table<F: Field>(
if is_lut_len_large {
let mut twiddle_lut = vec![F::zero(); (1 << log_n) as usize];
parallelize(&mut twiddle_lut, |twiddle_lut, start| {
let mut w_n = omega.pow_vartime(&[start as u64, 0, 0, 0]);
let mut w_n = omega.pow_vartime([start as u64, 0, 0, 0]);
for twiddle_lut in twiddle_lut.iter_mut() {
*twiddle_lut = w_n;
w_n = w_n * omega;
Expand All @@ -332,18 +332,18 @@ pub fn generate_twiddle_lookup_table<F: Field>(
parallelize(
&mut twiddle_lut[..low_degree_lut_len],
|twiddle_lut, start| {
let mut w_n = omega.pow_vartime(&[start as u64, 0, 0, 0]);
let mut w_n = omega.pow_vartime([start as u64, 0, 0, 0]);
for twiddle_lut in twiddle_lut.iter_mut() {
*twiddle_lut = w_n;
w_n = w_n * omega;
}
},
);
let high_degree_omega = omega.pow_vartime(&[(1 << sparse_degree) as u64, 0, 0, 0]);
let high_degree_omega = omega.pow_vartime([(1 << sparse_degree) as u64, 0, 0, 0]);
parallelize(
&mut twiddle_lut[low_degree_lut_len..],
|twiddle_lut, start| {
let mut w_n = high_degree_omega.pow_vartime(&[start as u64, 0, 0, 0]);
let mut w_n = high_degree_omega.pow_vartime([start as u64, 0, 0, 0]);
for twiddle_lut in twiddle_lut.iter_mut() {
*twiddle_lut = w_n;
w_n = w_n * high_degree_omega;
Expand Down Expand Up @@ -372,7 +372,7 @@ pub fn parallel_fft<G: Group>(a: &mut [G], omega: G::Scalar, log_n: u32) {
let twiddle_lut = &*twiddle_lut;
for (chunk_idx, tmp) in tmp.chunks_mut(sub_n).enumerate() {
scope.spawn(move |_| {
let split_fft_offset = chunk_idx * sub_n >> log_split;
let split_fft_offset = (chunk_idx * sub_n) >> log_split;
for (i, tmp) in tmp.chunks_mut(split_m).enumerate() {
let split_fft_offset = split_fft_offset + i;
split_radix_fft(tmp, a, twiddle_lut, n, split_fft_offset, log_split);
Expand All @@ -392,7 +392,7 @@ pub fn parallel_fft<G: Group>(a: &mut [G], omega: G::Scalar, log_n: u32) {
});

// sub fft
let new_omega = omega.pow_vartime(&[split_m as u64, 0, 0, 0]);
let new_omega = omega.pow_vartime([split_m as u64, 0, 0, 0]);
multicore::scope(|scope| {
for a in a.chunks_mut(sub_n) {
scope.spawn(move |_| {
Expand All @@ -419,7 +419,7 @@ pub fn parallel_fft<G: Group>(a: &mut [G], omega: G::Scalar, log_n: u32) {

/// Convert coefficient bases group elements to lagrange basis by inverse FFT.
pub fn g_to_lagrange<C: CurveAffine>(g_projective: Vec<C::Curve>, k: u32) -> Vec<C> {
let n_inv = C::Scalar::TWO_INV.pow_vartime(&[k as u64, 0, 0, 0]);
let n_inv = C::Scalar::TWO_INV.pow_vartime([k as u64, 0, 0, 0]);
let mut omega_inv = C::Scalar::ROOT_OF_UNITY_INV;
for _ in k..C::Scalar::S {
omega_inv = omega_inv.square();
Expand Down Expand Up @@ -464,7 +464,7 @@ pub fn eval_polynomial<F: Field>(poly: &[F], point: F) -> F {
{
scope.spawn(move |_| {
let start = chunk_idx * chunk_size;
out[0] = evaluate(poly, point) * point.pow_vartime(&[start as u64, 0, 0, 0]);
out[0] = evaluate(poly, point) * point.pow_vartime([start as u64, 0, 0, 0]);
});
}
});
Expand Down
27 changes: 27 additions & 0 deletions halo2_proofs/src/circuit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,18 @@ pub trait Layouter<F: Field> {
N: Fn() -> NR,
NR: Into<String>;

#[cfg(feature = "parallel_syn")]
fn assign_regions<A, AR, N, NR>(
&mut self,
name: N,
assignments: Vec<A>,
) -> Result<Vec<AR>, Error>
where
A: FnMut(Region<'_, F>) -> Result<AR, Error> + Send,
AR: Send,
N: Fn() -> NR,
NR: Into<String>;

/// Assign a table region to an absolute row number.
///
/// ```ignore
Expand Down Expand Up @@ -510,6 +522,21 @@ impl<'a, F: Field, L: Layouter<F> + 'a> Layouter<F> for NamespacedLayouter<'a, F
self.0.assign_region(name, assignment)
}

#[cfg(feature = "parallel_syn")]
fn assign_regions<A, AR, N, NR>(
&mut self,
name: N,
assignments: Vec<A>,
) -> Result<Vec<AR>, Error>
where
A: FnMut(Region<'_, F>) -> Result<AR, Error> + Send,
AR: Send,
N: Fn() -> NR,
NR: Into<String>,
{
self.0.assign_regions(name, assignments)
}

fn assign_table<A, N, NR>(&mut self, name: N, assignment: A) -> Result<(), Error>
where
A: FnMut(Table<'_, F>) -> Result<(), Error>,
Expand Down
167 changes: 165 additions & 2 deletions halo2_proofs/src/circuit/floor_planner/single_pass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ use std::cmp;
use std::collections::HashMap;
use std::fmt;
use std::marker::PhantomData;
use std::ops::Range;
use std::sync::{Arc, Mutex};
use std::time::Instant;

use ff::Field;

Expand All @@ -12,6 +15,7 @@ use crate::{
layouter::{RegionColumn, RegionLayouter, RegionShape, TableLayouter},
Cell, Layouter, Region, RegionIndex, RegionStart, Table, Value,
},
multicore,
plonk::{
Advice, Any, Assigned, Assignment, Challenge, Circuit, Column, Error, Fixed, FloorPlanner,
Instance, Selector, TableColumn,
Expand All @@ -33,7 +37,7 @@ impl FloorPlanner for SimpleFloorPlanner {
config: C::Config,
constants: Vec<Column<Fixed>>,
) -> Result<(), Error> {
let timer = start_timer!(|| format!("SimpleFloorPlanner synthesize"));
let timer = start_timer!(|| ("SimpleFloorPlanner synthesize").to_string());
let layouter = SingleChipLayouter::new(cs, constants)?;
let result = circuit.synthesize(config, layouter);
end_timer!(timer);
Expand Down Expand Up @@ -63,7 +67,7 @@ impl<'a, F: Field, CS: Assignment<F> + 'a> fmt::Debug for SingleChipLayouter<'a,
}
}

impl<'a, F: Field, CS: Assignment<F>> SingleChipLayouter<'a, F, CS> {
impl<'a, F: Field, CS: Assignment<F> + 'a> SingleChipLayouter<'a, F, CS> {
/// Creates a new single-chip layouter.
pub fn new(cs: &'a mut CS, constants: Vec<Column<Fixed>>) -> Result<Self, Error> {
let ret = SingleChipLayouter {
Expand All @@ -76,6 +80,20 @@ impl<'a, F: Field, CS: Assignment<F>> SingleChipLayouter<'a, F, CS> {
};
Ok(ret)
}

fn fork(&self, sub_cs: Vec<&'a mut CS>) -> Result<Vec<Self>, Error> {
Ok(sub_cs
.into_iter()
.map(|sub_cs| Self {
cs: sub_cs,
constants: self.constants.clone(),
regions: self.regions.clone(),
columns: self.columns.clone(),
table_columns: self.table_columns.clone(),
_marker: Default::default(),
})
.collect::<Vec<_>>())
}
}

impl<'a, F: Field, CS: Assignment<F> + 'a> Layouter<F> for SingleChipLayouter<'a, F, CS> {
Expand Down Expand Up @@ -185,6 +203,151 @@ impl<'a, F: Field, CS: Assignment<F> + 'a> Layouter<F> for SingleChipLayouter<'a
Ok(result)
}

#[cfg(feature = "parallel_syn")]
fn assign_regions<A, AR, N, NR>(
&mut self,
name: N,
mut assignments: Vec<A>,
) -> Result<Vec<AR>, Error>
where
A: FnMut(Region<'_, F>) -> Result<AR, Error> + Send,
AR: Send,
N: Fn() -> NR,
NR: Into<String>,
{
let region_index = self.regions.len();
let region_name: String = name().into();
// Get region shapes sequentially
let mut ranges = vec![];
for (i, assignment) in assignments.iter_mut().enumerate() {
// Get shape of the ith sub-region.
let mut shape = RegionShape::new((region_index + i).into());
let region: &mut dyn RegionLayouter<F> = &mut shape;
assignment(region.into())?;

let mut region_start = 0;
for column in &shape.columns {
let column_start = self.columns.get(column).cloned().unwrap_or(0);
region_start = cmp::max(region_start, column_start);
}
log::debug!(
"{}_{} start: {}, end: {}",
region_name,
i,
region_start,
region_start + shape.row_count()
);
self.regions.push(region_start.into());
ranges.push(region_start..(region_start + shape.row_count()));

// Update column usage information.
for column in shape.columns.iter() {
self.columns
.insert(*column, region_start + shape.row_count());
}
}

// Do actual synthesis of sub-regions in parallel
let cs_fork_time = Instant::now();
let mut sub_cs = self.cs.fork(&ranges)?;
log::info!(
"CS forked into {} subCS took {:?}",
sub_cs.len(),
cs_fork_time.elapsed()
);
let ref_sub_cs = sub_cs.iter_mut().collect();
let sub_layouters = self.fork(ref_sub_cs)?;
let regions_2nd_pass = Instant::now();
let ret = crossbeam::scope(|scope| {
let mut handles = vec![];
for (i, (mut assignment, mut sub_layouter)) in assignments
.into_iter()
.zip(sub_layouters.into_iter())
.enumerate()
{
let region_name = format!("{}_{}", region_name, i);
handles.push(scope.spawn(move |_| {
let sub_region_2nd_pass = Instant::now();
sub_layouter.cs.enter_region(|| region_name.clone());
let mut region =
SingleChipLayouterRegion::new(&mut sub_layouter, (region_index + i).into());
let region_ref: &mut dyn RegionLayouter<F> = &mut region;
let result = assignment(region_ref.into());
let constant = region.constants.clone();
sub_layouter.cs.exit_region();
log::info!(
"region {} 2nd pass synthesis took {:?}",
region_name,
sub_region_2nd_pass.elapsed()
);

(result, constant)
}));
}

handles
.into_iter()
.map(|handle| handle.join().expect("handle.join should never fail"))
.collect::<Vec<_>>()
})
.expect("scope should not fail");
let cs_merge_time = Instant::now();
let num_sub_cs = sub_cs.len();
self.cs.merge(sub_cs)?;
log::info!(
"Merge {} subCS back took {:?}",
num_sub_cs,
cs_merge_time.elapsed()
);
log::info!(
"{} sub_regions of {} 2nd pass synthesis took {:?}",
ranges.len(),
region_name,
regions_2nd_pass.elapsed()
);
let (results, constants): (Vec<_>, Vec<_>) = ret.into_iter().unzip();

// Check if there are errors in sub-region synthesis
let results = results.into_iter().collect::<Result<Vec<_>, Error>>()?;

// Merge all constants from sub-regions together
let constants_to_assign = constants
.into_iter()
.flat_map(|constant_to_assign| constant_to_assign.into_iter())
.collect::<Vec<_>>();

// Assign constants. For the simple floor planner, we assign constants in order in
// the first `constants` column.
if self.constants.is_empty() {
if !constants_to_assign.is_empty() {
return Err(Error::NotEnoughColumnsForConstants);
}
} else {
let constants_column = self.constants[0];
let next_constant_row = self
.columns
.entry(Column::<Any>::from(constants_column).into())
.or_default();
for (constant, advice) in constants_to_assign {
self.cs.assign_fixed(
|| format!("Constant({:?})", constant.evaluate()),
constants_column,
*next_constant_row,
|| Value::known(constant),
)?;
self.cs.copy(
constants_column.into(),
*next_constant_row,
advice.column,
*self.regions[*advice.region_index] + advice.row_offset,
)?;
*next_constant_row += 1;
}
}

Ok(results)
}

fn assign_table<A, N, NR>(&mut self, name: N, mut assignment: A) -> Result<(), Error>
where
A: FnMut(Table<'_, F>) -> Result<(), Error>,
Expand Down
14 changes: 14 additions & 0 deletions halo2_proofs/src/circuit/floor_planner/v1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,20 @@ impl<'p, 'a, F: Field, CS: Assignment<F> + 'a> Layouter<F> for V1Pass<'p, 'a, F,
}
}

#[cfg(feature = "parallel_syn")]
fn assign_regions<A, AR, N, NR>(
&mut self,
_name: N,
_assignments: Vec<A>,
) -> Result<Vec<AR>, Error>
where
A: FnMut(Region<'_, F>) -> Result<AR, Error>,
N: Fn() -> NR,
NR: Into<String>,
{
todo!()
}

fn assign_table<A, N, NR>(&mut self, name: N, assignment: A) -> Result<(), Error>
where
A: FnMut(Table<'_, F>) -> Result<(), Error>,
Expand Down
Loading

0 comments on commit 1c21e5b

Please sign in to comment.