-
-
Notifications
You must be signed in to change notification settings - Fork 276
use rayon to speed up linfa-logistic #355
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; | ||
use linfa::prelude::*; | ||
use ndarray::{Array1, Ix1}; | ||
use rand::{Rng, SeedableRng}; | ||
|
||
const MAX_ITERATIONS: u64 = 2; | ||
|
||
fn train_model( | ||
dataset: &Dataset<f32, bool, Ix1>, | ||
) -> linfa_logistic::FittedLogisticRegression<f32, bool> { | ||
linfa_logistic::LogisticRegression::default() | ||
.max_iterations(MAX_ITERATIONS) | ||
.fit(dataset) | ||
.unwrap() | ||
} | ||
|
||
fn generate_categorical_data(nfeatures: usize, nsamples: usize) -> Dataset<f32, bool, Ix1> { | ||
let mut rng = rand::rngs::SmallRng::seed_from_u64(42); | ||
let mut feature_rows: Vec<Vec<f32>> = Vec::new(); | ||
let mut label_rows: Vec<bool> = Vec::new(); | ||
for _ in 0..nsamples { | ||
let mut features = Vec::new(); | ||
for _ in 0..nfeatures { | ||
let value = if rng.gen() { 1.0 } else { 0.0 }; | ||
features.push(value); | ||
} | ||
feature_rows.push(features); | ||
label_rows.push(rng.gen()); | ||
} | ||
linfa::Dataset::new( | ||
ndarray::Array2::from_shape_vec( | ||
(nsamples, nfeatures), | ||
feature_rows.into_iter().flatten().collect(), | ||
) | ||
.unwrap(), | ||
Array1::from_shape_vec(label_rows.len(), label_rows).unwrap(), | ||
) | ||
} | ||
|
||
fn bench(c: &mut Criterion) { | ||
let mut group = c.benchmark_group("Logistic regression"); | ||
group.measurement_time(std::time::Duration::from_secs(10)).sample_size(10); | ||
for nfeatures in [1_000] { | ||
for nsamples in [1_000, 10_000, 100_000, 200_000, 500_000, 1_000_000] { | ||
let input = generate_categorical_data(nfeatures, nsamples); | ||
group.bench_with_input( | ||
BenchmarkId::new("train_model", format!("{:e}x{:e}", nfeatures as f64, nsamples as f64)), | ||
&input, | ||
|b, dataset| { | ||
b.iter(|| train_model(dataset)); | ||
}, | ||
); | ||
} | ||
} | ||
group.finish(); | ||
} | ||
|
||
criterion_group!(benches, bench); | ||
criterion_main!(benches); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -454,9 +454,9 @@ fn log_sum_exp<F: linfa::Float, A: Data<Elem = F>>( | |
/// Computes `exp(n - max) / sum(exp(n- max))`, which is a numerically stable version of softmax | ||
fn softmax_inplace<F: linfa::Float, A: DataMut<Elem = F>>(v: &mut ArrayBase<A, Ix1>) { | ||
let max = v.iter().copied().reduce(F::max).unwrap(); | ||
v.mapv_inplace(|n| (n - max).exp()); | ||
v.par_mapv_inplace(|n| (n - max).exp()); | ||
let sum = v.sum(); | ||
v.mapv_inplace(|n| n / sum); | ||
v.par_mapv_inplace(|n| n / sum); | ||
} | ||
|
||
/// Computes the logistic loss assuming the training labels $y \in {-1, 1}$ | ||
|
@@ -479,7 +479,7 @@ fn logistic_loss<F: Float, A: Data<Elem = F>>( | |
let yz = x.dot(¶ms.into_shape((params.len(), 1)).unwrap()) + intercept; | ||
let len = yz.len(); | ||
let mut yz = yz.into_shape(len).unwrap() * y; | ||
yz.mapv_inplace(log_logistic); | ||
yz.par_mapv_inplace(log_logistic); | ||
-yz.sum() + F::cast(0.5) * alpha * params.dot(¶ms) | ||
} | ||
|
||
|
@@ -495,8 +495,7 @@ fn logistic_grad<F: Float, A: Data<Elem = F>>( | |
let yz = x.dot(¶ms.into_shape((params.len(), 1)).unwrap()) + intercept; | ||
let len = yz.len(); | ||
let mut yz = yz.into_shape(len).unwrap() * y; | ||
yz.mapv_inplace(logistic); | ||
yz -= F::one(); | ||
yz.par_mapv_inplace(|v| logistic(v) - F::one()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you put There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe another comment, the expression we have right now, isn't it |
||
yz *= y; | ||
if w.len() == n_features + 1 { | ||
let mut grad = Array::zeros(w.len()); | ||
|
@@ -624,7 +623,7 @@ impl<F: Float, C: PartialOrd + Clone> FittedLogisticRegression<F, C> { | |
/// model was fitted. | ||
pub fn predict_probabilities<A: Data<Elem = F>>(&self, x: &ArrayBase<A, Ix2>) -> Array1<F> { | ||
let mut probs = x.dot(&self.params) + self.intercept; | ||
probs.mapv_inplace(logistic); | ||
probs.par_mapv_inplace(logistic); | ||
probs | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there
par_reduce
by any chance, for max and sum?