diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..ea7336948 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +bin/* +lib/* +*.json +*.ipynb diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 000000000..16fe87b06 --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 000000000..ac0fda003 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2014 Jorge Aparicio + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..158e16099 --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +BINDIR = bin +LIB = src/lib.rs +LIBDIR = lib +RUSTC = rustc -O +SRCS = $(wildcard examples/*.rs) +BINS = $(patsubst examples/%.rs,bin/%,$(SRCS)) + +.PHONY: all clean test + +all: + mkdir -p $(LIBDIR) + $(RUSTC) $(LIB) --out-dir $(LIBDIR) + +clean: + rm -rf bin lib + +test: + rm -rf bin + mkdir bin + $(foreach src,$(SRCS),$(RUSTC) $(src) -L $(LIBDIR) --out-dir $(BINDIR);) + $(foreach bin,$(BINS),$(bin) &&) true + ./check-line-length.sh diff --git a/README.md b/README.md new file mode 100644 index 000000000..84ae3e4f6 --- /dev/null +++ b/README.md @@ -0,0 +1,89 @@ +# criterion.rs + +This is a port (with a few modifications) of +[Haskell "criterion" benchmarking library](http://www.serpentine.com/blog/2009/09/29/criterion-a-new-benchmarking-library-for-haskell) +to Rust. + +Addresses [mozilla/rust#6812](https://github.com/mozilla/rust/issues/6812) and +I hope it'll help with +[mozilla/rust#7532](https://github.com/mozilla/rust/issues/7532) + +I encourage you to look at this +[braindump](http://japaric.github.io/criterion-braindump), for an explanation +(with plots!) of how criterion works. + +## Done so far + +* Estimation of the cost of reading the clock (`precise_time_ns()`) +* Outlier classification using the box plot method (IQR criteria) +* Removal of severe outliers (this is **not** done in the original criterion) +* Bootstrapping: point estimate, standard error and confidence interval +* Convert to library +* Bencher-like interface +* Bencher configuration +* Benchmark groups +* Some examples + +## Not (yet?) ported from the original + +* outlierVariance, this method computes the influence of the outliers on the + variance of the sample + * this still looks too magical to me, using only the sample size, and the + point estimates of the mean and the standard deviation, the author + classifies the effect of the outliers on the sample variance + * there are no references of the method used to do this + * some rough ideas that might accomplish this: + * the SEM (standard error of the mean) is the variance of the population + over the square root of the sample size, I could compute the variance of + the population and compare it against the bootstrapped variance. + * Fit the bootstrapped distribution to a normal distribution, and look at + the R squared. + * Look at the skewness of the bootstrap distributions. + +## TODO + +* More testing +* Compare the results generated by criterion.rs with the results generated by + Rust Bencher algorithm +* Compare the current basic bootstrap against the BCa (bias corrected and + accelerated) bootstrap +* Save metrics to json file +* Hypothesis testing + * execution time improved or regressed? +* Check if the sample is garbage + * may be caused by CPU throttling or CPU usage peaks + * should translate into high variance in the sample + * background constant CPU usage should be hard to detect + * this affects more the mean than the variance +* Documentation + +# Wishlist + +* Plot the [PDF](http://en.wikipedia.org/wiki/Probability_density_function) of + the sample + * computing the PDF is expensive + * PDF from the sample is not too reliable, a PDF from the bootstrap would be + better, but that would be even more expensive + * need plotting library + * gnuplot? is the license compatible with Apache/MIT? +* Interface to benchmark external programs (written in other languages) + * Addresses the last point in + [mozilla/rust#7532](https://github.com/mozilla/rust/issues/7532) + * Something like [eulermark.rs](https://github.com/japaric/eulermark.rs) + * See eulermark results [here](http://japaric.github.io/eulermark.rs) + +## Unresolved questions + +* Is sensible to remove the severe outliers in **all** the cases? + * Removing outliers will always reduce the variance in the sample +* Can we continuously remove the severe outliers from the sample, until the box + plot analysis yields no more severe outliers? +* When performing several benchmarks, heavy benchmark may affect the benchmarks + that follow (hot CPU?), how do we address this? + * Add a cooldown time between benchmarks? + +## License + +criterion.rs is dual licensed under the Apache 2.0 license and the MIT license. + +See LICENSE-APACHE and LICENSE-MIT for more details. diff --git a/check-line-length.sh b/check-line-length.sh new file mode 100755 index 000000000..904a2c2e7 --- /dev/null +++ b/check-line-length.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +echo "Checking if any rust file has a line longer than 79 characters" + +offenders=$(grep -Pl ".{80}" $(find . -name '*.rs')) +status=$? + +if [[ $status == 0 ]]; then + for offender in $offenders; do + echo "> $offender exceeds 79 chars" + awk 'length($0) > 79' $offender + done + + exit 1 +fi + +echo "All is good!" diff --git a/examples/alloc.rs b/examples/alloc.rs new file mode 100644 index 000000000..424b69900 --- /dev/null +++ b/examples/alloc.rs @@ -0,0 +1,12 @@ +extern crate criterion; + +use criterion::Bencher; + +fn main() { + let mut b = Bencher::new(); + + b.bench("box", || box 0.0f64); + b.bench_group("from_elem", &[1u, 100, 10_000, 1_000_000], |n| { + Vec::from_elem(n, 0.0f64) + }); +} diff --git a/examples/fib.rs b/examples/fib.rs new file mode 100644 index 000000000..35d81ca76 --- /dev/null +++ b/examples/fib.rs @@ -0,0 +1,17 @@ +extern crate criterion; + +use criterion::Bencher; + +fn fib(n: uint) -> uint { + match n { + 0 => 0, + 1 => 1, + n => fib(n - 1) + fib(n - 2), + } +} + +fn main() { + let mut b = Bencher::new(); + + b.bench_group("fib", &[5u, 10, 15], |n| fib(n)); +} diff --git a/examples/math.rs b/examples/math.rs new file mode 100644 index 000000000..73213307c --- /dev/null +++ b/examples/math.rs @@ -0,0 +1,11 @@ +extern crate criterion; + +use criterion::Bencher; + +fn main() { + let mut b = Bencher::new(); + + b.bench("exp", || 2.0_f64.exp()); + b.bench("ln", || 2.0_f64.ln()); + b.bench("sqrt", || 2.0_f64.sqrt()); +} diff --git a/src/bencher.rs b/src/bencher.rs new file mode 100644 index 000000000..1d03a8b1a --- /dev/null +++ b/src/bencher.rs @@ -0,0 +1,87 @@ +use clock::Clock; +use common::run_for_at_least; +use sample::Sample; +use std::default::Default; +use std::fmt::Show; +use units::{AsTime,ToNanoSeconds}; + +pub struct Bencher { + clock: Option, + config: BencherConfig, +} + +impl Bencher { + pub fn new() -> Bencher { + Bencher { + clock: None, + config: Default::default(), + } + } + + pub fn set_config(&mut self, config: BencherConfig) { + self.config = config; + } + + pub fn bench(&mut self, name: N, action: || -> T) { + if self.clock.is_none() { + self.clock = Some(Clock::new(self.config.dump_clock)); + } + + let min_time = 10.ms(); + let size = self.config.sample_size; + let nresamples = self.config.nresamples; + let cl = self.config.confidence_level; + + println!("\nbenchmarking {}", name); + let (elapsed, iters, action) = run_for_at_least(min_time, 1, action); + + println!("> collecting {} measurements, {} iters each in estimated {}", + size, + iters, + (elapsed as f64 * size as f64).as_time()); + + let (sample, _) = Sample::new(size, + action, + iters, + self.clock); + + sample.outliers().report(); + + if self.config.dump_sample { + sample.dump(&Path::new(format!("{}.json", name))); + } + + sample.without_outliers().bootstrap(nresamples, cl).report(); + } + + pub fn bench_group(&mut self, + group: G, + inputs: &[I], + action: |I| -> O) { + for input in inputs.iter() { + self.bench(format!("{}_{}", group, input), || { + action(input.clone()) + }); + } + } +} + +pub struct BencherConfig { + pub confidence_level: f64, + pub dump_clock: bool, + pub dump_sample: bool, + pub nresamples: uint, + pub sample_size: uint, +} + +impl Default for BencherConfig { + fn default() -> BencherConfig { + BencherConfig { + confidence_level: 0.95, + dump_clock: false, + dump_sample: false, + nresamples: 100_000, + sample_size: 100, + } + } +} diff --git a/src/bootstrap.rs b/src/bootstrap.rs new file mode 100644 index 000000000..95ecc1115 --- /dev/null +++ b/src/bootstrap.rs @@ -0,0 +1,144 @@ +use rand::distributions::IndependentSample; +use rand::distributions::range::Range; +use rand::{TaskRng,task_rng}; +use sample::Sample; +use test::stats::Stats; +use units::AsTime; + +// XXX for debugging purposes, remove later +//use std::io::{File,Truncate,Write}; +//use serialize::json::ToJson; + +pub struct Estimate { + confidence_level: f64, + lower_bound: f64, + point: f64, + standard_error: f64, + upper_bound: f64, +} + +impl Estimate { + fn new(point: f64, bootstrap: &[f64], cl: f64) -> Estimate { + let standard_error = bootstrap.std_dev(); + let lower_bound = bootstrap.percentile(50.0 * (1.0 - cl)); + let upper_bound = bootstrap.percentile(50.0 * (1.0 + cl)); + + Estimate { + confidence_level: cl, + lower_bound: lower_bound, + point: point, + standard_error: standard_error, + upper_bound: upper_bound, + } + } +} + +impl Estimate { + fn report(&self) -> String { + format!("{} ± {} [{} {}] {}% CI", + self.point.as_time(), + self.standard_error.as_time(), + self.lower_bound.as_time(), + self.upper_bound.as_time(), + self.confidence_level * 100.0) + } +} + +pub struct Bootstrap { + iters: uint, + mean: Estimate, + median: Estimate, + nresamples: uint, + sample_size: uint, + std_dev: Estimate, +} + +impl Bootstrap { + pub fn new(sample: &Sample, + nresamples: uint, + cl: f64) + -> Bootstrap + { + assert!(cl > 0.0 && cl < 1.0, + "confidence level must be between 0.0 and 1.0"); + + println!("> bootstrapping sample with {} resamples", nresamples); + + + let mut means = Vec::with_capacity(nresamples); + let mut medians = Vec::with_capacity(nresamples); + let mut std_devs = Vec::with_capacity(nresamples); + + let mut resamples = Resamples::new(sample.data()); + for _ in range(0, nresamples) { + let resample = resamples.next(); + + means.push(resample.mean()); + medians.push(resample.median()); + std_devs.push(resample.std_dev()); + } + + // XXX for debugging purposes, remove later + //match File::open_mode(&Path::new("b-mean.json"), Truncate, Write) { + //Err(_) => fail!("couldn't open b-mean.json"), + //Ok(mut file) => { + //match file.write_str(means.to_json().to_str().as_slice()) { + //Err(_) => fail!("couldn't write b-mean.json"), + //Ok(_) => {}, + //} + //} + //} + + let mean = Estimate::new(sample.mean(), means.as_slice(), cl); + let median = Estimate::new(sample.median(), medians.as_slice(), cl); + let std_dev = Estimate::new(sample.std_dev(), std_devs.as_slice(), cl); + + Bootstrap { + iters: sample.iters(), + mean: mean, + median: median, + nresamples: nresamples, + sample_size: sample.len(), + std_dev: std_dev, + } + } + + pub fn report(&self) { + println!(" > mean: {}", self.mean.report()); + println!(" > median: {}", self.median.report()); + println!(" > std_dev: {}", self.std_dev.report()); + } +} + +struct Resamples<'a> { + range: Range, + rng: TaskRng, + sample: &'a [f64], + stage: Vec, +} + +impl<'a> Resamples<'a> { + pub fn new(sample: &'a [f64]) -> Resamples<'a> { + let size = sample.len(); + + Resamples { + range: Range::new(0, size - 1), + rng: task_rng(), + sample: sample, + stage: Vec::from_elem(size, 0.0), + } + } + + pub fn next<'b>(&'b mut self) -> &'b [f64] { + let size = self.sample.len(); + + // resampling *with* replacement + for i in range(0, size) { + let j = self.range.ind_sample(&mut self.rng); + + self.stage.as_mut_slice()[i] = self.sample[j]; + } + + self.stage.as_slice() + } +} diff --git a/src/clock.rs b/src/clock.rs new file mode 100644 index 000000000..3fb6ae997 --- /dev/null +++ b/src/clock.rs @@ -0,0 +1,38 @@ +use common::run_for_at_least; +use sample::Sample; +use time::precise_time_ns; +use units::{AsTime,ToNanoSeconds}; + +pub struct Clock { + cost: f64, +} + +impl Clock { + pub fn cost(&self) -> f64 { + self.cost as f64 + } + + pub fn new(dump: bool) -> Clock { + let action = || precise_time_ns(); + let (_, iters, action) = run_for_at_least(10.ms(), 10_000, action); + + println!("estimating the cost of precise_time_ns()"); + let (sample, _) = Sample::new(100, action, iters, None); + + let mean = sample.mean(); + let median = sample.median(); + let std_dev = sample.std_dev(); + + println!("> mean: {}", mean.as_time()); + println!("> median: {}", median.as_time()); + println!("> std dev: {}", std_dev.as_time()); + + if dump { + sample.dump(&Path::new("clock.json")); + } + + Clock { + cost: median, + } + } +} diff --git a/src/common.rs b/src/common.rs new file mode 100644 index 000000000..a422caaa4 --- /dev/null +++ b/src/common.rs @@ -0,0 +1,31 @@ +use test::black_box; +use time::precise_time_ns; + +pub fn run_for_at_least<'a, T>(how_long: u64, + seed: uint, + action: ||:'a -> T) + -> (u64, uint, ||:'a -> T) +{ + let mut iters = seed; + let mut tries = 0; + + let init = precise_time_ns(); + loop { + let start = precise_time_ns(); + for _ in range(0, iters) { + black_box(action()); + } + let elapsed = precise_time_ns() - start; + + if elapsed > how_long { + return (elapsed, iters, action); + } + + iters *= 2; + tries += 1; + + if precise_time_ns() - init > 10 * how_long { + fail!("took too long to run: seed {}, tries {}", seed, tries); + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 000000000..bd83f69b3 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,17 @@ +#![crate_id="criterion#0.11-pre"] +#![crate_type="lib"] + +extern crate rand; +extern crate serialize; +extern crate test; +extern crate time; + +pub use bencher::{Bencher,BencherConfig}; + +mod bencher; +mod bootstrap; +mod clock; +mod common; +mod outlier; +mod sample; +mod units; diff --git a/src/outlier.rs b/src/outlier.rs new file mode 100644 index 000000000..8c5c439d2 --- /dev/null +++ b/src/outlier.rs @@ -0,0 +1,71 @@ +use sample::Sample; + +pub struct Outliers { + high_mild: uint, + high_severe: uint, + low_mild: uint, + low_severe: uint, + sample_size: uint, + total: uint, +} + +impl Outliers { + // classify outliers using the boxplot method + // see http://en.wikipedia.org/wiki/Boxplot for more information + pub fn new(sample: &Sample) -> Outliers { + let (q1, _, q3) = sample.quartiles(); + let iqr = q3 - q1; + + let lost = q1 - 3.0 * iqr; // Low severe outlier threshold + let lomt = q1 - 1.5 * iqr; // Low mild outlier threshold + let himt = q3 + 1.5 * iqr; // High mild outlier threshold + let hist = q3 + 3.0 * iqr; // High severe outlier threshold + + let (mut los, mut lom, mut him, mut his) = (0, 0, 0, 0); + + for &value in sample.data().iter() { + if value < lost { + los += 1; + } else if value < lomt { + lom += 1; + } else if value > hist { + his += 1; + } else if value > himt { + him += 1; + } + } + + Outliers { + high_mild: him, + high_severe: his, + low_mild: lom, + low_severe: los, + sample_size: sample.len(), + total: him + his + lom + los, + } + } + + pub fn report(&self) { + if self.total == 0 { + return + } + + let percent = |n: uint| { 100.0 * n as f64 / self.sample_size as f64 }; + + println!("> found {} outliers among {} measurements ({:.2}%)", + self.total, + self.sample_size, + percent(self.total)); + + let print = |n: uint, class| { + if n != 0 { + println!(" > {} ({:.2}%) {}", n, percent(n), class); + } + }; + + print(self.low_severe, "low severe"); + print(self.low_mild, "low mild"); + print(self.high_mild, "high mild"); + print(self.high_severe, "high severe"); + } +} diff --git a/src/sample.rs b/src/sample.rs new file mode 100644 index 000000000..cd58a8072 --- /dev/null +++ b/src/sample.rs @@ -0,0 +1,123 @@ +use bootstrap::Bootstrap; +use clock::Clock; +use outlier::Outliers; +use serialize::json; +use std::io::{File,Truncate,Write}; +use test::black_box; +use test::stats::Stats; +use time::precise_time_ns; + +#[deriving(Encodable)] +pub struct Sample { + data: Vec, + iters: uint, +} + +impl Sample { + pub fn new<'a, T>(size: uint, + action: ||:'a -> T, + iters: uint, + clock: Option) + -> (Sample, ||:'a -> T) + { + let mut total_time = Vec::from_elem(size, 0u64); + + for t in total_time.mut_iter() { + let start = precise_time_ns(); + for _ in range(0, iters) { + black_box(action()); + } + *t = precise_time_ns() - start; + } + + let time_per_iter: Vec = total_time.move_iter().map(|t| { + match clock { + None => { + t as f64 / (iters + 1) as f64 + }, + // XXX this operation adds variance to our measurement, but + // we'll consider the increment to be negligible + Some(clock) => { + (t as f64 - clock.cost()) / iters as f64 + }, + } + }).collect(); + + let sample = Sample { + data: time_per_iter, + iters: iters, + }; + + (sample, action) + } + + pub fn bootstrap(&self, nresamples: uint, cl: f64) -> Bootstrap { + Bootstrap::new(self, nresamples, cl) + } + + pub fn data<'a>(&'a self) -> &'a [f64] { + self.data.as_slice() + } + + pub fn dump(&self, path: &Path) { + let json = json::Encoder::str_encode(self); + + match File::open_mode(&Path::new(path), Truncate, Write) { + Err(_) => fail!("couldn't open {}", path.display()), + Ok(mut file) => match file.write_str(json.as_slice()) { + Err(_) => fail!("couldn't write {}", path.display()), + Ok(_) => {}, + } + } + } + + pub fn iters(&self) -> uint { + self.iters + } + + pub fn mean(&self) -> f64 { + self.data.as_slice().mean() + } + + pub fn median(&self) -> f64 { + self.data.as_slice().median() + } + + pub fn outliers(&self) -> Outliers { + Outliers::new(self) + } + + pub fn std_dev(&self) -> f64 { + self.data.as_slice().std_dev() + } + + pub fn quartiles(&self) -> (f64, f64, f64) { + self.data.as_slice().quartiles() + } + + // remove severe outliers using the IQR criteria + pub fn without_outliers(&self) -> Sample { + let (q1, _, q3) = self.quartiles(); + let iqr = q3 - q1; + let (lb, ub) = (q1 - 3.0 * iqr, q3 + 3.0 * iqr); + + let data: Vec = self.data.iter().filter_map(|&x| { + if x > lb && x < ub { + Some(x) + } else { + None + } + }).collect(); + + Sample { + data: data, + iters: self.iters, + } + } +} + +impl Container for Sample { + fn len(&self) -> uint { + self.data.len() + } +} diff --git a/src/units.rs b/src/units.rs new file mode 100644 index 000000000..4581ed150 --- /dev/null +++ b/src/units.rs @@ -0,0 +1,53 @@ +pub trait ToNanoSeconds { + fn s(self) -> u64; + fn ms(self) -> u64; + fn us(self) -> u64; + fn ns(self) -> u64; +} + +impl ToNanoSeconds for int { + fn s(self) -> u64 { + (self as u64) * 1_000_000_000_u64 + } + + fn ms(self) -> u64 { + (self as u64) * 1_000_000_u64 + } + + fn us(self) -> u64 { + (self as u64) * 1_000_u64 + } + + fn ns(self) -> u64 { + self as u64 + } +} + +pub trait AsTime { + fn as_time(self) -> String; +} + +impl AsTime for f64 { + fn as_time(self) -> String { + fn short(n: f64) -> String { + if n < 10.0 { format!("{:.4}", n) } + else if n < 100.0 { format!("{:.3}", n) } + else if n < 1000.0 { format!("{:.2}", n) } + else { format!("{}", n) } + } + + if self < 0.0 { + format!("-{}", (-self).as_time()) + } else if self < 1.0 { + format!("{} ps", short(self * 1e3)) + } else if self < 1_000.0 { + format!("{} ns", short(self)) + } else if self < 1_000_000.0 { + format!("{} us", short(self * 1e-3)) + } else if self < 1_000_000_000.0 { + format!("{} ms", short(self * 1e-6)) + } else { + format!("{} s", short(self * 1e-9)) + } + } +}