diff --git a/Makefile b/Makefile index e14fad4..6e5341b 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,10 @@ python-bench: results numpy/*.py echo $(benches_name) -pytest $(IGNORE_FLAGS) --benchmark-json=$(NUMPY_JSON) $(BENCHFLAGS) $(BENCHES) python numpy/converter.py --json_name $(NUMPY_JSON) + +# Separate target to run the python benchmarks with numpy-taco cross validation logic. +validate-python-bench: numpy/*.py validation-path + pytest $(IGNORE_FLAGS) $(BENCHFLAGS) $(BENCHES) .PHONY: convert-csv-all convert-csv-all: @@ -34,10 +38,24 @@ convert-csv-all: taco-bench: taco/build/taco-bench ifeq ($(BENCHES),"") - LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" + LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10 else - LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" + LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10 +endif + +# Separate target to run the TACO benchmarks with numpy-taco cross validation logic. +validate-taco-bench: taco/build/taco-bench validation-path +ifeq ($(BENCHES),"") + LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_repetitions=1 +else + LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_repetitions=1 +endif + +.PHONY: validation-path +validation-path: +ifeq ($(VALIDATION_OUTPUT_PATH),) + $(error VALIDATION_OUTPUT_PATH is undefined) endif taco/build/taco-bench: results check-and-reinit-submodules taco/benchmark/googletest diff --git a/numpy/ufuncs.py b/numpy/ufuncs.py index 1c59782..8cab1eb 100644 --- a/numpy/ufuncs.py +++ b/numpy/ufuncs.py @@ -2,7 +2,8 @@ from scipy.sparse import random, csr_matrix import sparse import pytest -from util import TensorCollectionFROSTT, PydataTensorShifter, TensorCollectionSuiteSparse, ScipyTensorShifter, PydataMatrixMarketTensorLoader, ScipyMatrixMarketTensorLoader +import os +from util import TensorCollectionFROSTT, PydataTensorShifter, TensorCollectionSuiteSparse, ScipyTensorShifter, PydataMatrixMarketTensorLoader, ScipyMatrixMarketTensorLoader, VALIDATION_OUTPUT_PATH, PydataSparseTensorDumper # TODO (rohany): Ask hameer about this. pydata/sparse isn't happy when # given this ufunc to evaluate. @@ -88,41 +89,53 @@ def bench(): tacoBench(bench) print("Result", bench()) +def ufunc_bench_key(tensorName, funcName): + return tensorName + "-" + funcName + "-numpy" + # Run benchmarks against the FROSTT collection. FROSTTTensors = TensorCollectionFROSTT() -@pytest.mark.parametrize("tensor", FROSTTTensors.getTensors(), ids=FROSTTTensors.getTensorNames()) +@pytest.mark.parametrize("tensor", FROSTTTensors.getTensors()) @pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift]) def bench_pydata_frostt_ufunc_sparse(tacoBench, tensor, ufunc): - frTensor = tensor.load().astype('int64') shifter = PydataTensorShifter() - other = shifter.shiftLastMode(frTensor).astype('int64') + other = shifter.shiftLastMode(frTensor) def bench(): c = ufunc(frTensor, other) return c extra_info = dict() extra_info['tensor_str'] = str(tensor) extra_info['ufunc_str'] = ufunc.__name__ - tacoBench(bench, extra_info) + if VALIDATION_OUTPUT_PATH is not None: + result = bench() + key = ufunc_bench_key(str(tensor), ufunc.__name__) + outpath = os.path.join(VALIDATION_OUTPUT_PATH, key + ".tns") + PydataSparseTensorDumper().dump(result, outpath) + else: + tacoBench(bench, extra_info) # Run benchmarks against the SuiteSparse collection. SuiteSparseTensors = TensorCollectionSuiteSparse() -@pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors(), ids=SuiteSparseTensors.getTensorNames()) +@pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors()) @pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift]) def bench_pydata_suitesparse_ufunc_sparse(tacoBench, tensor, ufunc): - ssTensor = tensor[1].load(PydataMatrixMarketTensorLoader()).astype('int64') + ssTensor = tensor.load(PydataMatrixMarketTensorLoader()).astype('int64') shifter = PydataTensorShifter() - other = shifter.shiftLastMode(ssTensor).astype('int64') + other = shifter.shiftLastMode(ssTensor) def bench(): c = ufunc(ssTensor, other) return c extra_info = dict() extra_info['tensor_str'] = str(tensor) extra_info['ufunc_str'] = ufunc.__name__ - tacoBench(bench, extra_info) + if VALIDATION_OUTPUT_PATH is not None: + result = bench() + key = ufunc_bench_key(str(tensor), ufunc.__name__) + outpath = os.path.join(VALIDATION_OUTPUT_PATH, key + ".tns") + PydataSparseTensorDumper().dump(result, outpath) + else: + tacoBench(bench, extra_info) -# TODO (rohany): scipy doesn't support these, I forgot. If that's the case, -# do we really need to compare against suitesparse? @pytest.mark.skip(reason="scipy doesn't support this actually") @pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors(), ids=SuiteSparseTensors.getTensorNames()) @pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift]) diff --git a/numpy/util.py b/numpy/util.py index 3a7d954..6c6be4e 100644 --- a/numpy/util.py +++ b/numpy/util.py @@ -7,6 +7,8 @@ # Get the path to the directory holding random tensors. Error out # if this isn't set. TENSOR_PATH = os.environ['TACO_TENSOR_PATH'] +# Get the validation path, if it exists. +VALIDATION_OUTPUT_PATH = os.getenv('VALIDATION_OUTPUT_PATH', None) # TnsFileLoader loads a tensor stored in .tns format. class TnsFileLoader: @@ -76,6 +78,14 @@ def load(self, path): dims, coords, values = self.loader.load(path) return sparse.COO(coords, values, tuple(dims)) +# PydataSparseTensorDumper dumps a sparse tensor to a the desired file. +class PydataSparseTensorDumper: + def __init__(self): + self.dumper = TnsFileDumper() + + def dump(self, tensor, path): + self.dumper.dump_dict_to_file(tensor.shape, sparse.DOK(tensor).data, path) + # construct_random_tensor_key constructs a unique key that represents # a random tensor parameterized by the chosen shape and sparsity. # The key itself is formatted by the dimensions, followed by the @@ -132,6 +142,7 @@ def random(self, shape, sparsity): class FROSTTTensor: def __init__(self, path): self.path = path + self.__name__ = self.__str__() def __str__(self): f = os.path.split(self.path)[1] @@ -176,7 +187,7 @@ def shiftLastMode(self, tensor): # For order 2 tensors, always shift the last coordinate. Otherwise, shift only coordinates # that have even last coordinates. This ensures that there is at least some overlap # between the original tensor and its shifted counter part. - if tensor.shape[-1] <= 0 or resultCoords[-1][i] % 2 == 0: + if len(tensor.shape) <= 2 or resultCoords[-1][i] % 2 == 0: resultCoords[-1][i] = (resultCoords[-1][i] + 1) % tensor.shape[-1] return sparse.COO(resultCoords, resultValues, tensor.shape) @@ -231,6 +242,7 @@ def load(self, path): class SuiteSparseTensor: def __init__(self, path): self.path = path + self.__name__ = self.__str__() def __str__(self): f = os.path.split(self.path)[1] diff --git a/taco/bench.cpp b/taco/bench.cpp index 92f4269..921ea88 100644 --- a/taco/bench.cpp +++ b/taco/bench.cpp @@ -6,15 +6,38 @@ #include "taco/tensor.h" #include "taco/util/strings.h" -std::string getTacoTensorPath() { - auto path = std::getenv("TACO_TENSOR_PATH"); +std::string getEnvVar(std::string varname) { + auto path = std::getenv(varname.c_str()); if (path == nullptr) { - std::cout << "TACO_TENSOR_PATH is unset" << std::endl; - assert(false); + return ""; } return std::string(path); } +std::string getTacoTensorPath() { + std::string result = getEnvVar("TACO_TENSOR_PATH"); + if (result == "") { + assert(false && "TACO_TENSOR_PATH is unset"); + } + return cleanPath(result); +} + +std::string getValidationOutputPath() { + auto result = getEnvVar("VALIDATION_OUTPUT_PATH"); + if (result != "") { + result = cleanPath(result); + } + return result; +} + +std::string cleanPath(std::string path) { + std::string result(path); + if (result[result.size() - 1] != '/') { + result += "/"; + } + return result; +} + std::string constructRandomTensorKey(std::vector dims, float sparsity) { auto path = getTacoTensorPath(); std::stringstream result; diff --git a/taco/bench.h b/taco/bench.h index 1badae7..b88c9d6 100644 --- a/taco/bench.h +++ b/taco/bench.h @@ -25,7 +25,6 @@ #define TACO_BENCH_ARG(bench, name, arg) \ BENCHMARK_CAPTURE(bench, name, arg) \ ->Unit(benchmark::kMicrosecond) \ - ->Repetitions(10) \ ->Iterations(1) \ ->ReportAggregatesOnly(true) \ ->UseRealTime() @@ -33,12 +32,14 @@ #define TACO_BENCH_ARGS(bench, name, ...) \ BENCHMARK_CAPTURE(bench, name, __VA_ARGS__) \ ->Unit(benchmark::kMicrosecond) \ - ->Repetitions(10) \ ->Iterations(1) \ ->ReportAggregatesOnly(true) \ ->UseRealTime() std::string getTacoTensorPath(); +std::string getValidationOutputPath(); +// cleanPath ensures that the input path ends with "/". +std::string cleanPath(std::string path); taco::TensorBase loadRandomTensor(std::string name, std::vector dims, float sparsity, taco::Format format); template @@ -46,10 +47,12 @@ taco::Tensor castToType(std::string name, taco::Tensor tensor) { taco::Tensor result(name, tensor.getDimensions(), tensor.getFormat()); std::vector coords(tensor.getOrder()); for (auto& value : taco::iterate(tensor)) { - for (int i = 0; i < tensor.getOrder(); i++) { - coords[i] = value.first[i]; + if (static_cast(value.second) != T(0)) { + for (int i = 0; i < tensor.getOrder(); i++) { + coords[i] = value.first[i]; + } + result.insert(coords, static_cast(value.second)); } - result.insert(coords, T(value.second)); } result.pack(); return result; diff --git a/taco/ufuncs.cpp b/taco/ufuncs.cpp index 6980056..00ca7fb 100644 --- a/taco/ufuncs.cpp +++ b/taco/ufuncs.cpp @@ -184,15 +184,19 @@ struct UfuncInputCache { }; UfuncInputCache inputCache; +std::string ufuncBenchKey(std::string tensorName, std::string funcName) { + return tensorName + "-" + funcName + "-taco"; +} + static void bench_frostt_ufunc(benchmark::State& state, std::string tnsPath, Func op) { - auto path = getTacoTensorPath(); - auto frosttTensorPath = path; - if (frosttTensorPath[frosttTensorPath.size() - 1] != '/') { - frosttTensorPath += "/"; - } + auto frosttTensorPath = getTacoTensorPath(); frosttTensorPath += "FROSTT/"; frosttTensorPath += tnsPath; + auto pathSplit = taco::util::split(tnsPath, "/"); + auto filename = pathSplit[pathSplit.size() - 1]; + auto tensorName = taco::util::split(filename, ".")[0]; + // TODO (rohany): What format do we want to do here? Tensor frosttTensor, other; std::tie(frosttTensor, other) = inputCache.getUfuncInput(frosttTensorPath, Sparse); @@ -220,12 +224,19 @@ static void bench_frostt_ufunc(benchmark::State& state, std::string tnsPath, Fun state.ResumeTiming(); result.compute(); + + state.PauseTiming(); + if (auto validationPath = getValidationOutputPath(); validationPath != "") { + auto key = ufuncBenchKey(tensorName, op.getName()); + auto outpath = validationPath + key + ".tns"; + taco::write(outpath, result.removeExplicitZeros(result.getFormat())); + } } } Func ldExp("ldexp", Ldexp(), leftIncAlgebra()); -Func rightShift("rightShift", RightShift(), leftIncAlgebra()); -Func xorOp("xor", GeneralAdd(), xorAlgebra()); +Func rightShift("right_shift", RightShift(), leftIncAlgebra()); +Func xorOp("logical_xor", GeneralAdd(), xorAlgebra()); #define FOREACH_FROSTT_TENSOR(__func__) \ __func__(nips, "nips.tns") \ @@ -234,19 +245,15 @@ Func xorOp("xor", GeneralAdd(), xorAlgebra()); __func__(lbnl_network, "lbnl-network.tns") #define DECLARE_FROSTT_UFUNC_BENCH(name, path) \ - TACO_BENCH_ARGS(bench_frostt_ufunc, name/xor, path, xorOp); \ - TACO_BENCH_ARGS(bench_frostt_ufunc, name/ldExp, path, ldExp); \ - TACO_BENCH_ARGS(bench_frostt_ufunc, name/rightShift, path, rightShift); \ + TACO_BENCH_ARGS(bench_frostt_ufunc, name/xor, path, xorOp); \ + TACO_BENCH_ARGS(bench_frostt_ufunc, name/ldExp, path, ldExp); \ + TACO_BENCH_ARGS(bench_frostt_ufunc, name/rightShift, path, rightShift); \ FOREACH_FROSTT_TENSOR(DECLARE_FROSTT_UFUNC_BENCH) struct SuiteSparseTensors { SuiteSparseTensors() { - auto path = getTacoTensorPath(); - auto ssTensorPath = path; - if (ssTensorPath[ssTensorPath.size() - 1] != '/') { - ssTensorPath += "/"; - } + auto ssTensorPath = getTacoTensorPath(); ssTensorPath += "suitesparse/"; for (auto& entry : std::experimental::filesystem::directory_iterator(ssTensorPath)) { std::string f(entry.path()); @@ -285,6 +292,13 @@ static void bench_suitesparse_ufunc(benchmark::State& state, Func op) { state.ResumeTiming(); result.compute(); + + state.PauseTiming(); + if (auto validationPath = getValidationOutputPath(); validationPath != "") { + auto key = ufuncBenchKey(tensorName, op.getName()); + auto outpath = validationPath + key + ".tns"; + taco::write(outpath, result.removeExplicitZeros(result.getFormat())); + } } } diff --git a/validator.sh b/validator.sh new file mode 100755 index 0000000..b84cdeb --- /dev/null +++ b/validator.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +for numpy in $1/*-numpy.tns; do + taco=${numpy/-numpy/-taco} + if [ ! "$(wc -l < $numpy | xargs)" -eq "$(wc -l < $taco | xargs)" ]; then + echo "Files $numpy and $taco have a differing number of entries." + fi +done