Skip to content

*: add scaffolding and basic cross validation between taco and numpy #17

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,35 @@ python-bench: results numpy/*.py
echo $(benches_name)
-pytest $(IGNORE_FLAGS) --benchmark-json=$(NUMPY_JSON) $(BENCHFLAGS) $(BENCHES)
python numpy/converter.py --json_name $(NUMPY_JSON)

# Separate target to run the python benchmarks with numpy-taco cross validation logic.
validate-python-bench: numpy/*.py validation-path
pytest $(IGNORE_FLAGS) $(BENCHFLAGS) $(BENCHES)

.PHONY: convert-csv-all
convert-csv-all:
python numpy/converter.py --all

taco-bench: taco/build/taco-bench
ifeq ($(BENCHES),"")
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)"
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10

else
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)"
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10
endif

# Separate target to run the TACO benchmarks with numpy-taco cross validation logic.
validate-taco-bench: taco/build/taco-bench validation-path
ifeq ($(BENCHES),"")
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_repetitions=1
else
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_repetitions=1
endif

.PHONY: validation-path
validation-path:
ifeq ($(VALIDATION_OUTPUT_PATH),)
$(error VALIDATION_OUTPUT_PATH is undefined)
endif

taco/build/taco-bench: results check-and-reinit-submodules taco/benchmark/googletest
Expand Down
35 changes: 24 additions & 11 deletions numpy/ufuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from scipy.sparse import random, csr_matrix
import sparse
import pytest
from util import TensorCollectionFROSTT, PydataTensorShifter, TensorCollectionSuiteSparse, ScipyTensorShifter, PydataMatrixMarketTensorLoader, ScipyMatrixMarketTensorLoader
import os
from util import TensorCollectionFROSTT, PydataTensorShifter, TensorCollectionSuiteSparse, ScipyTensorShifter, PydataMatrixMarketTensorLoader, ScipyMatrixMarketTensorLoader, VALIDATION_OUTPUT_PATH, PydataSparseTensorDumper

# TODO (rohany): Ask hameer about this. pydata/sparse isn't happy when
# given this ufunc to evaluate.
Expand Down Expand Up @@ -88,41 +89,53 @@ def bench():
tacoBench(bench)
print("Result", bench())

def ufunc_bench_key(tensorName, funcName):
return tensorName + "-" + funcName + "-numpy"

# Run benchmarks against the FROSTT collection.
FROSTTTensors = TensorCollectionFROSTT()
@pytest.mark.parametrize("tensor", FROSTTTensors.getTensors(), ids=FROSTTTensors.getTensorNames())
@pytest.mark.parametrize("tensor", FROSTTTensors.getTensors())
@pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])
def bench_pydata_frostt_ufunc_sparse(tacoBench, tensor, ufunc):

frTensor = tensor.load().astype('int64')
shifter = PydataTensorShifter()
other = shifter.shiftLastMode(frTensor).astype('int64')
other = shifter.shiftLastMode(frTensor)
def bench():
c = ufunc(frTensor, other)
return c
extra_info = dict()
extra_info['tensor_str'] = str(tensor)
extra_info['ufunc_str'] = ufunc.__name__
tacoBench(bench, extra_info)
if VALIDATION_OUTPUT_PATH is not None:
result = bench()
key = ufunc_bench_key(str(tensor), ufunc.__name__)
outpath = os.path.join(VALIDATION_OUTPUT_PATH, key + ".tns")
PydataSparseTensorDumper().dump(result, outpath)
else:
tacoBench(bench, extra_info)

# Run benchmarks against the SuiteSparse collection.
SuiteSparseTensors = TensorCollectionSuiteSparse()
@pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors(), ids=SuiteSparseTensors.getTensorNames())
@pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors())
@pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])
def bench_pydata_suitesparse_ufunc_sparse(tacoBench, tensor, ufunc):
ssTensor = tensor[1].load(PydataMatrixMarketTensorLoader()).astype('int64')
ssTensor = tensor.load(PydataMatrixMarketTensorLoader()).astype('int64')
shifter = PydataTensorShifter()
other = shifter.shiftLastMode(ssTensor).astype('int64')
other = shifter.shiftLastMode(ssTensor)
def bench():
c = ufunc(ssTensor, other)
return c
extra_info = dict()
extra_info['tensor_str'] = str(tensor)
extra_info['ufunc_str'] = ufunc.__name__
tacoBench(bench, extra_info)
if VALIDATION_OUTPUT_PATH is not None:
result = bench()
key = ufunc_bench_key(str(tensor), ufunc.__name__)
outpath = os.path.join(VALIDATION_OUTPUT_PATH, key + ".tns")
PydataSparseTensorDumper().dump(result, outpath)
else:
tacoBench(bench, extra_info)

# TODO (rohany): scipy doesn't support these, I forgot. If that's the case,
# do we really need to compare against suitesparse?
@pytest.mark.skip(reason="scipy doesn't support this actually")
@pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors(), ids=SuiteSparseTensors.getTensorNames())
@pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])
Expand Down
14 changes: 13 additions & 1 deletion numpy/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
# Get the path to the directory holding random tensors. Error out
# if this isn't set.
TENSOR_PATH = os.environ['TACO_TENSOR_PATH']
# Get the validation path, if it exists.
VALIDATION_OUTPUT_PATH = os.getenv('VALIDATION_OUTPUT_PATH', None)

# TnsFileLoader loads a tensor stored in .tns format.
class TnsFileLoader:
Expand Down Expand Up @@ -76,6 +78,14 @@ def load(self, path):
dims, coords, values = self.loader.load(path)
return sparse.COO(coords, values, tuple(dims))

# PydataSparseTensorDumper dumps a sparse tensor to a the desired file.
class PydataSparseTensorDumper:
def __init__(self):
self.dumper = TnsFileDumper()

def dump(self, tensor, path):
self.dumper.dump_dict_to_file(tensor.shape, sparse.DOK(tensor).data, path)

# construct_random_tensor_key constructs a unique key that represents
# a random tensor parameterized by the chosen shape and sparsity.
# The key itself is formatted by the dimensions, followed by the
Expand Down Expand Up @@ -132,6 +142,7 @@ def random(self, shape, sparsity):
class FROSTTTensor:
def __init__(self, path):
self.path = path
self.__name__ = self.__str__()

def __str__(self):
f = os.path.split(self.path)[1]
Expand Down Expand Up @@ -176,7 +187,7 @@ def shiftLastMode(self, tensor):
# For order 2 tensors, always shift the last coordinate. Otherwise, shift only coordinates
# that have even last coordinates. This ensures that there is at least some overlap
# between the original tensor and its shifted counter part.
if tensor.shape[-1] <= 0 or resultCoords[-1][i] % 2 == 0:
if len(tensor.shape) <= 2 or resultCoords[-1][i] % 2 == 0:
resultCoords[-1][i] = (resultCoords[-1][i] + 1) % tensor.shape[-1]
return sparse.COO(resultCoords, resultValues, tensor.shape)

Expand Down Expand Up @@ -231,6 +242,7 @@ def load(self, path):
class SuiteSparseTensor:
def __init__(self, path):
self.path = path
self.__name__ = self.__str__()

def __str__(self):
f = os.path.split(self.path)[1]
Expand Down
31 changes: 27 additions & 4 deletions taco/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,38 @@
#include "taco/tensor.h"
#include "taco/util/strings.h"

std::string getTacoTensorPath() {
auto path = std::getenv("TACO_TENSOR_PATH");
std::string getEnvVar(std::string varname) {
auto path = std::getenv(varname.c_str());
if (path == nullptr) {
std::cout << "TACO_TENSOR_PATH is unset" << std::endl;
assert(false);
return "";
}
return std::string(path);
}

std::string getTacoTensorPath() {
std::string result = getEnvVar("TACO_TENSOR_PATH");
if (result == "") {
assert(false && "TACO_TENSOR_PATH is unset");
}
return cleanPath(result);
}

std::string getValidationOutputPath() {
auto result = getEnvVar("VALIDATION_OUTPUT_PATH");
if (result != "") {
result = cleanPath(result);
}
return result;
}

std::string cleanPath(std::string path) {
std::string result(path);
if (result[result.size() - 1] != '/') {
result += "/";
}
return result;
}

std::string constructRandomTensorKey(std::vector<int> dims, float sparsity) {
auto path = getTacoTensorPath();
std::stringstream result;
Expand Down
13 changes: 8 additions & 5 deletions taco/bench.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,31 +25,34 @@
#define TACO_BENCH_ARG(bench, name, arg) \
BENCHMARK_CAPTURE(bench, name, arg) \
->Unit(benchmark::kMicrosecond) \
->Repetitions(10) \
->Iterations(1) \
->ReportAggregatesOnly(true) \
->UseRealTime()

#define TACO_BENCH_ARGS(bench, name, ...) \
BENCHMARK_CAPTURE(bench, name, __VA_ARGS__) \
->Unit(benchmark::kMicrosecond) \
->Repetitions(10) \
->Iterations(1) \
->ReportAggregatesOnly(true) \
->UseRealTime()

std::string getTacoTensorPath();
std::string getValidationOutputPath();
// cleanPath ensures that the input path ends with "/".
std::string cleanPath(std::string path);
taco::TensorBase loadRandomTensor(std::string name, std::vector<int> dims, float sparsity, taco::Format format);

template<typename T>
taco::Tensor<T> castToType(std::string name, taco::Tensor<double> tensor) {
taco::Tensor<T> result(name, tensor.getDimensions(), tensor.getFormat());
std::vector<int> coords(tensor.getOrder());
for (auto& value : taco::iterate<double>(tensor)) {
for (int i = 0; i < tensor.getOrder(); i++) {
coords[i] = value.first[i];
if (static_cast<T>(value.second) != T(0)) {
for (int i = 0; i < tensor.getOrder(); i++) {
coords[i] = value.first[i];
}
result.insert(coords, static_cast<T>(value.second));
}
result.insert(coords, T(value.second));
}
result.pack();
return result;
Expand Down
44 changes: 29 additions & 15 deletions taco/ufuncs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,15 +184,19 @@ struct UfuncInputCache {
};
UfuncInputCache inputCache;

std::string ufuncBenchKey(std::string tensorName, std::string funcName) {
return tensorName + "-" + funcName + "-taco";
}

static void bench_frostt_ufunc(benchmark::State& state, std::string tnsPath, Func op) {
auto path = getTacoTensorPath();
auto frosttTensorPath = path;
if (frosttTensorPath[frosttTensorPath.size() - 1] != '/') {
frosttTensorPath += "/";
}
auto frosttTensorPath = getTacoTensorPath();
frosttTensorPath += "FROSTT/";
frosttTensorPath += tnsPath;

auto pathSplit = taco::util::split(tnsPath, "/");
auto filename = pathSplit[pathSplit.size() - 1];
auto tensorName = taco::util::split(filename, ".")[0];

// TODO (rohany): What format do we want to do here?
Tensor<int64_t> frosttTensor, other;
std::tie(frosttTensor, other) = inputCache.getUfuncInput(frosttTensorPath, Sparse);
Expand Down Expand Up @@ -220,12 +224,19 @@ static void bench_frostt_ufunc(benchmark::State& state, std::string tnsPath, Fun
state.ResumeTiming();

result.compute();

state.PauseTiming();
if (auto validationPath = getValidationOutputPath(); validationPath != "") {
auto key = ufuncBenchKey(tensorName, op.getName());
auto outpath = validationPath + key + ".tns";
taco::write(outpath, result.removeExplicitZeros(result.getFormat()));
}
}
}

Func ldExp("ldexp", Ldexp(), leftIncAlgebra());
Func rightShift("rightShift", RightShift(), leftIncAlgebra());
Func xorOp("xor", GeneralAdd(), xorAlgebra());
Func rightShift("right_shift", RightShift(), leftIncAlgebra());
Func xorOp("logical_xor", GeneralAdd(), xorAlgebra());

#define FOREACH_FROSTT_TENSOR(__func__) \
__func__(nips, "nips.tns") \
Expand All @@ -234,19 +245,15 @@ Func xorOp("xor", GeneralAdd(), xorAlgebra());
__func__(lbnl_network, "lbnl-network.tns")

#define DECLARE_FROSTT_UFUNC_BENCH(name, path) \
TACO_BENCH_ARGS(bench_frostt_ufunc, name/xor, path, xorOp); \
TACO_BENCH_ARGS(bench_frostt_ufunc, name/ldExp, path, ldExp); \
TACO_BENCH_ARGS(bench_frostt_ufunc, name/rightShift, path, rightShift); \
TACO_BENCH_ARGS(bench_frostt_ufunc, name/xor, path, xorOp); \
TACO_BENCH_ARGS(bench_frostt_ufunc, name/ldExp, path, ldExp); \
TACO_BENCH_ARGS(bench_frostt_ufunc, name/rightShift, path, rightShift); \

FOREACH_FROSTT_TENSOR(DECLARE_FROSTT_UFUNC_BENCH)

struct SuiteSparseTensors {
SuiteSparseTensors() {
auto path = getTacoTensorPath();
auto ssTensorPath = path;
if (ssTensorPath[ssTensorPath.size() - 1] != '/') {
ssTensorPath += "/";
}
auto ssTensorPath = getTacoTensorPath();
ssTensorPath += "suitesparse/";
for (auto& entry : std::experimental::filesystem::directory_iterator(ssTensorPath)) {
std::string f(entry.path());
Expand Down Expand Up @@ -285,6 +292,13 @@ static void bench_suitesparse_ufunc(benchmark::State& state, Func op) {
state.ResumeTiming();

result.compute();

state.PauseTiming();
if (auto validationPath = getValidationOutputPath(); validationPath != "") {
auto key = ufuncBenchKey(tensorName, op.getName());
auto outpath = validationPath + key + ".tns";
taco::write(outpath, result.removeExplicitZeros(result.getFormat()));
}
}
}

Expand Down
8 changes: 8 additions & 0 deletions validator.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

for numpy in $1/*-numpy.tns; do
taco=${numpy/-numpy/-taco}
if [ ! "$(wc -l < $numpy | xargs)" -eq "$(wc -l < $taco | xargs)" ]; then
echo "Files $numpy and $taco have a differing number of entries."
fi
done