Skip to content

Commit 3c5d4ba

Browse files
authored
Merge pull request #17 from tensor-compiler/validation
*: add scaffolding and basic cross validation between taco and numpy
2 parents ef6bd1e + 46fceb1 commit 3c5d4ba

File tree

7 files changed

+129
-38
lines changed

7 files changed

+129
-38
lines changed

Makefile

+20-2
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,35 @@ python-bench: results numpy/*.py
2727
echo $(benches_name)
2828
-pytest $(IGNORE_FLAGS) --benchmark-json=$(NUMPY_JSON) $(BENCHFLAGS) $(BENCHES)
2929
python numpy/converter.py --json_name $(NUMPY_JSON)
30+
31+
# Separate target to run the python benchmarks with numpy-taco cross validation logic.
32+
validate-python-bench: numpy/*.py validation-path
33+
pytest $(IGNORE_FLAGS) $(BENCHFLAGS) $(BENCHES)
3034

3135
.PHONY: convert-csv-all
3236
convert-csv-all:
3337
python numpy/converter.py --all
3438

3539
taco-bench: taco/build/taco-bench
3640
ifeq ($(BENCHES),"")
37-
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)"
41+
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10
3842

3943
else
40-
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)"
44+
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10
45+
endif
46+
47+
# Separate target to run the TACO benchmarks with numpy-taco cross validation logic.
48+
validate-taco-bench: taco/build/taco-bench validation-path
49+
ifeq ($(BENCHES),"")
50+
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_repetitions=1
51+
else
52+
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_repetitions=1
53+
endif
54+
55+
.PHONY: validation-path
56+
validation-path:
57+
ifeq ($(VALIDATION_OUTPUT_PATH),)
58+
$(error VALIDATION_OUTPUT_PATH is undefined)
4159
endif
4260

4361
taco/build/taco-bench: results check-and-reinit-submodules taco/benchmark/googletest

numpy/ufuncs.py

+24-11
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
from scipy.sparse import random, csr_matrix
33
import sparse
44
import pytest
5-
from util import TensorCollectionFROSTT, PydataTensorShifter, TensorCollectionSuiteSparse, ScipyTensorShifter, PydataMatrixMarketTensorLoader, ScipyMatrixMarketTensorLoader
5+
import os
6+
from util import TensorCollectionFROSTT, PydataTensorShifter, TensorCollectionSuiteSparse, ScipyTensorShifter, PydataMatrixMarketTensorLoader, ScipyMatrixMarketTensorLoader, VALIDATION_OUTPUT_PATH, PydataSparseTensorDumper
67

78
# TODO (rohany): Ask hameer about this. pydata/sparse isn't happy when
89
# given this ufunc to evaluate.
@@ -88,41 +89,53 @@ def bench():
8889
tacoBench(bench)
8990
print("Result", bench())
9091

92+
def ufunc_bench_key(tensorName, funcName):
93+
return tensorName + "-" + funcName + "-numpy"
94+
9195
# Run benchmarks against the FROSTT collection.
9296
FROSTTTensors = TensorCollectionFROSTT()
93-
@pytest.mark.parametrize("tensor", FROSTTTensors.getTensors(), ids=FROSTTTensors.getTensorNames())
97+
@pytest.mark.parametrize("tensor", FROSTTTensors.getTensors())
9498
@pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])
9599
def bench_pydata_frostt_ufunc_sparse(tacoBench, tensor, ufunc):
96-
97100
frTensor = tensor.load().astype('int64')
98101
shifter = PydataTensorShifter()
99-
other = shifter.shiftLastMode(frTensor).astype('int64')
102+
other = shifter.shiftLastMode(frTensor)
100103
def bench():
101104
c = ufunc(frTensor, other)
102105
return c
103106
extra_info = dict()
104107
extra_info['tensor_str'] = str(tensor)
105108
extra_info['ufunc_str'] = ufunc.__name__
106-
tacoBench(bench, extra_info)
109+
if VALIDATION_OUTPUT_PATH is not None:
110+
result = bench()
111+
key = ufunc_bench_key(str(tensor), ufunc.__name__)
112+
outpath = os.path.join(VALIDATION_OUTPUT_PATH, key + ".tns")
113+
PydataSparseTensorDumper().dump(result, outpath)
114+
else:
115+
tacoBench(bench, extra_info)
107116

108117
# Run benchmarks against the SuiteSparse collection.
109118
SuiteSparseTensors = TensorCollectionSuiteSparse()
110-
@pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors(), ids=SuiteSparseTensors.getTensorNames())
119+
@pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors())
111120
@pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])
112121
def bench_pydata_suitesparse_ufunc_sparse(tacoBench, tensor, ufunc):
113-
ssTensor = tensor[1].load(PydataMatrixMarketTensorLoader()).astype('int64')
122+
ssTensor = tensor.load(PydataMatrixMarketTensorLoader()).astype('int64')
114123
shifter = PydataTensorShifter()
115-
other = shifter.shiftLastMode(ssTensor).astype('int64')
124+
other = shifter.shiftLastMode(ssTensor)
116125
def bench():
117126
c = ufunc(ssTensor, other)
118127
return c
119128
extra_info = dict()
120129
extra_info['tensor_str'] = str(tensor)
121130
extra_info['ufunc_str'] = ufunc.__name__
122-
tacoBench(bench, extra_info)
131+
if VALIDATION_OUTPUT_PATH is not None:
132+
result = bench()
133+
key = ufunc_bench_key(str(tensor), ufunc.__name__)
134+
outpath = os.path.join(VALIDATION_OUTPUT_PATH, key + ".tns")
135+
PydataSparseTensorDumper().dump(result, outpath)
136+
else:
137+
tacoBench(bench, extra_info)
123138

124-
# TODO (rohany): scipy doesn't support these, I forgot. If that's the case,
125-
# do we really need to compare against suitesparse?
126139
@pytest.mark.skip(reason="scipy doesn't support this actually")
127140
@pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors(), ids=SuiteSparseTensors.getTensorNames())
128141
@pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])

numpy/util.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
# Get the path to the directory holding random tensors. Error out
88
# if this isn't set.
99
TENSOR_PATH = os.environ['TACO_TENSOR_PATH']
10+
# Get the validation path, if it exists.
11+
VALIDATION_OUTPUT_PATH = os.getenv('VALIDATION_OUTPUT_PATH', None)
1012

1113
# TnsFileLoader loads a tensor stored in .tns format.
1214
class TnsFileLoader:
@@ -76,6 +78,14 @@ def load(self, path):
7678
dims, coords, values = self.loader.load(path)
7779
return sparse.COO(coords, values, tuple(dims))
7880

81+
# PydataSparseTensorDumper dumps a sparse tensor to a the desired file.
82+
class PydataSparseTensorDumper:
83+
def __init__(self):
84+
self.dumper = TnsFileDumper()
85+
86+
def dump(self, tensor, path):
87+
self.dumper.dump_dict_to_file(tensor.shape, sparse.DOK(tensor).data, path)
88+
7989
# construct_random_tensor_key constructs a unique key that represents
8090
# a random tensor parameterized by the chosen shape and sparsity.
8191
# The key itself is formatted by the dimensions, followed by the
@@ -132,6 +142,7 @@ def random(self, shape, sparsity):
132142
class FROSTTTensor:
133143
def __init__(self, path):
134144
self.path = path
145+
self.__name__ = self.__str__()
135146

136147
def __str__(self):
137148
f = os.path.split(self.path)[1]
@@ -176,7 +187,7 @@ def shiftLastMode(self, tensor):
176187
# For order 2 tensors, always shift the last coordinate. Otherwise, shift only coordinates
177188
# that have even last coordinates. This ensures that there is at least some overlap
178189
# between the original tensor and its shifted counter part.
179-
if tensor.shape[-1] <= 0 or resultCoords[-1][i] % 2 == 0:
190+
if len(tensor.shape) <= 2 or resultCoords[-1][i] % 2 == 0:
180191
resultCoords[-1][i] = (resultCoords[-1][i] + 1) % tensor.shape[-1]
181192
return sparse.COO(resultCoords, resultValues, tensor.shape)
182193

@@ -231,6 +242,7 @@ def load(self, path):
231242
class SuiteSparseTensor:
232243
def __init__(self, path):
233244
self.path = path
245+
self.__name__ = self.__str__()
234246

235247
def __str__(self):
236248
f = os.path.split(self.path)[1]

taco/bench.cpp

+27-4
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,38 @@
66
#include "taco/tensor.h"
77
#include "taco/util/strings.h"
88

9-
std::string getTacoTensorPath() {
10-
auto path = std::getenv("TACO_TENSOR_PATH");
9+
std::string getEnvVar(std::string varname) {
10+
auto path = std::getenv(varname.c_str());
1111
if (path == nullptr) {
12-
std::cout << "TACO_TENSOR_PATH is unset" << std::endl;
13-
assert(false);
12+
return "";
1413
}
1514
return std::string(path);
1615
}
1716

17+
std::string getTacoTensorPath() {
18+
std::string result = getEnvVar("TACO_TENSOR_PATH");
19+
if (result == "") {
20+
assert(false && "TACO_TENSOR_PATH is unset");
21+
}
22+
return cleanPath(result);
23+
}
24+
25+
std::string getValidationOutputPath() {
26+
auto result = getEnvVar("VALIDATION_OUTPUT_PATH");
27+
if (result != "") {
28+
result = cleanPath(result);
29+
}
30+
return result;
31+
}
32+
33+
std::string cleanPath(std::string path) {
34+
std::string result(path);
35+
if (result[result.size() - 1] != '/') {
36+
result += "/";
37+
}
38+
return result;
39+
}
40+
1841
std::string constructRandomTensorKey(std::vector<int> dims, float sparsity) {
1942
auto path = getTacoTensorPath();
2043
std::stringstream result;

taco/bench.h

+8-5
Original file line numberDiff line numberDiff line change
@@ -25,31 +25,34 @@
2525
#define TACO_BENCH_ARG(bench, name, arg) \
2626
BENCHMARK_CAPTURE(bench, name, arg) \
2727
->Unit(benchmark::kMicrosecond) \
28-
->Repetitions(10) \
2928
->Iterations(1) \
3029
->ReportAggregatesOnly(true) \
3130
->UseRealTime()
3231

3332
#define TACO_BENCH_ARGS(bench, name, ...) \
3433
BENCHMARK_CAPTURE(bench, name, __VA_ARGS__) \
3534
->Unit(benchmark::kMicrosecond) \
36-
->Repetitions(10) \
3735
->Iterations(1) \
3836
->ReportAggregatesOnly(true) \
3937
->UseRealTime()
4038

4139
std::string getTacoTensorPath();
40+
std::string getValidationOutputPath();
41+
// cleanPath ensures that the input path ends with "/".
42+
std::string cleanPath(std::string path);
4243
taco::TensorBase loadRandomTensor(std::string name, std::vector<int> dims, float sparsity, taco::Format format);
4344

4445
template<typename T>
4546
taco::Tensor<T> castToType(std::string name, taco::Tensor<double> tensor) {
4647
taco::Tensor<T> result(name, tensor.getDimensions(), tensor.getFormat());
4748
std::vector<int> coords(tensor.getOrder());
4849
for (auto& value : taco::iterate<double>(tensor)) {
49-
for (int i = 0; i < tensor.getOrder(); i++) {
50-
coords[i] = value.first[i];
50+
if (static_cast<T>(value.second) != T(0)) {
51+
for (int i = 0; i < tensor.getOrder(); i++) {
52+
coords[i] = value.first[i];
53+
}
54+
result.insert(coords, static_cast<T>(value.second));
5155
}
52-
result.insert(coords, T(value.second));
5356
}
5457
result.pack();
5558
return result;

taco/ufuncs.cpp

+29-15
Original file line numberDiff line numberDiff line change
@@ -184,15 +184,19 @@ struct UfuncInputCache {
184184
};
185185
UfuncInputCache inputCache;
186186

187+
std::string ufuncBenchKey(std::string tensorName, std::string funcName) {
188+
return tensorName + "-" + funcName + "-taco";
189+
}
190+
187191
static void bench_frostt_ufunc(benchmark::State& state, std::string tnsPath, Func op) {
188-
auto path = getTacoTensorPath();
189-
auto frosttTensorPath = path;
190-
if (frosttTensorPath[frosttTensorPath.size() - 1] != '/') {
191-
frosttTensorPath += "/";
192-
}
192+
auto frosttTensorPath = getTacoTensorPath();
193193
frosttTensorPath += "FROSTT/";
194194
frosttTensorPath += tnsPath;
195195

196+
auto pathSplit = taco::util::split(tnsPath, "/");
197+
auto filename = pathSplit[pathSplit.size() - 1];
198+
auto tensorName = taco::util::split(filename, ".")[0];
199+
196200
// TODO (rohany): What format do we want to do here?
197201
Tensor<int64_t> frosttTensor, other;
198202
std::tie(frosttTensor, other) = inputCache.getUfuncInput(frosttTensorPath, Sparse);
@@ -220,12 +224,19 @@ static void bench_frostt_ufunc(benchmark::State& state, std::string tnsPath, Fun
220224
state.ResumeTiming();
221225

222226
result.compute();
227+
228+
state.PauseTiming();
229+
if (auto validationPath = getValidationOutputPath(); validationPath != "") {
230+
auto key = ufuncBenchKey(tensorName, op.getName());
231+
auto outpath = validationPath + key + ".tns";
232+
taco::write(outpath, result.removeExplicitZeros(result.getFormat()));
233+
}
223234
}
224235
}
225236

226237
Func ldExp("ldexp", Ldexp(), leftIncAlgebra());
227-
Func rightShift("rightShift", RightShift(), leftIncAlgebra());
228-
Func xorOp("xor", GeneralAdd(), xorAlgebra());
238+
Func rightShift("right_shift", RightShift(), leftIncAlgebra());
239+
Func xorOp("logical_xor", GeneralAdd(), xorAlgebra());
229240

230241
#define FOREACH_FROSTT_TENSOR(__func__) \
231242
__func__(nips, "nips.tns") \
@@ -234,19 +245,15 @@ Func xorOp("xor", GeneralAdd(), xorAlgebra());
234245
__func__(lbnl_network, "lbnl-network.tns")
235246

236247
#define DECLARE_FROSTT_UFUNC_BENCH(name, path) \
237-
TACO_BENCH_ARGS(bench_frostt_ufunc, name/xor, path, xorOp); \
238-
TACO_BENCH_ARGS(bench_frostt_ufunc, name/ldExp, path, ldExp); \
239-
TACO_BENCH_ARGS(bench_frostt_ufunc, name/rightShift, path, rightShift); \
248+
TACO_BENCH_ARGS(bench_frostt_ufunc, name/xor, path, xorOp); \
249+
TACO_BENCH_ARGS(bench_frostt_ufunc, name/ldExp, path, ldExp); \
250+
TACO_BENCH_ARGS(bench_frostt_ufunc, name/rightShift, path, rightShift); \
240251

241252
FOREACH_FROSTT_TENSOR(DECLARE_FROSTT_UFUNC_BENCH)
242253

243254
struct SuiteSparseTensors {
244255
SuiteSparseTensors() {
245-
auto path = getTacoTensorPath();
246-
auto ssTensorPath = path;
247-
if (ssTensorPath[ssTensorPath.size() - 1] != '/') {
248-
ssTensorPath += "/";
249-
}
256+
auto ssTensorPath = getTacoTensorPath();
250257
ssTensorPath += "suitesparse/";
251258
for (auto& entry : std::experimental::filesystem::directory_iterator(ssTensorPath)) {
252259
std::string f(entry.path());
@@ -285,6 +292,13 @@ static void bench_suitesparse_ufunc(benchmark::State& state, Func op) {
285292
state.ResumeTiming();
286293

287294
result.compute();
295+
296+
state.PauseTiming();
297+
if (auto validationPath = getValidationOutputPath(); validationPath != "") {
298+
auto key = ufuncBenchKey(tensorName, op.getName());
299+
auto outpath = validationPath + key + ".tns";
300+
taco::write(outpath, result.removeExplicitZeros(result.getFormat()));
301+
}
288302
}
289303
}
290304

validator.sh

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#!/bin/bash
2+
3+
for numpy in $1/*-numpy.tns; do
4+
taco=${numpy/-numpy/-taco}
5+
if [ ! "$(wc -l < $numpy | xargs)" -eq "$(wc -l < $taco | xargs)" ]; then
6+
echo "Files $numpy and $taco have a differing number of entries."
7+
fi
8+
done

0 commit comments

Comments
 (0)