diff --git a/Makefile b/Makefile
index e14fad4..6e5341b 100644
--- a/Makefile
+++ b/Makefile
@@ -27,6 +27,10 @@ python-bench: results numpy/*.py
 	echo $(benches_name)
 	-pytest $(IGNORE_FLAGS) --benchmark-json=$(NUMPY_JSON) $(BENCHFLAGS) $(BENCHES) 
 	python numpy/converter.py --json_name $(NUMPY_JSON)
+
+# Separate target to run the python benchmarks with numpy-taco cross validation logic.
+validate-python-bench: numpy/*.py validation-path
+	pytest $(IGNORE_FLAGS) $(BENCHFLAGS) $(BENCHES) 
 	
 .PHONY: convert-csv-all
 convert-csv-all:
@@ -34,10 +38,24 @@ convert-csv-all:
 
 taco-bench: taco/build/taco-bench
 ifeq ($(BENCHES),"")
-	LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)"
+	LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10
 
 else
-	LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)"
+	LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10
+endif
+
+# Separate target to run the TACO benchmarks with numpy-taco cross validation logic.
+validate-taco-bench: taco/build/taco-bench validation-path
+ifeq ($(BENCHES),"")
+	LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_repetitions=1
+else
+	LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_repetitions=1
+endif
+
+.PHONY: validation-path
+validation-path:
+ifeq ($(VALIDATION_OUTPUT_PATH),)
+	$(error VALIDATION_OUTPUT_PATH is undefined)
 endif
 
 taco/build/taco-bench: results check-and-reinit-submodules taco/benchmark/googletest
diff --git a/numpy/ufuncs.py b/numpy/ufuncs.py
index 1c59782..8cab1eb 100644
--- a/numpy/ufuncs.py
+++ b/numpy/ufuncs.py
@@ -2,7 +2,8 @@
 from scipy.sparse import random, csr_matrix
 import sparse
 import pytest
-from util import TensorCollectionFROSTT, PydataTensorShifter, TensorCollectionSuiteSparse, ScipyTensorShifter, PydataMatrixMarketTensorLoader, ScipyMatrixMarketTensorLoader
+import os
+from util import TensorCollectionFROSTT, PydataTensorShifter, TensorCollectionSuiteSparse, ScipyTensorShifter, PydataMatrixMarketTensorLoader, ScipyMatrixMarketTensorLoader, VALIDATION_OUTPUT_PATH, PydataSparseTensorDumper
 
 # TODO (rohany): Ask hameer about this. pydata/sparse isn't happy when
 #  given this ufunc to evaluate.
@@ -88,41 +89,53 @@ def bench():
     tacoBench(bench)
     print("Result", bench())
 
+def ufunc_bench_key(tensorName, funcName):
+    return tensorName + "-" + funcName + "-numpy"
+
 # Run benchmarks against the FROSTT collection.
 FROSTTTensors = TensorCollectionFROSTT()
-@pytest.mark.parametrize("tensor", FROSTTTensors.getTensors(), ids=FROSTTTensors.getTensorNames())
+@pytest.mark.parametrize("tensor", FROSTTTensors.getTensors())
 @pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])
 def bench_pydata_frostt_ufunc_sparse(tacoBench, tensor, ufunc):
-
     frTensor = tensor.load().astype('int64')
     shifter = PydataTensorShifter()
-    other = shifter.shiftLastMode(frTensor).astype('int64')
+    other = shifter.shiftLastMode(frTensor)
     def bench():
         c = ufunc(frTensor, other)
         return c
     extra_info = dict()
     extra_info['tensor_str'] = str(tensor)
     extra_info['ufunc_str'] = ufunc.__name__
-    tacoBench(bench, extra_info)
+    if VALIDATION_OUTPUT_PATH is not None:
+        result = bench()
+        key = ufunc_bench_key(str(tensor), ufunc.__name__)
+        outpath = os.path.join(VALIDATION_OUTPUT_PATH, key + ".tns")
+        PydataSparseTensorDumper().dump(result, outpath)
+    else:
+        tacoBench(bench, extra_info)
 
 # Run benchmarks against the SuiteSparse collection.
 SuiteSparseTensors = TensorCollectionSuiteSparse()
-@pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors(), ids=SuiteSparseTensors.getTensorNames())
+@pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors())
 @pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])
 def bench_pydata_suitesparse_ufunc_sparse(tacoBench, tensor, ufunc):
-    ssTensor = tensor[1].load(PydataMatrixMarketTensorLoader()).astype('int64')
+    ssTensor = tensor.load(PydataMatrixMarketTensorLoader()).astype('int64')
     shifter = PydataTensorShifter()
-    other = shifter.shiftLastMode(ssTensor).astype('int64')
+    other = shifter.shiftLastMode(ssTensor)
     def bench():
         c = ufunc(ssTensor, other)
         return c
     extra_info = dict()
     extra_info['tensor_str'] = str(tensor)
     extra_info['ufunc_str'] = ufunc.__name__
-    tacoBench(bench, extra_info)
+    if VALIDATION_OUTPUT_PATH is not None:
+        result = bench()
+        key = ufunc_bench_key(str(tensor), ufunc.__name__)
+        outpath = os.path.join(VALIDATION_OUTPUT_PATH, key + ".tns")
+        PydataSparseTensorDumper().dump(result, outpath)
+    else:
+        tacoBench(bench, extra_info)
 
-# TODO (rohany): scipy doesn't support these, I forgot. If that's the case,
-#  do we really need to compare against suitesparse?
 @pytest.mark.skip(reason="scipy doesn't support this actually")
 @pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors(), ids=SuiteSparseTensors.getTensorNames())
 @pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])
diff --git a/numpy/util.py b/numpy/util.py
index 3a7d954..6c6be4e 100644
--- a/numpy/util.py
+++ b/numpy/util.py
@@ -7,6 +7,8 @@
 # Get the path to the directory holding random tensors. Error out
 # if this isn't set.
 TENSOR_PATH = os.environ['TACO_TENSOR_PATH']
+# Get the validation path, if it exists.
+VALIDATION_OUTPUT_PATH = os.getenv('VALIDATION_OUTPUT_PATH', None)
 
 # TnsFileLoader loads a tensor stored in .tns format.
 class TnsFileLoader:
@@ -76,6 +78,14 @@ def load(self, path):
         dims, coords, values = self.loader.load(path)
         return sparse.COO(coords, values, tuple(dims))
 
+# PydataSparseTensorDumper dumps a sparse tensor to a the desired file.
+class PydataSparseTensorDumper:
+    def __init__(self):
+        self.dumper = TnsFileDumper()
+
+    def dump(self, tensor, path):
+        self.dumper.dump_dict_to_file(tensor.shape, sparse.DOK(tensor).data, path)
+
 # construct_random_tensor_key constructs a unique key that represents
 # a random tensor parameterized by the chosen shape and sparsity.
 # The key itself is formatted by the dimensions, followed by the
@@ -132,6 +142,7 @@ def random(self, shape, sparsity):
 class FROSTTTensor:
     def __init__(self, path):
         self.path = path
+        self.__name__ = self.__str__()
 
     def __str__(self):
         f = os.path.split(self.path)[1]
@@ -176,7 +187,7 @@ def shiftLastMode(self, tensor):
             # For order 2 tensors, always shift the last coordinate. Otherwise, shift only coordinates
             # that have even last coordinates. This ensures that there is at least some overlap
             # between the original tensor and its shifted counter part.
-            if tensor.shape[-1] <= 0 or resultCoords[-1][i] % 2 == 0:
+            if len(tensor.shape) <= 2 or resultCoords[-1][i] % 2 == 0:
                 resultCoords[-1][i] = (resultCoords[-1][i] + 1) % tensor.shape[-1]
         return sparse.COO(resultCoords, resultValues, tensor.shape)
 
@@ -231,6 +242,7 @@ def load(self, path):
 class SuiteSparseTensor:
     def __init__(self, path):
         self.path = path
+        self.__name__ = self.__str__()
 
     def __str__(self):
         f = os.path.split(self.path)[1]
diff --git a/taco/bench.cpp b/taco/bench.cpp
index 92f4269..921ea88 100644
--- a/taco/bench.cpp
+++ b/taco/bench.cpp
@@ -6,15 +6,38 @@
 #include "taco/tensor.h"
 #include "taco/util/strings.h"
 
-std::string getTacoTensorPath() {
-  auto path = std::getenv("TACO_TENSOR_PATH");
+std::string getEnvVar(std::string varname) {
+  auto path = std::getenv(varname.c_str());
   if (path == nullptr) {
-    std::cout << "TACO_TENSOR_PATH is unset" << std::endl;
-    assert(false);
+    return "";
   }
   return std::string(path);
 }
 
+std::string getTacoTensorPath() {
+  std::string result = getEnvVar("TACO_TENSOR_PATH");
+  if (result == "") {
+    assert(false && "TACO_TENSOR_PATH is unset");
+  }
+  return cleanPath(result);
+}
+
+std::string getValidationOutputPath() {
+  auto result = getEnvVar("VALIDATION_OUTPUT_PATH");
+  if (result != "") {
+    result = cleanPath(result);
+  }
+  return result;
+}
+
+std::string cleanPath(std::string path) {
+  std::string result(path);
+  if (result[result.size() - 1] != '/') {
+    result += "/";
+  }
+  return result;
+}
+
 std::string constructRandomTensorKey(std::vector<int> dims, float sparsity) {
   auto path = getTacoTensorPath();
   std::stringstream result;
diff --git a/taco/bench.h b/taco/bench.h
index 1badae7..b88c9d6 100644
--- a/taco/bench.h
+++ b/taco/bench.h
@@ -25,7 +25,6 @@
 #define TACO_BENCH_ARG(bench, name, arg)  \
   BENCHMARK_CAPTURE(bench, name, arg)     \
   ->Unit(benchmark::kMicrosecond)         \
-  ->Repetitions(10)                       \
   ->Iterations(1)                         \
   ->ReportAggregatesOnly(true)            \
   ->UseRealTime()
@@ -33,12 +32,14 @@
 #define TACO_BENCH_ARGS(bench, name, ...)       \
   BENCHMARK_CAPTURE(bench, name, __VA_ARGS__)   \
   ->Unit(benchmark::kMicrosecond)               \
-  ->Repetitions(10)                             \
   ->Iterations(1)                               \
   ->ReportAggregatesOnly(true)                  \
   ->UseRealTime()
 
 std::string getTacoTensorPath();
+std::string getValidationOutputPath();
+// cleanPath ensures that the input path ends with "/".
+std::string cleanPath(std::string path);
 taco::TensorBase loadRandomTensor(std::string name, std::vector<int> dims, float sparsity, taco::Format format);
 
 template<typename T>
@@ -46,10 +47,12 @@ taco::Tensor<T> castToType(std::string name, taco::Tensor<double> tensor) {
   taco::Tensor<T> result(name, tensor.getDimensions(), tensor.getFormat());
   std::vector<int> coords(tensor.getOrder());
   for (auto& value : taco::iterate<double>(tensor)) {
-    for (int i = 0; i < tensor.getOrder(); i++) {
-      coords[i] = value.first[i];
+    if (static_cast<T>(value.second) != T(0)) {
+      for (int i = 0; i < tensor.getOrder(); i++) {
+        coords[i] = value.first[i];
+      }
+      result.insert(coords, static_cast<T>(value.second));
     }
-    result.insert(coords, T(value.second));
   }
   result.pack();
   return result;
diff --git a/taco/ufuncs.cpp b/taco/ufuncs.cpp
index 6980056..00ca7fb 100644
--- a/taco/ufuncs.cpp
+++ b/taco/ufuncs.cpp
@@ -184,15 +184,19 @@ struct UfuncInputCache {
 };
 UfuncInputCache inputCache;
 
+std::string ufuncBenchKey(std::string tensorName, std::string funcName) {
+  return tensorName + "-" + funcName + "-taco";
+}
+
 static void bench_frostt_ufunc(benchmark::State& state, std::string tnsPath, Func op) {
-  auto path = getTacoTensorPath();
-  auto frosttTensorPath = path;
-  if (frosttTensorPath[frosttTensorPath.size() - 1] != '/') {
-    frosttTensorPath += "/";
-  }
+  auto frosttTensorPath = getTacoTensorPath();
   frosttTensorPath += "FROSTT/";
   frosttTensorPath += tnsPath;
 
+  auto pathSplit = taco::util::split(tnsPath, "/");
+  auto filename = pathSplit[pathSplit.size() - 1];
+  auto tensorName = taco::util::split(filename, ".")[0];
+
   // TODO (rohany): What format do we want to do here?
   Tensor<int64_t> frosttTensor, other;
   std::tie(frosttTensor, other) = inputCache.getUfuncInput(frosttTensorPath, Sparse);
@@ -220,12 +224,19 @@ static void bench_frostt_ufunc(benchmark::State& state, std::string tnsPath, Fun
     state.ResumeTiming();
 
     result.compute();
+
+    state.PauseTiming();
+    if (auto validationPath = getValidationOutputPath(); validationPath != "") {
+      auto key = ufuncBenchKey(tensorName, op.getName());
+      auto outpath = validationPath + key + ".tns";
+      taco::write(outpath, result.removeExplicitZeros(result.getFormat()));
+    }
   }
 }
 
 Func ldExp("ldexp", Ldexp(), leftIncAlgebra());
-Func rightShift("rightShift", RightShift(), leftIncAlgebra());
-Func xorOp("xor", GeneralAdd(), xorAlgebra());
+Func rightShift("right_shift", RightShift(), leftIncAlgebra());
+Func xorOp("logical_xor", GeneralAdd(), xorAlgebra());
 
 #define FOREACH_FROSTT_TENSOR(__func__) \
   __func__(nips, "nips.tns") \
@@ -234,19 +245,15 @@ Func xorOp("xor", GeneralAdd(), xorAlgebra());
   __func__(lbnl_network, "lbnl-network.tns")
 
 #define DECLARE_FROSTT_UFUNC_BENCH(name, path) \
-   TACO_BENCH_ARGS(bench_frostt_ufunc, name/xor, path, xorOp); \
-   TACO_BENCH_ARGS(bench_frostt_ufunc, name/ldExp, path, ldExp); \
-   TACO_BENCH_ARGS(bench_frostt_ufunc, name/rightShift, path, rightShift); \
+  TACO_BENCH_ARGS(bench_frostt_ufunc, name/xor, path, xorOp); \
+  TACO_BENCH_ARGS(bench_frostt_ufunc, name/ldExp, path, ldExp); \
+  TACO_BENCH_ARGS(bench_frostt_ufunc, name/rightShift, path, rightShift); \
 
 FOREACH_FROSTT_TENSOR(DECLARE_FROSTT_UFUNC_BENCH)
 
 struct SuiteSparseTensors {
  SuiteSparseTensors() {
-   auto path = getTacoTensorPath();
-   auto ssTensorPath = path;
-   if (ssTensorPath[ssTensorPath.size() - 1] != '/') {
-     ssTensorPath += "/";
-   }
+   auto ssTensorPath = getTacoTensorPath();
    ssTensorPath += "suitesparse/";
    for (auto& entry : std::experimental::filesystem::directory_iterator(ssTensorPath)) {
      std::string f(entry.path());
@@ -285,6 +292,13 @@ static void bench_suitesparse_ufunc(benchmark::State& state, Func op) {
     state.ResumeTiming();
 
     result.compute();
+
+    state.PauseTiming();
+    if (auto validationPath = getValidationOutputPath(); validationPath != "") {
+      auto key = ufuncBenchKey(tensorName, op.getName());
+      auto outpath = validationPath + key + ".tns";
+      taco::write(outpath, result.removeExplicitZeros(result.getFormat()));
+    }
   }
 }
 
diff --git a/validator.sh b/validator.sh
new file mode 100755
index 0000000..b84cdeb
--- /dev/null
+++ b/validator.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+for numpy in $1/*-numpy.tns; do
+    taco=${numpy/-numpy/-taco}
+    if [ ! "$(wc -l < $numpy | xargs)" -eq "$(wc -l < $taco | xargs)" ]; then
+        echo "Files $numpy and $taco have a differing number of entries."
+    fi
+done