diff --git a/.clang-format b/.clang-format
index 57aa8853..b18dc481 100644
--- a/.clang-format
+++ b/.clang-format
@@ -22,4 +22,5 @@ SpaceBeforeParens: Always
 TabWidth: '4'
 UseTab: Never
 AlwaysBreakAfterReturnType: None
-AlwaysBreakAfterDefinitionReturnType: None
\ No newline at end of file
+AlwaysBreakAfterDefinitionReturnType: None
+ContinuationIndentWidth: Always
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 55f3c713..12050b4d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -97,9 +97,10 @@ compile_flags.txt
 
 # local editor config dirs
 .vscode
-.idea
+.idea*
 .clangd
-
+.cache
+cmake-build-debug
 # ignore installable version of dyadrun
 dyadrun
 flux_barrier
@@ -112,7 +113,17 @@ flux_barrier
 
 # Python stuff
 **/__pycache__/
-**/build
+**/build*
 **/*.egg-info
 /install/
 /dyad-env/
+env
+hydra_log
+docs/demos/ecp_feb_2023/c_cons
+docs/demos/ecp_feb_2023/cpp_cons
+docs/demos/ecp_feb_2023/c_prod
+docs/demos/ecp_feb_2023/cpp_prod
+tests/integration/dlio_benchmark/logs
+scripts/checkpoints
+scripts/logs
+tests/integration/dlio_benchmark/perf_analysis/.ipynb_checkpoints
diff --git a/CMakeLists.txt b/CMakeLists.txt
index dc03bc64..6e16bf8c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,12 +13,12 @@ set(DYAD_PACKAGE_VERSION_MAJOR "${DYAD_VERSION_MAJOR}.${DYAD_VERSION_MINOR}")
 set(DYAD_PACKAGE_VERSION_MINOR "${DYAD_VERSION_PATCH}")
 set(DYAD_PACKAGE_STRING "${DYAD_PACKAGE_NAME} ${DYAD_PACKAGE_VERSION}")
 set(DYAD_PACKAGE_TARNAME "${DYAD_PACKAGE}")
-
 project(dyad LANGUAGES C CXX)
 
 # Convenience defines
 string(TOUPPER "${PROJECT_NAME}" UPPER_PROJECT_NAME)
 string(TOLOWER "${PROJECT_NAME}" LOWER_PROJECT_NAME)
+set(DYAD_PROJECT_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
 #------------------------------------------------------------------------------
 # Internal Includes for header and libraries
@@ -110,7 +110,7 @@ set(DYAD_LOGGER "NONE" CACHE STRING "Logger to use for DYAD")
 set_property(CACHE DYAD_LOGGER PROPERTY STRINGS FLUX CPP_LOGGER NONE)
 set(DYAD_LOGGER_LEVEL "NONE" CACHE STRING "Logging level to use for DYAD")
 set_property(CACHE DYAD_LOGGER_LEVEL PROPERTY STRINGS DEBUG INFO WARN ERROR NONE)
-
+option(DYAD_ENABLE_TESTS "Enable dyad tests" OFF)
 
 #------------------------------------------------------------------------------
 # Compiler setup
@@ -459,6 +459,8 @@ string(APPEND _str
   "  DYAD_ENABLE_UCX_DATA:        ${DYAD_ENABLE_UCX_DATA}\n")
 string(APPEND _str
   "  DYAD_ENABLE_UCX_DATA_RMA:    ${DYAD_ENABLE_UCX_DATA_RMA}\n")
+string(APPEND _str
+        "  DYAD_ENABLE_TESTS:    ${DYAD_ENABLE_TESTS}\n")
 string(APPEND _str
   "  DYAD_PROFILER:               ${DYAD_PROFILER}\n")
 string(APPEND _str
@@ -513,3 +515,9 @@ install(FILES "${CMAKE_BINARY_DIR}/dyad_module.lua.install"
   RENAME "${DYAD_MODULEFILE_NAME}"
   DESTINATION
   "${DYAD_INSTALL_SYSCONFDIR}")
+
+
+if (DYAD_ENABLE_TESTS)
+    enable_testing()
+    add_subdirectory(tests)
+endif ()
\ No newline at end of file
diff --git a/pydyad/pydyad/bindings.py b/pydyad/pydyad/bindings.py
index 49cc341b..085961a5 100644
--- a/pydyad/pydyad/bindings.py
+++ b/pydyad/pydyad/bindings.py
@@ -82,9 +82,12 @@ def __del__(self):
             self.dyad_bindings_obj = None
 
 
-class DTLMode(enum.IntEnum):
-    DYAD_DTL_UCX = 0
-    DYAD_DTL_FLUX_RPC = 1
+class DTLMode(enum.Enum):
+    DYAD_DTL_UCX = "UCX"
+    DYAD_DTL_FLUX_RPC = "FLUX_RPC"
+
+    def __str__(self):
+        return self.value
 
 class DTLCommMode(enum.IntEnum):
     DYAD_COMM_NONE = 0
@@ -252,7 +255,7 @@ def init(
             prod_managed_path.encode() if prod_managed_path is not None else None,
             cons_managed_path.encode() if cons_managed_path is not None else None,
             ctypes.c_bool(relative_to_managed_path),
-            dtl_mode.encode() if dtl_mode is not None else None,
+            str(dtl_mode).encode() if dtl_mode is not None else None,
             ctypes.c_int(dtl_comm_mode),
             ctypes.c_void_p(flux_handle)
         )
diff --git a/pydyad/pydyad/hdf.py b/pydyad/pydyad/hdf.py
new file mode 100644
index 00000000..880d6c1e
--- /dev/null
+++ b/pydyad/pydyad/hdf.py
@@ -0,0 +1,37 @@
+from pydyad.bindings import Dyad
+
+from pathlib import Path
+
+import h5py
+
+
+class DyadFile(h5py.File):
+
+    def __init__(self, fname,  mode, file=None, dyad_ctx=None, metadata_wrapper=None):
+        # According to H5PY, the first positional argument to File.__init__ is fname
+        self.fname = fname
+        if not isinstance(self.fname, Path):
+            self.fname = Path(fname)
+        self.fname = self.fname.expanduser().resolve()
+        self.m = mode
+        if dyad_ctx is None:
+            raise NameError("'dyad_ctx' argument not provided to pydyad.hdf.File constructor")
+        self.dyad_ctx = dyad_ctx
+        if self.m in ("r"):
+            if (self.dyad_ctx.cons_path is not None and
+                    self.dyad_ctx.cons_path in self.fname.parents):
+                if metadata_wrapper:
+                    self.dyad_ctx.consume_w_metadata(str(self.fname), metadata_wrapper)
+                else:
+                    dyad_ctx.consume(str(self.fname))
+        if file:
+            super().__init__(file, mode)
+        else:
+            super().__init__(fname, mode)
+
+    def close(self):
+        super().close()
+        if self.m in ("w", "r+"):
+            if (self.dyad_ctx.prod_path is not None and
+                    self.dyad_ctx.prod_path in self.fname.parents):
+                self.dyad_ctx.produce(str(self.fname))
\ No newline at end of file
diff --git a/pydyad/setup.cfg b/pydyad/setup.cfg
index 18fb00b6..8454ccce 100644
--- a/pydyad/setup.cfg
+++ b/pydyad/setup.cfg
@@ -8,5 +8,5 @@ classifier =
 python_requires = >=3.7
 install_requires =
     numpy
-    # dlio_profiler_py @ git+https://github.com/hariharan-devarajan/dlio-profiler.git
-    pydftracer==1.0.2
\ No newline at end of file
+    h5py
+    pydftracer==1.0.2
diff --git a/scripts/Testing/Temporary/CTestCostData.txt b/scripts/Testing/Temporary/CTestCostData.txt
new file mode 100644
index 00000000..ed97d539
--- /dev/null
+++ b/scripts/Testing/Temporary/CTestCostData.txt
@@ -0,0 +1 @@
+---
diff --git a/scripts/corona.sh b/scripts/corona.sh
new file mode 100755
index 00000000..2fd8eec2
--- /dev/null
+++ b/scripts/corona.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+test_case=$1
+NUM_NODES=$2
+PPN=$3
+source ./setup-env.sh ${test_case} $NUM_NODES $PPN
+rm *.core flux.log
+rm -rf logs/* profiler/*
+flux alloc -q $QUEUE -t $TIME -N $NUM_NODES -o per-resource.count=${BROKERS_PER_NODE} --exclusive --broker-opts=--setattr=log-filename=./logs/flux.log ./run.sh $test_case $NUM_NODES $PPN
diff --git a/scripts/dspaces/aggregate.py b/scripts/dspaces/aggregate.py
new file mode 100644
index 00000000..ad65f8ab
--- /dev/null
+++ b/scripts/dspaces/aggregate.py
@@ -0,0 +1,63 @@
+from pathlib import Path
+import re
+import argparse
+import pandas as pd
+
+
+def process_single_run_csvs(dir_path):
+    dirname = dir_path.name
+    match_obj = re.match(r"(?P<test_name>[a-zA-Z]+)_(?P<num_nodes>[0-9]+)_(?P<ppn>[0-9]+)", dirname)
+    if match_obj is None:
+        raise RuntimeError("Cannot parse directory name")
+    num_nodes = int(match_obj.group("num_nodes"))
+    ppn = int(match_obj.group("ppn"))
+    csv_files = list(dir_path.glob("*.csv"))
+    df = pd.concat(map(pd.read_csv, csv_files), ignore_index=True)
+    num_ops = len(df)
+    df = df.drop(columns=["var_name", "version"])
+    df = df.groupby("rank").agg("sum")
+    return {
+        "test_name": match_obj.group("test_name"),
+        "num_nodes": num_nodes,
+        "ppn": ppn,
+        "num_mdata_ops": num_ops,
+        "data_size": df["data_size"].sum(),
+        "mdata_time_ns": df["mdata_time_ns"].max(),
+        "data_time_ns": df["data_time_ns"].max(),
+    }
+
+
+def build_result_dataframe(testdir):
+    top_level_rundir_name = testdir.parent.name
+    test_dir_name = testdir.name
+    output_df_name = "{}_{}.csv".format(top_level_rundir_name, test_dir_name)
+    print("Building", output_df_name)
+    df_rows = []
+    for subdir in testdir.iterdir():
+        if subdir.is_dir():
+            print("Getting data for", str(subdir))
+            df_row = process_single_run_csvs(subdir)
+            df_rows.append(df_row)
+    output_df = pd.DataFrame(data=df_rows)
+    return output_df_name, output_df
+
+
+def main():
+    parser = argparse.ArgumentParser("Aggregate data for test")
+    parser.add_argument("testdir", type=Path,
+                        help="Path to the test directory to collect data for")
+    parser.add_argument("--dump_dir", "-d", type=Path,
+                        help="Directory to dump the resulting CSV into")
+    args = parser.parse_args()
+    csv_name, df = build_result_dataframe(args.testdir.expanduser().resolve())
+    dump_dir = args.dump_dir.expanduser().resolve()
+    if not dump_dir.is_dir():
+        print("Creating non-existant dump directory {}".format(str(dump_dir)))
+        dump_dir.mkdir(parents=True)
+    full_csv_name = dump_dir / csv_name
+    df.to_csv(str(full_csv_name))
+    print("Wrote data to {}".format(str(full_csv_name)))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/dspaces/collect.py b/scripts/dspaces/collect.py
new file mode 100644
index 00000000..b42c5f3d
--- /dev/null
+++ b/scripts/dspaces/collect.py
@@ -0,0 +1,75 @@
+from pathlib import Path
+import re
+import argparse
+import json
+
+
+def validate_log(out_file):
+    with open(str(out_file), "r") as f:
+        for line in f:
+            if line.startswith("[DSPACES_TEST]"):
+                return line
+    return None
+
+
+def validate_dir(path):
+    dirname = path.name
+    match_obj = re.match(r"(?P<test_name>[a-zA-Z]+)_(?P<num_nodes>[0-9]+)_(?P<ppn>[0-9]+)", dirname)
+    if match_obj is None:
+        raise RuntimeError("Cannot parse directory name")
+    test_name = match_obj.group("test_name")
+    num_nodes = int(match_obj.group("num_nodes"))
+    ppn = int(match_obj.group("ppn"))
+    # num_tasks = num_nodes * ppn
+    out_file = path / "run.out"
+    if not out_file.is_file():
+        raise RuntimeError("Could not find run.out for {}".format(path))
+    perf_line = validate_log(out_file)
+    if perf_line is None:
+        raise RuntimeError("Run for {} failed because we don't have perf numbers".format(path))
+    return {
+        "test_name": test_name,
+        "num_nodes": num_nodes,
+        "ppn": ppn,
+        "perf": perf_line,
+    }
+
+
+def validate_rundir(td):
+    print("Validating tests in {}:".format(td.name))
+    subdirs = [sd for sd in td.iterdir() if sd.is_dir()]
+    perf_entries = []
+    for sd in subdirs:
+        print("  * Validating {}:".format(sd.name), end=" ")
+        try:
+            new_perf = validate_dir(sd)
+            perf_entries.append(new_perf)
+            print("GOOD")
+        except RuntimeError as e:
+            print("BAD")
+            raise e
+    return perf_entries
+
+
+def main():
+    parser = argparse.ArgumentParser("Validate runs")
+    parser.add_argument("testdir", type=Path,
+                        help="Top-level directory representing the results of a single iteration of the testing")
+    parser.add_argument("--dump_file", "-d", type=Path, default=None,
+                        help="Path to JSON file where we want to dump performance results")
+    args = parser.parse_args()
+    perf_entries = validate_rundir(args.testdir.expanduser().resolve())
+    if args.dump_file is not None:
+        dump_file = args.dump_file.expanduser().resolve()
+        if not dump_file.name.endswith(".json"):
+            raise ValueError("Invalid file suffix for JSON file")
+        if not dump_file.parent.is_dir():
+            dump_file.parent.mkdir(parents=True)
+        with open(str(dump_file), "w") as f:
+            json.dump(perf_entries, f, indent=4, sort_keys=True)
+    else:
+        print(json.dumps(perf_entries, sort_keys=True, indent=4))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/dspaces/corona.sh b/scripts/dspaces/corona.sh
new file mode 100755
index 00000000..6c5158ec
--- /dev/null
+++ b/scripts/dspaces/corona.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+test_case=$1
+num_nodes=$2
+ppn=$3
+timing_root_dir=$4
+use_alloc=$5
+
+num_iters=16
+num_files=16
+request_size=65536
+hg_conn_str="ofi+verbs"
+
+extra_flux_flags="--setattr=system.bank=ice4hpc"
+
+source ./setup-env.sh
+
+timing_dir=$timing_root_dir/${test_case}_${num_nodes}_${ppn}
+
+if [ -d $timing_dir ]; then
+    echo "Dump directory $timing_dir already exists"
+    exit 1
+fi
+
+mkdir -p $timing_dir
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+
+if $use_alloc; then
+    flux alloc -q $QUEUE -t $TIME -N $num_nodes --exclusive $extra_flux_flags ./run.sh $test_case $num_nodes $ppn $num_iters $num_files $request_size $hg_conn_str $timing_dir $SCRIPT_DIR
+else
+    flux batch -q $QUEUE -t $TIME -N $num_nodes --output=$timing_dir/run.out --error=$timing_dir/run.err --exclusive $extra_flux_flags ./run.sh $test_case $num_nodes $ppn $num_iters $num_files $request_size $hg_conn_str $timing_dir $SCRIPT_DIR
+fi
diff --git a/scripts/dspaces/dspaces_start.sh b/scripts/dspaces/dspaces_start.sh
new file mode 100755
index 00000000..ce69e73a
--- /dev/null
+++ b/scripts/dspaces/dspaces_start.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+echo "## Config file for DataSpaces server
+ndim = 1
+dims = $1
+max_versions = $2
+num_apps = 1" > dataspaces.conf
+
+# Use provided number of nodes instead of auto-obtained number
+# dspaces_num_nodes=$(flux resource info | grep -oP "\d+ Nodes" | grep -oP "^\d+")
+
+flux submit -N $3 --cores=$(( $3*1 )) --tasks-per-node=$4 dspaces_server $5
+
+# Wait for DataSpaces configuration file to be created.
+# If we don't do this, the DataSpaces clients will either crash or hang
+sleep 1s
+while [ ! -f conf.ds ]; do
+    sleep 1s
+done
+sleep 3s
diff --git a/scripts/dspaces/dspaces_stop.sh b/scripts/dspaces/dspaces_stop.sh
new file mode 100755
index 00000000..eaa18642
--- /dev/null
+++ b/scripts/dspaces/dspaces_stop.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+flux run --ntasks=1 terminator
+
+# rm conf.ds dataspaces.conf
diff --git a/scripts/dspaces/print_all.sh b/scripts/dspaces/print_all.sh
new file mode 100755
index 00000000..a5ff350a
--- /dev/null
+++ b/scripts/dspaces/print_all.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+timing_root_dir=$1
+
+echo "#!/bin/bash"
+
+ppns=( 1 2 4 8 16 32 64 )
+
+echo "# Remote ppn scaling test"
+for ppn in ${ppns[@]}; do
+    # Run 2 node test for RemoteDataBandwidth
+    echo "./corona.sh RemoteDataBandwidth 2 $ppn $timing_root_dir/2_node_remote_data false"
+    # Run 2 node test for RemoteDataAggBandwidth
+    echo "./corona.sh RemoteDataAggBandwidth 2 $ppn $timing_root_dir/2_node_remote_data false"
+done
+echo ""
+
+num_nodes=( 4 8 16 32 64 )
+ppns=( 16 )
+
+echo "# Num nodes remote scaling"
+for nn in ${num_nodes[@]}; do
+    for ppn in ${ppns[@]}; do
+        # Run 1 node test for LocalProcessDataBandwidth
+        echo "./corona.sh RemoteDataBandwidth $nn $ppn $timing_root_dir/multi_node_remote_data false"
+        # Run 1 node test for LocalNodeDataBandwidth
+        echo "./corona.sh RemoteDataAggBandwidth $nn $ppn $timing_root_dir/multi_node_remote_data false"
+    done
+done
+echo ""
+
+num_nodes=( 1 2 4 8 16 32 64 )
+echo "# Metadata Perf"
+for nn in ${num_nodes[@]}; do
+    # Process local mdata
+    echo "./corona.sh LocalProcessDataBandwidth $nn 16 $timing_root_dir/proc_local_mdata false"
+    # Service mdata
+    echo "./corona.sh LocalNodeDataBandwidth $nn 16 $timing_root_dir/service_mdata false"
+    # Global mdata
+    echo "./corona.sh RemoteDataBandwidth $nn 16 $timing_root_dir/global_mdata false"
+done
+echo ""
+
+ppns=( 1 2 4 8 16 32 64 )
+
+echo "# Local process ppn scaling"
+for ppn in ${ppns[@]}; do
+    # Run 2 node test for RemoteDataBandwidth
+    echo "./corona.sh LocalProcessDataBandwidth 1 $ppn $timing_root_dir/1_node_local_data false"
+    # Run 2 node test for RemoteDataAggBandwidth
+    echo "./corona.sh LocalNodeDataBandwidth 1 $ppn $timing_root_dir/1_node_local_data false"
+done
+echo ""
+
+num_nodes=( 2 4 8 16 32 64 )
+ppns=( 16 )
+
+echo "# Local node scaling"
+for nn in ${num_nodes[@]}; do
+    for ppn in ${ppns[@]}; do
+        # Run 2 node test for RemoteDataBandwidth
+        echo "./corona.sh LocalProcessDataBandwidth $nn $ppn $timing_root_dir/multi_node_local_data false"
+        # Run 2 node test for RemoteDataAggBandwidth
+        echo "./corona.sh LocalNodeDataBandwidth $nn $ppn $timing_root_dir/multi_node_local_data false"
+    done
+done
diff --git a/scripts/dspaces/run.sh b/scripts/dspaces/run.sh
new file mode 100755
index 00000000..088070cb
--- /dev/null
+++ b/scripts/dspaces/run.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+test_case=$1
+NUM_NODES=$2
+PPN=$3
+NUM_ITERS=$4
+NUM_FILES=$5
+REQUEST_SIZE=$6
+HG_CONNECTION_STR=$7
+TIMING_DIR=$8
+SCRIPT_DIR=$9
+
+ulimit -c unlimited
+
+test_cores=$(( NUM_NODES*32 ))
+
+curr_dir=$(pwd)
+
+cd $TIMING_DIR
+
+source $SCRIPT_DIR/setup-env.sh
+
+EXEC_DIR="${GITHUB_WORKSPACE}/build/bin"
+
+# export DSPACES_DEBUG=1
+
+echo Starting DataSpaces
+# Setup DYAD
+$SCRIPT_DIR/dspaces_start.sh $(( REQUEST_SIZE * NUM_ITERS )) ${NUM_FILES} ${NUM_NODES} ${SERVER_PPN} ${HG_CONNECTION_STR}
+
+echo "Running Test $WORKLOAD"
+flux run -N ${NUM_NODES} --cores=$test_cores --tasks-per-node=${PPN} \
+    ${EXEC_DIR}/unit_test --filename dp_${NUM_NODES}_${PPN} \
+    --ppn ${PPN} --iteration ${NUM_ITERS} --number_of_files ${NUM_FILES} \
+    --server_ppn ${SERVER_PPN} --request_size ${REQUEST_SIZE} --timing_dir ${TIMING_DIR} ${test_case}
+
+echo Stopping DataSpaces
+$SCRIPT_DIR/dspaces_stop.sh
+
+cd $curr_dir
diff --git a/scripts/dspaces/run_all.sh b/scripts/dspaces/run_all.sh
new file mode 100755
index 00000000..b58885b5
--- /dev/null
+++ b/scripts/dspaces/run_all.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+timing_root_dir=$1
+
+ppns=( 1 2 4 8 16 32 64 )
+
+for ppn in ${ppns[@]}; do
+    # Run 2 node test for RemoteDataBandwidth
+    ./corona.sh RemoteDataBandwidth 2 $ppn $timing_root_dir/2_node_remote_data false
+    # Run 2 node test for RemoteDataAggBandwidth
+    ./corona.sh RemoteDataAggBandwidth 2 $ppn $timing_root_dir/2_node_remote_data false
+done
+
+num_nodes=( 4 8 16 32 64 )
+ppns=( 16 )
+
+for nn in ${num_nodes[@]}; do
+    for ppn in ${ppns[@]}; do
+        # Run 1 node test for LocalProcessDataBandwidth
+        ./corona.sh RemoteDataBandwidth $nn $ppn $timing_root_dir/multi_node_remote_data false
+        # Run 1 node test for LocalNodeDataBandwidth
+        ./corona.sh RemoteDataAggBandwidth $nn $ppn $timing_root_dir/multi_node_remote_data false
+    done
+done
+
+num_nodes=( 1 2 4 8 16 32 64 )
+for nn in ${num_nodes[@]}; do
+    # Process local mdata
+    ./corona.sh LocalProcessDataBandwidth $nn 64 $timing_root_dir/proc_local_mdata false
+    # Service mdata
+    ./corona.sh LocalNodeDataBandwidth $nn 64 $timing_root_dir/service_mdata false
+    # Global mdata
+    ./corona.sh RemoteDataBandwidth $nn 64 $timing_root_dir/global_mdata false
+done
+
+ppns=( 1 2 4 8 16 32 64 )
+
+for ppn in ${ppns[@]}; do
+    # Run 2 node test for RemoteDataBandwidth
+    ./corona.sh LocalProcessDataBandwidth 1 $ppn $timing_root_dir/1_node_local_data false
+    # Run 2 node test for RemoteDataAggBandwidth
+    ./corona.sh LocalNodeDataBandwidth 1 $ppn $timing_root_dir/1_node_local_data false
+done
+
+num_nodes=( 2 4 8 16 32 64 )
+ppns=( 64 )
+
+for nn in ${num_nodes[@]}; do
+    for ppn in ${ppns[@]}; do
+        # Run 2 node test for RemoteDataBandwidth
+        ./corona.sh LocalProcessDataBandwidth $nn $ppn $timing_root_dir/multi_node_local_data false
+        # Run 2 node test for RemoteDataAggBandwidth
+        ./corona.sh LocalNodeDataBandwidth $nn $ppn $timing_root_dir/multi_node_local_data false
+    done
+done
diff --git a/scripts/dspaces/setup-env.sh b/scripts/dspaces/setup-env.sh
new file mode 100755
index 00000000..6bcf276b
--- /dev/null
+++ b/scripts/dspaces/setup-env.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+export VSC_DEBUG_CONF_DIR=/g/g90/lumsden1/ws/dyad_sc24_paper_dspaces/dyad/.vscode
+
+module load gcc/10.3.1
+module load python/3.9.12
+module load openmpi/4.1.2
+# Configurations
+export QUEUE=pbatch
+export TIME="15m"
+export SERVER_PPN=1
+
+export GITHUB_WORKSPACE=/g/g90/lumsden1/ws/dyad_sc24_paper_dspaces/dyad
+export SPACK_DIR=/g/g90/lumsden1/ws/spack
+export SPACK_ENV=/g/g90/lumsden1/ws/dyad_sc24_paper_dspaces/baseline_env
+export SPACK_VIEW=/g/g90/lumsden1/ws/dyad_sc24_paper_dspaces/baseline_env/.spack-env/view
+
+# Activate Environments
+source ${SPACK_DIR}/share/spack/setup-env.sh
+spack env activate -p ${SPACK_ENV}
+
+# Derived PATHS
+export PATH=${PATH}:${SPACK_VIEW}/bin:${SPACK_VIEW}/sbin
+export LD_LIBRARY_PATH=/usr/lib64:${SPACK_VIEW}/lib:${SPACK_VIEW}/lib64:${LD_LIBRARY_PATH}
+
+unset LUA_PATH
+unset LUA_CPATH
diff --git a/scripts/dspaces/validate.py b/scripts/dspaces/validate.py
new file mode 100644
index 00000000..dca5ccf1
--- /dev/null
+++ b/scripts/dspaces/validate.py
@@ -0,0 +1,53 @@
+from pathlib import Path
+import re
+import argparse
+
+
+NUM_FILES = 16
+
+
+def validate_csv(csv_file):
+    with open(str(csv_file), "r") as f:
+        num_lines = len(f.readlines())
+        if num_lines != NUM_FILES + 1:
+            raise RuntimeError("CSV file {} only contains {} lines, expected {}".format(csv_file, num_lines, NUM_FILES+1))
+
+
+def validate_dir(path):
+    dirname = path.name
+    match_obj = re.match(r"(?P<test_name>[a-zA-Z]+)_(?P<num_nodes>[0-9]+)_(?P<ppn>[0-9]+)", dirname)
+    if match_obj is None:
+        raise RuntimeError("Cannot parse directory name")
+    num_nodes = int(match_obj.group("num_nodes"))
+    ppn = int(match_obj.group("ppn"))
+    num_tasks = num_nodes * ppn
+    csv_files = list(path.glob("*.csv"))
+    if len(csv_files) != num_tasks:
+        raise RuntimeError("Only found {} CSV files, but expected {} for {}".format(len(csv_files), num_tasks, str(path)))
+    for f in csv_files:
+        validate_csv(f)
+
+
+def validate_rundir(td):
+    print("Validating tests in {}:".format(td.name))
+    subdirs = [sd for sd in td.iterdir() if sd.is_dir()]
+    for sd in subdirs:
+        print("  * Validating {}:".format(sd.name), end=" ")
+        try:
+            validate_dir(sd)
+            print("GOOD")
+        except RuntimeError as e:
+            print("BAD")
+            raise e
+
+
+def main():
+    parser = argparse.ArgumentParser("Validate runs")
+    parser.add_argument("testdir", type=Path,
+                        help="Top-level directory representing the results of a single iteration of the testing")
+    args = parser.parse_args()
+    validate_rundir(args.testdir.expanduser().resolve())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/mpi_attach.py b/scripts/mpi_attach.py
new file mode 100644
index 00000000..d6dfd473
--- /dev/null
+++ b/scripts/mpi_attach.py
@@ -0,0 +1,119 @@
+import argparse
+import os
+import paramiko
+import logging
+import shutil
+import json
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+                    prog='mpi_attach',
+                    description='Attach to a mpi program')
+    parser.add_argument('-c', '--conf_dir', help="pass conf_dir else it is infered using VSC_DEBUG_CONF_DIR")
+    parser.add_argument('-p', '--project_dir', help="pass project_dir")
+    parser.add_argument('-v', '--verbose', action='store_true')
+    parser.add_argument('-d', '--debug', action='store_true')
+    args = parser.parse_args()
+    if  args.conf_dir == None:
+        args.conf_dir = os.getenv("VSC_DEBUG_CONF_DIR", ".")
+    loglevel = logging.WARNING
+    if args.verbose:
+        loglevel = logging.INFO
+    elif args.debug:
+        loglevel = logging.DEBUG
+    logging.basicConfig(level=loglevel,
+        handlers=[
+            logging.StreamHandler()
+        ],
+        format='[%(levelname)s] [%(asctime)s] %(message)s [%(pathname)s:%(lineno)d]',
+        datefmt='%H:%M:%S'
+    )
+    logging.info(f"args: {args}")
+    return args
+
+def main():
+    args = parse_args()
+    conf_file = f"{args.conf_dir}/debug.conf"
+    file = open(conf_file, 'r')
+    lines = file.readlines()
+    file.close()
+
+    vals = [{}]*len(lines)
+    logging.info(f"vals has {len(vals)} values")
+    for line in lines:
+        exec, rank, hostname, port, pid = line.split(":")
+        exec = exec.strip()
+        rank = int(rank.strip())
+        hostname = hostname.strip()
+        port = int(port.strip())
+        pid = int(pid.strip())
+        vals[rank] = {"hostname":hostname, "port":port, "pid":pid, "exec":exec}
+
+    # Create a launch_json files
+    launch_file = f"{args.project_dir}/.vscode/launch.json"
+    with open(launch_file, "r") as jsonFile:
+        launch_data = json.load(jsonFile)
+    
+    # clean previous configurations
+    confs = launch_data["configurations"]
+    final_confs = []
+    for conf in confs:
+        if "mpi_gdb" not in conf["name"]:
+            final_confs.append(conf)
+    
+    compound_names = []
+    for rank, val in enumerate(vals):
+        exec = val["exec"]
+        port = val["port"]
+        hostname = val["hostname"]
+        test_name = f"mpi_gdb for rank {rank}"
+        final_confs.append({
+            "type": "gdb",
+            "request": "attach",
+            "name": test_name,
+            "executable": f"{exec}",
+            "target": f"{hostname}:{port}",
+            "remote": True,
+            "cwd": "${workspaceRoot}", 
+            "gdbpath": "gdb"
+        })
+        compound_names.append(test_name)
+    final_compounds = []
+    compounds = []
+    if "compounds" in launch_data:
+        compounds = launch_data["compounds"]
+    final_compounds = []
+    for compound in compounds:
+        if "mpi_gdb" not in compound["name"]:
+            final_compounds.append(compound)
+    
+    final_compounds.append({
+      "name": "mpi_gdb compound",
+      "configurations": compound_names,
+      "preLaunchTask": "${defaultBuildTask}",
+      "stopAll": True
+    })
+    launch_data["compounds"] = final_compounds
+
+
+    launch_data["configurations"]=final_confs
+    with open(launch_file, "w") as jsonFile:
+        json.dump(launch_data, jsonFile, indent=2)
+
+    gdbserver_exe = shutil.which("gdbserver")
+    ssh = paramiko.SSHClient()
+    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+    for rank, val in enumerate(vals):
+        hostname, port, pid = val["hostname"], val["port"], val["pid"]
+        logging.info(f"rank:{rank} hostname:{hostname} port:{port} pid:{pid}")
+        ssh.connect(hostname)
+        cmd = f"{gdbserver_exe} {hostname}:{port} --attach {pid} > {os.getcwd()}/gdbserver-{hostname}-{pid}.log 2>&1 &"
+        logging.info(f"cmd:{cmd}")
+        transport = ssh.get_transport()
+        channel = transport.open_session()
+        channel.exec_command(cmd)
+        ssh.close()
+
+    
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/scripts/run.sh b/scripts/run.sh
new file mode 100755
index 00000000..1b17f4e2
--- /dev/null
+++ b/scripts/run.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+test_case=$1
+NUM_NODES=$2
+PPN=$3
+source ./setup-env.sh $test_case $NUM_NODES $PPN
+
+
+pushd ${GITHUB_WORKSPACE}/build
+echo Starting DYAD
+# Setup DYAD
+ctest -R dyad_start -VV 
+
+# Setup Local Directories
+echo Setting up local directories
+flux run -N $((NUM_NODES*BROKERS_PER_NODE)) --tasks-per-node=1 mkdir -p $DYAD_PATH
+flux run -N $((NUM_NODES*BROKERS_PER_NODE)) --tasks-per-node=1 rm -rf $DYAD_PATH/*
+
+echo "Running Test $WORKLOAD"
+ctest -R ${WORKLOAD}$ -VV 
+
+echo Stopping DYAD
+ctest -R dyad_stop -VV
diff --git a/scripts/setup-env.sh b/scripts/setup-env.sh
new file mode 100644
index 00000000..ebd30850
--- /dev/null
+++ b/scripts/setup-env.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+module load python/3.9.12
+module load openmpi/4.1.2
+module load gcc/10.3.1
+export VSC_DEBUG_PROJECT_DIR=/usr/workspace/haridev/dyad
+export VSC_DEBUG_CONF_DIR=${VSC_DEBUG_PROJECT_DIR}/.vscode/
+test_case=$1
+NUM_NODES=$2
+PPN=$3
+# Configurations
+export WORKLOAD=${test_case}_${NUM_NODES}_${PPN}
+export QUEUE=pdebug
+export TIME=$((60))
+export BROKERS_PER_NODE=1
+
+export DYAD_INSTALL_PREFIX=/usr/workspace/haridev/dyad/env/spack/.spack-env/view
+export DYAD_KVS_NAMESPACE=dyad
+export DYAD_DTL_MODE=UCX
+export DYAD_PATH=/l/ssd/haridev/dyad/internal
+#export DYAD_PATH=/dev/shm/haridev/dyad
+export GITHUB_WORKSPACE=/usr/workspace/haridev/dyad
+export SPACK_DIR=/usr/workspace/haridev/spack
+export SPACK_ENV=/usr/workspace/haridev/dyad/env/spack
+export PYTHON_ENV=/usr/workspace/haridev/dyad/env/python
+
+# Activate Environments
+. ${SPACK_DIR}/share/spack/setup-env.sh
+spack env activate -p ${SPACK_ENV}
+source ${PYTHON_ENV}/bin/activate 
+
+# Derived PATHS
+export PATH=${PATH}:${DYAD_INSTALL_PREFIX}/bin:${DYAD_INSTALL_PREFIX}/sbin
+export LD_LIBRARY_PATH=/usr/lib64:${DYAD_INSTALL_PREFIX}/lib:${LD_LIBRARY_PATH}
+export PYTHONPATH=${GITHUB_WORKSPACE}/tests/integration/dlio_benchmark:${GITHUB_WORKSPACE}/pydyad:$PYTHONPATH
+
+unset LUA_PATH
+unset LUA_CPATH
diff --git a/src/dyad/core/dyad_core.c b/src/dyad/core/dyad_core.c
index a3e97395..44d2f953 100644
--- a/src/dyad/core/dyad_core.c
+++ b/src/dyad/core/dyad_core.c
@@ -27,14 +27,10 @@
 #include <string.h>
 #endif
 
-#if DYAD_PERFFLOW
-#define DYAD_CORE_FUNC_MODS __attribute__ ((annotate ("@critical_path()"))) static
-#else
-#define DYAD_CORE_FUNC_MODS static inline
-#endif
 
 
-static int gen_path_key (const char* restrict str,
+
+DYAD_DLL_EXPORTED int gen_path_key (const char* restrict str,
                          char* restrict path_key,
                          const size_t len,
                          const uint32_t depth,
@@ -48,13 +44,17 @@ static int gen_path_key (const char* restrict str,
     uint32_t hash[4] = {0u};  // Output for the hash
     size_t cx = 0ul;
     int n = 0;
+    if (str == NULL || path_key == NULL || len == 0ul) {
+        DYAD_C_FUNCTION_END();
+        return -1;
+    }
     size_t str_len = strlen (str);
-    const char* str_long = str;
-
-    if (str == NULL || path_key == NULL || len == 0ul || str_len == 0ul) {
+    if (str_len == 0ul) {
         DYAD_C_FUNCTION_END();
         return -1;
     }
+    const char* str_long = str;
+    
     path_key[0] = '\0';
 
     // Just append the string so that it can be as large as 128 bytes.
@@ -79,7 +79,8 @@ static int gen_path_key (const char* restrict str,
         }
     }
     n = snprintf (path_key + cx, len - cx, "%s", str);
-    if (cx + n >= len || n < 0) {
+    // FIXME: cx + n >= len  fails for str_len > 256
+    if (n < 0) {
         DYAD_C_FUNCTION_END();
         return -1;
     }
@@ -176,7 +177,7 @@ publish_done:;
     return rc;
 }
 
-DYAD_CORE_FUNC_MODS dyad_rc_t dyad_commit (dyad_ctx_t* restrict ctx, const char* restrict fname)
+DYAD_DLL_EXPORTED dyad_rc_t dyad_commit (dyad_ctx_t* restrict ctx, const char* restrict fname)
 {
     DYAD_C_FUNCTION_START();
     DYAD_C_FUNCTION_UPDATE_STR ("fname", ctx->fname);
@@ -236,7 +237,7 @@ static void print_mdata (const dyad_ctx_t* restrict ctx,
     }
 }
 
-DYAD_CORE_FUNC_MODS dyad_rc_t dyad_kvs_read (const dyad_ctx_t* restrict ctx,
+DYAD_DLL_EXPORTED dyad_rc_t dyad_kvs_read (const dyad_ctx_t* restrict ctx,
                                              const char* restrict topic,
                                              const char* restrict upath,
                                              bool should_wait,
@@ -374,7 +375,7 @@ fetch_done:;
     return rc;
 }
 
-DYAD_CORE_FUNC_MODS dyad_rc_t dyad_get_data (const dyad_ctx_t* restrict ctx,
+DYAD_DLL_EXPORTED dyad_rc_t dyad_get_data (const dyad_ctx_t* restrict ctx,
                                              const dyad_metadata_t* restrict mdata,
                                              char** restrict file_data,
                                              size_t* restrict file_len)
@@ -818,7 +819,7 @@ dyad_rc_t dyad_consume_w_metadata (dyad_ctx_t* restrict ctx, const char* fname,
     DYAD_C_FUNCTION_START();
     DYAD_C_FUNCTION_UPDATE_STR ("fname", fname);
     dyad_rc_t rc = DYAD_RC_OK;
-    int fd = -1;
+    int lock_fd = -1, io_fd = -1;
     ssize_t file_size = -1;
     char* file_data = NULL;
     size_t data_len = 0ul;
@@ -844,48 +845,59 @@ dyad_rc_t dyad_consume_w_metadata (dyad_ctx_t* restrict ctx, const char* fname,
     }
     // Set reenter to false to avoid recursively performing DYAD operations
     ctx->reenter = false;
-    fd = open (fname, O_RDWR | O_CREAT, 0666);
-    DYAD_C_FUNCTION_UPDATE_INT ("fd", fd);
-    if (fd == -1) {
+    lock_fd = open (fname, O_RDWR | O_CREAT, 0666);
+    DYAD_C_FUNCTION_UPDATE_INT ("lock_fd", lock_fd);
+    if (lock_fd == -1) {
         DYAD_LOG_ERROR (ctx, "Cannot create file (%s) for dyad_consume_w_metadata!\n", fname);
         rc = DYAD_RC_BADFIO;
         goto consume_close;
     }
-    rc = dyad_excl_flock (ctx, fd, &exclusive_lock);
+    rc = dyad_excl_flock (ctx, lock_fd, &exclusive_lock);
     if (DYAD_IS_ERROR (rc)) {
-        dyad_release_flock (ctx, fd, &exclusive_lock);
+        dyad_release_flock (ctx, lock_fd, &exclusive_lock);
         goto consume_close;
     }
-    if ((file_size = get_file_size (fd)) <= 0) {
+    if ((file_size = get_file_size (lock_fd)) <= 0) {
         DYAD_LOG_INFO (ctx, "[node %u rank %u pid %d] File (%s with fd %d) is not fetched yet", \
-                       ctx->node_idx, ctx->rank, ctx->pid, fname, fd);
+                       ctx->node_idx, ctx->rank, ctx->pid, fname, lock_fd);
 
         // Call dyad_get_data to dispatch a RPC to the producer's Flux broker
         // and retrieve the data associated with the file
         rc = dyad_get_data (ctx, mdata, &file_data, &data_len);
         if (DYAD_IS_ERROR (rc)) {
             DYAD_LOG_ERROR (ctx, "dyad_get_data failed!\n");
-            dyad_release_flock (ctx, fd, &exclusive_lock);
+            dyad_release_flock (ctx, lock_fd, &exclusive_lock);
             goto consume_done;
         }
         DYAD_C_FUNCTION_UPDATE_INT ("data_len", data_len);
-
+        io_fd = open (fname, O_WRONLY);
+        DYAD_C_FUNCTION_UPDATE_INT ("io_fd", io_fd);
+        if (io_fd == -1) {
+            DYAD_LOG_ERROR (ctx, "Cannot open file (%s) in write mode for dyad_consume!\n", fname);
+            rc = DYAD_RC_BADFIO;
+            goto consume_close;
+        }
         // Call dyad_pull to fetch the data from the producer's
         // Flux broker
-        rc = dyad_cons_store (ctx, mdata, fd, data_len, file_data);
+        rc = dyad_cons_store (ctx, mdata, io_fd, data_len, file_data);
+
+        if (close (io_fd) != 0) {
+            rc = DYAD_RC_BADFIO;
+            dyad_release_flock (ctx, lock_fd, &exclusive_lock);
+            goto consume_done;
+        }
         // If an error occured in dyad_pull, log it
         // and return the corresponding DYAD return code
         if (DYAD_IS_ERROR (rc)) {
             DYAD_LOG_ERROR (ctx, "dyad_cons_store failed!\n");
-            dyad_release_flock (ctx, fd, &exclusive_lock);
+            dyad_release_flock (ctx, io_fd, &exclusive_lock);
             goto consume_done;
         };
-        fsync (fd);
     }
-    dyad_release_flock (ctx, fd, &exclusive_lock);
+    dyad_release_flock (ctx, lock_fd, &exclusive_lock);
     DYAD_C_FUNCTION_UPDATE_INT ("file_size", file_size);
 
-    if (close (fd) != 0) {
+    if (close (lock_fd) != 0) {
         rc = DYAD_RC_BADFIO;
         goto consume_done;
     }
diff --git a/src/dyad/core/dyad_core.h b/src/dyad/core/dyad_core.h
index 4608b517..b58577ca 100644
--- a/src/dyad/core/dyad_core.h
+++ b/src/dyad/core/dyad_core.h
@@ -25,7 +25,11 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-
+#if DYAD_PERFFLOW
+#define DYAD_CORE_FUNC_MODS __attribute__ ((annotate ("@critical_path()"))) static
+#else
+#define DYAD_CORE_FUNC_MODS static inline
+#endif
 DYAD_DLL_EXPORTED extern const struct dyad_ctx dyad_ctx_default;
 
 struct dyad_metadata {
@@ -113,6 +117,26 @@ DYAD_PFA_ANNOTATE DYAD_DLL_EXPORTED dyad_rc_t dyad_consume (dyad_ctx_t* ctx, con
 DYAD_PFA_ANNOTATE DYAD_DLL_EXPORTED dyad_rc_t dyad_consume_w_metadata (dyad_ctx_t* ctx, const char* fname,
                                                                        const dyad_metadata_t* mdata);
 
+
+/**
+ * Private Function definitions
+ */
+DYAD_DLL_EXPORTED dyad_rc_t dyad_get_data (const dyad_ctx_t* ctx, const dyad_metadata_t* mdata,
+                                                         char** file_data,
+                                                         size_t* file_len);
+DYAD_DLL_EXPORTED dyad_rc_t dyad_commit (dyad_ctx_t* ctx, const char* fname);
+
+DYAD_DLL_EXPORTED int gen_path_key (const char* str, char* path_key,
+                                                          const size_t len,
+                                                          const uint32_t depth,
+                                                          const uint32_t width);
+
+DYAD_DLL_EXPORTED dyad_rc_t dyad_kvs_read (const dyad_ctx_t* ctx,
+                                             const char* topic,
+                                             const char* upath,
+                                             bool should_wait,
+                                             dyad_metadata_t** mdata);
+
 #if DYAD_SYNC_DIR
 DYAD_PFA_ANNOTATE DYAD_DLL_EXPORTED int dyad_sync_directory (dyad_ctx_t* ctx, const char* path);
 #endif
diff --git a/src/dyad/core/dyad_ctx.c b/src/dyad/core/dyad_ctx.c
index 6dc03898..376d22aa 100644
--- a/src/dyad/core/dyad_ctx.c
+++ b/src/dyad/core/dyad_ctx.c
@@ -803,7 +803,7 @@ DYAD_DLL_EXPORTED dyad_rc_t dyad_finalize ()
 {
     DYAD_C_FUNCTION_START ();
     dyad_rc_t rc = DYAD_RC_OK;
-    DYAD_LOG_STDERR ("DYAD context is being destroyed!\n");
+    //DYAD_LOG_STDERR ("DYAD context is being destroyed!\n");
     if (ctx == NULL) {
         rc = DYAD_RC_OK;
         goto finalize_region_finish;
diff --git a/src/dyad/modules/dyad.c b/src/dyad/modules/dyad.c
index b4365692..44ba7a17 100644
--- a/src/dyad/modules/dyad.c
+++ b/src/dyad/modules/dyad.c
@@ -352,6 +352,8 @@ static int opt_parse (opt_parse_out_t *restrict opt,
                 // mode as the option, then skip reinitializing
                 DYAD_LOG_STDERR ("DYAD_MOD: DTL 'mode' option -m with value `%s'\n", optarg);
                 opt->dtl_mode = optarg;
+                if (strcmp("UCX", optarg) == 0) *dtl_mode = DYAD_DTL_UCX;
+                else if (strcmp("FLUX_RPC", optarg) == 0) *dtl_mode = DYAD_DTL_FLUX_RPC;
                 break;
             case 'i':
 #ifndef DYAD_LOGGER_NO_LOG
@@ -422,7 +424,12 @@ dyad_rc_t dyad_module_ctx_init (const opt_parse_out_t *opt, flux_t *h)
             DYAD_DTL_MODE_ENV,
             getenv (DYAD_DTL_MODE_ENV));
     }
-
+    char* kvs = getenv("DYAD_KVS_NAMESPACE");
+    if (kvs != NULL) {
+       DYAD_LOG_STDERR ("DYAD_MOD: DYAD_KVS_NAMESPACE is set to `%s'\n", kvs);
+    } else {
+        DYAD_LOG_STDERR ("DYAD_MOD: DYAD_KVS_NAMESPACE is not set\n");
+    }    
     dyad_ctx_init (DYAD_COMM_SEND, h);
     mod_ctx->ctx = dyad_ctx_get ();
     dyad_ctx_t *ctx = mod_ctx->ctx;
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 00000000..2661b5df
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,10 @@
+option(ENABLE_DSPACES_TEST "Enable DataSpaces perf test" OFF)
+option(ENABLE_UNIT_TEST "Enable DYAD unit tests" ON)
+
+if (ENABLE_DSPACES_TEST)
+    add_subdirectory(dspaces_perf)
+endif ()
+
+if (ENABLE_UNIT_TEST)
+    add_subdirectory(unit)
+endif ()
diff --git a/tests/dspaces_perf/CMakeLists.txt b/tests/dspaces_perf/CMakeLists.txt
new file mode 100644
index 00000000..75d37d7c
--- /dev/null
+++ b/tests/dspaces_perf/CMakeLists.txt
@@ -0,0 +1,29 @@
+# TODO uncomment and update when adding the "add_test" directives
+# if(NOT DEFINED ENV{DYAD_TEST_MACHINE})
+#     message(FATAL_ERROR "-- [dyad] DYAD_TEST_MACHINE in env should be set for ${PROJECT_NAME} test build")
+# else()
+#     message(STATUS "[dyad] found setting machine to $ENV{DYAD_TEST_MACHINE}")
+# endif()
+# if (NOT DEFINED ENV{DSPACES_MAX_VERSIONS})
+#     message(FATAL_ERROR "-- [dyad] DSPACES_MAX_VERSIONS in env should be set for ${PROJECT_NAME} paper test build")
+# else ()
+#     message(STATUS "[dyad] found setting DataSpaces max versions to $ENV{DSPACES_MAX_VERSIONS}")
+# endif ()
+
+find_package(Catch2 REQUIRED)
+find_package(MPI REQUIRED COMPONENTS CXX)
+find_package(dspaces REQUIRED CONFIG)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(Catch2::Catch2)
+include_directories(${MPI_CXX_INCLUDE_DIRS})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
+include_directories(${CMAKE_BINARY_DIR}/include)
+set(TEST_LIBS Catch2::Catch2 ${MPI_CXX_LIBRARIES} -rdynamic dspaces::dspaces)
+set(TEST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/catch_config.h ${CMAKE_CURRENT_SOURCE_DIR}/test_utils.h)
+add_executable(unit_test unit_test.cpp ${TEST_SRC} )
+target_link_libraries(unit_test ${TEST_LIBS})
+
+# TODO add the "add_test" directives by going into the subdirectories
+# add_subdirectory(script)
+# add_subdirectory(data_plane)
+# add_subdirectory(mdm)
diff --git a/tests/dspaces_perf/catch_config.h b/tests/dspaces_perf/catch_config.h
new file mode 100644
index 00000000..9eb9ac24
--- /dev/null
+++ b/tests/dspaces_perf/catch_config.h
@@ -0,0 +1,24 @@
+#ifndef DYAD_CATCH_CONFIG_H
+#define DYAD_CATCH_CONFIG_H
+#include <catch2/catch_all.hpp>
+namespace cl = Catch::Clara;
+
+cl::Parser define_options();
+
+int init(int* argc, char*** argv);
+int finalize();
+
+int main(int argc, char* argv[]) {
+    Catch::Session session;
+    auto cli = session.cli() | define_options();
+    session.cli(cli);
+    int returnCode = session.applyCommandLine(argc, argv);
+    if (returnCode != 0) return returnCode;
+    returnCode = init(&argc, &argv);
+    if (returnCode != 0) return returnCode;
+    int test_return_code = session.run();
+    returnCode = finalize();
+    if (returnCode != 0) return returnCode;
+    exit(test_return_code);
+}
+#endif  // DYAD_CATCH_CONFIG_H
diff --git a/tests/dspaces_perf/data_plane/CMakeLists.txt b/tests/dspaces_perf/data_plane/CMakeLists.txt
new file mode 100644
index 00000000..3819b23d
--- /dev/null
+++ b/tests/dspaces_perf/data_plane/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(node 1)
+set(ppn 1)
+set(files 1)
+set(test_name unit_remote_data_${node}_${ppn})
+add_test(${test_name} flux run -N $node --tasks-per-node $ppn ${CMAKE_BINARY_DIR}/bin/unit_test --filename dp_${node}_${ppn} --pfs $ENV{DYAD_PFS_DIR} --dmd $ENV{DYAD_DMD_DIR} --iteration 10 --number_of_files ${files} --reporter compact RemoteDataBandwidth)
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_KVS_NAMESPACE=${DYAD_KEYSPACE})
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_MODULE_SO=${CMAKE_BINARY_DIR}/${DYAD_LIBDIR}/dyad.so)
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_LOG_DIR=${DYAD_LOG_DIR})
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_DTL_MODE=UCX)
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_CONSUMER=$ENV{DYAD_DMD_DIR})
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_PRODUCER=$ENV{DYAD_DMD_DIR})
\ No newline at end of file
diff --git a/tests/dspaces_perf/data_plane/data_plane.cpp b/tests/dspaces_perf/data_plane/data_plane.cpp
new file mode 100644
index 00000000..35fb43ca
--- /dev/null
+++ b/tests/dspaces_perf/data_plane/data_plane.cpp
@@ -0,0 +1,389 @@
+#include <dspaces.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <array>
+
+#define NS_TO_SECS(ns_var) ((double)ns_var / 1000000000.0)
+#define AGG_TIME(time_var, agg_time_var, dtype)                                \
+  MPI_Reduce(&time_var, &agg_time_var, 1, dtype, MPI_SUM, 0, MPI_COMM_WORLD)
+
+FILE *redirect_stdout(const char *filename) {
+  size_t dir_len = strlen(args.dspaces_timing_dir.c_str());
+  bool ends_with_separator =
+      (args.dspaces_timing_dir.c_str()[dir_len - 1] == '/');
+  size_t filename_len = dir_len + strlen(filename) + 1;
+  if (!ends_with_separator) {
+    filename_len += 1;
+  }
+  char *full_filename = (char *)malloc(filename_len * sizeof(char));
+  memset(full_filename, 0, filename_len * sizeof(char));
+  strcpy(full_filename, args.dspaces_timing_dir.c_str());
+  if (!ends_with_separator) {
+    strcat(full_filename, "/");
+  }
+  strcat(full_filename, filename);
+  FILE *fp = freopen(full_filename, "a", stdout);
+  free(full_filename);
+  return fp;
+}
+
+int restore_stdout(FILE *freopen_fp) { return fclose(freopen_fp); }
+
+void gen_var_name(char *filename, bool is_local, bool add_rank_if_remote,
+                  bool next_local_rank, bool next_node) {
+  size_t node_idx = info.rank / args.process_per_node;
+  size_t local_rank = info.rank % args.process_per_node;
+  if (next_local_rank) {
+    local_rank = (local_rank + 1) % args.process_per_node;
+  }
+  if (next_node) {
+    node_idx = (node_idx + 1) % info.num_nodes;
+  }
+  size_t server_proc_local_idx = info.rank % args.server_ppn;
+  size_t global_server_proc_idx =
+      node_idx * args.server_ppn + server_proc_local_idx;
+  if (is_local) {
+    size_t global_rank_from_local =
+        node_idx * args.process_per_node + local_rank;
+    sprintf(filename, "%s_%zu.bat", args.filename.c_str(),
+            global_rank_from_local);
+  } else {
+    if (add_rank_if_remote) {
+      sprintf(filename, "%s_%zu_%zu.bat", args.filename.c_str(),
+              global_server_proc_idx, local_rank);
+    } else {
+      sprintf(filename, "%s_%zu.bat", args.filename.c_str(),
+              global_server_proc_idx);
+    }
+  }
+}
+
+int create_files_per_server_process(dspaces_client_t *client, bool is_local,
+                                    bool add_rank) {
+  int rc = 0;
+  char filename[4096];
+  size_t file_size = args.request_size * args.iteration;
+  gen_var_name(filename, is_local, add_rank, false, false);
+  // Clients are connected round-robin to server processes on the same node.
+  // To work out which processes should write "files", we first get a node-local
+  // rank for each client process by modulo dividing the global rank (info.rank)
+  // with the processes per node (args.process_per_node). Then, we floor divide
+  // that node-local rank by args.server_ppn. This floor division should only
+  // equal 0 for the first "args.server_ppn" ranks to connect to local server
+  // processes. Since DataSpaces connects to local processes round-robin, these
+  // ranks for which the floor division equals 0 are the "first" processes to
+  // connect to each local server processes.
+  bool first_rank_per_server_proc =
+      (info.rank % args.process_per_node) / args.server_ppn == 0;
+  int color = (int)(is_local || add_rank || first_rank_per_server_proc);
+  MPI_Comm split_comm;
+  int split_rank;
+  MPI_Comm_split(MPI_COMM_WORLD, color, info.rank, &split_comm);
+  MPI_Comm_rank(split_comm, &split_rank);
+  if (is_local || add_rank || first_rank_per_server_proc) {
+    std::string rand_data = GenRandom(file_size);
+    uint64_t lb = 0;
+    uint64_t ub = rand_data.size() - 1;
+    uint64_t buf_size = ub + 1;
+    dspaces_define_gdim(*client, filename, 1, &buf_size);
+    for (size_t file_idx = 0; file_idx < args.number_of_files; ++file_idx) {
+      if (is_local) {
+        rc = dspaces_put_local(*client, filename, file_idx, sizeof(char), 1,
+                               &lb, &ub, rand_data.data());
+      } else {
+        rc = dspaces_put(*client, filename, file_idx, sizeof(char), 1, &lb, &ub,
+                         rand_data.data());
+      }
+      if (rc != 0) {
+        // TODO log error?
+      }
+      MPI_Barrier(split_comm);
+      if (split_rank == 0) {
+        printf("Finished putting file %lu\n", file_idx);
+        fflush(stdout);
+      }
+    }
+  }
+  MPI_Barrier(MPI_COMM_WORLD);
+  if (info.rank == 0) {
+    printf("Finished putting data into server\n");
+    fflush(stdout);
+  }
+  MPI_Barrier(MPI_COMM_WORLD);
+  return rc;
+}
+
+void gen_perf_print(size_t data_len, double total_mdata_time,
+                    double total_data_time) {
+  double agg_mdata_time = 0;
+  double agg_data_time = 0;
+  AGG_TIME(total_mdata_time, agg_mdata_time, MPI_DOUBLE);
+  AGG_TIME(total_data_time, agg_data_time, MPI_DOUBLE);
+  if (info.rank == 0) {
+    double final_mdata_time = agg_mdata_time / info.comm_size;
+    double final_data_time = agg_data_time / info.comm_size;
+    printf("[DSPACES_TEST],%10d,%10lu,%10lu,%10.6f,%10.6f,%10.6f,%20.6f\n",
+           info.comm_size,                  // Comm Size
+           data_len * args.number_of_files, // Total I/O per process
+           args.number_of_files,            // Number of mdata ops per process
+           final_data_time,                 // Data Time
+           final_mdata_time,                // Metadata Time
+           data_len * args.number_of_files * info.comm_size / final_data_time /
+               1024.0 / 1024.0, // Data Bandwidth
+           // TODO change division to be by 1000 instead of 1024
+           args.number_of_files * info.comm_size /
+               final_mdata_time // Metadata Bandwidth
+    );
+    fflush(stdout);
+  }
+}
+
+TEST_CASE("RemoteDataBandwidth",
+          "[files= " + std::to_string(args.number_of_files) +
+              "]"
+              "[file_size= " +
+              std::to_string(args.request_size * args.iteration) +
+              "]"
+              "[parallel_req= " +
+              std::to_string(info.comm_size) +
+              "]"
+              "[num_nodes= " +
+              std::to_string(info.comm_size / args.process_per_node) + "]") {
+  SECTION("Test Max Bandwidth") {
+    Timer data_time;
+    REQUIRE(pretest() == 0);
+    dspaces_client_t client = dspaces_CLIENT_NULL;
+    int rc = dspaces_init_mpi(MPI_COMM_WORLD, &client);
+    REQUIRE(rc == dspaces_SUCCESS);
+    REQUIRE(create_files_per_server_process(&client, false, true) == 0);
+    char filename[4096];
+    gen_var_name(filename, false, true, false, true);
+    size_t data_len = args.request_size * args.iteration;
+    char *file_data = NULL;
+    int ndim = 1;
+    uint64_t lb = 0;
+    uint64_t ub = data_len - 1;
+    char csv_filename[4096];
+    // sprintf(csv_filename, "remote_data_bandwidth_%d.csv", info.rank);
+    // FILE* fp = fopen(csv_filename, "w+");
+    // FILE* fp = redirect_stdout(csv_filename);
+    // REQUIRE(fp != NULL);
+    // printf("rank,var_name,version,data_size,mdata_time_ns,data_time_ns\n");
+    double total_mdata_time = 0;
+    double total_data_time = 0;
+    for (size_t file_idx = 0; file_idx < args.number_of_files; ++file_idx) {
+      long long int mdata_time_ns = 0;
+      long long int data_time_ns = 0;
+      data_time.resumeTime();
+      // Using aget instead of get because dyad_get_data also allocates the
+      // buffer Also, setting timeout to 0 to prevent blocking for data
+      // availability since the data should always be available.
+      rc = dspaces_aget(client, filename, file_idx, ndim, &lb, &ub,
+                        (void **)&file_data, -1, &mdata_time_ns, &data_time_ns);
+      data_time.pauseTime();
+      REQUIRE(rc == dspaces_SUCCESS);
+      free(file_data);
+      total_mdata_time += NS_TO_SECS(mdata_time_ns);
+      total_data_time += NS_TO_SECS(data_time_ns);
+    }
+    // restore_stdout(fp);
+    // AGGREGATE_TIME(data);
+    gen_perf_print(data_len, total_mdata_time, total_data_time);
+    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Abort(MPI_COMM_WORLD, 0);
+    rc = dspaces_fini(client);
+    REQUIRE(rc == dspaces_SUCCESS);
+    // fclose(fp);
+    REQUIRE(posttest() == 0);
+  }
+}
+
+TEST_CASE("RemoteDataAggBandwidth",
+          "[files= " + std::to_string(args.number_of_files) +
+              "]"
+              "[file_size= " +
+              std::to_string(args.request_size * args.iteration) +
+              "]"
+              "[parallel_req= " +
+              std::to_string(info.comm_size) +
+              "]"
+              "[num_nodes= " +
+              std::to_string(info.comm_size / args.process_per_node) + "]") {
+  SECTION("Test Max Bandwidth") {
+    Timer data_time;
+    REQUIRE(pretest() == 0);
+    dspaces_client_t client = dspaces_CLIENT_NULL;
+    int rc = dspaces_init_mpi(MPI_COMM_WORLD, &client);
+    REQUIRE(rc == dspaces_SUCCESS);
+    REQUIRE(create_files_per_server_process(&client, false, false) == 0);
+    char filename[4096];
+    gen_var_name(filename, false, false, false, true);
+    char *file_data = NULL;
+    size_t data_len = args.request_size * args.iteration;
+    int ndim = 1;
+    uint64_t lb = 0;
+    uint64_t ub = data_len - 1;
+    char csv_filename[4096];
+    // sprintf(csv_filename, "remote_data_agg_bandwidth_%d.csv", info.rank);
+    // FILE* fp = redirect_stdout(csv_filename);
+    // FILE* fp = fopen(csv_filename, "w+");
+    // REQUIRE(fp != NULL);
+    // printf("rank,var_name,version,data_size,mdata_time_ns,data_time_ns\n");
+    double total_mdata_time = 0;
+    double total_data_time = 0;
+    if (info.rank % args.process_per_node != 0)
+      usleep(10000);
+    for (size_t file_idx = 0; file_idx < args.number_of_files; ++file_idx) {
+      long long int mdata_time_ns = 0;
+      long long int data_time_ns = 0;
+      data_time.resumeTime();
+      // Using aget instead of get because dyad_get_data also allocates the
+      // buffer Unlike the previous test, we set the timeout to -1 so it will do
+      // any blocking that it might want to do
+      // TODO: confirm that the timeout is actually needed to guarantee this
+      // type of behavior
+      rc = dspaces_aget(client, filename, file_idx, ndim, &lb, &ub,
+                        (void **)&file_data, -1, &mdata_time_ns, &data_time_ns);
+      data_time.pauseTime();
+      REQUIRE(rc == dspaces_SUCCESS);
+      free(file_data);
+      total_mdata_time += NS_TO_SECS(mdata_time_ns);
+      total_data_time += NS_TO_SECS(data_time_ns);
+    }
+    // restore_stdout(fp);
+    // AGGREGATE_TIME(data);
+    // if (info.rank == 0) {
+    //   printf("[MANUAL_TIMING] total_data = %10.6f\n", total_data);
+    //   fflush(stdout);
+    // }
+    gen_perf_print(data_len, total_mdata_time, total_data_time);
+    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Abort(MPI_COMM_WORLD, 0);
+    rc = dspaces_fini(client);
+    REQUIRE(rc == dspaces_SUCCESS);
+    // fclose(fp);
+    REQUIRE(posttest() == 0);
+  }
+}
+
+TEST_CASE("LocalProcessDataBandwidth",
+          "[files= " + std::to_string(args.number_of_files) +
+              "]"
+              "[file_size= " +
+              std::to_string(args.request_size * args.iteration) +
+              "]"
+              "[parallel_req= " +
+              std::to_string(info.comm_size) +
+              "]"
+              "[num_nodes= " +
+              std::to_string(info.comm_size / args.process_per_node) + "]") {
+  SECTION("Test Max Bandwidth") {
+    Timer data_time;
+    REQUIRE(pretest() == 0);
+    dspaces_client_t client = dspaces_CLIENT_NULL;
+    int rc = dspaces_init_mpi(MPI_COMM_WORLD, &client);
+    REQUIRE(rc == dspaces_SUCCESS);
+    REQUIRE(create_files_per_server_process(&client, true, true) == 0);
+    char filename[4096];
+    gen_var_name(filename, true, false, false, false);
+    size_t data_len = args.request_size * args.iteration;
+    int ndim = 1;
+    uint64_t lb = 0;
+    uint64_t ub = data_len - 1;
+    char *file_data = NULL;
+    char csv_filename[4096];
+    // sprintf(csv_filename, "local_process_data_bandwidth_%d.csv", info.rank);
+    // FILE* fp = redirect_stdout(csv_filename);
+    // FILE* fp = fopen(csv_filename, "w+");
+    // REQUIRE(fp != NULL);
+    // printf("rank,var_name,version,data_size,mdata_time_ns,data_time_ns\n");
+    double total_mdata_time = 0;
+    double total_data_time = 0;
+    for (size_t file_idx = 0; file_idx < args.number_of_files; ++file_idx) {
+      long long int mdata_time_ns = 0;
+      long long int data_time_ns = 0;
+      data_time.resumeTime();
+      // Using aget instead of get because dyad_get_data also allocates the
+      // buffer Also, setting timeout to 0 to prevent blocking for data
+      // availability since the data should always be available.
+      rc = dspaces_aget(client, filename, file_idx, ndim, &lb, &ub,
+                        (void **)&file_data, -1, &mdata_time_ns, &data_time_ns);
+      data_time.pauseTime();
+      REQUIRE(rc == dspaces_SUCCESS);
+      free(file_data);
+      total_mdata_time += NS_TO_SECS(mdata_time_ns);
+      total_data_time += NS_TO_SECS(data_time_ns);
+    }
+    // restore_stdout(fp);
+    // AGGREGATE_TIME(data);
+    gen_perf_print(data_len, total_mdata_time, total_data_time);
+    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Abort(MPI_COMM_WORLD, 0);
+    rc = dspaces_fini(client);
+    REQUIRE(rc == dspaces_SUCCESS);
+    // fclose(fp);
+    REQUIRE(posttest() == 0);
+  }
+}
+
+TEST_CASE("LocalNodeDataBandwidth",
+          "[files= " + std::to_string(args.number_of_files) +
+              "]"
+              "[file_size= " +
+              std::to_string(args.request_size * args.iteration) +
+              "]"
+              "[parallel_req= " +
+              std::to_string(info.comm_size) +
+              "]"
+              "[num_nodes= " +
+              std::to_string(info.comm_size / args.process_per_node) + "]") {
+  SECTION("Test Max Bandwidth") {
+    Timer data_time;
+    REQUIRE(pretest() == 0);
+    dspaces_client_t client = dspaces_CLIENT_NULL;
+    int rc = dspaces_init_mpi(MPI_COMM_WORLD, &client);
+    REQUIRE(rc == dspaces_SUCCESS);
+    REQUIRE(create_files_per_server_process(&client, true, true) == 0);
+    char filename[4096];
+    gen_var_name(filename, true, false, true, false);
+    size_t data_len = args.request_size * args.iteration;
+    int ndim = 1;
+    uint64_t lb = 0;
+    uint64_t ub = data_len - 1;
+    char *file_data = NULL;
+    char csv_filename[4096];
+    // sprintf(csv_filename, "local_node_data_bandwidth_%d.csv", info.rank);
+    // FILE* fp = redirect_stdout(csv_filename);
+    // FILE* fp = fopen(csv_filename, "w+");
+    // REQUIRE(fp != NULL);
+    // printf("rank,var_name,version,data_size,mdata_time_ns,data_time_ns\n");
+    double total_mdata_time = 0;
+    double total_data_time = 0;
+    for (size_t file_idx = 0; file_idx < args.number_of_files; ++file_idx) {
+      long long int mdata_time_ns = 0;
+      long long int data_time_ns = 0;
+      data_time.resumeTime();
+      // Using aget instead of get because dyad_get_data also allocates the
+      // buffer Also, setting timeout to 0 to prevent blocking for data
+      // availability since the data should always be available.
+      rc = dspaces_aget(client, filename, file_idx, ndim, &lb, &ub,
+                        (void **)&file_data, -1, &mdata_time_ns, &data_time_ns);
+      data_time.pauseTime();
+      REQUIRE(rc == dspaces_SUCCESS);
+      free(file_data);
+      total_mdata_time += NS_TO_SECS(mdata_time_ns);
+      total_data_time += NS_TO_SECS(data_time_ns);
+    }
+    // restore_stdout(fp);
+    // AGGREGATE_TIME(data);
+    gen_perf_print(data_len, total_mdata_time, total_data_time);
+    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Abort(MPI_COMM_WORLD, 0);
+    rc = dspaces_fini(client);
+    REQUIRE(rc == dspaces_SUCCESS);
+    // fclose(fp);
+    REQUIRE(posttest() == 0);
+  }
+}
diff --git a/tests/dspaces_perf/mdm/CMakeLists.txt b/tests/dspaces_perf/mdm/CMakeLists.txt
new file mode 100644
index 00000000..3717411e
--- /dev/null
+++ b/tests/dspaces_perf/mdm/CMakeLists.txt
@@ -0,0 +1,29 @@
+set(node 1)
+set(ppn 1)
+set(files 1)
+set(test_name unit_localfs_${node}_${ppn})
+set(mpiexec flux run -N ${node} --tasks-per-node ${ppn})
+set(mpiexec )
+add_test(${test_name} ${mpiexec} ${CMAKE_BINARY_DIR}/bin/unit_test --filename mdm_${node}_${ppn} --pfs $ENV{DYAD_PFS_DIR} --dmd $ENV{DYAD_DMD_DIR} --iteration 10 --number_of_files ${files} --reporter compact LocalFSLookup)
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_KVS_NAMESPACE=${DYAD_KEYSPACE})
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_MODULE_SO=${CMAKE_BINARY_DIR}/${DYAD_LIBDIR}/dyad.so)
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_LOG_DIR=${DYAD_LOG_DIR})
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_DTL_MODE=UCX)
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_CONSUMER=$ENV{DYAD_DMD_DIR})
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_PRODUCER=$ENV{DYAD_DMD_DIR})
+set(test_name unit_localkvs_${node}_${ppn})
+add_test(${test_name} ${mpiexec} ${CMAKE_BINARY_DIR}/bin/unit_test --filename mdm_${node}_${ppn} --pfs $ENV{DYAD_PFS_DIR} --dmd $ENV{DYAD_DMD_DIR} --iteration 10 --number_of_files ${files} --reporter compact LocalKVSLookup)
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_KVS_NAMESPACE=${DYAD_KEYSPACE})
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_MODULE_SO=${CMAKE_BINARY_DIR}/${DYAD_LIBDIR}/dyad.so)
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_LOG_DIR=${DYAD_LOG_DIR})
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_DTL_MODE=UCX)
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_CONSUMER=$ENV{DYAD_DMD_DIR})
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_PRODUCER=$ENV{DYAD_DMD_DIR})
+set(test_name unit_remotekvs_${node}_${ppn})
+add_test(${test_name} ${mpiexec} ${CMAKE_BINARY_DIR}/bin/unit_test --filename mdm_${node}_${ppn} --pfs $ENV{DYAD_PFS_DIR} --dmd $ENV{DYAD_DMD_DIR} --iteration 10 --number_of_files ${files} --reporter compact RemoteKVSLookup)
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_KVS_NAMESPACE=${DYAD_KEYSPACE})
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_MODULE_SO=${CMAKE_BINARY_DIR}/${DYAD_LIBDIR}/dyad.so)
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_LOG_DIR=${DYAD_LOG_DIR})
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_DTL_MODE=UCX)
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_CONSUMER=$ENV{DYAD_DMD_DIR})
+set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_PRODUCER=$ENV{DYAD_DMD_DIR})
\ No newline at end of file
diff --git a/tests/dspaces_perf/mdm/mdm.cpp b/tests/dspaces_perf/mdm/mdm.cpp
new file mode 100644
index 00000000..d4cc2563
--- /dev/null
+++ b/tests/dspaces_perf/mdm/mdm.cpp
@@ -0,0 +1,151 @@
+#include <dyad/utils/utils.h>
+#include <fcntl.h>
+
+#include <climits>
+TEST_CASE("LocalFSLookup",  "[number_of_lookups= " + std::to_string(args.number_of_files) +"]"
+                            "[parallel_req= " + std::to_string(info.comm_size) +"]"
+                            "[num_nodes= " + std::to_string(info.comm_size / args.process_per_node) +"]") {
+    REQUIRE (pretest() == 0);
+    REQUIRE (clean_directories() == 0);
+    dyad_rc_t rc = dyad_init_env (DYAD_COMM_RECV, info.flux_handle);
+    REQUIRE (rc >= 0);
+    auto ctx = dyad_ctx_get();
+    struct flock exclusive_lock;
+    SECTION("Throughput") {
+        char filename[4096];
+        Timer kvs_time;
+        for (size_t file_idx=0; file_idx < args.number_of_files; ++file_idx) {
+            sprintf (filename,
+                     "%s/%s_%u_%zu.bat",
+                     args.dyad_managed_dir.c_str (),
+                     args.filename.c_str (),
+                     info.broker_idx,
+                     file_idx);
+            kvs_time.resumeTime();
+            int lock_fd = open (filename, O_RDWR | O_CREAT, 0666);
+            kvs_time.pauseTime();
+            REQUIRE (lock_fd != -1);
+
+            kvs_time.resumeTime();
+            rc = dyad_excl_flock (ctx, lock_fd, &exclusive_lock);
+            kvs_time.pauseTime();
+            REQUIRE (rc >= 0);
+
+            kvs_time.resumeTime();
+            auto file_size = get_file_size (lock_fd);
+            kvs_time.pauseTime();
+            (void)file_size;
+            kvs_time.resumeTime();
+            dyad_release_flock (ctx, lock_fd, &exclusive_lock);
+            int status = close (lock_fd);
+            kvs_time.pauseTime();
+            REQUIRE (status == 0);
+        }
+        AGGREGATE_TIME(kvs);
+        if (info.rank == 0) {
+            printf("[DYAD_TEST],%10d,%10lu,%10.6f,%10.6f\n",
+                   info.comm_size, args.number_of_files,
+                   total_kvs/info.comm_size, args.number_of_files*info.comm_size*info.comm_size/total_kvs/1000/1000);
+        }
+    }
+    rc = dyad_finalize();
+    REQUIRE (rc >= 0);
+    REQUIRE (clean_directories() == 0);
+    REQUIRE (posttest() == 0);
+}
+
+TEST_CASE("LocalKVSLookup",  "[number_of_lookups= " + std::to_string(args.number_of_files) +"]"
+                             "[parallel_req= " + std::to_string(info.comm_size) +"]"
+                             "[num_nodes= " + std::to_string(info.comm_size / args.process_per_node) +"]") {
+    REQUIRE (pretest() == 0);
+    dyad_rc_t rc = dyad_init_env (DYAD_COMM_RECV, info.flux_handle);
+    REQUIRE (rc >= 0);
+    auto ctx = dyad_ctx_get();
+    SECTION("Throughput") {
+        Timer kvs_time;
+        char my_filename[4096], lookup_filename[4096];
+        for (size_t file_idx=0; file_idx < args.number_of_files; ++file_idx) {
+            sprintf (my_filename,
+                     "%s/%s_%u_%d_%zu.bat",
+                     args.dyad_managed_dir.c_str (),
+                     args.filename.c_str (),
+                     info.broker_idx,
+                     info.rank,
+                     file_idx);
+            sprintf (lookup_filename,
+                     "%s_%u_%d_%zu.bat",
+                     args.filename.c_str (),
+                     info.broker_idx,
+                     info.rank,
+                     file_idx);
+            rc = dyad_commit (ctx, my_filename);
+            REQUIRE (rc >= 0);
+            dyad_metadata_t* mdata;
+            const size_t topic_len = PATH_MAX;
+            char topic[PATH_MAX+1] = {'\0'};
+            gen_path_key (lookup_filename, topic, topic_len, ctx->key_depth, ctx->key_bins);
+            kvs_time.resumeTime();
+            rc = dyad_kvs_read (ctx, topic, lookup_filename, false, &mdata);
+            kvs_time.pauseTime();
+            REQUIRE (rc >= 0);
+        }
+        AGGREGATE_TIME(kvs);
+        if (info.rank == 0) {
+            printf("[DYAD_TEST],%10d,%10lu,%10.6f,%10.6f\n",
+                   info.comm_size, args.number_of_files,
+                   total_kvs/info.comm_size, args.number_of_files*info.comm_size*info.comm_size/total_kvs/1000/1000);
+        }
+    }
+    rc = dyad_finalize();
+    REQUIRE (rc >= 0);
+    REQUIRE (posttest() == 0);
+}
+
+TEST_CASE("RemoteKVSLookup",  "[number_of_lookups= " + std::to_string(args.number_of_files) +"]"
+                              "[parallel_req= " + std::to_string(info.comm_size) +"]"
+                              "[num_nodes= " + std::to_string(info.comm_size / args.process_per_node) +"]") {
+    REQUIRE (pretest() == 0);
+    dyad_rc_t rc = dyad_init_env (DYAD_COMM_RECV, info.flux_handle);
+    REQUIRE (rc >= 0);
+    auto ctx = dyad_ctx_get();
+    SECTION("Throughput") {
+        Timer kvs_time;
+        char my_filename[4096], lookup_filename[4096];
+        for (size_t file_idx=0; file_idx < args.number_of_files; ++file_idx) {
+            sprintf (my_filename,
+                     "%s/%s_%u_%d_%d_%zu.bat",
+                     args.dyad_managed_dir.c_str (),
+                     args.filename.c_str (),
+                     info.broker_idx,
+                     info.rank,
+                     info.comm_size,
+                     file_idx);
+            sprintf (lookup_filename,
+                     "%s_%u_%d_%d_%zu.bat",
+                     args.filename.c_str (),
+                     info.broker_idx,
+                     info.rank,
+                     info.comm_size,
+                     file_idx);
+            rc = dyad_commit (ctx, my_filename);
+            REQUIRE (rc >= 0);
+            dyad_metadata_t* mdata;
+            const size_t topic_len = PATH_MAX;
+            char topic[PATH_MAX+1] = {'\0'};
+            gen_path_key (lookup_filename, topic, topic_len, ctx->key_depth, ctx->key_bins);
+            kvs_time.resumeTime();
+            rc = dyad_kvs_read (ctx, topic, lookup_filename, false, &mdata);
+            kvs_time.pauseTime();
+            REQUIRE (rc >= 0);
+        }
+        AGGREGATE_TIME(kvs);
+        if (info.rank == 0) {
+            printf("[DYAD_TEST],%10d,%10lu,%10.6f,%10.6f\n",
+                   info.comm_size, args.number_of_files,
+                   total_kvs/info.comm_size, args.number_of_files*info.comm_size*info.comm_size/total_kvs/1000/1000);
+        }
+    }
+    rc = dyad_finalize();
+    REQUIRE (rc >= 0);
+    REQUIRE (posttest() == 0);
+}
\ No newline at end of file
diff --git a/tests/dspaces_perf/script/CMakeLists.txt b/tests/dspaces_perf/script/CMakeLists.txt
new file mode 100644
index 00000000..355bb72c
--- /dev/null
+++ b/tests/dspaces_perf/script/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_test(dspaces_start ${CMAKE_CURRENT_SOURCE_DIR}/dspaces_start.sh)
+set_property(TEST dspaces_start APPEND PROPERTY ENVIRONMENT DSPACES_DEFAULT_VAR_SIZE=${DSPACES_DEFAULT_VAR_SIZE})
+set_property(TEST dspaces_start APPEND PROPERTY ENVIRONMENT DSPACES_MAX_VERSIONS=$ENV{DSPACES_MAX_VERSIONS})
+set_property(TEST dspaces_start APPEND PROPERTY ENVIRONMENT DSPACES_PPN=$ENV{DSPACES_PPN})
+set_property(TEST dspaces_start APPEND PROPERTY ENVIRONMENT DSPACES_ROOT=${DSPACES_ROOT})
+set_property(TEST dspaces_start APPEND PROPERTY ENVIRONMENT DSPACES_HG_CONNECTION_STR=$ENV{DSPACES_HG_CONNECTION_STR})
+
+add_test(dspaces_stop ${CMAKE_CURRENT_SOURCE_DIR}/dspaces_stop.sh)
+set_property(TEST dspaces_stop APPEND PROPERTY ENVIRONMENT DSPACES_ROOT=${DSPACES_ROOT})
diff --git a/tests/dspaces_perf/script/dspaces_start.sh b/tests/dspaces_perf/script/dspaces_start.sh
new file mode 100755
index 00000000..e3f3a96e
--- /dev/null
+++ b/tests/dspaces_perf/script/dspaces_start.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+echo "## Config file for DataSpaces server
+ndim = 1
+dims = ${DSPACES_DEFAULT_VAR_SIZE}
+max_versions = ${DSPACES_MAX_VAR_VERSIONS}
+num_apps = 1" > dataspaces.conf
+
+dspaces_num_nodes=$(flux resource info | grep -oP "\d+ Nodes" | grep -oP "^\d+")
+
+flux submit -N ${dspaces_num_nodes} --tasks-per-node ${DSPACES_PPN} ${DSPACES_ROOT}/bin/dspaces_server ${DSPACES_HG_CONNECTION_STR}
\ No newline at end of file
diff --git a/tests/dspaces_perf/script/dspaces_stop.sh b/tests/dspaces_perf/script/dspaces_stop.sh
new file mode 100755
index 00000000..1a6b4ccf
--- /dev/null
+++ b/tests/dspaces_perf/script/dspaces_stop.sh
@@ -0,0 +1 @@
+flux run --ntasks=1 ${DSPACES_ROOT}/bin/terminator
\ No newline at end of file
diff --git a/tests/dspaces_perf/test_utils.h b/tests/dspaces_perf/test_utils.h
new file mode 100644
index 00000000..11a62970
--- /dev/null
+++ b/tests/dspaces_perf/test_utils.h
@@ -0,0 +1,66 @@
+#ifndef DYAD_TEST_UTILS_H
+#define DYAD_TEST_UTILS_H
+
+#include <cmath>
+#include <cstdio>
+#include <string>
+#include <unistd.h>
+
+const uint32_t KB = 1024;
+const uint32_t MB = 1024 * 1024;
+#define AGGREGATE_TIME(name)                                      \
+  double total_##name = 0.0;                                      \
+  auto name##_a = name##_time.getElapsedTime();                   \
+  MPI_Reduce(&name##_a, &total_##name, 1, MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
+
+size_t GetRandomOffset(size_t i, unsigned int offset_seed, size_t stride,
+                       size_t total_size) {
+    return abs((int)(((i * rand_r(&offset_seed)) % stride) % total_size));
+}
+inline std::string get_filename(int fd) {
+    const int kMaxSize = 256;
+    char proclnk[kMaxSize];
+    char filename[kMaxSize];
+    snprintf(proclnk, kMaxSize, "/proc/self/fd/%d", fd);
+    size_t r = readlink(proclnk, filename, kMaxSize);
+    filename[r] = '\0';
+    return filename;
+}
+
+std::string GenRandom(const int len) {
+    std::string tmp_s;
+    static const char alphanum[] =
+        "0123456789"
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+        "abcdefghijklmnopqrstuvwxyz";
+
+    srand(100);
+
+    tmp_s.reserve(len);
+
+    for (int i = 0; i < len; ++i) {
+        tmp_s += alphanum[rand() % (sizeof(alphanum) - 1)];
+    }
+
+    tmp_s[len - 1] = '\n';
+
+    return tmp_s;
+}
+
+class Timer {
+    public:
+    Timer() : elapsed_time(0) {}
+    void resumeTime() { t1 = std::chrono::high_resolution_clock::now(); }
+    double pauseTime() {
+        auto t2 = std::chrono::high_resolution_clock::now();
+        elapsed_time += std::chrono::duration<double>(t2 - t1).count();
+        return elapsed_time;
+    }
+    double getElapsedTime() { return elapsed_time; }
+
+    private:
+    std::chrono::high_resolution_clock::time_point t1;
+    double elapsed_time;
+};
+
+#endif  // DYAD_TEST_UTILS_H
diff --git a/tests/dspaces_perf/unit_test.cpp b/tests/dspaces_perf/unit_test.cpp
new file mode 100644
index 00000000..f67b4f87
--- /dev/null
+++ b/tests/dspaces_perf/unit_test.cpp
@@ -0,0 +1,131 @@
+#include <catch_config.h>
+#include <flux/core.h>
+#include <mpi.h>
+#include <test_utils.h>
+
+/**
+ * Test data structures
+ */
+namespace dyad::test {
+struct Info {
+    int rank;
+    int comm_size;
+    int num_nodes;
+    size_t num_server_procs;
+    bool debug_init;
+};
+struct Arguments {
+    std::string dspaces_timing_dir;
+    // MPI Configurations
+    size_t process_per_node = 1;
+    // DataSpaces Configuration
+    size_t server_ppn = 1;
+    // Test configuration
+    std::string filename = "test.dat";
+    size_t number_of_files = 1;
+    size_t request_size = 65536;
+    size_t iteration = 8;
+    bool debug = false;
+};
+}  // namespace dyad::test
+
+dyad::test::Arguments args;
+dyad::test::Info info;
+/**
+ * Overridden methods for catch
+ */
+
+int init(int* argc, char*** argv) {
+    //  fprintf(stdout, "Initializing MPI\n");
+    MPI_Init(argc, argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &info.rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &info.comm_size);
+    info.debug_init = false;
+    MPI_Barrier(MPI_COMM_WORLD);
+    return 0;
+}
+int finalize() {
+    MPI_Finalize();
+    return 0;
+}
+cl::Parser define_options() {
+    return cl::Opt(args.filename, "filename")["-f"]["--filename"](
+        "Filename to be use for I/O.") |
+           cl::Opt(args.process_per_node,
+                   "process_per_node")["-p"]["--ppn"]("Processes per node") |
+           cl::Opt(args.request_size, "request_size")["-r"]["--request_size"](
+               "Transfer size used for performing I/O") |
+           cl::Opt(args.iteration,
+                   "iteration")["-i"]["--iteration"]("Number of Iterations")  |
+           cl::Opt(args.number_of_files,
+                   "number_of_files")["-n"]["--number_of_files"]("Number of Files")  |
+           cl::Opt(args.server_ppn,
+                   "server_ppn")["-s"]["--server_ppn"]("Number of DataSpaces server processes per node") |
+           cl::Opt(args.dspaces_timing_dir,
+                   "dspaces_timing_dir")["-t"]["--timing_dir"]("Directory to write DataSpaces internal timings") |
+           cl::Opt(args.debug)["-d"]["--debug"]("debug");
+}
+
+int pretest() {
+    if (!info.debug_init && args.debug) {
+        const int HOSTNAME_SIZE = 256;
+        char hostname[HOSTNAME_SIZE];
+        gethostname(hostname, HOSTNAME_SIZE);
+        int pid = getpid();
+        char* start_port_str = getenv("VSC_DEBUG_START_PORT");
+        int start_port = 10000;
+        if (start_port_str != nullptr) {
+            start_port = atoi(start_port_str);
+        }
+        const char* conf_dir = getenv("VSC_DEBUG_CONF_DIR");
+        if (conf_dir == nullptr) {
+            conf_dir = ".";
+        }
+        char conf_file[4096];
+        sprintf(conf_file, "%s/debug.conf", conf_dir);
+
+        char exe[1024];
+        int ret = readlink("/proc/self/exe",exe,sizeof(exe)-1);
+        REQUIRE(ret !=-1);
+        exe[ret] = 0;
+        if (info.rank == 0) {
+            remove(conf_file);
+        }
+        MPI_Barrier(MPI_COMM_WORLD);
+        MPI_File mpi_fh;
+        int status_orig = MPI_File_open(MPI_COMM_WORLD, conf_file, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &mpi_fh);
+        REQUIRE(status_orig == MPI_SUCCESS);
+        const int buf_len = 16*1024;
+        char buffer[buf_len];
+        int size;
+        if (info.rank == 0) {
+            size = sprintf(buffer, "%d\n%s:%d:%s:%d:%d\n", info.comm_size, exe, info.rank, hostname, start_port+info.rank, pid);
+        } else {
+            size = sprintf(buffer, "%s:%d:%s:%d:%d\n", exe, info.rank, hostname, start_port+info.rank, pid);
+        }        
+        MPI_Status status;
+        MPI_File_write_ordered(mpi_fh, buffer, size, MPI_CHAR, &status);
+        int written_bytes;
+        MPI_Get_count(&status, MPI_CHAR, &written_bytes);
+        REQUIRE(written_bytes == size);
+        MPI_File_close(&mpi_fh);
+        MPI_Barrier(MPI_COMM_WORLD);
+        if (info.rank == 0) {
+            printf("%d ready for attach\n", info.comm_size);
+            fflush(stdout);
+            sleep(60);
+        }
+        info.debug_init = true;
+    }
+    info.num_nodes = info.comm_size / args.process_per_node;
+    info.num_server_procs = info.num_nodes * args.server_ppn;
+    MPI_Barrier(MPI_COMM_WORLD);
+    return 0;
+}
+
+int posttest() {
+    return 0;
+}
+#include "data_plane/data_plane.cpp"
+// Temporarily disable mdm tests
+// #include "mdm/mdm.cpp"
diff --git a/tests/integration/dlio_benchmark/add_dspaces_to_env.py b/tests/integration/dlio_benchmark/add_dspaces_to_env.py
new file mode 100644
index 00000000..980927d3
--- /dev/null
+++ b/tests/integration/dlio_benchmark/add_dspaces_to_env.py
@@ -0,0 +1,39 @@
+import sysconfig
+import site
+from pathlib import Path
+import argparse
+import re
+
+
+def get_sitepackages_for_dspaces():
+    env_sitepackages = sysconfig.get_path("platlib")
+    match_obj = re.match(r"^.*(?P<site>lib/.*)$", env_sitepackages)
+    if match_obj is None:
+        raise FileNotFoundError("Could not locate site-packages for venv")
+    return match_obj.group("site")
+
+
+def main():
+    parser = argparse.ArgumentParser("Adds DataSpaces's Python bindings to venv")
+    parser.add_argument("dspaces_install_prefix", type=Path,
+                        help="Path to the DataSpaces install")
+    parser.add_argument("--dspaces_sitepackages_dir", "-d", type=str,
+                        default=get_sitepackages_for_dspaces(),
+                        help="Override default path from DataSpaces install prefix to Python bindings")
+    parser.add_argument("--venv_sitepackages_dir", "-v", type=Path,
+                        default=Path(site.getsitepackages()[0]).expanduser().resolve(),
+                        help="Override path to venv's site-packages directory")
+    parser.add_argument("--pth_filename", "-p", type=str,
+                        default="dspaces.pth",
+                        help="Override the default name of the pth file that will be creating")
+    args = parser.parse_args()
+    dspaces_install_prefix = args.dspaces_install_prefix.expanduser().resolve()
+    dspaces_sitepackages_dir = dspaces_install_prefix / args.dspaces_sitepackages_dir
+    pth_file_contents = str(dspaces_sitepackages_dir) + "\n"
+    full_pth_filename = args.venv_sitepackages_dir / args.pth_filename
+    with open(str(full_pth_filename), "w") as f:
+        f.write(pth_file_contents)
+        
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/integration/dlio_benchmark/configs/workload/dspaces_mummi.yaml b/tests/integration/dlio_benchmark/configs/workload/dspaces_mummi.yaml
new file mode 100644
index 00000000..86b07f8f
--- /dev/null
+++ b/tests/integration/dlio_benchmark/configs/workload/dspaces_mummi.yaml
@@ -0,0 +1,30 @@
+model: mummi
+
+framework: pytorch
+
+workflow:
+  generate_data: False
+  train: True
+
+dataset:
+  data_folder: data/mummi/
+  format: hdf5
+  num_files_train: 600
+  num_samples_per_file: 8000
+  record_length: 69528
+  enable_chunking: True
+  chunk_size: 17799168
+
+reader:
+  data_loader: pytorch
+  batch_size: 256
+  read_threads: 2
+  file_shuffle: seed
+  sample_shuffle: seed
+  multiprocessing_context: spawn
+  data_loader_classname: dspaces_h5_torch_data_loader.DspacesH5TorchDataLoader
+  data_loader_sampler: index
+
+train:
+  epochs: 10
+  computation_time: .133
diff --git a/tests/integration/dlio_benchmark/configs/workload/dspaces_mummi_small.yaml b/tests/integration/dlio_benchmark/configs/workload/dspaces_mummi_small.yaml
new file mode 100644
index 00000000..5fdf2289
--- /dev/null
+++ b/tests/integration/dlio_benchmark/configs/workload/dspaces_mummi_small.yaml
@@ -0,0 +1,30 @@
+model: mummi
+
+framework: pytorch
+
+workflow:
+  generate_data: False
+  train: True
+
+dataset:
+  data_folder: data/mummi/
+  format: hdf5
+  num_files_train: 1
+  num_samples_per_file: 100
+  record_length: 69528
+  enable_chunking: True
+  chunk_size: 17799168
+
+reader:
+  data_loader: pytorch
+  batch_size: 1
+  read_threads: 1 # 2
+  file_shuffle: seed
+  sample_shuffle: seed
+  multiprocessing_context: spawn
+  data_loader_classname: dspaces_h5_torch_data_loader.DspacesH5TorchDataLoader
+  data_loader_sampler: index
+
+train:
+  epochs: 10
+  computation_time: .133
diff --git a/tests/integration/dlio_benchmark/configs/workload/dyad_mummi.yaml b/tests/integration/dlio_benchmark/configs/workload/dyad_mummi.yaml
new file mode 100644
index 00000000..55e8d027
--- /dev/null
+++ b/tests/integration/dlio_benchmark/configs/workload/dyad_mummi.yaml
@@ -0,0 +1,30 @@
+model: mummi
+
+framework: pytorch
+
+workflow:
+  generate_data: False
+  train: True
+
+dataset:
+  data_folder: data/mummi/
+  format: hdf5
+  num_files_train: 600
+  num_samples_per_file: 8000
+  record_length: 69528
+  enable_chunking: True
+  chunk_size: 17799168
+
+reader:
+  data_loader: pytorch
+  batch_size: 256
+  read_threads: 6
+  file_shuffle: seed
+  sample_shuffle: seed
+  multiprocessing_context: spawn
+  data_loader_classname: dyad_h5_torch_data_loader.DyadH5TorchDataLoader
+  data_loader_sampler: index
+
+train:
+  epochs: 10
+  computation_time: .133
diff --git a/tests/integration/dlio_benchmark/configs/workload/dyad_mummi_small.yaml b/tests/integration/dlio_benchmark/configs/workload/dyad_mummi_small.yaml
new file mode 100644
index 00000000..ec40905b
--- /dev/null
+++ b/tests/integration/dlio_benchmark/configs/workload/dyad_mummi_small.yaml
@@ -0,0 +1,30 @@
+model: mummi
+
+framework: pytorch
+
+workflow:
+  generate_data: False
+  train: True
+
+dataset:
+  data_folder: data/mummi/
+  format: hdf5
+  num_files_train: 1
+  num_samples_per_file: 100
+  record_length: 69528
+  enable_chunking: True
+  chunk_size: 17799168
+
+reader:
+  data_loader: pytorch
+  batch_size: 1
+  read_threads: 2
+  file_shuffle: seed
+  sample_shuffle: seed
+  multiprocessing_context: spawn
+  data_loader_classname: dyad_h5_torch_data_loader.DyadH5TorchDataLoader
+  data_loader_sampler: index
+
+train:
+  epochs: 10
+  computation_time: .133
diff --git a/tests/integration/dlio_benchmark/configs/workload/dyad_resnet50.yaml b/tests/integration/dlio_benchmark/configs/workload/dyad_resnet50.yaml
new file mode 100644
index 00000000..a767cbf1
--- /dev/null
+++ b/tests/integration/dlio_benchmark/configs/workload/dyad_resnet50.yaml
@@ -0,0 +1,28 @@
+model: unet3d
+
+framework: pytorch
+
+workflow:
+  generate_data: False
+  train: True
+
+dataset:
+  num_subfolders_train: 21843
+  num_files_train: 218368
+  num_samples_per_file: 1
+  record_length: 150528
+  data_folder: data/resnet50
+  format: png
+
+train:
+  computation_time: 0.317 # this is for A100
+  epochs: 20
+
+reader:
+  data_loader: pytorch
+  read_threads: 1
+  computation_threads: 1
+  batch_size: 1
+  multiprocessing_context: spawn
+  data_loader_classname: dyad_torch_data_loader.DyadTorchDataLoader
+  data_loader_sampler: index
\ No newline at end of file
diff --git a/tests/integration/dlio_benchmark/configs/workload/dyad_resnet50_small.yaml b/tests/integration/dlio_benchmark/configs/workload/dyad_resnet50_small.yaml
new file mode 100644
index 00000000..b9297585
--- /dev/null
+++ b/tests/integration/dlio_benchmark/configs/workload/dyad_resnet50_small.yaml
@@ -0,0 +1,27 @@
+model: unet3d
+
+framework: pytorch
+
+workflow:
+  generate_data: False
+  train: True
+
+dataset:
+  num_files_train: 1024
+  num_samples_per_file: 1
+  record_length: 150528
+  data_folder: data/resnet50
+  format: png
+
+train:
+  computation_time: 0.317 # this is for A100
+  epochs: 100
+
+reader:
+  data_loader: pytorch
+  read_threads: 6
+  computation_threads: 8
+  batch_size: 1
+  multiprocessing_context: spawn
+  data_loader_classname: dyad_torch_data_loader.DyadTorchDataLoader
+  data_loader_sampler: index
\ No newline at end of file
diff --git a/tests/integration/dlio_benchmark/configs/workload/dyad_unet3d.yaml b/tests/integration/dlio_benchmark/configs/workload/dyad_unet3d.yaml
index 1edf7819..ae23db4b 100644
--- a/tests/integration/dlio_benchmark/configs/workload/dyad_unet3d.yaml
+++ b/tests/integration/dlio_benchmark/configs/workload/dyad_unet3d.yaml
@@ -5,12 +5,12 @@ framework: pytorch
 workflow:
   generate_data: False
   train: True
-  checkpoint: True
+  checkpoint: False
 
 dataset: 
   data_folder: data/unet3d/
   format: npz
-  num_files_train: 168
+  num_files_train: 320
   num_samples_per_file: 1
   record_length: 146600628
   record_length_stdev: 68341808
@@ -19,7 +19,7 @@ dataset:
 reader: 
   data_loader: pytorch
   batch_size: 4
-  read_threads: 3
+  read_threads: 6
   file_shuffle: seed
   sample_shuffle: seed
   multiprocessing_context: spawn
@@ -28,7 +28,7 @@ reader:
 
 train:
   epochs: 10
-  computation_time: 1.3604
+  computation_time: 0
 
 checkpoint:
   checkpoint_folder: checkpoints/unet3d
diff --git a/tests/integration/dlio_benchmark/configs/workload/dyad_unet3d_large.yaml b/tests/integration/dlio_benchmark/configs/workload/dyad_unet3d_large.yaml
new file mode 100644
index 00000000..21cca045
--- /dev/null
+++ b/tests/integration/dlio_benchmark/configs/workload/dyad_unet3d_large.yaml
@@ -0,0 +1,37 @@
+model: unet3d
+
+framework: pytorch
+
+workflow:
+  generate_data: False
+  train: True
+  checkpoint: False
+
+dataset: 
+  data_folder: data/unet3d/
+  format: npz
+  num_files_train: 10240
+  num_samples_per_file: 1
+  record_length: 146600628
+  record_length_stdev: 68341808
+  record_length_resize: 2097152
+  
+reader: 
+  data_loader: pytorch
+  batch_size: 4
+  read_threads: 1
+  file_shuffle: seed
+  sample_shuffle: seed
+  multiprocessing_context: spawn
+  data_loader_classname: dyad_torch_data_loader.DyadTorchDataLoader
+  data_loader_sampler: index
+
+train:
+  epochs: 20
+  computation_time: 0.188
+
+checkpoint:
+  checkpoint_folder: checkpoints/unet3d
+  checkpoint_after_epoch: 5
+  epochs_between_checkpoints: 2
+  model_size: 499153191
diff --git a/tests/integration/dlio_benchmark/configs/workload/dyad_unet3d_small.yaml b/tests/integration/dlio_benchmark/configs/workload/dyad_unet3d_small.yaml
index 3818c2eb..752af3c1 100644
--- a/tests/integration/dlio_benchmark/configs/workload/dyad_unet3d_small.yaml
+++ b/tests/integration/dlio_benchmark/configs/workload/dyad_unet3d_small.yaml
@@ -5,35 +5,33 @@ framework: pytorch
 workflow:
   generate_data: False
   train: True
-  evaluation: True
+  checkpoint: False
 
 dataset:
-  data_folder: data/dyad_unet3d
+  data_folder: data/unet3d/
   format: npz
-  num_files_train: 16
-  num_files_eval: 1
+  num_files_train: 168
   num_samples_per_file: 1
-  record_length: 4096
-  file_shuffle: SEED
+  record_length: 146600628
+  record_length_stdev: 68341808
+  record_length_resize: 2097152
 
 reader:
   data_loader: pytorch
   batch_size: 1
-  batch_size_eval: 1
+  read_threads: 1
+  file_shuffle: seed
+  sample_shuffle: seed
   multiprocessing_context: spawn
   data_loader_classname: dyad_torch_data_loader.DyadTorchDataLoader
   data_loader_sampler: index
 
 train:
-  epochs: 2
-  computation_time: 1.00
-  seed: 100
-  seed_change_epoch: True
-
-
-evaluation:
-  eval_time: 0.5
-  epochs_between_evals: 1
-
-profiling:
-  profiler: iostat
\ No newline at end of file
+  epochs: 10
+  computation_time: 1
+
+checkpoint:
+  checkpoint_folder: checkpoints/unet3d
+  checkpoint_after_epoch: 5
+  epochs_between_checkpoints: 2
+  model_size: 499153191
diff --git a/tests/integration/dlio_benchmark/configs/workload/mummi_base.yaml b/tests/integration/dlio_benchmark/configs/workload/mummi_base.yaml
new file mode 100644
index 00000000..da8022d1
--- /dev/null
+++ b/tests/integration/dlio_benchmark/configs/workload/mummi_base.yaml
@@ -0,0 +1,30 @@
+model: mummi
+
+framework: pytorch
+
+workflow:
+  generate_data: False
+  train: True
+
+dataset:
+  data_folder: data/mummi/
+  format: hdf5
+  num_files_train: 600
+  num_samples_per_file: 8000
+  record_length: 69528
+  enable_chunking: True
+  chunk_size: 17799168
+
+reader:
+  data_loader: pytorch
+  batch_size: 256
+  read_threads: 6
+  file_shuffle: seed
+  sample_shuffle: seed
+  multiprocessing_context: spawn
+  data_loader_classname: torch_data_loader.BaseTorchDataLoader
+  data_loader_sampler: index
+
+train:
+  epochs: 10
+  computation_time: .133
diff --git a/tests/integration/dlio_benchmark/configs/workload/resnet50_base.yaml b/tests/integration/dlio_benchmark/configs/workload/resnet50_base.yaml
new file mode 100644
index 00000000..09b832b2
--- /dev/null
+++ b/tests/integration/dlio_benchmark/configs/workload/resnet50_base.yaml
@@ -0,0 +1,28 @@
+model: resnet50
+
+framework: pytorch
+
+workflow:
+  generate_data: False
+  train: True
+
+dataset:
+  num_subfolders_train: 21843
+  num_files_train: 218430
+  num_samples_per_file: 1
+  record_length: 150528
+  data_folder: data/resnet50
+  format: png
+
+train:
+  computation_time: 0.317 # this is for A100
+  epochs: 20
+
+
+reader:
+  data_loader: pytorch
+  read_threads: 6
+  batch_size: 512
+  multiprocessing_context: spawn
+  data_loader_classname: torch_data_loader.BaseTorchDataLoader
+  data_loader_sampler: index
\ No newline at end of file
diff --git a/tests/integration/dlio_benchmark/configs/workload/resnet50_base_small.yaml b/tests/integration/dlio_benchmark/configs/workload/resnet50_base_small.yaml
new file mode 100644
index 00000000..f0c1db1e
--- /dev/null
+++ b/tests/integration/dlio_benchmark/configs/workload/resnet50_base_small.yaml
@@ -0,0 +1,28 @@
+model: resnet50
+
+framework: pytorch
+
+workflow:
+  generate_data: False
+  train: True
+
+dataset:
+  num_files_train: 1024
+  num_samples_per_file: 1
+  record_length: 150528
+  data_folder: data/resnet50
+  format: png
+
+train:
+  computation_time: 0.317 # this is for A100
+  epochs: 20
+
+
+reader:
+  data_loader: pytorch
+  read_threads: 6
+  computation_threads: 8
+  batch_size: 1
+  multiprocessing_context: spawn
+  data_loader_classname: torch_data_loader.BaseTorchDataLoader
+  data_loader_sampler: index
\ No newline at end of file
diff --git a/tests/integration/dlio_benchmark/configs/workload/unet3d_base.yaml b/tests/integration/dlio_benchmark/configs/workload/unet3d_base.yaml
index 89875d91..95941848 100644
--- a/tests/integration/dlio_benchmark/configs/workload/unet3d_base.yaml
+++ b/tests/integration/dlio_benchmark/configs/workload/unet3d_base.yaml
@@ -5,7 +5,7 @@ framework: pytorch
 workflow:
   generate_data: False
   train: True
-  checkpoint: True
+  checkpoint: False
 
 dataset:
   data_folder: data/unet3d/
@@ -28,7 +28,7 @@ reader:
 
 train:
   epochs: 10
-  computation_time: 0 #1.3604
+  computation_time: 0
 
 checkpoint:
   checkpoint_folder: checkpoints/unet3d
diff --git a/tests/integration/dlio_benchmark/configs/workload/unet3d_base_large.yaml b/tests/integration/dlio_benchmark/configs/workload/unet3d_base_large.yaml
new file mode 100644
index 00000000..0f092c2d
--- /dev/null
+++ b/tests/integration/dlio_benchmark/configs/workload/unet3d_base_large.yaml
@@ -0,0 +1,37 @@
+model: unet3d
+
+framework: pytorch
+
+workflow:
+  generate_data: False
+  train: True
+  checkpoint: False
+
+dataset:
+  data_folder: data/unet3d/
+  format: npz
+  num_files_train: 10240
+  num_samples_per_file: 1
+  record_length: 146600628
+  record_length_stdev: 68341808
+  record_length_resize: 2097152
+
+reader:
+  data_loader: pytorch
+  batch_size: 4
+  read_threads: 6
+  file_shuffle: seed
+  sample_shuffle: seed
+  multiprocessing_context: spawn
+  data_loader_classname: torch_data_loader.BaseTorchDataLoader
+  data_loader_sampler: index
+
+train:
+  epochs: 20
+  computation_time: 0.188
+
+checkpoint:
+  checkpoint_folder: checkpoints/unet3d
+  checkpoint_after_epoch: 5
+  epochs_between_checkpoints: 2
+  model_size: 499153191
diff --git a/tests/integration/dlio_benchmark/corona.sh b/tests/integration/dlio_benchmark/corona.sh
index f9dbf708..a561ccbe 100755
--- a/tests/integration/dlio_benchmark/corona.sh
+++ b/tests/integration/dlio_benchmark/corona.sh
@@ -2,4 +2,4 @@
 source ./setup-env.sh
 rm *.core flux.log
 rm -rf logs/* profiler/*
-flux alloc -q pdebug -N $NUM_NODES -o per-resource.count=${BROKERS_PER_NODE} --exclusive --broker-opts=--setattr=log-filename=./logs/flux.log ./run_dlio.sh
+flux alloc -q $QUEUE -t $TIME -N $NUM_NODES -o per-resource.count=${BROKERS_PER_NODE} --exclusive --broker-opts=--setattr=log-filename=./logs/flux.log ./run_dlio.sh
diff --git a/tests/integration/dlio_benchmark/dspaces-setup-env.sh b/tests/integration/dlio_benchmark/dspaces-setup-env.sh
new file mode 100644
index 00000000..49e61230
--- /dev/null
+++ b/tests/integration/dlio_benchmark/dspaces-setup-env.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+# TODO make sure python mod matches Spack environment if there is python in there
+module load gcc/10.3.1
+module load python/3.9.12
+module load openmpi/4.1.2
+
+# Configurations
+export DLIO_WORKLOAD=dspaces_mummi #dyad_mummi #dyad_unet3d_large # unet3d_base dyad_unet3d dyad_unet3d_small resnet50_base dyad_resnet50 unet3d_base_large mummi_base dyad_mummi
+export NUM_NODES=32 # 2 for small config, for full config, scale 8, 16, 32, 64
+export PPN=8 # 1 for small config, 8 for full config
+export QUEUE=pbatch
+export TIME=$((180))
+export BROKERS_PER_NODE=1
+export GENERATE_DATA="0"
+export DSPACES_HG_STRING="ofi+verbs"
+
+export NUM_SAMPLES_PER_FILE=8000
+export NUM_ROWS_PER_SAMPLE=263
+export NUM_COLS_PER_SAMPLE=263
+
+export GITHUB_WORKSPACE=/g/g90/lumsden1/ws/dyad_sc24_paper_dspaces/dyad
+export SPACK_DIR=/g/g90/lumsden1/ws/spack
+export SPACK_ENV=/g/g90/lumsden1/ws/dyad_sc24_paper_dspaces/baseline_env
+export SPACK_VIEW=$SPACK_ENV/.spack-env/view
+export PYTHON_ENV=/g/g90/lumsden1/ws/dyad_sc24_paper_dspaces/mummi_venv
+export DLIO_DATA_DIR=/p/lustre1/lumsden1/dyad_mummi #  dyad_resnet50 dyad_unet3d_basic
+#export DLIO_DATA_DIR=/p/lustre2/haridev/dyad/dlio_benchmark/dyad_unet3d_basic #  dyad_resnet50
+
+# DLIO Profiler Configurations
+export DLIO_PROFILER_ENABLE=0
+export DLIO_PROFILER_INC_METADATA=1
+export DLIO_PROFILER_DATA_DIR=${DLIO_DATA_DIR}:${DYAD_PATH}
+export DLIO_PROFILER_LOG_FILE=/g/g90/lumsden1/ws/dyad_sc24_paper_dspaces/dyad/tests/integration/dlio_benchmark/profiler/dspaces
+export DLIO_PROFILER_LOG_LEVEL=ERROR
+#export GOTCHA_DEBUG=3
+
+export DLIO_PROFILER_BIND_SIGNALS=0
+export MV2_BCAST_HWLOC_TOPOLOGY=0
+export HDF5_USE_FILE_LOCKING=0
+
+#mkdir -p ${DYAD_PATH}
+mkdir -p ${DLIO_PROFILER_LOG_FILE}
+# Activate Environments
+. ${SPACK_DIR}/share/spack/setup-env.sh
+spack env activate -p ${SPACK_ENV}
+source ${PYTHON_ENV}/bin/activate 
+
+# Derived Configurations
+export DSPACES_DLIO_RUN_LOG=dyad_${DLIO_WORKLOAD}_${NUM_NODES}_${PPN}_${BROKERS_PER_NODE}.log
+export CONFIG_ARG="--config-dir=${GITHUB_WORKSPACE}/tests/integration/dlio_benchmark/configs"
+#export CONFIG_ARG=""
+
+# Derived PATHS
+export PATH=${PATH}:${SPACK_VIEW}/bin:${SPACK_VIEW}/sbin
+export LD_LIBRARY_PATH=/usr/lib64:${SPACK_VIEW}/lib:${SPACK_VIEW}/lib64:${LD_LIBRARY_PATH}
+export PYTHONPATH=${GITHUB_WORKSPACE}/tests/integration/dlio_benchmark:$PYTHONPATH
+
+unset LUA_PATH
+unset LUA_CPATH
diff --git a/tests/integration/dlio_benchmark/dspaces_alloc.sh b/tests/integration/dlio_benchmark/dspaces_alloc.sh
new file mode 100755
index 00000000..08aaf3f4
--- /dev/null
+++ b/tests/integration/dlio_benchmark/dspaces_alloc.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+source ./dspaces-setup-env.sh
+
+# export DSPACES_DEBUG=1
+
+if [ $# -eq 0 ]; then
+    flux alloc -N $NUM_NODES -t $TIME -q $QUEUE --exclusive \
+        --broker-opts=--setattr=log-filename=./logs/flux.log \
+        ./dspaces_run_dlio.sh
+else
+    flux alloc -N $NUM_NODES -t $TIME -q $QUEUE --exclusive \
+        --broker-opts=--setattr=log-filename=./logs/flux.log \
+        ./dspaces_run_dlio.sh $1
+fi
diff --git a/tests/integration/dlio_benchmark/dspaces_batch.sh b/tests/integration/dlio_benchmark/dspaces_batch.sh
new file mode 100755
index 00000000..e48bba14
--- /dev/null
+++ b/tests/integration/dlio_benchmark/dspaces_batch.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+source ./dspaces-setup-env.sh
+
+# export DSPACES_DEBUG=1
+
+if [ $# -eq 0 ]; then
+    flux batch -N $NUM_NODES -t $TIME -q $QUEUE --exclusive \
+        --broker-opts=--setattr=log-filename=./logs/flux.log \
+        ./dspaces_run_dlio.sh
+else
+    dump_dir=$1
+    if [ ! -d $1 ]; then
+        mkdir -p $1
+    fi
+    if [ $# -eq 1 ]; then
+        flux batch -N $NUM_NODES -t $TIME -q $QUEUE --exclusive \
+            --broker-opts=--setattr=log-filename=./logs/flux.log \
+            --output=$1/run.out --error=$1/run.err \
+            ./dspaces_run_dlio.sh $1
+    else
+        export NUM_NODES=$2
+        flux batch -N $NUM_NODES -t $TIME -q $QUEUE --exclusive \
+            --broker-opts=--setattr=log-filename=./logs/flux.log \
+            --output=$1/run.out --error=$1/run.err \
+            ./dspaces_run_dlio.sh $1 $2
+    fi
+fi
\ No newline at end of file
diff --git a/tests/integration/dlio_benchmark/dspaces_h5_torch_data_loader.py b/tests/integration/dlio_benchmark/dspaces_h5_torch_data_loader.py
new file mode 100644
index 00000000..950eaa8b
--- /dev/null
+++ b/tests/integration/dlio_benchmark/dspaces_h5_torch_data_loader.py
@@ -0,0 +1,187 @@
+"""
+   Copyright (c) 2022, UChicago Argonne, LLC
+   All Rights Reserved
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+"""
+from time import time
+import logging
+import math
+import pickle
+import torch
+from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
+
+from dlio_benchmark.common.constants import MODULE_DATA_LOADER
+from dlio_benchmark.common.enumerations import Shuffle, DatasetType, DataLoaderType
+from dlio_benchmark.data_loader.base_data_loader import BaseDataLoader
+from dlio_benchmark.reader.reader_factory import ReaderFactory
+from dlio_benchmark.utils.utility import utcnow, DLIOMPI
+from dlio_benchmark.utils.config import ConfigArguments
+from dlio_profiler.logger import fn_interceptor as Profile
+
+import dspaces
+import numpy as np
+import os
+import h5py
+import fcntl
+dlp = Profile(MODULE_DATA_LOADER)
+
+
+class DspacesH5TorchDataset(Dataset):
+    """
+    Currently, we only support loading one sample per file
+    TODO: support multiple samples per file
+    """
+    @dlp.log_init
+    def __init__(self, format_type, dataset_type, epoch, num_samples, num_workers, batch_size):
+        self.format_type = format_type
+        self.dataset_type = dataset_type
+        self.epoch_number = epoch
+        self.num_samples = num_samples
+        self.reader = None
+        self.num_images_read = 0
+        self.batch_size = batch_size
+        args = ConfigArguments.get_instance()
+        self.img_dim = args.dimension
+        self.serial_args = pickle.dumps(args)
+        self.dlp_logger = None
+        if num_workers == 0:
+            self.worker_init(-1)
+
+    @dlp.log
+    def worker_init(self, worker_id):
+        pickle.loads(self.serial_args)
+        self._args = ConfigArguments.get_instance()
+        self._args.configure_dlio_logging(is_child=True)
+        self.dlp_logger = self._args.configure_dlio_profiler(is_child=True, use_pid=True)
+        logging.debug(f"{utcnow()} worker initialized {worker_id} with format {self.format_type}")
+        self.reader = ReaderFactory.get_reader(type=self.format_type,
+                                               dataset_type=self.dataset_type,
+                                               thread_index=worker_id,
+                                               epoch_number=self.epoch_number)
+        proc_rank = os.getpid()
+        logging.debug("Intializing dspaces")
+        self.ds_client = dspaces.dspaces(rank=proc_rank)
+
+    def __del__(self):
+        if self.dlp_logger:
+            self.dlp_logger.finalize()
+        # Manually invoke finalizer for DataSpaces to ensure it is shutdown properly
+        # if self.ds_client:
+        #     del self.ds_client
+        #     self.ds_client = None
+            
+    @dlp.log
+    def __len__(self):
+        return self.num_samples
+
+    @dlp.log
+    def __getitem__(self, image_idx):
+        logging.debug(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} reading {image_idx} image")
+        self.num_images_read += 1
+        step = int(math.ceil(self.num_images_read / self.batch_size))
+        filename, sample_index = self._args.global_index_map[image_idx]
+        lb = tuple([sample_index, 0, 0])
+        ub = tuple([sample_index, self.img_dim-1, self.img_dim-1])
+        dlp.update(args={"fname":filename})
+        logging.debug(f"Filename is {filename}")
+        dlp.update(args={"image_idx":image_idx})
+        dlp.update(args={"version":0})
+        dlp.update(args={"lb":lb})
+        logging.debug(f"lb is {lb}")
+        dlp.update(args={"ub":ub})
+        logging.debug(f"ub is {ub}")
+        logging.debug("Starting dspaces aget")
+        data = self.ds_client.get(
+            filename,  # variable name
+            0,         # variable version
+            lb,        # lower bound in global dims
+            ub,        # upper bound in global dims
+            np.uint8,  # NumPy datatype of elements
+            -1         # timeout
+        )
+        logging.debug("Finished dspaces aget")
+        dlp.update(step=step)
+        logging.debug(f"data shape is {data.shape}")
+        dlp.update(image_size=data.nbytes)
+        return data
+
+class DspacesH5TorchDataLoader(BaseDataLoader):
+    @dlp.log_init
+    def __init__(self, format_type, dataset_type, epoch_number):
+        super().__init__(format_type, dataset_type, epoch_number, DataLoaderType.PYTORCH)
+
+    @dlp.log
+    def read(self):
+        do_shuffle = True if self._args.sample_shuffle != Shuffle.OFF else False
+        dataset = DspacesH5TorchDataset(self.format_type, self.dataset_type, self.epoch_number, self.num_samples, self._args.read_threads, self.batch_size)
+        if do_shuffle:
+            sampler = RandomSampler(dataset)
+        else:
+            sampler = SequentialSampler(dataset)
+        if self._args.read_threads >= 1:
+            prefetch_factor = math.ceil(self._args.prefetch_size / self._args.read_threads)
+        else:
+            prefetch_factor = self._args.prefetch_size
+        if prefetch_factor > 0:
+            if self._args.my_rank == 0:
+                logging.debug(
+                    f"{utcnow()} Prefetch size is {self._args.prefetch_size}; prefetch factor of {prefetch_factor} will be set to Torch DataLoader.")
+        else:
+            prefetch_factor = 2
+            if self._args.my_rank == 0:
+                logging.debug(
+                    f"{utcnow()} Prefetch size is 0; a default prefetch factor of 2 will be set to Torch DataLoader.")
+        logging.debug(f"{utcnow()} Setup dataloader with {self._args.read_threads} workers {torch.__version__}")
+        if self._args.read_threads==0:
+            kwargs={}
+        else:
+            kwargs={'multiprocessing_context':self._args.multiprocessing_context,
+                    'prefetch_factor': prefetch_factor}
+            if torch.__version__ != '1.3.1':
+                kwargs['persistent_workers'] = True
+        if torch.__version__ == '1.3.1':
+            if 'prefetch_factor' in kwargs:
+                del kwargs['prefetch_factor']
+            self._dataset = DataLoader(dataset,
+                                       batch_size=self.batch_size,
+                                       sampler=sampler,
+                                       num_workers=self._args.read_threads,
+                                       pin_memory=True,
+                                       drop_last=True,
+                                       worker_init_fn=dataset.worker_init,
+                                       **kwargs)
+        else:
+            self._dataset = DataLoader(dataset,
+                                       batch_size=self.batch_size,
+                                       sampler=sampler,
+                                       num_workers=self._args.read_threads,
+                                       pin_memory=True,
+                                       drop_last=True,
+                                       worker_init_fn=dataset.worker_init,
+                                       **kwargs)  # 2 is the default value
+        logging.debug(f"{utcnow()} Rank {self._args.my_rank} will read {len(self._dataset) * self.batch_size} files")
+
+        # self._dataset.sampler.set_epoch(epoch_number)
+
+    @dlp.log
+    def next(self):
+        super().next()
+        total = self._args.training_steps if self.dataset_type is DatasetType.TRAIN else self._args.eval_steps
+        logging.debug(f"{utcnow()} Rank {self._args.my_rank} should read {total} batches")
+        for batch in self._dataset:
+            yield batch
+
+    @dlp.log
+    def finalize(self):
+        pass
diff --git a/tests/integration/dlio_benchmark/dspaces_preloader.py b/tests/integration/dlio_benchmark/dspaces_preloader.py
new file mode 100644
index 00000000..d8ec6785
--- /dev/null
+++ b/tests/integration/dlio_benchmark/dspaces_preloader.py
@@ -0,0 +1,79 @@
+from mpi4py import MPI
+from pathlib import Path
+import h5py
+import dspaces
+
+import argparse
+import logging
+import time
+
+VALID_HDF5_EXTS = [".hdf5", ".h5"]
+
+
+def collect_file_names(dirname):
+    return list(sorted([f for f in dirname.iterdir() if f.is_file() and f.suffix in VALID_HDF5_EXTS]))
+
+
+def get_local_files(all_files, rank, comm_size):
+    return [all_files[i] for i in range(rank, len(all_files), comm_size)]
+
+
+def store_samples_for_file(ds_client, local_file):
+    with h5py.File(str(local_file), "r") as f:
+        for starting_sample in range(0, f["records"].shape[0], 256):
+            end = starting_sample+256
+            if end > f["records"].shape[0]:
+                end = f["records"].shape[0]
+            data = f["records"][starting_sample:end]
+            offset = (starting_sample, 0, 0)
+            ds_client.put(data, str(local_file), 0, offset)
+
+        
+def main():
+    parser = argparse.ArgumentParser("Preload data for MuMMI training")
+    parser.add_argument("data_dir", type=Path,
+                        help="Path to the directory containing HDF5 files")
+    parser.add_argument('-v', '--verbose', action='store_true')
+    parser.add_argument('-d', '--debug', action='store_true')
+    args = parser.parse_args()
+    loglevel = logging.WARNING
+    if args.verbose:
+        loglevel = logging.INFO
+    elif args.debug:
+        loglevel = logging.DEBUG
+    logging.basicConfig(level=loglevel,
+        handlers=[
+            logging.StreamHandler()
+        ],
+        format='[%(levelname)s] [%(asctime)s] %(message)s [%(pathname)s:%(lineno)d]',
+        datefmt='%H:%M:%S'
+    )
+    data_dir = args.data_dir.expanduser().resolve()
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    comm_size = comm.Get_size()
+    start = time.time()
+    if rank == 0:
+        logging.info("Preloading with {} processes".format(comm_size))
+    logging.debug("RANK {}: initializing DataSpaces".format(rank))
+    ds_client = dspaces.dspaces(comm=comm)
+    logging.debug("RANK {}: collecting filenames".format(rank))
+    all_files = collect_file_names(data_dir)
+    logging.debug("RANK {}: obtaining local files".format(rank))
+    local_files = get_local_files(all_files, rank, comm_size)
+    logging.debug("RANK {}: putting the following files: {}".format(rank, local_files))
+    for lf in local_files:
+        store_samples_for_file(ds_client, lf)
+    end = time.time()
+    local_time = end - start
+    total_time = 0.0
+    total_time = comm.reduce(local_time,MPI.SUM, root=0)
+    if rank == 0:
+        total_time /= comm_size
+        print("DataSpaces preload time is {} s".format(total_time), flush=True)
+    comm.Barrier()
+    comm.Abort(0) # Perform an abort because finalization sometimes hangs on Corona
+    
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/integration/dlio_benchmark/dspaces_run_dlio.sh b/tests/integration/dlio_benchmark/dspaces_run_dlio.sh
new file mode 100755
index 00000000..a36d5929
--- /dev/null
+++ b/tests/integration/dlio_benchmark/dspaces_run_dlio.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+source ./dspaces-setup-env.sh 
+
+curr_dir=$(pwd)
+if [ $# -ge 1 ]; then
+    cd $1
+fi
+
+if [ $# -gt 1 ]; then
+    export NUM_NODES=$2
+fi
+echo "Number of nodes is $NUM_NODES"
+
+ulimit -c unlimited
+
+# Startup dspaces_server
+echo "Generating DataSpaces config file"
+echo "## Config file for DataSpaces server
+ndim = 3
+dims = $NUM_SAMPLES_PER_FILE,$NUM_ROWS_PER_SAMPLE,$NUM_COLS_PER_SAMPLE
+max_versions = 1
+num_apps = 1" > dataspaces.conf 
+
+redirect_flag=""
+if [ ! -z ${DSPACES_DEBUG+x} ]; then
+    redirect_flag="--output=server.out --error=server.err"
+fi
+
+echo "Launching DataSpaces server"
+flux submit -N $NUM_NODES --cores=$(( NUM_NODES*16 )) \
+    --tasks-per-node=$BROKERS_PER_NODE $redirect_flag dspaces_server $DSPACES_HG_STRING
+
+# Wait for DataSpaces's server to create conf.ds
+echo "Waiting on conf.ds to be created"
+sleep 1s
+while [ ! -f conf.ds ]; do
+    sleep 1s
+done
+# Give the server enough time to write the contents
+sleep 3s
+echo "Server running!"
+
+if [[ "${GENERATE_DATA}" == "1" ]]; then
+# Generate Data for Workload
+echo Generating DLIO Dataset
+flux submit -o cpu-affinity=off -N $((NUM_NODES*BROKERS_PER_NODE)) --tasks-per-node=$((PPN/BROKERS_PER_NODE)) dlio_benchmark ${CONFIG_ARG} workload=${DLIO_WORKLOAD} ++workload.dataset.data_folder=${DLIO_DATA_DIR} ++workload.workflow.generate_data=True ++workload.workflow.train=False
+GEN_PID=$(flux job last)
+flux job attach ${GEN_PID}
+echo "Run without Gen data to do training"
+exit
+fi
+
+# Preload data into DataSpaces
+echo "Preloading samples into DataSpaces"
+flux run -N $NUM_NODES --cores=$(( NUM_NODES*32 )) --tasks-per-node=32 python3 $GITHUB_WORKSPACE/tests/integration/dlio_benchmark/dspaces_preloader.py $DLIO_DATA_DIR/train
+
+# Run Training
+echo Running DLIO Training for ${DLIO_WORKLOAD}
+flux submit -N $NUM_NODES --cores=$(( NUM_NODES*32 )) -o cpu-affinity=on --tasks-per-node=$PPN \
+    dlio_benchmark ${CONFIG_ARG} workload=${DLIO_WORKLOAD} \
+    ++workload.dataset.data_folder=${DLIO_DATA_DIR} \
+    ++workload.workflow.generate_data=False ++workload.workflow.train=True
+RUN_PID=$(flux job last)
+flux job attach ${RUN_PID} > ${DSPACES_DLIO_RUN_LOG} 2>&1
+#cat ${DYAD_DLIO_RUN_LOG}
+echo "Finished Executing check ${DSPACES_DLIO_RUN_LOG} for output"
+
+flux run --ntasks=1 terminator
+
+rm dataspaces.conf conf.ds
+if [ $# -ge 1 ]; then
+    cd $curr_dir
+fi
diff --git a/tests/integration/dlio_benchmark/dyad_h5_torch_data_loader.py b/tests/integration/dlio_benchmark/dyad_h5_torch_data_loader.py
new file mode 100644
index 00000000..34e35e2d
--- /dev/null
+++ b/tests/integration/dlio_benchmark/dyad_h5_torch_data_loader.py
@@ -0,0 +1,234 @@
+"""
+   Copyright (c) 2022, UChicago Argonne, LLC
+   All Rights Reserved
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+"""
+from time import time
+import logging
+import math
+import pickle
+import torch
+from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
+
+from dlio_benchmark.common.constants import MODULE_DATA_LOADER
+from dlio_benchmark.common.enumerations import Shuffle, DatasetType, DataLoaderType
+from dlio_benchmark.data_loader.base_data_loader import BaseDataLoader
+from dlio_benchmark.reader.reader_factory import ReaderFactory
+from dlio_benchmark.utils.utility import utcnow, DLIOMPI
+from dlio_benchmark.utils.config import ConfigArguments
+from dlio_profiler.logger import fn_interceptor as Profile
+
+from pydyad import Dyad
+from pydyad.hdf import DyadFile
+from pydyad.bindings import DTLMode, DTLCommMode
+import numpy as np
+import flux
+import os
+import h5py
+import fcntl
+dlp = Profile(MODULE_DATA_LOADER)
+
+
+class DYADH5TorchDataset(Dataset):
+    """
+    Currently, we only support loading one sample per file
+    TODO: support multiple samples per file
+    """
+    @dlp.log_init
+    def __init__(self, format_type, dataset_type, epoch, num_samples, num_workers, batch_size):
+        self.format_type = format_type
+        self.dataset_type = dataset_type
+        self.epoch_number = epoch
+        self.num_samples = num_samples
+        self.reader = None
+        self.num_images_read = 0
+        self.batch_size = batch_size
+        args = ConfigArguments.get_instance()
+        self.serial_args = pickle.dumps(args)
+        self.dlp_logger = None
+        if num_workers == 0:
+            self.worker_init(-1)
+
+    @dlp.log
+    def worker_init(self, worker_id):
+        pickle.loads(self.serial_args)
+        self._args = ConfigArguments.get_instance()
+        self._args.configure_dlio_logging(is_child=True)
+        self.dlp_logger = self._args.configure_dlio_profiler(is_child=True, use_pid=True)
+        logging.debug(f"{utcnow()} worker initialized {worker_id} with format {self.format_type}")
+        self.reader = ReaderFactory.get_reader(type=self.format_type,
+                                               dataset_type=self.dataset_type,
+                                               thread_index=worker_id,
+                                               epoch_number=self.epoch_number)
+        self.dyad_io = Dyad()
+        is_local = os.getenv("DYAD_LOCAL_TEST", "0") == "1"
+        self.broker_per_node = int(os.getenv("BROKERS_PER_NODE", "1"))
+
+        self.f = flux.Flux()
+        self.broker_rank = self.f.get_rank()
+        if is_local:
+            self.dyad_managed_directory = os.path.join(os.getenv("DYAD_PATH", ""), str(self.f.get_rank()))
+        else:
+            self.dyad_managed_directory = os.getenv("DYAD_PATH", "")
+        self.my_node_index = int(self.broker_rank*1.0 / self.broker_per_node)
+        dtl_str = os.getenv("DYAD_DTL_MODE", "FLUX_RPC")
+        mode = DTLMode.DYAD_DTL_FLUX_RPC
+        namespace = os.getenv("DYAD_KVS_NAMESPACE")
+        logging.debug(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} init dyad {self.dyad_managed_directory} {dtl_str} {namespace}")
+        if dtl_str == "UCX":
+            mode = DTLMode.DYAD_DTL_UCX
+        self.dyad_io.init(debug=self._args.debug, check=False, shared_storage=False, reinit=False,
+                          async_publish=True, fsync_write=False, key_depth=3,
+                          service_mux=self.broker_per_node,
+                          key_bins=1024, kvs_namespace=os.getenv("DYAD_KVS_NAMESPACE"),
+                          prod_managed_path=self.dyad_managed_directory, cons_managed_path=self.dyad_managed_directory,
+                          dtl_mode=mode, dtl_comm_mode=DTLCommMode.DYAD_COMM_RECV)
+
+    def __del__(self):
+        if self.dlp_logger:
+            self.dlp_logger.finalize()
+    @dlp.log
+    def __len__(self):
+        return self.num_samples
+
+    @dlp.log
+    def __getitem__(self, image_idx):
+        logging.debug(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} reading {image_idx} image")
+        self.num_images_read += 1
+        step = int(math.ceil(self.num_images_read / self.batch_size))
+        filename, sample_index = self._args.global_index_map[image_idx]
+        is_present = False
+        file_obj = None
+        base_fname = filename
+        dlp.update(args={"fname":filename})
+        dlp.update(args={"image_idx":image_idx})
+        if self.dyad_managed_directory != "":
+            logging.debug(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} reading metadata")
+            base_fname = os.path.join(self.dyad_managed_directory, os.path.basename(filename))
+            file_obj = self.dyad_io.get_metadata(fname=base_fname, should_wait=False, raw=True)
+            logging.debug(f"Using managed directory {self.dyad_managed_directory} {base_fname} {file_obj}")
+            is_present = True
+        if file_obj:
+            access_mode = "remote"
+            file_node_index = int(file_obj.contents.owner_rank*1.0 / self.broker_per_node)
+            if self.my_node_index == file_node_index:
+                access_mode = "local"
+            dlp.update(args={"owner_rank":str(file_obj.contents.owner_rank)})
+            dlp.update(args={"my_broker":str(self.broker_rank)})
+            dlp.update(args={"mode":"dyad"})
+            dlp.update(args={"access":access_mode})
+            logging.debug(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} reading {image_idx} sample from {access_mode} dyad {file_obj.contents.owner_rank}")
+            logging.debug(f"Reading from managed directory {base_fname}")
+            hf = DyadFile(base_fname, "r", dyad_ctx=self.dyad_io, metadata_wrapper=file_obj)
+            try:
+                data = hf["records"][sample_index]
+            except:
+                logging.debug(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} got wierd {image_idx} sample from {access_mode} dyad {file_obj.contents.owner_rank}")
+                data = self._args.resized_image
+            hf.close()
+            logging.debug(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} read {image_idx} sample from {access_mode} dyad {file_obj.contents.owner_rank}")
+            self.dyad_io.free_metadata(file_obj)
+        else:
+            dlp.update(args={"mode":"pfs"})
+            dlp.update(args={"access":"remote"})
+            logging.debug(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} reading {image_idx} sample from pfs {base_fname}")
+            logging.debug(f"Reading from pfs {base_fname}")
+            dyad_f = open(base_fname, "wb")
+            fcntl.lockf(dyad_f, fcntl.LOCK_EX)
+            dyad_f.seek(0, 2)
+            size = dyad_f.tell()
+            if size == 0:
+                pfs_f = open(filename, "rb")
+                data = pfs_f.read()
+                dyad_f.write(data)
+                pfs_f.close()
+            fcntl.lockf(dyad_f, fcntl.LOCK_UN)
+            dyad_f.close()
+            hf = DyadFile(base_fname, "r+", dyad_ctx=self.dyad_io)
+            data = hf["records"][sample_index]
+            hf.close()
+            logging.debug(f"Read from pfs {base_fname}")
+        dlp.update(step=step)
+        dlp.update(image_size=data.nbytes)
+        return data
+
+class DyadH5TorchDataLoader(BaseDataLoader):
+    @dlp.log_init
+    def __init__(self, format_type, dataset_type, epoch_number):
+        super().__init__(format_type, dataset_type, epoch_number, DataLoaderType.PYTORCH)
+
+    @dlp.log
+    def read(self):
+        do_shuffle = True if self._args.sample_shuffle != Shuffle.OFF else False
+        dataset = DYADH5TorchDataset(self.format_type, self.dataset_type, self.epoch_number, self.num_samples, self._args.read_threads, self.batch_size)
+        if do_shuffle:
+            sampler = RandomSampler(dataset)
+        else:
+            sampler = SequentialSampler(dataset)
+        if self._args.read_threads >= 1:
+            prefetch_factor = math.ceil(self._args.prefetch_size / self._args.read_threads)
+        else:
+            prefetch_factor = self._args.prefetch_size
+        if prefetch_factor > 0:
+            if self._args.my_rank == 0:
+                logging.debug(
+                    f"{utcnow()} Prefetch size is {self._args.prefetch_size}; prefetch factor of {prefetch_factor} will be set to Torch DataLoader.")
+        else:
+            prefetch_factor = 2
+            if self._args.my_rank == 0:
+                logging.debug(
+                    f"{utcnow()} Prefetch size is 0; a default prefetch factor of 2 will be set to Torch DataLoader.")
+        logging.debug(f"{utcnow()} Setup dataloader with {self._args.read_threads} workers {torch.__version__}")
+        if self._args.read_threads==0:
+            kwargs={}
+        else:
+            kwargs={'multiprocessing_context':self._args.multiprocessing_context,
+                    'prefetch_factor': prefetch_factor}
+            if torch.__version__ != '1.3.1':
+                kwargs['persistent_workers'] = True
+        if torch.__version__ == '1.3.1':
+            if 'prefetch_factor' in kwargs:
+                del kwargs['prefetch_factor']
+            self._dataset = DataLoader(dataset,
+                                       batch_size=self.batch_size,
+                                       sampler=sampler,
+                                       num_workers=self._args.read_threads,
+                                       pin_memory=True,
+                                       drop_last=True,
+                                       worker_init_fn=dataset.worker_init,
+                                       **kwargs)
+        else:
+            self._dataset = DataLoader(dataset,
+                                       batch_size=self.batch_size,
+                                       sampler=sampler,
+                                       num_workers=self._args.read_threads,
+                                       pin_memory=True,
+                                       drop_last=True,
+                                       worker_init_fn=dataset.worker_init,
+                                       **kwargs)  # 2 is the default value
+        logging.debug(f"{utcnow()} Rank {self._args.my_rank} will read {len(self._dataset) * self.batch_size} files")
+
+        # self._dataset.sampler.set_epoch(epoch_number)
+
+    @dlp.log
+    def next(self):
+        super().next()
+        total = self._args.training_steps if self.dataset_type is DatasetType.TRAIN else self._args.eval_steps
+        logging.debug(f"{utcnow()} Rank {self._args.my_rank} should read {total} batches")
+        for batch in self._dataset:
+            yield batch
+
+    @dlp.log
+    def finalize(self):
+        pass
diff --git a/tests/integration/dlio_benchmark/dyad_torch_data_loader.py b/tests/integration/dlio_benchmark/dyad_torch_data_loader.py
index 72938e62..1f7c6e03 100644
--- a/tests/integration/dlio_benchmark/dyad_torch_data_loader.py
+++ b/tests/integration/dlio_benchmark/dyad_torch_data_loader.py
@@ -30,7 +30,7 @@
 from dftracer.logger import dft_fn as Profile
 
 from pydyad import Dyad, dyad_open
-from pydyad.bindings import DTLMode
+from pydyad.bindings import DTLMode, DTLCommMode
 import numpy as np
 import flux
 import os
@@ -83,15 +83,15 @@ def worker_init(self, worker_id):
         dtl_str = os.getenv("DYAD_DTL_MODE", "FLUX_RPC")
         mode = DTLMode.DYAD_DTL_FLUX_RPC
         namespace = os.getenv("DYAD_KVS_NAMESPACE")
-        logging.info(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} init dyad {self.dyad_managed_directory} {dtl_str} {namespace}")
+        logging.debug(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} init dyad {self.dyad_managed_directory} {dtl_str} {namespace}")
         if dtl_str == "UCX":
             mode = DTLMode.DYAD_DTL_UCX
-        self.dyad_io.init(debug=self._args.debug, check=False, shared_storage=False, reinit=True,
-                          async_publish=False, fsync_write=False, key_depth=3,
+        self.dyad_io.init(debug=self._args.debug, check=False, shared_storage=False, reinit=False,
+                          async_publish=True, fsync_write=False, key_depth=3,
                           service_mux=self.broker_per_node,
                           key_bins=1024, kvs_namespace=os.getenv("DYAD_KVS_NAMESPACE"),
                           prod_managed_path=self.dyad_managed_directory, cons_managed_path=self.dyad_managed_directory,
-                          dtl_mode=mode, dtl_comm_mode=DYAD_COMM_RECV)
+                          dtl_mode=mode, dtl_comm_mode=DTLCommMode.DYAD_COMM_RECV)
 
     def __del__(self):
         if self.dlp_logger:
@@ -102,6 +102,7 @@ def __len__(self):
 
     @dlp.log
     def __getitem__(self, image_idx):
+        logging.debug(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} reading {image_idx} image")
         self.num_images_read += 1
         step = int(math.ceil(self.num_images_read / self.batch_size))
         filename, sample_index = self._args.global_index_map[image_idx]
@@ -111,7 +112,7 @@ def __getitem__(self, image_idx):
         dlp.update(args={"fname":filename})
         dlp.update(args={"image_idx":image_idx})
         if self.dyad_managed_directory != "":
-            logging.info(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} reading metadata")
+            logging.debug(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} reading metadata")
             base_fname = os.path.join(self.dyad_managed_directory, os.path.basename(filename))
             file_obj = self.dyad_io.get_metadata(fname=base_fname, should_wait=False, raw=True)
             logging.debug(f"Using managed directory {self.dyad_managed_directory} {base_fname} {file_obj}")
@@ -128,7 +129,12 @@ def __getitem__(self, image_idx):
             logging.info(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} reading {image_idx} sample from {access_mode} dyad {file_obj.contents.owner_rank}")
             logging.debug(f"Reading from managed directory {base_fname}")
             with dyad_open(base_fname, "rb", dyad_ctx=self.dyad_io, metadata_wrapper=file_obj) as f:
-                data = np.load(f, allow_pickle=True)["x"]
+                try:
+                    data = np.load(f, allow_pickle=True)["x"]
+                except:
+                    logging.info(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} got wierd {image_idx} sample from {access_mode} dyad {file_obj.contents.owner_rank}")
+                    data = self._args.resized_image
+            logging.info(f"{utcnow()} Rank {DLIOMPI.get_instance().rank()} read {image_idx} sample from {access_mode} dyad {file_obj.contents.owner_rank}")
             self.dyad_io.free_metadata(file_obj)
         else:
             dlp.update(args={"mode":"pfs"})
@@ -140,6 +146,7 @@ def __getitem__(self, image_idx):
                 logging.debug(f"Writing to managed_directory {base_fname}")
                 with dyad_open(base_fname, "wb", dyad_ctx=self.dyad_io) as f:
                     np.savez(f, x=data)
+            logging.debug(f"Read from pfs {base_fname}")
 
         dlp.update(step=step)
         dlp.update(image_size=data.nbytes)
diff --git a/tests/integration/dlio_benchmark/run_dlio.sh b/tests/integration/dlio_benchmark/run_dlio.sh
index 021e6a73..207bcbf2 100755
--- a/tests/integration/dlio_benchmark/run_dlio.sh
+++ b/tests/integration/dlio_benchmark/run_dlio.sh
@@ -4,10 +4,9 @@ source ./setup-env.sh
 # Setup DYAD
 echo Setting up Dyad
 flux kvs namespace create ${DYAD_KVS_NAMESPACE}
-for (( c=0; c<NUM_NODES*BROKERS_PER_NODE; c++ )); do     
-  echo "Loading module on broker $c"  
-  flux exec -r $c flux module load ${SPACK_ENV}/.spack-env/view/lib/dyad.so --debug --info_log=${DYAD_LOG_DIR}/dyad-broker --error_log=${DYAD_LOG_DIR}/dyad-broker --mode="UCX" $DYAD_PATH
-done
+echo "Loading module on brokers"
+flux exec -r all flux module load ${SPACK_ENV}/.spack-env/view/lib/dyad.so --debug --info_log=${DYAD_LOG_DIR}/dyad-broker --error_log=${DYAD_LOG_DIR}/dyad-broker --mode="UCX" $DYAD_PATH
+
 echo Loaded Modules
 flux module list
 echo List Namespaces
@@ -20,9 +19,11 @@ flux run -N $((NUM_NODES*BROKERS_PER_NODE)) --tasks-per-node=1 rm -rf $DYAD_PATH
 if [[ "${GENERATE_DATA}" == "1" ]]; then
 # Generate Data for Workload
 echo Generating DLIO Dataset
-flux submit -N $((NUM_NODES*BROKERS_PER_NODE)) --tasks-per-node=$((PPN/BROKERS_PER_NODE)) dlio_benchmark ${CONFIG_ARG} workload=${DLIO_WORKLOAD} ++workload.dataset.data_folder=${DLIO_DATA_DIR} ++workload.workflow.generate_data=True ++workload.workflow.train=False
+flux submit -o cpu-affinity=off -N $((NUM_NODES*BROKERS_PER_NODE)) --tasks-per-node=$((PPN/BROKERS_PER_NODE)) dlio_benchmark ${CONFIG_ARG} workload=${DLIO_WORKLOAD} ++workload.dataset.data_folder=${DLIO_DATA_DIR} ++workload.workflow.generate_data=True ++workload.workflow.train=False
 GEN_PID=$(flux job last)
 flux job attach ${GEN_PID}
+echo "Run without Gen data to do training"
+exit
 fi
 
 # Run Training
@@ -37,6 +38,5 @@ echo "Finished Executing check ${DYAD_DLIO_RUN_LOG} for output"
 # Clean DYAD
 echo Cleaning DYAD
 flux kvs namespace remove ${DYAD_KVS_NAMESPACE}
-for (( c=0; c<NUM_NODES; c++ )); do     
-  flux exec -r $c flux module unload ${SPACK_ENV}/.spack-env/view/lib/dyad.so 
-done
+flux exec -r all flux module unload dyad
+
diff --git a/tests/integration/dlio_benchmark/setup-env.sh b/tests/integration/dlio_benchmark/setup-env.sh
index a990999c..1f461fdb 100644
--- a/tests/integration/dlio_benchmark/setup-env.sh
+++ b/tests/integration/dlio_benchmark/setup-env.sh
@@ -4,37 +4,43 @@ module load python/3.9.12
 module load openmpi/4.1.2
 
 # Configurations
-export DLIO_WORKLOAD=dyad_unet3d # unet3d_base dyad_unet3d dyad_unet3d_small
-export NUM_NODES=2
+export DLIO_WORKLOAD=dyad_mummi #dyad_unet3d_large # unet3d_base dyad_unet3d dyad_unet3d_small resnet50_base dyad_resnet50 unet3d_base_large mummi_base dyad_mummi
+export NUM_NODES=64
 export PPN=8
+export QUEUE=pbatch
+export TIME=$((60))
 export BROKERS_PER_NODE=1
 export GENERATE_DATA="0"
 
 export DYAD_INSTALL_PREFIX=/usr/workspace/haridev/dyad/env/spack/.spack-env/view
 export DYAD_KVS_NAMESPACE=dyad
 export DYAD_DTL_MODE=UCX
-export DYAD_PATH=/l/ssd/haridev/dyad
-export DYAD_PATH=/dev/shm/haridev/dyad
+export DYAD_PATH="/l/ssd/haridev/dyad"
+#export DYAD_PATH=/dev/shm/haridev/dyad
 export GITHUB_WORKSPACE=/usr/workspace/haridev/dyad
-export SPACK_DIR=/usr/workspace/haridev/spack-new
+export SPACK_DIR=/usr/workspace/haridev/spack
 export SPACK_ENV=/usr/workspace/haridev/dyad/env/spack
 export PYTHON_ENV=/usr/workspace/haridev/dyad/env/python
-export DLIO_DATA_DIR=/p/lustre2/haridev/dyad/dlio_benchmark/dyad_unet3d_basic
+export DLIO_DATA_DIR=/p/lustre1/iopp/dyad/dlio_benchmark/dyad_mummi #  dyad_resnet50 dyad_unet3d_basic
+#export DLIO_DATA_DIR=/p/lustre2/haridev/dyad/dlio_benchmark/dyad_unet3d_basic #  dyad_resnet50
 
 # DLIO Profiler Configurations
 export DFTRACER_ENABLE=1
 export DFTRACER_INC_METADATA=1
 export DFTRACER_DATA_DIR=${DLIO_DATA_DIR}:${DYAD_PATH}
 export DFTRACER_LOG_FILE=/usr/workspace/haridev/dyad/tests/integration/dlio_benchmark/profiler/dyad
+
 export DYAD_LOG_DIR=/usr/workspace/haridev/dyad/tests/integration/dlio_benchmark/logs
 export DFTRACER_LOG_LEVEL=ERROR
 #export GOTCHA_DEBUG=3
 
 export DFTRACER_BIND_SIGNALS=0
 export MV2_BCAST_HWLOC_TOPOLOGY=0
+export HDF5_USE_FILE_LOCKING=0
 
 mkdir -m 775 -p ${DYAD_PATH}
 mkdir -p ${DFTRACER_LOG_FILE}
+
 # Activate Environments
 . ${SPACK_DIR}/share/spack/setup-env.sh
 spack env activate -p ${SPACK_ENV}
diff --git a/tests/integration/dlio_benchmark/torch_data_loader.py b/tests/integration/dlio_benchmark/torch_data_loader.py
index 71a17a1e..9b0be081 100644
--- a/tests/integration/dlio_benchmark/torch_data_loader.py
+++ b/tests/integration/dlio_benchmark/torch_data_loader.py
@@ -19,9 +19,9 @@
 import math
 import pickle
 import torch
-torch.use_deterministic_algorithms(True)
-import random
 from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
+from torch.utils.data.sampler import Sampler
+import numpy as np
 
 from dlio_benchmark.common.constants import MODULE_DATA_LOADER
 from dlio_benchmark.common.enumerations import Shuffle, DatasetType, DataLoaderType
@@ -39,6 +39,7 @@ class BaseTorchDataset(Dataset):
     Currently, we only support loading one sample per file
     TODO: support multiple samples per file
     """
+
     @dlp.log_init
     def __init__(self, format_type, dataset_type, epoch, num_samples, num_workers, batch_size):
         self.format_type = format_type
@@ -61,8 +62,6 @@ def worker_init(self, worker_id):
         self._args.configure_dlio_logging(is_child=True)
         self.dlp_logger = self._args.configure_dlio_profiler(is_child=True, use_pid=True)
         logging.debug(f"{utcnow()} worker initialized {worker_id} with format {self.format_type}")
-        worker_seed = torch.initial_seed() % 2**32
-        random.seed(worker_seed)
         self.reader = ReaderFactory.get_reader(type=self.format_type,
                                                dataset_type=self.dataset_type,
                                                thread_index=worker_id,
@@ -71,6 +70,7 @@ def worker_init(self, worker_id):
     def __del__(self):
         if self.dlp_logger:
             self.dlp_logger.finalize()
+
     @dlp.log
     def __len__(self):
         return self.num_samples
@@ -90,6 +90,31 @@ def __getitem__(self, image_idx):
         dlp.update(args={"image_idx":image_idx})
         return data
 
+class dlio_sampler(Sampler):
+    def __init__(self, rank, size, num_samples, shuffle, epochs, seed):
+        self.size = size
+        self.rank = rank
+        self.num_samples = num_samples
+        self.shuffle = shuffle
+        self.epochs = epochs
+        self.seed = seed
+
+    def __len__(self):
+        return self.num_samples
+
+    def __iter__(self):
+        indices = list(range(self.num_samples))
+        if self.shuffle != Shuffle.OFF:
+            if self.shuffle == Shuffle.SEED:
+                np.random.seed(self.seed)
+            np.random.shuffle(indices)
+        samples_per_gpu = self.num_samples // self.size
+        start = self.rank * samples_per_gpu
+        end = ((self.rank + 1) * samples_per_gpu) * self.epochs
+        for i in range(start, end):
+            yield indices[i % self.num_samples]
+
+
 class BaseTorchDataLoader(BaseDataLoader):
     @dlp.log_init
     def __init__(self, format_type, dataset_type, epoch_number):
@@ -97,16 +122,10 @@ def __init__(self, format_type, dataset_type, epoch_number):
 
     @dlp.log
     def read(self):
-        do_shuffle = True if self._args.sample_shuffle != Shuffle.OFF else False
-        g = torch.Generator()
-        dataset = BaseTorchDataset(self.format_type, self.dataset_type, self.epoch_number, self.num_samples, self._args.read_threads, self.batch_size)
-        if do_shuffle:
-            if self._args.sample_shuffle == Shuffle.SEED:
-                torch.manual_seed(self._args.seed)
-                g.manual_seed(self._args.seed)
-            sampler = RandomSampler(dataset, replacement=True, num_samples=self.num_samples, generator=g)
-        else:
-            sampler = SequentialSampler(dataset)
+        dataset = BaseTorchDataset(self.format_type, self.dataset_type, self.epoch_number, self.num_samples,
+                               self._args.read_threads, self.batch_size)
+        sampler = dlio_sampler(self._args.my_rank, self._args.comm_size, self.num_samples, self._args.sample_shuffle,
+                               self._args.epochs, self._args.seed)
         if self._args.read_threads >= 1:
             prefetch_factor = math.ceil(self._args.prefetch_size / self._args.read_threads)
         else:
@@ -147,7 +166,6 @@ def read(self):
                                        pin_memory=True,
                                        drop_last=True,
                                        worker_init_fn=dataset.worker_init,
-                                       generator=g,
                                        **kwargs)  # 2 is the default value
         logging.debug(f"{utcnow()} Rank {self._args.my_rank} will read {len(self._dataset) * self.batch_size} files")
 
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
new file mode 100644
index 00000000..fc2654e1
--- /dev/null
+++ b/tests/unit/CMakeLists.txt
@@ -0,0 +1,36 @@
+if(NOT DEFINED ENV{DYAD_TEST_MACHINE})
+    message(FATAL_ERROR "-- [dyad] DYAD_TEST_MACHINE in env should be set for ${PROJECT_NAME} test build")
+else()
+    message(STATUS "[dyad] found setting machine to $ENV{DYAD_TEST_MACHINE}")
+endif()
+if(NOT DEFINED ENV{DYAD_PFS_DIR})
+    message(FATAL_ERROR "-- [dyad] DYAD_PFS_DIR in env should be set for ${PROJECT_NAME} paper test build")
+else()
+    message(STATUS "[dyad] found setting pfs dir to $ENV{DYAD_PFS_DIR}")
+endif()
+if(NOT DEFINED ENV{DYAD_DMD_DIR})
+    message(FATAL_ERROR "-- [dyad] DYAD_DMD_DIR in env should be set for ${PROJECT_NAME} paper test build")
+else()
+    message(STATUS "[dyad] found setting DMD Dir to $ENV{DYAD_DMD_DIR}")
+endif()
+set(DYAD_KEYSPACE test_dyad)
+set(DYAD_LOG_DIR ${CMAKE_BINARY_DIR}/logs)
+file(MAKE_DIRECTORY ${DYAD_LOG_DIR})
+find_package(Catch2 REQUIRED)
+find_package(MPI REQUIRED COMPONENTS CXX)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(Catch2::Catch2)
+include_directories(${MPI_CXX_INCLUDE_DIRS})
+include_directories(${DYAD_PROJECT_DIR}/src)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
+include_directories(${CMAKE_BINARY_DIR}/include)
+set(TEST_LIBS Catch2::Catch2 -lstdc++fs ${MPI_CXX_LIBRARIES} -rdynamic dyad_core dyad_ctx dyad_utils flux-core ${CPP_LOGGER_LIBRARIES})
+set(TEST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/catch_config.h ${CMAKE_CURRENT_SOURCE_DIR}/mpi_console_reporter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mpi_console_reporter.hpp ${CMAKE_CURRENT_SOURCE_DIR}/test_utils.h)
+add_executable(unit_test unit_test.cpp ${TEST_SRC} )
+target_link_libraries(unit_test ${TEST_LIBS})
+add_dependencies(unit_test dyad)
+
+add_subdirectory(script)
+add_subdirectory(data_plane)
+add_subdirectory(mdm)
+add_subdirectory(dyad_core)
\ No newline at end of file
diff --git a/tests/unit/catch_config.h b/tests/unit/catch_config.h
new file mode 100644
index 00000000..987b6d28
--- /dev/null
+++ b/tests/unit/catch_config.h
@@ -0,0 +1,87 @@
+//
+// Created by haridev on 2/28/24.
+//
+
+#ifndef DYAD_CATCH_CONFIG_H
+#define DYAD_CATCH_CONFIG_H
+#include <catch2/catch_all.hpp>
+#include "mpi.h"
+#include <iostream>
+namespace cl = Catch::Clara;
+
+cl::Parser define_options();
+
+int init(int* argc, char*** argv);
+int finalize();
+
+int main(int argc, char* argv[]) {
+    Catch::Session session;
+    auto cli = session.cli() | define_options();
+    session.cli(cli);
+    int returnCode = session.applyCommandLine(argc, argv);
+    if (returnCode != 0) return returnCode;
+    returnCode = init(&argc, &argv);
+    if (returnCode != 0) return returnCode;
+    int test_return_code = session.run();
+    returnCode = finalize();
+    if (returnCode != 0) return returnCode;
+    exit(test_return_code);
+}
+
+#include <catch2/reporters/catch_reporter_streaming_base.hpp>
+#include <catch2/internal/catch_unique_ptr.hpp>
+
+namespace Catch {
+    // Fwd decls
+    class TablePrinter;
+
+    class MPIConsoleReporter final : public StreamingReporterBase {
+        Detail::unique_ptr<TablePrinter> m_tablePrinter;
+
+    public:
+        MPIConsoleReporter(ReporterConfig&& config);
+        ~MPIConsoleReporter() override;
+        static std::string getDescription();
+
+        void noMatchingTestCases( StringRef unmatchedSpec ) override;
+        void reportInvalidTestSpec( StringRef arg ) override;
+
+        void assertionStarting(AssertionInfo const&) override;
+
+        void assertionEnded(AssertionStats const& _assertionStats) override;
+
+        void sectionStarting(SectionInfo const& _sectionInfo) override;
+        void sectionEnded(SectionStats const& _sectionStats) override;
+
+        void benchmarkPreparing( StringRef name ) override;
+        void benchmarkStarting(BenchmarkInfo const& info) override;
+        void benchmarkEnded(BenchmarkStats<> const& stats) override;
+        void benchmarkFailed( StringRef error ) override;
+
+        void testCaseEnded(TestCaseStats const& _testCaseStats) override;
+        void testRunEnded(TestRunStats const& _testRunStats) override;
+        void testRunStarting(TestRunInfo const& _testRunInfo) override;
+
+    private:
+        void lazyPrint();
+
+        void lazyPrintWithoutClosingBenchmarkTable();
+        void lazyPrintRunInfo();
+        void printTestCaseAndSectionHeader();
+
+        void printClosedHeader(std::string const& _name);
+        void printOpenHeader(std::string const& _name);
+
+        // if string has a : in first line will set indent to follow it on
+        // subsequent lines
+        void printHeaderString(std::string const& _string, std::size_t indent = 0);
+
+        void printTotalsDivider(Totals const& totals);
+
+        bool m_headerPrinted = false;
+        bool m_testRunInfoPrinted = false;
+    };
+
+} // end namespace Catch
+
+#endif  // DYAD_CATCH_CONFIG_H
diff --git a/tests/unit/data_plane/CMakeLists.txt b/tests/unit/data_plane/CMakeLists.txt
new file mode 100644
index 00000000..3450d497
--- /dev/null
+++ b/tests/unit/data_plane/CMakeLists.txt
@@ -0,0 +1,66 @@
+
+set(files 16)
+set(ts 65536)
+set(ops 16)
+set(ppns 1 2 4 8 16 32 64)
+
+function(add_dp_remote_test node ppn files ts ops)
+    # Remote Streaming RPC over RDMA
+    set(test_name unit_remote_data_${node}_${ppn})
+    add_test(${test_name} flux run -N ${node} --tasks-per-node ${ppn} ${CMAKE_BINARY_DIR}/bin/unit_test --filename dp_${node}_${ppn} --ppn ${ppn} --pfs $ENV{DYAD_PFS_DIR} --dmd $ENV{DYAD_DMD_DIR} --iteration ${ops} --number_of_files ${files} --request_size ${ts} --reporter mpi_console RemoteDataBandwidth)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_KVS_NAMESPACE=${DYAD_KEYSPACE})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_MODULE_SO=${CMAKE_BINARY_DIR}/${DYAD_LIBDIR}/dyad.so)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_LOG_DIR=${DYAD_LOG_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_DTL_MODE=UCX)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_CONSUMER=$ENV{DYAD_DMD_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_PRODUCER=$ENV{DYAD_DMD_DIR})
+    # Remote Agg
+    set(test_name unit_remote_agg_data_${node}_${ppn})
+    add_test(${test_name} flux run -N ${node} --tasks-per-node ${ppn} ${CMAKE_BINARY_DIR}/bin/unit_test --filename dp_${node}_${ppn} --ppn ${ppn} --pfs $ENV{DYAD_PFS_DIR} --dmd $ENV{DYAD_DMD_DIR} --iteration ${ops} --number_of_files ${files} --request_size ${ts} --reporter mpi_console RemoteDataAggBandwidth)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_KVS_NAMESPACE=${DYAD_KEYSPACE})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_MODULE_SO=${CMAKE_BINARY_DIR}/${DYAD_LIBDIR}/dyad.so)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_LOG_DIR=${DYAD_LOG_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_DTL_MODE=UCX)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_CONSUMER=$ENV{DYAD_DMD_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_PRODUCER=$ENV{DYAD_DMD_DIR})
+endfunction()
+
+function(add_dp_local_test node ppn files ts ops)
+    # Process Local
+    set(test_name unit_process_local_data_${node}_${ppn})
+    add_test(${test_name} flux run -N ${node} --tasks-per-node ${ppn} ${CMAKE_BINARY_DIR}/bin/unit_test --filename dp_${node}_${ppn} --ppn ${ppn} --pfs $ENV{DYAD_PFS_DIR} --dmd $ENV{DYAD_DMD_DIR} --iteration ${ops} --number_of_files ${files} --request_size ${ts} --reporter compact LocalProcessDataBandwidth)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_KVS_NAMESPACE=${DYAD_KEYSPACE})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_MODULE_SO=${CMAKE_BINARY_DIR}/${DYAD_LIBDIR}/dyad.so)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_LOG_DIR=${DYAD_LOG_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_DTL_MODE=UCX)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_CONSUMER=$ENV{DYAD_DMD_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_PRODUCER=$ENV{DYAD_DMD_DIR})
+    # Node Local
+    set(test_name unit_node_local_data_${node}_${ppn})
+    add_test(${test_name} flux run -N ${node} --tasks-per-node ${ppn} ${CMAKE_BINARY_DIR}/bin/unit_test --filename dp_${node}_${ppn} --ppn ${ppn} --pfs $ENV{DYAD_PFS_DIR} --dmd $ENV{DYAD_DMD_DIR} --iteration ${ops} --number_of_files ${files} --request_size ${ts} --reporter compact LocalNodeDataBandwidth)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_KVS_NAMESPACE=${DYAD_KEYSPACE})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_MODULE_SO=${CMAKE_BINARY_DIR}/${DYAD_LIBDIR}/dyad.so)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_LOG_DIR=${DYAD_LOG_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_DTL_MODE=UCX)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_CONSUMER=$ENV{DYAD_DMD_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_PRODUCER=$ENV{DYAD_DMD_DIR})
+endfunction()
+
+
+foreach (ppn ${ppns})
+    add_dp_remote_test(2 ${ppn} ${files} ${ts} ${ops})
+    add_dp_local_test(1 ${ppn} ${files} ${ts} ${ops})
+endforeach ()
+set(nodes 2 4 8 16 32 64)
+set(ppns 16 32 64)
+foreach (node ${nodes})
+    foreach (ppn ${ppns})
+        add_dp_local_test(${node} ${ppn} ${files} ${ts} ${ops})
+    endforeach ()
+endforeach ()
+set(nodes 4 8 16 32 64)
+foreach (node ${nodes})
+    foreach (ppn ${ppns})
+        add_dp_remote_test(${node} ${ppn} ${files} ${ts} ${ops})
+    endforeach ()
+endforeach ()
diff --git a/tests/unit/data_plane/data_plane.cpp b/tests/unit/data_plane/data_plane.cpp
new file mode 100644
index 00000000..b69b6173
--- /dev/null
+++ b/tests/unit/data_plane/data_plane.cpp
@@ -0,0 +1,216 @@
+#include <dyad/common/dyad_logging.h>
+#include <dyad/core/dyad_core.h>
+#include <dyad/core/dyad_ctx.h>
+#include <fcntl.h>
+
+#include <cstddef>
+
+int create_files_per_broker() {
+  char filename[4096], first_file[4096];
+  bool is_first = true;
+  size_t file_size = args.request_size * args.iteration;
+  size_t node_idx = info.rank / args.process_per_node;
+  bool first_rank_per_node = info.rank % args.process_per_node == 0;
+  if (first_rank_per_node) {
+    fs::create_directories(args.dyad_managed_dir);
+    for (size_t broker_idx = 0; broker_idx < args.brokers_per_node;
+         ++broker_idx) {
+      for (size_t file_idx = 0; file_idx < args.number_of_files; ++file_idx) {
+        size_t global_broker_idx =
+            node_idx * args.brokers_per_node + broker_idx;
+        if (is_first) {
+          sprintf(first_file, "%s/%s_%zu_%zu.bat",
+                  args.dyad_managed_dir.c_str(), args.filename.c_str(),
+                  global_broker_idx, file_idx);
+          std::string cmd = "{ tr -dc '[:alnum:]' < /dev/urandom | head -c " +
+                            std::to_string(file_size) + "; } > " + first_file +
+                            " ";
+          int status = system(cmd.c_str());
+          (void)status;
+          is_first = false;
+        } else {
+          sprintf(filename, "%s/%s_%zu_%zu.bat", args.dyad_managed_dir.c_str(),
+                  args.filename.c_str(), global_broker_idx, file_idx);
+          std::string cmd = "cp " + std::string(first_file) + " " + filename;
+          int status = system(cmd.c_str());
+          (void)status;
+        }
+      }
+    }
+  }
+  MPI_Barrier(MPI_COMM_WORLD);
+  return 0;
+}
+// clang-format off
+TEST_CASE("RemoteDataBandwidth", "[files= " + std::to_string(args.number_of_files) +"]" 
+                                "[file_size= " + std::to_string(args.request_size*args.iteration) +"]"
+                                "[parallel_req= " + std::to_string(info.comm_size) +"]"
+                                "[num_nodes= " + std::to_string(info.comm_size / args.process_per_node) +"]") {
+  // clang-format on
+  REQUIRE(pretest() == 0);
+  REQUIRE(clean_directories() == 0);
+  REQUIRE(create_files_per_broker() == 0);
+  dyad_init_env(DYAD_COMM_RECV, info.flux_handle);
+  auto ctx = dyad_ctx_get();
+  SECTION("Test Max Bandwidth") {
+    Timer data_time;
+    char filename[4096];
+    uint32_t neighour_broker_idx = (info.broker_idx + 1) % info.broker_size;
+    dyad_metadata_t mdata;
+    mdata.owner_rank = neighour_broker_idx;
+    size_t data_len = args.request_size * args.iteration;
+    char* file_data = NULL;
+    for (size_t file_idx = 0; file_idx < args.number_of_files; ++file_idx) {
+      sprintf(filename, "%s_%u_%zu.bat", args.filename.c_str(),
+              neighour_broker_idx, file_idx);
+      mdata.fpath = filename;
+      data_time.resumeTime();
+      auto rc = dyad_get_data(ctx, &mdata, &file_data, &data_len);
+      data_time.pauseTime();
+      REQUIRE(rc >= 0);
+    }
+    AGGREGATE_TIME(data);
+    if (info.rank == 0) {
+      printf("[DYAD_TEST],%10d,%10lu,%10.6f,%10.6f\n", info.comm_size,
+             data_len * args.number_of_files, total_data / info.comm_size,
+             data_len * args.number_of_files * info.comm_size * info.comm_size /
+                 total_data / 1024 / 1024.0);
+    }
+  }
+  auto rc = dyad_finalize();
+  REQUIRE(rc >= 0);
+  REQUIRE(clean_directories() == 0);
+  REQUIRE(posttest() == 0);
+}
+// clang-format off
+TEST_CASE("RemoteDataAggBandwidth", "[files= " + std::to_string(args.number_of_files) +"]"
+                                    "[file_size= " + std::to_string(args.request_size*args.iteration) +"]"
+                                    "[parallel_req= " + std::to_string(info.comm_size) +"]"
+                                    "[num_nodes= " + std::to_string(info.comm_size / args.process_per_node) +"]") {
+  // clang-format on
+  REQUIRE(pretest() == 0);
+  REQUIRE(clean_directories() == 0);
+  REQUIRE(create_files_per_broker() == 0);
+  dyad_init_env(DYAD_COMM_RECV, info.flux_handle);
+  auto ctx = dyad_ctx_get();
+  SECTION("Test Max Bandwidth") {
+    Timer data_time;
+    char filename[4096], upath[4096];
+    uint32_t neighour_broker_idx = (info.broker_idx + 1) % info.broker_size;
+    dyad_metadata_t mdata;
+    mdata.owner_rank = neighour_broker_idx;
+    size_t data_len = args.request_size * args.iteration;
+    if (info.rank % args.process_per_node != 0) usleep(10000);
+    for (size_t file_idx = 0; file_idx < args.number_of_files; ++file_idx) {
+      sprintf(upath, "%s_%u_%zu.bat", args.filename.c_str(),
+              neighour_broker_idx, file_idx);
+      sprintf(filename, "%s/%s_%u_%zu.bat", args.dyad_managed_dir.c_str(),
+              args.filename.c_str(), neighour_broker_idx, file_idx);
+      mdata.fpath = upath;
+      data_time.resumeTime();
+      auto rc = dyad_consume_w_metadata(ctx, filename, &mdata);
+      data_time.pauseTime();
+      REQUIRE(rc >= 0);
+    }
+    AGGREGATE_TIME(data);
+    if (info.rank == 0) {
+      printf("[DYAD_TEST],%10d,%10lu,%10.6f,%10.6f\n", info.comm_size,
+             data_len * args.number_of_files, total_data / info.comm_size,
+             data_len * args.number_of_files * info.comm_size * info.comm_size /
+                 total_data / 1024 / 1024.0);
+    }
+  }
+  auto rc = dyad_finalize();
+  REQUIRE(rc >= 0);
+  REQUIRE(clean_directories() == 0);
+  REQUIRE(posttest() == 0);
+}
+
+// clang-format off
+TEST_CASE("LocalProcessDataBandwidth", "[files= " + std::to_string(args.number_of_files) +"]"
+                                 "[file_size= " + std::to_string(args.request_size*args.iteration) +"]"
+                                 "[parallel_req= " + std::to_string(info.comm_size) +"]"
+                                 "[num_nodes= " + std::to_string(info.comm_size / args.process_per_node) +"]") {
+  // clang-format on
+  REQUIRE(pretest() == 0);
+  REQUIRE(clean_directories() == 0);
+  REQUIRE(create_files_per_broker() == 0);
+  dyad_init_env(DYAD_COMM_RECV, info.flux_handle);
+  SECTION("Test Max Bandwidth") {
+    Timer data_time;
+    char filename[4096];
+    size_t data_len = args.request_size * args.iteration;
+    char* file_data = (char*)malloc(data_len);
+    for (size_t file_idx = 0; file_idx < args.number_of_files; ++file_idx) {
+      sprintf(filename, "%s/%s_%u_%zu.bat", args.dyad_managed_dir.c_str(),
+              args.filename.c_str(), info.broker_idx, file_idx);
+      data_time.resumeTime();
+      int fd = open(filename, O_RDONLY);
+      data_time.pauseTime();
+      REQUIRE(fd != -1);
+      data_time.resumeTime();
+      int bytes = read(fd, file_data, data_len);
+      data_time.pauseTime();
+      REQUIRE((size_t)bytes == data_len);
+      data_time.resumeTime();
+      int status = close(fd);
+      data_time.pauseTime();
+      REQUIRE(status == 0);
+    }
+    AGGREGATE_TIME(data);
+    if (info.rank == 0) {
+      printf("[DYAD_TEST],%10d,%10lu,%10.6f,%10.6f\n", info.comm_size,
+             data_len * args.number_of_files, total_data / info.comm_size,
+             data_len * args.number_of_files * info.comm_size * info.comm_size /
+                 total_data / 1024 / 1024.0);
+    }
+  }
+  auto rc = dyad_finalize();
+  REQUIRE(rc >= 0);
+  REQUIRE(clean_directories() == 0);
+  REQUIRE(posttest() == 0);
+}
+// clang-format off
+TEST_CASE("LocalNodeDataBandwidth", "[files= " + std::to_string(args.number_of_files) +"]"
+                                       "[file_size= " + std::to_string(args.request_size*args.iteration) +"]"
+                                       "[parallel_req= " + std::to_string(info.comm_size) +"]"
+                                       "[num_nodes= " + std::to_string(info.comm_size / args.process_per_node) +"]") {
+  // clang-format on
+  REQUIRE(pretest() == 0);
+  REQUIRE(clean_directories() == 0);
+  REQUIRE(create_files_per_broker() == 0);
+  dyad_init_env(DYAD_COMM_RECV, info.flux_handle);
+  SECTION("Test Max Bandwidth") {
+    Timer data_time;
+    char filename[4096];
+    size_t data_len = args.request_size * args.iteration;
+    char* file_data = (char*)malloc(data_len);
+    for (size_t file_idx = 0; file_idx < args.number_of_files; ++file_idx) {
+      sprintf(filename, "%s/%s_%u_%zu.bat", args.dyad_managed_dir.c_str(),
+              args.filename.c_str(), info.broker_idx, file_idx);
+      data_time.resumeTime();
+      int fd = open(filename, O_RDONLY);
+      data_time.pauseTime();
+      REQUIRE(fd != -1);
+      data_time.resumeTime();
+      int bytes = read(fd, file_data, data_len);
+      data_time.pauseTime();
+      REQUIRE((size_t)bytes == data_len);
+      data_time.resumeTime();
+      int status = close(fd);
+      data_time.pauseTime();
+      REQUIRE(status == 0);
+    }
+    AGGREGATE_TIME(data);
+    if (info.rank == 0) {
+      printf("[DYAD_TEST],%10d,%10lu,%10.6f,%10.6f\n", info.comm_size,
+             data_len * args.number_of_files, total_data / info.comm_size,
+             data_len * args.number_of_files * info.comm_size * info.comm_size /
+                 total_data / 1024 / 1024.0);
+    }
+  }
+  auto rc = dyad_finalize();
+  REQUIRE(rc >= 0);
+  REQUIRE(clean_directories() == 0);
+  REQUIRE(posttest() == 0);
+}
\ No newline at end of file
diff --git a/tests/unit/dyad_core/CMakeLists.txt b/tests/unit/dyad_core/CMakeLists.txt
new file mode 100644
index 00000000..1be584bf
--- /dev/null
+++ b/tests/unit/dyad_core/CMakeLists.txt
@@ -0,0 +1 @@
+add_test(unit_dyad_core ${CMAKE_BINARY_DIR}/bin/unit_test --reporter mpi_console gen_path_key)
\ No newline at end of file
diff --git a/tests/unit/dyad_core/core_functions.cpp b/tests/unit/dyad_core/core_functions.cpp
new file mode 100644
index 00000000..11125435
--- /dev/null
+++ b/tests/unit/dyad_core/core_functions.cpp
@@ -0,0 +1,71 @@
+
+#include <dyad/core/dyad_core.h>
+/**
+ * Test cases
+ */
+TEST_CASE("gen_path_key",
+          "[module=dyad_core]"
+          "[method=gen_path_key]") {
+  SECTION("should generate path key") {
+    const char* str = "test_string";
+    char path_key[256] = {'\0'};
+    int result = gen_path_key(str, path_key, sizeof(path_key), 3, 5);
+    REQUIRE(result == 0);
+    REQUIRE(strcmp(path_key, "") != 0);
+  }
+  SECTION("should_return_minus_one_when_input_string_is_null") {
+    const char* str = nullptr;
+    char path_key[256] = {'\0'};
+    int result = gen_path_key(str, path_key, sizeof(path_key), 3, 5);
+    REQUIRE(result == -1);
+  }
+  SECTION("should_handle_input_string_of_length_less_than_128_bytes") {
+    const char* str = "short_string";
+    char path_key[256] = {'\0'};
+    int result = gen_path_key(str, path_key, sizeof(path_key), 3, 5);
+    REQUIRE(result == 0);
+    REQUIRE(strcmp(path_key, "") != 0);
+  }
+  SECTION("should_handle_input_string_of_length_more_than_128_bytes") {
+    const char* str =
+        "this_is_a_very_long_string_that_is_more_than_128_bytes_long_this_is_a_"
+        "very_long_string_that_is_more_than_128_bytes_long_this_is_a_very_long_"
+        "string_that_is_more_than_128_bytes_long_this_is_a_very_long_string_"
+        "that_is_more_than_128_bytes_long_this_is_a_very_long_string_that_is_"
+        "more_than_128_bytes_long_this_is_a_very_long_string_that_is_more_than_"
+        "128_bytes_long_this_is_a_very_long_string_that_is_more_than_128_bytes_"
+        "long_this_is_a_very_long_string_that_is_more_than_128_bytes_long_this_"
+        "is_a_very_long_string_that_is_more_than_128_bytes_long_this_is_a_very_"
+        "long_string_that_is_more_than_128_bytes_long";
+    char path_key[256] = {'\0'};
+    int result = gen_path_key(str, path_key, sizeof(path_key), 3, 5);
+    REQUIRE(result == 0);
+    REQUIRE(strcmp(path_key, "") != 0);
+  }
+  SECTION("should_generate_path_key_with_depth_and_width_specified") {
+    const char* str = "test_string";
+    char path_key[256] = {'\0'};
+    int result = gen_path_key(str, path_key, sizeof(path_key), 3, 5);
+    REQUIRE(result == 0);
+    REQUIRE(strcmp(path_key, "") != 0);
+  }
+  SECTION("should_generate_path_key_with_depth_and_width_set_to_0") {
+    const char* str = "test_string";
+    char path_key[256] = {'\0'};
+    int result = gen_path_key(str, path_key, sizeof(path_key), 0, 0);
+    REQUIRE(result == 0);
+    REQUIRE(strcmp(path_key, "") != 0);
+  }
+  SECTION("should_return_minus_1_when_input_string_is_NULL") {
+    const char* str = NULL;
+    char path_key[256] = {'\0'};
+    int result = gen_path_key(str, path_key, sizeof(path_key), 3, 5);
+    REQUIRE(result == -1);
+  }
+  SECTION("should_return_minus_1_when_path_key_is_NULL") {
+    const char* str = "test_string";
+    char* path_key = NULL;
+    int result = gen_path_key(str, path_key, sizeof(path_key), 3, 5);
+    REQUIRE(result == -1);
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/mdm/CMakeLists.txt b/tests/unit/mdm/CMakeLists.txt
new file mode 100644
index 00000000..1d450667
--- /dev/null
+++ b/tests/unit/mdm/CMakeLists.txt
@@ -0,0 +1,38 @@
+set(files 16)
+set(ts 65536)
+set(ops 16)
+
+function(add_mdm_test node ppn files ts ops)
+    set(test_name unit_localfs_${node}_${ppn})
+    add_test(${test_name} flux run -N ${node} --tasks-per-node ${ppn} ${CMAKE_BINARY_DIR}/bin/unit_test --filename mdm_${node}_${ppn} --pfs $ENV{DYAD_PFS_DIR} --dmd $ENV{DYAD_DMD_DIR} --ppn ${ppn} --iteration ${ops} --number_of_files ${files} --request_size ${ts} --reporter mpi_console LocalFSLookup)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_KVS_NAMESPACE=${DYAD_KEYSPACE})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_MODULE_SO=${CMAKE_BINARY_DIR}/${DYAD_LIBDIR}/dyad.so)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_LOG_DIR=${DYAD_LOG_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_DTL_MODE=UCX)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_CONSUMER=$ENV{DYAD_DMD_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_PRODUCER=$ENV{DYAD_DMD_DIR})
+    set(test_name unit_localkvs_${node}_${ppn})
+    add_test(${test_name} flux run -N ${node} --tasks-per-node ${ppn} ${CMAKE_BINARY_DIR}/bin/unit_test --filename mdm_${node}_${ppn} --pfs $ENV{DYAD_PFS_DIR} --dmd $ENV{DYAD_DMD_DIR} --ppn ${ppn} --iteration ${ops} --number_of_files ${files} --request_size ${ts} --reporter mpi_console LocalKVSLookup)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_KVS_NAMESPACE=${DYAD_KEYSPACE})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_MODULE_SO=${CMAKE_BINARY_DIR}/${DYAD_LIBDIR}/dyad.so)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_LOG_DIR=${DYAD_LOG_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_DTL_MODE=UCX)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_CONSUMER=$ENV{DYAD_DMD_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_PRODUCER=$ENV{DYAD_DMD_DIR})
+    set(test_name unit_remotekvs_${node}_${ppn})
+    add_test(${test_name} flux run -N ${node} --tasks-per-node ${ppn} ${CMAKE_BINARY_DIR}/bin/unit_test --filename mdm_${node}_${ppn} --pfs $ENV{DYAD_PFS_DIR} --dmd $ENV{DYAD_DMD_DIR} --ppn ${ppn} --iteration ${ops} --number_of_files ${files} --request_size ${ts} --reporter mpi_console RemoteKVSLookup)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_KVS_NAMESPACE=${DYAD_KEYSPACE})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_MODULE_SO=${CMAKE_BINARY_DIR}/${DYAD_LIBDIR}/dyad.so)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_LOG_DIR=${DYAD_LOG_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_DTL_MODE=UCX)
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_CONSUMER=$ENV{DYAD_DMD_DIR})
+    set_property(TEST ${test_name} APPEND PROPERTY ENVIRONMENT DYAD_PATH_PRODUCER=$ENV{DYAD_DMD_DIR})
+endfunction()
+
+set(ppns 2)
+set(nodes 1 2 4 8 16 32 64)
+foreach (node ${nodes})
+    foreach (ppn ${ppns})
+        add_mdm_test(${node} ${ppn} ${files} ${ts} ${ops})
+    endforeach ()
+endforeach ()
diff --git a/tests/unit/mdm/mdm.cpp b/tests/unit/mdm/mdm.cpp
new file mode 100644
index 00000000..584c0c3a
--- /dev/null
+++ b/tests/unit/mdm/mdm.cpp
@@ -0,0 +1,141 @@
+
+#include <dyad/utils/utils.h>
+#include <fcntl.h>
+
+#include <climits>
+#include <string>
+
+// clang-format off
+TEST_CASE("LocalFSLookup",  "[number_of_lookups= " + std::to_string(args.number_of_files) +"]"
+                            "[parallel_req= " + std::to_string(info.comm_size) +"]"
+                            "[num_nodes= " + std::to_string(info.comm_size / args.process_per_node) +"]") {
+  // clang-format on
+  REQUIRE(pretest() == 0);
+  REQUIRE(clean_directories() == 0);
+  dyad_rc_t rc = dyad_init_env(DYAD_COMM_RECV, info.flux_handle);
+  REQUIRE(rc >= 0);
+  auto ctx = dyad_ctx_get();
+  struct flock exclusive_lock;
+  SECTION("Throughput") {
+    char filename[4096];
+    Timer kvs_time;
+    for (size_t file_idx = 0; file_idx < args.number_of_files; ++file_idx) {
+      sprintf(filename, "%s/%s_%u_%zu.bat", args.dyad_managed_dir.c_str(),
+              args.filename.c_str(), info.broker_idx, file_idx);
+      INFO("The file " << filename << " rank " << info.rank);
+      kvs_time.resumeTime();
+      int lock_fd = open(filename, O_RDWR | O_CREAT, 0666);
+      kvs_time.pauseTime();
+      REQUIRE(lock_fd != -1);
+
+      kvs_time.resumeTime();
+      rc = dyad_excl_flock(ctx, lock_fd, &exclusive_lock);
+      kvs_time.pauseTime();
+      REQUIRE(rc >= 0);
+
+      kvs_time.resumeTime();
+      auto file_size = get_file_size(lock_fd);
+      kvs_time.pauseTime();
+      (void)file_size;
+      kvs_time.resumeTime();
+      dyad_release_flock(ctx, lock_fd, &exclusive_lock);
+      int status = close(lock_fd);
+      kvs_time.pauseTime();
+      REQUIRE(status == 0);
+    }
+    AGGREGATE_TIME(kvs);
+    if (info.rank == 0) {
+      printf("[DYAD_TEST],%10d,%10lu,%10.6f,%10.6f\n", info.comm_size,
+             args.number_of_files, total_kvs / info.comm_size,
+             args.number_of_files * info.comm_size * info.comm_size /
+                 total_kvs / 1000 / 1000);
+    }
+  }
+  rc = dyad_finalize();
+  REQUIRE(rc >= 0);
+  REQUIRE(clean_directories() == 0);
+  REQUIRE(posttest() == 0);
+}
+// clang-format off
+TEST_CASE("LocalKVSLookup",  "[number_of_lookups= " + std::to_string(args.number_of_files) +"]"
+                             "[parallel_req= " + std::to_string(info.comm_size) +"]"
+                             "[num_nodes= " + std::to_string(info.comm_size / args.process_per_node) +"]") {
+  // clang-format on
+  REQUIRE(pretest() == 0);
+  dyad_rc_t rc = dyad_init_env(DYAD_COMM_RECV, info.flux_handle);
+  REQUIRE(rc >= 0);
+  auto ctx = dyad_ctx_get();
+  SECTION("Throughput") {
+    Timer kvs_time;
+    char my_filename[4096], lookup_filename[4096];
+    for (size_t file_idx = 0; file_idx < args.number_of_files; ++file_idx) {
+      sprintf(my_filename, "%s/%s_%u_%d_%zu.bat", args.dyad_managed_dir.c_str(),
+              args.filename.c_str(), info.broker_idx, info.rank, file_idx);
+      sprintf(lookup_filename, "%s_%u_%d_%zu.bat", args.filename.c_str(),
+              info.broker_idx, info.rank, file_idx);
+      rc = dyad_commit(ctx, my_filename);
+      REQUIRE(rc >= 0);
+      dyad_metadata_t* mdata;
+      const size_t topic_len = PATH_MAX;
+      char topic[PATH_MAX + 1] = {'\0'};
+      gen_path_key(lookup_filename, topic, topic_len, ctx->key_depth,
+                   ctx->key_bins);
+      kvs_time.resumeTime();
+      rc = dyad_kvs_read(ctx, topic, lookup_filename, false, &mdata);
+      kvs_time.pauseTime();
+      REQUIRE(rc >= 0);
+    }
+    AGGREGATE_TIME(kvs);
+    if (info.rank == 0) {
+      printf("[DYAD_TEST],%10d,%10lu,%10.6f,%10.6f\n", info.comm_size,
+             args.number_of_files, total_kvs / info.comm_size,
+             args.number_of_files * info.comm_size * info.comm_size /
+                 total_kvs / 1000 / 1000);
+    }
+  }
+  rc = dyad_finalize();
+  REQUIRE(rc >= 0);
+  REQUIRE(posttest() == 0);
+}
+// clang-format off
+TEST_CASE("RemoteKVSLookup",  "[number_of_lookups= " + std::to_string(args.number_of_files) +"]"
+                              "[parallel_req= " + std::to_string(info.comm_size) +"]"
+                              "[num_nodes= " + std::to_string(info.comm_size / args.process_per_node) +"]") {
+  // clang-format on
+  REQUIRE(pretest() == 0);
+  dyad_rc_t rc = dyad_init_env(DYAD_COMM_RECV, info.flux_handle);
+  REQUIRE(rc >= 0);
+  auto ctx = dyad_ctx_get();
+  SECTION("Throughput") {
+    Timer kvs_time;
+    char my_filename[4096], lookup_filename[4096];
+    for (size_t file_idx = 0; file_idx < args.number_of_files; ++file_idx) {
+      sprintf(my_filename, "%s/%s_%u_%d_%d_%zu.bat",
+              args.dyad_managed_dir.c_str(), args.filename.c_str(),
+              info.broker_idx, info.rank, info.comm_size, file_idx);
+      sprintf(lookup_filename, "%s_%u_%d_%d_%zu.bat", args.filename.c_str(),
+              info.broker_idx, info.rank, info.comm_size, file_idx);
+      rc = dyad_commit(ctx, my_filename);
+      REQUIRE(rc >= 0);
+      dyad_metadata_t* mdata;
+      const size_t topic_len = PATH_MAX;
+      char topic[PATH_MAX + 1] = {'\0'};
+      gen_path_key(lookup_filename, topic, topic_len, ctx->key_depth,
+                   ctx->key_bins);
+      kvs_time.resumeTime();
+      rc = dyad_kvs_read(ctx, topic, lookup_filename, false, &mdata);
+      kvs_time.pauseTime();
+      REQUIRE(rc >= 0);
+    }
+    AGGREGATE_TIME(kvs);
+    if (info.rank == 0) {
+      printf("[DYAD_TEST],%10d,%10lu,%10.6f,%10.6f\n", info.comm_size,
+             args.number_of_files, total_kvs / info.comm_size,
+             args.number_of_files * info.comm_size * info.comm_size /
+                 total_kvs / 1000 / 1000);
+    }
+  }
+  rc = dyad_finalize();
+  REQUIRE(rc >= 0);
+  REQUIRE(posttest() == 0);
+}
diff --git a/tests/unit/mpi_console_reporter.cpp b/tests/unit/mpi_console_reporter.cpp
new file mode 100644
index 00000000..1cbbd186
--- /dev/null
+++ b/tests/unit/mpi_console_reporter.cpp
@@ -0,0 +1,720 @@
+//              Copyright Catch2 Authors
+// Distributed under the Boost Software License, Version 1.0.
+//   (See accompanying file LICENSE_1_0.txt or copy at
+//        https://www.boost.org/LICENSE_1_0.txt)
+
+// SPDX-License-Identifier: BSL-1.0
+#include "mpi_console_reporter.hpp"
+#include <catch2/interfaces/catch_interfaces_config.hpp>
+#include <catch2/catch_test_spec.hpp>
+#include <catch2/internal/catch_console_colour.hpp>
+#include <catch2/internal/catch_string_manip.hpp>
+#include <catch2/catch_version.hpp>
+#include <catch2/internal/catch_textflow.hpp>
+#include <catch2/internal/catch_reusable_string_stream.hpp>
+#include <catch2/internal/catch_stringref.hpp>
+#include <catch2/catch_test_case_info.hpp>
+#include <catch2/internal/catch_console_width.hpp>
+#include <catch2/reporters/catch_reporter_helpers.hpp>
+#include <catch2/internal/catch_move_and_forward.hpp>
+#include <mpi.h>
+#include <cstdio>
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable:4061) // Not all labels are EXPLICITLY handled in switch
+ // Note that 4062 (not all labels are handled and default is missing) is enabled
+#endif
+
+#if defined(__clang__)
+#  pragma clang diagnostic push
+// For simplicity, benchmarking-only helpers are always enabled
+#  pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+
+
+namespace Catch {
+
+namespace {
+
+// Formatter impl for ConsoleMPIReporter
+class ConsoleAssertionPrinter {
+public:
+    ConsoleAssertionPrinter& operator= (ConsoleAssertionPrinter const&) = delete;
+    ConsoleAssertionPrinter(ConsoleAssertionPrinter const&) = delete;
+    ConsoleAssertionPrinter(std::ostream& _stream, AssertionStats const& _stats, ColourImpl* colourImpl_, bool _printInfoMessages)
+        : stream(_stream),
+        stats(_stats),
+        result(_stats.assertionResult),
+        colour(Colour::None),
+        message(result.getMessage()),
+        messages(_stats.infoMessages),
+        colourImpl(colourImpl_),
+        printInfoMessages(_printInfoMessages) {
+        switch (result.getResultType()) {
+        case ResultWas::Ok:
+            colour = Colour::Success;
+            passOrFail = "PASSED"_sr;
+            //if( result.hasMessage() )
+            if (_stats.infoMessages.size() == 1)
+                messageLabel = "with message";
+            if (_stats.infoMessages.size() > 1)
+                messageLabel = "with messages";
+            break;
+        case ResultWas::ExpressionFailed:
+            if (result.isOk()) {
+                colour = Colour::Success;
+                passOrFail = "FAILED - but was ok"_sr;
+            } else {
+                colour = Colour::Error;
+                passOrFail = "FAILED"_sr;
+            }
+            if (_stats.infoMessages.size() == 1)
+                messageLabel = "with message";
+            if (_stats.infoMessages.size() > 1)
+                messageLabel = "with messages";
+            break;
+        case ResultWas::ThrewException:
+            colour = Colour::Error;
+            passOrFail = "FAILED"_sr;
+            messageLabel = "due to unexpected exception with ";
+            if (_stats.infoMessages.size() == 1)
+                messageLabel += "message";
+            if (_stats.infoMessages.size() > 1)
+                messageLabel += "messages";
+            break;
+        case ResultWas::FatalErrorCondition:
+            colour = Colour::Error;
+            passOrFail = "FAILED"_sr;
+            messageLabel = "due to a fatal error condition";
+            break;
+        case ResultWas::DidntThrowException:
+            colour = Colour::Error;
+            passOrFail = "FAILED"_sr;
+            messageLabel = "because no exception was thrown where one was expected";
+            break;
+        case ResultWas::Info:
+            messageLabel = "info";
+            break;
+        case ResultWas::Warning:
+            messageLabel = "warning";
+            break;
+        case ResultWas::ExplicitFailure:
+            passOrFail = "FAILED"_sr;
+            colour = Colour::Error;
+            if (_stats.infoMessages.size() == 1)
+                messageLabel = "explicitly with message";
+            if (_stats.infoMessages.size() > 1)
+                messageLabel = "explicitly with messages";
+            break;
+            // These cases are here to prevent compiler warnings
+        case ResultWas::Unknown:
+        case ResultWas::FailureBit:
+        case ResultWas::Exception:
+            passOrFail = "** internal error **"_sr;
+            colour = Colour::Error;
+            break;
+        }
+    }
+
+    void print() const {
+        printSourceInfo();
+        if (stats.totals.assertions.total() > 0) {
+            printResultType();
+            printOriginalExpression();
+            printReconstructedExpression();
+        } else {
+            stream << '\n';
+        }
+        printMessage();
+    }
+
+private:
+    void printResultType() const {
+        if (!passOrFail.empty()) {
+            stream << colourImpl->guardColour(colour) << passOrFail << ":\n";
+        }
+    }
+    void printOriginalExpression() const {
+        if (result.hasExpression()) {
+            stream << colourImpl->guardColour( Colour::OriginalExpression )
+                   << "  " << result.getExpressionInMacro() << '\n';
+        }
+    }
+    void printReconstructedExpression() const {
+        if (result.hasExpandedExpression()) {
+            stream << "with expansion:\n";
+            stream << colourImpl->guardColour( Colour::ReconstructedExpression )
+                   << TextFlow::Column( result.getExpandedExpression() )
+                          .indent( 2 )
+                   << '\n';
+        }
+    }
+    void printMessage() const {
+        if (!messageLabel.empty())
+            stream << messageLabel << ':' << '\n';
+        for (auto const& msg : messages) {
+            // If this assertion is a warning ignore any INFO messages
+            if (printInfoMessages || msg.type != ResultWas::Info)
+                stream << TextFlow::Column(msg.message).indent(2) << '\n';
+        }
+    }
+    void printSourceInfo() const {
+        stream << colourImpl->guardColour( Colour::FileName )
+               << result.getSourceInfo() << ": ";
+    }
+
+    std::ostream& stream;
+    AssertionStats const& stats;
+    AssertionResult const& result;
+    Colour::Code colour;
+    StringRef passOrFail;
+    std::string messageLabel;
+    std::string message;
+    std::vector<MessageInfo> messages;
+    ColourImpl* colourImpl;
+    bool printInfoMessages;
+};
+
+std::size_t makeRatio( std::uint64_t number, std::uint64_t total ) {
+    const auto ratio = total > 0 ? CATCH_CONFIG_CONSOLE_WIDTH * number / total : 0;
+    return (ratio == 0 && number > 0) ? 1 : static_cast<std::size_t>(ratio);
+}
+
+std::size_t& findMax( std::size_t& i, std::size_t& j, std::size_t& k ) {
+    if (i > j && i > k)
+        return i;
+    else if (j > k)
+        return j;
+    else
+        return k;
+}
+
+enum class Justification { Left, Right };
+
+struct ColumnInfo {
+    std::string name;
+    std::size_t width;
+    Justification justification;
+};
+struct ColumnBreak {};
+struct RowBreak {};
+
+class Duration {
+    enum class Unit {
+        Auto,
+        Nanoseconds,
+        Microseconds,
+        Milliseconds,
+        Seconds,
+        Minutes
+    };
+    static const uint64_t s_nanosecondsInAMicrosecond = 1000;
+    static const uint64_t s_nanosecondsInAMillisecond = 1000 * s_nanosecondsInAMicrosecond;
+    static const uint64_t s_nanosecondsInASecond = 1000 * s_nanosecondsInAMillisecond;
+    static const uint64_t s_nanosecondsInAMinute = 60 * s_nanosecondsInASecond;
+
+    double m_inNanoseconds;
+    Unit m_units;
+
+public:
+    explicit Duration(double inNanoseconds, Unit units = Unit::Auto)
+        : m_inNanoseconds(inNanoseconds),
+        m_units(units) {
+        if (m_units == Unit::Auto) {
+            if (m_inNanoseconds < s_nanosecondsInAMicrosecond)
+                m_units = Unit::Nanoseconds;
+            else if (m_inNanoseconds < s_nanosecondsInAMillisecond)
+                m_units = Unit::Microseconds;
+            else if (m_inNanoseconds < s_nanosecondsInASecond)
+                m_units = Unit::Milliseconds;
+            else if (m_inNanoseconds < s_nanosecondsInAMinute)
+                m_units = Unit::Seconds;
+            else
+                m_units = Unit::Minutes;
+        }
+
+    }
+
+    auto value() const -> double {
+        switch (m_units) {
+        case Unit::Microseconds:
+            return m_inNanoseconds / static_cast<double>(s_nanosecondsInAMicrosecond);
+        case Unit::Milliseconds:
+            return m_inNanoseconds / static_cast<double>(s_nanosecondsInAMillisecond);
+        case Unit::Seconds:
+            return m_inNanoseconds / static_cast<double>(s_nanosecondsInASecond);
+        case Unit::Minutes:
+            return m_inNanoseconds / static_cast<double>(s_nanosecondsInAMinute);
+        default:
+            return m_inNanoseconds;
+        }
+    }
+    StringRef unitsAsString() const {
+        switch (m_units) {
+        case Unit::Nanoseconds:
+            return "ns"_sr;
+        case Unit::Microseconds:
+            return "us"_sr;
+        case Unit::Milliseconds:
+            return "ms"_sr;
+        case Unit::Seconds:
+            return "s"_sr;
+        case Unit::Minutes:
+            return "m"_sr;
+        default:
+            return "** internal error **"_sr;
+        }
+
+    }
+    friend auto operator << (std::ostream& os, Duration const& duration) -> std::ostream& {
+        return os << duration.value() << ' ' << duration.unitsAsString();
+    }
+};
+} // end anon namespace
+
+class TablePrinter {
+    std::ostream& m_os;
+    std::vector<ColumnInfo> m_columnInfos;
+    ReusableStringStream m_oss;
+    int m_currentColumn = -1;
+    bool m_isOpen = false;
+
+public:
+    TablePrinter( std::ostream& os, std::vector<ColumnInfo> columnInfos )
+    :   m_os( os ),
+        m_columnInfos( CATCH_MOVE( columnInfos ) ) {}
+
+    auto columnInfos() const -> std::vector<ColumnInfo> const& {
+        return m_columnInfos;
+    }
+
+    void open() {
+        if (!m_isOpen) {
+            m_isOpen = true;
+            *this << RowBreak();
+
+			TextFlow::Columns headerCols;
+			auto spacer = TextFlow::Spacer(2);
+			for (auto const& info : m_columnInfos) {
+                assert(info.width > 2);
+				headerCols += TextFlow::Column(info.name).width(info.width - 2);
+				headerCols += spacer;
+			}
+			m_os << headerCols << '\n';
+
+            m_os << lineOfChars('-') << '\n';
+        }
+    }
+    void close() {
+        if (m_isOpen) {
+            *this << RowBreak();
+            m_os << '\n' << std::flush;
+            m_isOpen = false;
+        }
+    }
+
+    template<typename T>
+    friend TablePrinter& operator << (TablePrinter& tp, T const& value) {
+        tp.m_oss << value;
+        return tp;
+    }
+
+    friend TablePrinter& operator << (TablePrinter& tp, ColumnBreak) {
+        auto colStr = tp.m_oss.str();
+        const auto strSize = colStr.size();
+        tp.m_oss.str("");
+        tp.open();
+        if (tp.m_currentColumn == static_cast<int>(tp.m_columnInfos.size() - 1)) {
+            tp.m_currentColumn = -1;
+            tp.m_os << '\n';
+        }
+        tp.m_currentColumn++;
+
+        auto colInfo = tp.m_columnInfos[tp.m_currentColumn];
+        auto padding = (strSize + 1 < colInfo.width)
+            ? std::string(colInfo.width - (strSize + 1), ' ')
+            : std::string();
+        if (colInfo.justification == Justification::Left)
+            tp.m_os << colStr << padding << ' ';
+        else
+            tp.m_os << padding << colStr << ' ';
+        return tp;
+    }
+
+    friend TablePrinter& operator << (TablePrinter& tp, RowBreak) {
+        if (tp.m_currentColumn > 0) {
+            tp.m_os << '\n';
+            tp.m_currentColumn = -1;
+        }
+        return tp;
+    }
+};
+
+ConsoleMPIReporter::ConsoleMPIReporter(ReporterConfig&& config):
+    StreamingReporterBase( CATCH_MOVE( config ) ),
+    m_tablePrinter(Detail::make_unique<TablePrinter>(m_stream,
+        [&config]() -> std::vector<ColumnInfo> {
+        if (config.fullConfig()->benchmarkNoAnalysis())
+        {
+            return{
+                { "benchmark name", CATCH_CONFIG_CONSOLE_WIDTH - 43, Justification::Left },
+                { "     samples", 14, Justification::Right },
+                { "  iterations", 14, Justification::Right },
+                { "        mean", 14, Justification::Right }
+            };
+        }
+        else
+        {
+            return{
+                { "benchmark name", CATCH_CONFIG_CONSOLE_WIDTH - 43, Justification::Left },
+                { "samples      mean       std dev", 14, Justification::Right },
+                { "iterations   low mean   low std dev", 14, Justification::Right },
+                { "estimated    high mean  high std dev", 14, Justification::Right }
+            };
+        }
+    }())) {}
+ConsoleMPIReporter::~ConsoleMPIReporter() = default;
+
+std::string ConsoleMPIReporter::getDescription() {
+    return "Reports test results as plain lines of text";
+}
+
+void ConsoleMPIReporter::noMatchingTestCases( StringRef unmatchedSpec ) {
+    m_stream << "No test cases matched '" << unmatchedSpec << "'\n";
+}
+
+void ConsoleMPIReporter::reportInvalidTestSpec( StringRef arg ) {
+    m_stream << "Invalid Filter: " << arg << '\n';
+}
+
+void ConsoleMPIReporter::assertionStarting(AssertionInfo const&) {}
+
+void ConsoleMPIReporter::assertionEnded(AssertionStats const& _assertionStats) {
+    AssertionResult const& result = _assertionStats.assertionResult;
+
+    bool includeResults = m_config->includeSuccessfulResults() || !result.isOk();
+
+    // Drop out if result was successful but we're not printing them.
+    if (!includeResults && result.getResultType() != ResultWas::Warning)
+        return;
+
+    lazyPrint();
+
+    ConsoleAssertionPrinter printer(m_stream, _assertionStats, m_colour.get(), includeResults);
+    printer.print();
+    m_stream << '\n' << std::flush;
+}
+
+void ConsoleMPIReporter::sectionStarting(SectionInfo const& _sectionInfo) {
+    m_tablePrinter->close();
+    m_headerPrinted = false;
+    StreamingReporterBase::sectionStarting(_sectionInfo);
+}
+void ConsoleMPIReporter::sectionEnded(SectionStats const& _sectionStats) {
+    m_tablePrinter->close();
+    if (_sectionStats.missingAssertions) {
+        lazyPrint();
+        auto guard =
+            m_colour->guardColour( Colour::ResultError ).engage( m_stream );
+        if (m_sectionStack.size() > 1)
+            m_stream << "\nNo assertions in section";
+        else
+            m_stream << "\nNo assertions in test case";
+        m_stream << " '" << _sectionStats.sectionInfo.name << "'\n\n" << std::flush;
+    }
+    double dur = _sectionStats.durationInSeconds;
+    if (shouldShowDuration(*m_config, dur)) {
+        m_stream << getFormattedDuration(dur) << " s: " << _sectionStats.sectionInfo.name << '\n' << std::flush;
+    }
+    if (m_headerPrinted) {
+        m_headerPrinted = false;
+    }
+    StreamingReporterBase::sectionEnded(_sectionStats);
+}
+
+void ConsoleMPIReporter::benchmarkPreparing( StringRef name ) {
+	lazyPrintWithoutClosingBenchmarkTable();
+
+	auto nameCol = TextFlow::Column( static_cast<std::string>( name ) )
+                       .width( m_tablePrinter->columnInfos()[0].width - 2 );
+
+	bool firstLine = true;
+	for (auto line : nameCol) {
+		if (!firstLine)
+			(*m_tablePrinter) << ColumnBreak() << ColumnBreak() << ColumnBreak();
+		else
+			firstLine = false;
+
+		(*m_tablePrinter) << line << ColumnBreak();
+	}
+}
+
+void ConsoleMPIReporter::benchmarkStarting(BenchmarkInfo const& info) {
+    (*m_tablePrinter) << info.samples << ColumnBreak()
+        << info.iterations << ColumnBreak();
+    if (!m_config->benchmarkNoAnalysis())
+        (*m_tablePrinter) << Duration(info.estimatedDuration) << ColumnBreak();
+}
+void ConsoleMPIReporter::benchmarkEnded(BenchmarkStats<> const& stats) {
+    if (m_config->benchmarkNoAnalysis())
+    {
+        (*m_tablePrinter) << Duration(stats.mean.point.count()) << ColumnBreak();
+    }
+    else
+    {
+        (*m_tablePrinter) << ColumnBreak()
+            << Duration(stats.mean.point.count()) << ColumnBreak()
+            << Duration(stats.mean.lower_bound.count()) << ColumnBreak()
+            << Duration(stats.mean.upper_bound.count()) << ColumnBreak() << ColumnBreak()
+            << Duration(stats.standardDeviation.point.count()) << ColumnBreak()
+            << Duration(stats.standardDeviation.lower_bound.count()) << ColumnBreak()
+            << Duration(stats.standardDeviation.upper_bound.count()) << ColumnBreak() << ColumnBreak() << ColumnBreak() << ColumnBreak() << ColumnBreak();
+    }
+}
+
+void ConsoleMPIReporter::benchmarkFailed( StringRef error ) {
+    auto guard = m_colour->guardColour( Colour::Red ).engage( m_stream );
+    (*m_tablePrinter)
+        << "Benchmark failed (" << error << ')'
+        << ColumnBreak() << RowBreak();
+}
+
+void ConsoleMPIReporter::testCaseEnded(TestCaseStats const& _testCaseStats) {
+    m_tablePrinter->close();
+    StreamingReporterBase::testCaseEnded(_testCaseStats);
+    m_headerPrinted = false;
+}
+void ConsoleMPIReporter::testRunEnded(TestRunStats const& _testRunStats) {
+    int rank = -1;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    if(rank != 0 && _testRunStats.totals.testCases.allPassed())
+        return;
+    printTotalsDivider(_testRunStats.totals);
+    printTotals(_testRunStats.totals);
+    m_stream << '\n' << std::flush;
+    StreamingReporterBase::testRunEnded(_testRunStats);
+}
+void ConsoleMPIReporter::testRunStarting(TestRunInfo const& _testInfo) {
+    StreamingReporterBase::testRunStarting(_testInfo);
+    int rank = -1;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    if(rank != 0) return;
+    if ( m_config->testSpec().hasFilters() ) {
+        m_stream << m_colour->guardColour( Colour::BrightYellow ) << "Filters: "
+                 << serializeFilters( m_config->getTestsOrTags() ) << '\n';
+    }
+    m_stream << "Randomness seeded to: " << m_config->rngSeed() << '\n';
+}
+
+void ConsoleMPIReporter::lazyPrint() {
+
+    m_tablePrinter->close();
+    lazyPrintWithoutClosingBenchmarkTable();
+}
+
+void ConsoleMPIReporter::lazyPrintWithoutClosingBenchmarkTable() {
+
+    if ( !m_testRunInfoPrinted ) {
+        lazyPrintRunInfo();
+    }
+    if (!m_headerPrinted) {
+        printTestCaseAndSectionHeader();
+        m_headerPrinted = true;
+    }
+}
+void ConsoleMPIReporter::lazyPrintRunInfo() {
+    m_stream << '\n'
+             << lineOfChars( '~' ) << '\n'
+             << m_colour->guardColour( Colour::SecondaryText )
+             << currentTestRunInfo.name << " is a Catch2 v" << libraryVersion()
+             << " host application.\n"
+             << "Run with -? for options\n\n";
+
+    m_testRunInfoPrinted = true;
+}
+void ConsoleMPIReporter::printTestCaseAndSectionHeader() {
+    assert(!m_sectionStack.empty());
+    printOpenHeader(currentTestCaseInfo->name);
+
+    if (m_sectionStack.size() > 1) {
+        auto guard = m_colour->guardColour( Colour::Headers ).engage( m_stream );
+
+        auto
+            it = m_sectionStack.begin() + 1, // Skip first section (test case)
+            itEnd = m_sectionStack.end();
+        for (; it != itEnd; ++it)
+            printHeaderString(it->name, 2);
+    }
+
+    SourceLineInfo lineInfo = m_sectionStack.back().lineInfo;
+
+
+    m_stream << lineOfChars( '-' ) << '\n'
+             << m_colour->guardColour( Colour::FileName ) << lineInfo << '\n'
+             << lineOfChars( '.' ) << "\n\n"
+             << std::flush;
+}
+
+void ConsoleMPIReporter::printClosedHeader(std::string const& _name) {
+    printOpenHeader(_name);
+    m_stream << lineOfChars('.') << '\n';
+}
+void ConsoleMPIReporter::printOpenHeader(std::string const& _name) {
+    m_stream << lineOfChars('-') << '\n';
+    {
+        auto guard = m_colour->guardColour( Colour::Headers ).engage( m_stream );
+        printHeaderString(_name);
+    }
+}
+
+void ConsoleMPIReporter::printHeaderString(std::string const& _string, std::size_t indent) {
+    // We want to get a bit fancy with line breaking here, so that subsequent
+    // lines start after ":" if one is present, e.g.
+    // ```
+    // blablabla: Fancy
+    //            linebreaking
+    // ```
+    // but we also want to avoid problems with overly long indentation causing
+    // the text to take up too many lines, e.g.
+    // ```
+    // blablabla: F
+    //            a
+    //            n
+    //            c
+    //            y
+    //            .
+    //            .
+    //            .
+    // ```
+    // So we limit the prefix indentation check to first quarter of the possible
+    // width
+    std::size_t idx = _string.find( ": " );
+    if ( idx != std::string::npos && idx < CATCH_CONFIG_CONSOLE_WIDTH / 4 ) {
+        idx += 2;
+    } else {
+        idx = 0;
+    }
+    m_stream << TextFlow::Column( _string )
+                  .indent( indent + idx )
+                  .initialIndent( indent )
+           << '\n';
+}
+
+struct SummaryColumn {
+
+    SummaryColumn( std::string _label, Colour::Code _colour )
+    :   label( CATCH_MOVE( _label ) ),
+        colour( _colour ) {}
+    SummaryColumn addRow( std::uint64_t count ) {
+        ReusableStringStream rss;
+        rss << count;
+        std::string row = rss.str();
+        for (auto& oldRow : rows) {
+            while (oldRow.size() < row.size())
+                oldRow = ' ' + oldRow;
+            while (oldRow.size() > row.size())
+                row = ' ' + row;
+        }
+        rows.push_back(row);
+        return *this;
+    }
+
+    std::string label;
+    Colour::Code colour;
+    std::vector<std::string> rows;
+
+};
+
+void ConsoleMPIReporter::printTotals( Totals const& totals ) {
+    if (totals.testCases.total() == 0) {
+        m_stream << m_colour->guardColour( Colour::Warning )
+                 << "No tests ran\n";
+    } else if (totals.assertions.total() > 0 && totals.testCases.allPassed()) {
+        m_stream << m_colour->guardColour( Colour::ResultSuccess )
+                 << "All tests passed";
+        m_stream << " ("
+            << pluralise(totals.assertions.passed, "assertion"_sr) << " in "
+            << pluralise(totals.testCases.passed, "test case"_sr) << ')'
+            << '\n';
+    } else {
+
+        std::vector<SummaryColumn> columns;
+        columns.push_back(SummaryColumn("", Colour::None)
+                          .addRow(totals.testCases.total())
+                          .addRow(totals.assertions.total()));
+        columns.push_back(SummaryColumn("passed", Colour::Success)
+                          .addRow(totals.testCases.passed)
+                          .addRow(totals.assertions.passed));
+        columns.push_back(SummaryColumn("failed", Colour::ResultError)
+                          .addRow(totals.testCases.failed)
+                          .addRow(totals.assertions.failed));
+        columns.push_back(SummaryColumn("failed as expected", Colour::ResultExpectedFailure)
+                          .addRow(totals.testCases.failedButOk)
+                          .addRow(totals.assertions.failedButOk));
+
+        printSummaryRow("test cases"_sr, columns, 0);
+        printSummaryRow("assertions"_sr, columns, 1);
+    }
+}
+void ConsoleMPIReporter::printSummaryRow(StringRef label, std::vector<SummaryColumn> const& cols, std::size_t row) {
+    for (auto col : cols) {
+        std::string const& value = col.rows[row];
+        if (col.label.empty()) {
+            m_stream << label << ": ";
+            if ( value != "0" ) {
+                m_stream << value;
+            } else {
+                m_stream << m_colour->guardColour( Colour::Warning )
+                         << "- none -";
+            }
+        } else if (value != "0") {
+            m_stream << m_colour->guardColour( Colour::LightGrey ) << " | "
+                     << m_colour->guardColour( col.colour ) << value << ' '
+                     << col.label;
+        }
+    }
+    m_stream << '\n';
+}
+
+void ConsoleMPIReporter::printTotalsDivider(Totals const& totals) {
+    if (totals.testCases.total() > 0) {
+        std::size_t failedRatio = makeRatio(totals.testCases.failed, totals.testCases.total());
+        std::size_t failedButOkRatio = makeRatio(totals.testCases.failedButOk, totals.testCases.total());
+        std::size_t passedRatio = makeRatio(totals.testCases.passed, totals.testCases.total());
+        while (failedRatio + failedButOkRatio + passedRatio < CATCH_CONFIG_CONSOLE_WIDTH - 1)
+            findMax(failedRatio, failedButOkRatio, passedRatio)++;
+        while (failedRatio + failedButOkRatio + passedRatio > CATCH_CONFIG_CONSOLE_WIDTH - 1)
+            findMax(failedRatio, failedButOkRatio, passedRatio)--;
+
+        m_stream << m_colour->guardColour( Colour::Error )
+                 << std::string( failedRatio, '=' )
+                 << m_colour->guardColour( Colour::ResultExpectedFailure )
+                 << std::string( failedButOkRatio, '=' );
+        if ( totals.testCases.allPassed() ) {
+            m_stream << m_colour->guardColour( Colour::ResultSuccess )
+                     << std::string( passedRatio, '=' );
+        } else {
+            m_stream << m_colour->guardColour( Colour::Success )
+                     << std::string( passedRatio, '=' );
+        }
+    } else {
+        m_stream << m_colour->guardColour( Colour::Warning )
+                 << std::string( CATCH_CONFIG_CONSOLE_WIDTH - 1, '=' );
+    }
+    m_stream << '\n';
+}
+void ConsoleMPIReporter::printSummaryDivider() {
+    m_stream << lineOfChars('-') << '\n';
+}
+
+} // end namespace Catch
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+#if defined(__clang__)
+#  pragma clang diagnostic pop
+#endif
\ No newline at end of file
diff --git a/tests/unit/mpi_console_reporter.hpp b/tests/unit/mpi_console_reporter.hpp
new file mode 100644
index 00000000..e9b55ac9
--- /dev/null
+++ b/tests/unit/mpi_console_reporter.hpp
@@ -0,0 +1,73 @@
+//              Copyright Catch2 Authors
+// Distributed under the Boost Software License, Version 1.0.
+//   (See accompanying file LICENSE.txt or copy at
+//        https://www.boost.org/LICENSE_1_0.txt)
+
+// SPDX-License-Identifier: BSL-1.0
+#ifndef DYAD_CATCH_REPORTER_MPI_CONSOLE_HPP_INCLUDED
+#define DYAD_CATCH_REPORTER_MPI_CONSOLE_HPP_INCLUDED
+
+#include <catch2/reporters/catch_reporter_streaming_base.hpp>
+#include <catch2/internal/catch_unique_ptr.hpp>
+#include <catch2/reporters/catch_reporter_registrars.hpp>
+
+namespace Catch {
+    // Fwd decls
+    struct SummaryColumn;
+    class TablePrinter;
+
+    class ConsoleMPIReporter final : public StreamingReporterBase {
+        Detail::unique_ptr<TablePrinter> m_tablePrinter;
+
+    public:
+        ConsoleMPIReporter(ReporterConfig&& config);
+        ~ConsoleMPIReporter() override;
+        static std::string getDescription();
+
+        void noMatchingTestCases( StringRef unmatchedSpec ) override;
+        void reportInvalidTestSpec( StringRef arg ) override;
+
+        void assertionStarting(AssertionInfo const&) override;
+
+        void assertionEnded(AssertionStats const& _assertionStats) override;
+
+        void sectionStarting(SectionInfo const& _sectionInfo) override;
+        void sectionEnded(SectionStats const& _sectionStats) override;
+
+        void benchmarkPreparing( StringRef name ) override;
+        void benchmarkStarting(BenchmarkInfo const& info) override;
+        void benchmarkEnded(BenchmarkStats<> const& stats) override;
+        void benchmarkFailed( StringRef error ) override;
+
+        void testCaseEnded(TestCaseStats const& _testCaseStats) override;
+        void testRunEnded(TestRunStats const& _testRunStats) override;
+        void testRunStarting(TestRunInfo const& _testRunInfo) override;
+
+    private:
+        void lazyPrint();
+
+        void lazyPrintWithoutClosingBenchmarkTable();
+        void lazyPrintRunInfo();
+        void printTestCaseAndSectionHeader();
+
+        void printClosedHeader(std::string const& _name);
+        void printOpenHeader(std::string const& _name);
+
+        // if string has a : in first line will set indent to follow it on
+        // subsequent lines
+        void printHeaderString(std::string const& _string, std::size_t indent = 0);
+
+
+        void printTotals(Totals const& totals);
+        void printSummaryRow(StringRef label, std::vector<SummaryColumn> const& cols, std::size_t row);
+
+        void printTotalsDivider(Totals const& totals);
+        void printSummaryDivider();
+
+        bool m_headerPrinted = false;
+        bool m_testRunInfoPrinted = false;
+    };
+
+} // end namespace Catch
+CATCH_REGISTER_REPORTER("mpi_console", Catch::ConsoleMPIReporter)
+#endif // DYAD_CATCH_REPORTER_MPI_CONSOLE_HPP_INCLUDED
\ No newline at end of file
diff --git a/tests/unit/script/CMakeLists.txt b/tests/unit/script/CMakeLists.txt
new file mode 100644
index 00000000..2c7585a7
--- /dev/null
+++ b/tests/unit/script/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_test(dyad_start ${CMAKE_CURRENT_SOURCE_DIR}/dyad_start.sh)
+set_property(TEST dyad_start APPEND PROPERTY ENVIRONMENT DYAD_KVS_NAMESPACE=${DYAD_KEYSPACE})
+set_property(TEST dyad_start APPEND PROPERTY ENVIRONMENT DYAD_MODULE_SO=${CMAKE_BINARY_DIR}/${DYAD_LIBDIR}/dyad.so)
+set_property(TEST dyad_start APPEND PROPERTY ENVIRONMENT DYAD_LOG_DIR=${DYAD_LOG_DIR})
+set_property(TEST dyad_start APPEND PROPERTY ENVIRONMENT DYAD_DTL_MODE=UCX)
+set_property(TEST dyad_start APPEND PROPERTY ENVIRONMENT DYAD_PATH=$ENV{DYAD_DMD_DIR})
+
+add_test(dyad_stop ${CMAKE_CURRENT_SOURCE_DIR}/dyad_stop.sh)
+set_property(TEST dyad_start APPEND PROPERTY ENVIRONMENT DYAD_KVS_NAMESPACE=${DYAD_KEYSPACE})
\ No newline at end of file
diff --git a/tests/unit/script/dyad_start.sh b/tests/unit/script/dyad_start.sh
new file mode 100755
index 00000000..31d82267
--- /dev/null
+++ b/tests/unit/script/dyad_start.sh
@@ -0,0 +1,2 @@
+flux kvs namespace create ${DYAD_KVS_NAMESPACE}
+flux exec -r all flux module load ${DYAD_MODULE_SO} --info_log=${DYAD_LOG_DIR}/dyad-broker --error_log=${DYAD_LOG_DIR}/dyad-broker --mode=${DYAD_DTL_MODE} $DYAD_PATH
\ No newline at end of file
diff --git a/tests/unit/script/dyad_stop.sh b/tests/unit/script/dyad_stop.sh
new file mode 100755
index 00000000..d564b1e2
--- /dev/null
+++ b/tests/unit/script/dyad_stop.sh
@@ -0,0 +1,2 @@
+flux kvs namespace remove ${DYAD_KVS_NAMESPACE}
+flux exec -r all flux module unload dyad
\ No newline at end of file
diff --git a/tests/unit/test_utils.h b/tests/unit/test_utils.h
new file mode 100644
index 00000000..11a62970
--- /dev/null
+++ b/tests/unit/test_utils.h
@@ -0,0 +1,66 @@
+#ifndef DYAD_TEST_UTILS_H
+#define DYAD_TEST_UTILS_H
+
+#include <cmath>
+#include <cstdio>
+#include <string>
+#include <unistd.h>
+
+const uint32_t KB = 1024;
+const uint32_t MB = 1024 * 1024;
+#define AGGREGATE_TIME(name)                                      \
+  double total_##name = 0.0;                                      \
+  auto name##_a = name##_time.getElapsedTime();                   \
+  MPI_Reduce(&name##_a, &total_##name, 1, MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
+
+size_t GetRandomOffset(size_t i, unsigned int offset_seed, size_t stride,
+                       size_t total_size) {
+    return abs((int)(((i * rand_r(&offset_seed)) % stride) % total_size));
+}
+inline std::string get_filename(int fd) {
+    const int kMaxSize = 256;
+    char proclnk[kMaxSize];
+    char filename[kMaxSize];
+    snprintf(proclnk, kMaxSize, "/proc/self/fd/%d", fd);
+    size_t r = readlink(proclnk, filename, kMaxSize);
+    filename[r] = '\0';
+    return filename;
+}
+
+std::string GenRandom(const int len) {
+    std::string tmp_s;
+    static const char alphanum[] =
+        "0123456789"
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+        "abcdefghijklmnopqrstuvwxyz";
+
+    srand(100);
+
+    tmp_s.reserve(len);
+
+    for (int i = 0; i < len; ++i) {
+        tmp_s += alphanum[rand() % (sizeof(alphanum) - 1)];
+    }
+
+    tmp_s[len - 1] = '\n';
+
+    return tmp_s;
+}
+
+class Timer {
+    public:
+    Timer() : elapsed_time(0) {}
+    void resumeTime() { t1 = std::chrono::high_resolution_clock::now(); }
+    double pauseTime() {
+        auto t2 = std::chrono::high_resolution_clock::now();
+        elapsed_time += std::chrono::duration<double>(t2 - t1).count();
+        return elapsed_time;
+    }
+    double getElapsedTime() { return elapsed_time; }
+
+    private:
+    std::chrono::high_resolution_clock::time_point t1;
+    double elapsed_time;
+};
+
+#endif  // DYAD_TEST_UTILS_H
diff --git a/tests/unit/unit_test.cpp b/tests/unit/unit_test.cpp
new file mode 100644
index 00000000..cb617c46
--- /dev/null
+++ b/tests/unit/unit_test.cpp
@@ -0,0 +1,167 @@
+#include <catch_config.h>
+#include <flux/core.h>
+#include <mpi.h>
+#include <test_utils.h>
+#include <unistd.h>
+
+#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
+#include <experimental/filesystem>
+namespace fs = std::experimental::filesystem;
+
+/**
+ * Test data structures
+ */
+namespace dyad::test {
+struct Info {
+  int rank;
+  int comm_size;
+  int num_nodes;
+  int num_brokers;
+  flux_t* flux_handle;
+  uint32_t broker_idx;
+  uint32_t broker_size;
+  bool debug_init;
+};
+struct Arguments {
+  // MPI Configurations
+  size_t process_per_node = 1;
+  size_t brokers_per_node = 1;
+  // DYAD Configuration
+  fs::path dyad_managed_dir = "~/dyad/dmd";
+  // Test configuration
+  fs::path pfs = "~/dyad/pfs";
+  std::string filename = "test.dat";
+  size_t number_of_files = 1;
+  size_t request_size = 65536;
+  size_t iteration = 8;
+  bool debug = false;
+};
+}  // namespace dyad::test
+
+dyad::test::Arguments args;
+dyad::test::Info info;
+/**
+ * Overridden methods for catch
+ */
+
+int init(int* argc, char*** argv) {
+  //  fprintf(stdout, "Initializing MPI\n");
+  MPI_Init(argc, argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &info.rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &info.comm_size);
+  info.flux_handle = flux_open(NULL, 0);
+  info.debug_init = false;
+  MPI_Barrier(MPI_COMM_WORLD);
+  return 0;
+}
+int finalize() {
+  MPI_Finalize();
+  return 0;
+}
+cl::Parser define_options() {
+  return cl::Opt(args.filename, "filename")["-f"]["--filename"](
+             "Filename to be use for I/O.") |
+         cl::Opt(args.pfs, "pfs")["-d"]["--pfs"](
+             "Directory used for performing I/O (default pfs)") |
+         cl::Opt(args.dyad_managed_dir, "dmd")["-d"]["--dmd"](
+             "Directory used for DYAD Managed Directory") |
+         cl::Opt(args.process_per_node,
+                 "process_per_node")["-p"]["--ppn"]("Processes per node") |
+         cl::Opt(args.request_size, "request_size")["-r"]["--request_size"](
+             "Transfer size used for performing I/O") |
+         cl::Opt(args.iteration,
+                 "iteration")["-i"]["--iteration"]("Number of Iterations") |
+         cl::Opt(
+             args.number_of_files,
+             "number_of_files")["-n"]["--number_of_files"]("Number of Files") |
+         cl::Opt(args.brokers_per_node,
+                 "brokers_per_node")["-b"]["--brokers_per_node"](
+             "Number of Brokers per node") |
+         cl::Opt(args.debug)["-d"]["--debug"]("debug");
+}
+
+int pretest() {
+  if (!info.debug_init && args.debug) {
+    const int HOSTNAME_SIZE = 256;
+    char hostname[HOSTNAME_SIZE];
+    gethostname(hostname, HOSTNAME_SIZE);
+    int pid = getpid();
+    char* start_port_str = getenv("VSC_DEBUG_START_PORT");
+    int start_port = 10000;
+    if (start_port_str != nullptr) {
+      start_port = atoi(start_port_str);
+    }
+    const char* conf_dir = getenv("VSC_DEBUG_CONF_DIR");
+    if (conf_dir == nullptr) {
+      conf_dir = ".";
+    }
+    char conf_file[4096];
+    sprintf(conf_file, "%s/debug.conf", conf_dir);
+
+    char exe[1024];
+    int ret = readlink("/proc/self/exe", exe, sizeof(exe) - 1);
+    REQUIRE(ret != -1);
+    exe[ret] = 0;
+    if (info.rank == 0) {
+      remove(conf_file);
+    }
+    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_File mpi_fh;
+    int status_orig = MPI_File_open(MPI_COMM_WORLD, conf_file,
+                                    MPI_MODE_WRONLY | MPI_MODE_CREATE,
+                                    MPI_INFO_NULL, &mpi_fh);
+    REQUIRE(status_orig == MPI_SUCCESS);
+    const int buf_len = 16 * 1024;
+    char buffer[buf_len];
+    int size;
+    if (info.rank == 0) {
+      size = sprintf(buffer, "%d\n%s:%d:%s:%d:%d\n", info.comm_size, exe,
+                     info.rank, hostname, start_port + info.rank, pid);
+    } else {
+      size = sprintf(buffer, "%s:%d:%s:%d:%d\n", exe, info.rank, hostname,
+                     start_port + info.rank, pid);
+    }
+    MPI_Status status;
+    MPI_File_write_ordered(mpi_fh, buffer, size, MPI_CHAR, &status);
+    int written_bytes;
+    MPI_Get_count(&status, MPI_CHAR, &written_bytes);
+    REQUIRE(written_bytes == size);
+    MPI_File_close(&mpi_fh);
+    MPI_Barrier(MPI_COMM_WORLD);
+    if (info.rank == 0) {
+      printf("%d ready for attach\n", info.comm_size);
+      fflush(stdout);
+      sleep(120);
+    }
+    info.debug_init = true;
+  }
+  info.num_nodes = info.comm_size / args.process_per_node;
+  info.num_brokers = info.num_nodes * args.brokers_per_node;
+  flux_get_rank(info.flux_handle, &info.broker_idx);
+  flux_get_size(info.flux_handle, &info.broker_size);
+  MPI_Barrier(MPI_COMM_WORLD);
+  return 0;
+}
+int posttest() {
+  MPI_Barrier(MPI_COMM_WORLD);
+  return 0;
+}
+int clean_directories() {
+  if (info.rank % args.process_per_node == 0) {
+    auto file_pt = args.pfs.string() + "/" + args.filename;
+    std::string cmd = "rm -rf " + file_pt + "*";
+    int status = system(cmd.c_str());
+    (void)status;
+    file_pt = args.dyad_managed_dir.string() + "/" + args.filename;
+    cmd = "rm -rf " + file_pt + "*";
+    status = system(cmd.c_str());
+    (void)status;
+  }
+  MPI_Barrier(MPI_COMM_WORLD);
+  return 0;
+}
+#include "data_plane/data_plane.cpp"
+#include "dyad_core/core_functions.cpp"
+#include "mdm/mdm.cpp"