From 9ebdb6988dd2157823a41c805cfbbe394860230a Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Fri, 10 Nov 2023 17:43:24 -0500
Subject: [PATCH 01/37] progress on better query algorithm. Not working yet

---
 CMakeLists.txt                 |  14 +-
 include/cc_alg_configuration.h |   3 -
 include/cc_sketch_alg.h        |  44 +++--
 src/cc_sketch_alg.cpp          | 314 ++++++++++++++++++++++++---------
 src/sketch.cpp                 |   3 +
 5 files changed, 271 insertions(+), 107 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a66c474d..cdbd929d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,10 +28,10 @@ else()
   message(STATUS "${CMAKE_CXX_COMPILER_ID} not recognized, no flags added")
 endif()
 
-#add_compile_options(-fsanitize=address)
-#add_link_options(-fsanitize=address)
-#add_compile_options(-fsanitize=undefined)
-#add_link_options(-fsanitize=undefined)
+add_compile_options(-fsanitize=address)
+add_link_options(-fsanitize=address)
+add_compile_options(-fsanitize=undefined)
+add_link_options(-fsanitize=undefined)
 
 # Check if this project is the top directory or build type is Debug
 # If so, build executables, otherwise, only build libraries
@@ -184,3 +184,9 @@ if (BUILD_BENCH)
   add_dependencies(bench_cc GraphZeppelin benchmark)
   target_link_libraries(bench_cc GraphZeppelin benchmark::benchmark xxhash)
 endif()
+
+
+add_executable(omp_test
+  test.cpp
+)
+target_link_libraries(omp_test PRIVATE GraphZeppelin)
\ No newline at end of file
diff --git a/include/cc_alg_configuration.h b/include/cc_alg_configuration.h
index e27182ca..52da61c6 100644
--- a/include/cc_alg_configuration.h
+++ b/include/cc_alg_configuration.h
@@ -31,9 +31,6 @@ class CCAlgConfiguration {
 
   friend std::ostream& operator<< (std::ostream &out, const CCAlgConfiguration &conf);
 
-  // no use of equal operator
-  CCAlgConfiguration& operator=(const CCAlgConfiguration &) = delete;
-
   // moving and copying allowed
   CCAlgConfiguration(const CCAlgConfiguration &oth) = default;
   CCAlgConfiguration (CCAlgConfiguration &&) = default;
diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h
index 40bf6269..1183c48c 100644
--- a/include/cc_sketch_alg.h
+++ b/include/cc_sketch_alg.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <atomic>  // REMOVE LATER
+#include <atomic>
 #include <cstdlib>
 #include <exception>
 #include <fstream>
@@ -27,12 +27,39 @@ class UpdateLockedException : public std::exception {
   }
 };
 
+struct MergeInstr {
+  node_id_t root;
+  node_id_t child;
+
+  inline bool operator< (const MergeInstr &oth) const {
+    if (root == oth.root)
+      return child < oth.child;
+    return root < oth.root;
+  }
+};
+
+struct alignas(64) GlobalMergeData {
+  Sketch sketch;
+  std::mutex mtx;
+  size_t num_merge_needed = -1;
+  size_t num_merge_done = 0;
+
+  GlobalMergeData(node_id_t num_nodes, size_t seed)
+      : sketch(Sketch::calc_vector_length(num_nodes), seed, Sketch::calc_cc_samples(num_nodes)) {}
+      
+  GlobalMergeData(const GlobalMergeData&& other)
+  : sketch(other.sketch) {
+    num_merge_needed = other.num_merge_needed;
+    num_merge_done = other.num_merge_done;
+  }
+};
+
 /**
  * Algorithm for computing connected components on undirected graph streams
  * (no self-edges or multi-edges)
  */
 class CCSketchAlg {
- protected:
+ private:
   node_id_t num_nodes;
   size_t seed;
   bool update_locked = false;
@@ -61,21 +88,14 @@ class CCSketchAlg {
    * @param query  an array of sketch sample results
    * @param reps   an array containing node indices for the representative of each supernode
    */
-  bool sample_supernodes(std::vector<node_id_t> &merge_instr);
+  bool sample_supernode(Sketch &skt);
 
   /**
    * @param reps         set containing the roots of each supernode
    * @param merge_instr  a list of lists of supernodes to be merged
    */
-  void merge_supernodes(const size_t next_round,
-                        const std::vector<node_id_t> &merge_instr);
-
-  /**
-   * @param reps         set containing the roots of each supernode
-   * @param merge_instr  an array where each vertex indicates its supernode root
-   */
-  void undo_merge_supernodes(const size_t cur_round,
-                             const std::vector<node_id_t> &merge_instr);
+  bool perform_boruvka_round(const size_t cur_round, const std::vector<MergeInstr> &merge_instr,
+                             std::vector<GlobalMergeData> &global_merges);
 
   /**
    * Main parallel algorithm utilizing Boruvka and L_0 sampling.
diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp
index 3a434697..a236b609 100644
--- a/src/cc_sketch_alg.cpp
+++ b/src/cc_sketch_alg.cpp
@@ -5,6 +5,7 @@
 #include <iostream>
 #include <map>
 #include <random>
+#include <omp.h>
 
 CCSketchAlg::CCSketchAlg(node_id_t num_nodes, CCAlgConfiguration config)
     : num_nodes(num_nodes), dsu(num_nodes), config(config) {
@@ -131,138 +132,275 @@ void CCSketchAlg::update(GraphUpdate upd) {
   sketches[edge.dst]->update(static_cast<vec_t>(concat_pairing_fn(edge.src, edge.dst)));
 }
 
-bool CCSketchAlg::sample_supernodes(std::vector<node_id_t> &merge_instr) {
-  bool except = false;
+// sample from a sketch that represents a supernode of vertices
+// that is, 1 or more vertices merged together during Boruvka
+inline bool CCSketchAlg::sample_supernode(Sketch &skt) {
   bool modified = false;
-  std::exception_ptr err;
-#pragma omp parallel for default(shared)
-    for (node_id_t root = 0; root < num_nodes; root++) {
-      if (merge_instr[root] != root) {
-        // don't query non-roots
-        continue;
-      }
+  SketchSample sample = skt.sample();
 
-      SketchSample sample_result;
-
-      // wrap in a try/catch because exiting through exception is undefined behavior in OMP
-      try {
-        sample_result = sketches[root]->sample();
-      } catch (...) {
-        except = true;
-        err = std::current_exception();
-      }
+  Edge e = inv_concat_pairing_fn(sample.idx);
+  SampleResult result_type = sample.result;
 
-      Edge e = inv_concat_pairing_fn(sample_result.idx);
-      SampleResult result_type = sample_result.result;
+  // std::cout << "Sample: " << result_type << " e:" << e.src << " " << e.dst << std::endl;
 
-      if (result_type == FAIL) {
-        modified = true;
-      } else if (result_type == GOOD) {
-        DSUMergeRet<node_id_t> m_ret = dsu.merge(e.src, e.dst);
-        if (m_ret.merged) {
+  if (result_type == FAIL) {
+    modified = true;
+  } else if (result_type == GOOD) {
+    DSUMergeRet<node_id_t> m_ret = dsu.merge(e.src, e.dst);
+    if (m_ret.merged) {
 #ifdef VERIFY_SAMPLES_F
-          verifier->verify_edge(e);
+      verifier->verify_edge(e);
 #endif
-          modified = true;
-          // Update spanning forest
-          auto src = std::min(e.src, e.dst);
-          auto dst = std::max(e.src, e.dst);
-          {
-            std::unique_lock<std::mutex> lk(spanning_forest_mtx[src]);
-            spanning_forest[src].insert(dst);
-          }
-        }
+      modified = true;
+      // Update spanning forest
+      auto src = std::min(e.src, e.dst);
+      auto dst = std::max(e.src, e.dst);
+      {
+        std::unique_lock<std::mutex> lk(spanning_forest_mtx[src]);
+        spanning_forest[src].insert(dst);
       }
     }
-  
-  // Did one of our threads produce an exception?
-  if (except) std::rethrow_exception(err);
+  }
+
   return modified;
 }
 
-void CCSketchAlg::merge_supernodes(const size_t next_round,
-                                   const std::vector<node_id_t> &merge_instr) {
-#pragma omp parallel default(shared)
+/*
+ * Returns the ith half-open range in the division of [0, length] into divisions segments.
+ */
+inline std::pair<node_id_t, node_id_t> get_ith_partition(node_id_t length, size_t i,
+                                                         size_t divisions) {
+  double div_factor = (double)length / divisions;
+  return {ceil(div_factor * i), ceil(div_factor * (i + 1))};
+}
+
+/*
+ * Returns the half-open range idx that contains idx
+ * Inverse of get_ith_partition
+ */
+inline size_t get_partition_idx(node_id_t length, node_id_t idx, size_t divisions) {
+  double div_factor = (double)length / divisions;
+  return idx / div_factor;
+}
+
+inline node_id_t find_last_partition_of_root(const std::vector<MergeInstr> &merge_instr,
+                                             const node_id_t root, node_id_t min_hint,
+                                             size_t num_threads) {
+  node_id_t max = merge_instr.size() - 1;
+  node_id_t min = min_hint;
+  MergeInstr target = {root, (node_id_t) -1};
+
+  while (min < max) {
+    node_id_t mid = min + (max - min) / 2;
+
+    if (merge_instr[mid] < target) {
+      min = mid + 1;
+    } else {
+      max = mid;
+    }
+  }
+
+  if (merge_instr[min].root != root)
+    min = min - 1;
+
+  assert(merge_instr[min].root == root);
+  assert(min == merge_instr.size() - 1 || merge_instr[min + 1].root > root);
+  return get_partition_idx(merge_instr.size(), min, num_threads);
+}
+
+// merge the global and return if it is safe to query now
+inline bool merge_global(const size_t cur_round, const Sketch &local_sketch,
+                         GlobalMergeData &global) {
+  std::unique_lock<std::mutex> lk(global.mtx);
+  global.sketch.range_merge(local_sketch, cur_round, 1);
+  ++global.num_merge_done;
+  assert(global.num_merge_done <= global.num_merge_needed);
+
+  return global.num_merge_done >= global.num_merge_needed;
+}
+
+bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
+                                        const std::vector<MergeInstr> &merge_instr,
+                                        std::vector<GlobalMergeData> &global_merges) {
+  bool modified = false;
+  bool except = false;
+  std::exception_ptr err;
+  for (size_t i = 0; i < global_merges.size(); i++) {
+    global_merges[i].sketch.zero_contents();
+    global_merges[i].num_merge_needed = -1;
+    global_merges[i].num_merge_done = 0;
+  }
+
+  std::atomic<size_t> num_query;
+  num_query = 0;
+
+#pragma omp parallel default(shared) num_threads(8)
   {
     // some thread local variables
     Sketch local_sketch(Sketch::calc_vector_length(num_nodes), seed,
                         Sketch::calc_cc_samples(num_nodes));
-    node_id_t cur_root = 0;
-    bool first_root = true;
-#pragma omp for
-    for (node_id_t i = 0; i < num_nodes; i++) {
-      if (merge_instr[i] == i) continue;
-
-      node_id_t root = merge_instr[i];
-      if (root != cur_root || first_root) {
-        if (!first_root) {
-          std::unique_lock<std::mutex> lk(sketches[cur_root]->mutex);
-          sketches[cur_root]->range_merge(local_sketch, next_round, 1);
+
+    size_t thr_id = omp_get_thread_num();
+    size_t num_threads = omp_get_num_threads();
+    std::pair<node_id_t, node_id_t> partition = get_ith_partition(num_nodes, thr_id, num_threads);
+    node_id_t start = partition.first;
+    node_id_t end = partition.second;
+    assert(start < end);
+
+#pragma omp critical
+    std::cout << thr_id << ": " << start << " " << end << std::endl;
+
+    // node_id_t left_root = merge_instr[start].root;
+    // node_id_t right_root = merge_instr[end - 1].root;
+
+    bool root_from_left = false;
+    if (start > 0) {
+      root_from_left = merge_instr[start - 1].root == merge_instr[start].root;
+    }
+    bool root_exits_right = false;
+    if (end < num_nodes) {
+      root_exits_right = merge_instr[end - 1].root == merge_instr[end].root;
+    }
+
+    node_id_t cur_root = merge_instr[start].root;
+#pragma omp critical
+  {
+    for (node_id_t i = start; i < end; i++) {
+      node_id_t root = merge_instr[i].root;
+      node_id_t child = merge_instr[i].child;
+      std::cout << thr_id << ": " << child << " into " << root << std::endl;
+      std::cout << "root_from_left " << root_from_left << " root_exits_right " << root_exits_right << std::endl;
+
+      if (root != cur_root) {
+        if (root_from_left) {
+          // we hold the global for this merge
+          bool query_ready = merge_global(cur_round, local_sketch, global_merges[thr_id]);
+          if (query_ready) {
+            try {
+              num_query += 1;
+              if (sample_supernode(global_merges[thr_id].sketch) && !modified) modified = true;
+            } catch (...) {
+              except = true;
+              err = std::current_exception();
+            }
+          }
+
+          // set root_from_left to false
+          root_from_left = false;
+        } else {
+          // This is an entirely local computation
+          // std::cout << std::endl;
+          try {
+            num_query += 1;
+            if (sample_supernode(local_sketch) && !modified) modified = true;
+          } catch (...) {
+            except = true;
+            err = std::current_exception();
+          }
         }
+
         cur_root = root;
         local_sketch.zero_contents();
-        first_root = false;
       }
 
-      local_sketch.range_merge(*sketches[i], next_round, 1);
+      // std::cout << " " << child;
+      local_sketch.range_merge(*sketches[child], cur_round, 1);
     }
 
-    if (!first_root) {
-      std::unique_lock<std::mutex> lk(sketches[cur_root]->mutex);
-      sketches[cur_root]->range_merge(local_sketch, next_round, 1);
+    if (root_exits_right || root_from_left) {
+      // global merge where we may or may not own it
+      size_t global_id = find_last_partition_of_root(merge_instr, cur_root, start, num_threads);
+      if (!root_from_left) {
+        // Resolved root_from_left, so we are the first thread to encounter this root
+        // set the number of threads that will merge into this component
+        std::unique_lock<std::mutex> lk(global_merges[global_id].mtx);
+        global_merges[global_id].num_merge_needed = global_id - thr_id + 1;
+      }
+      bool query_ready = merge_global(cur_round, local_sketch, global_merges[global_id]);
+      if (query_ready) {
+        try {
+          num_query += 1;
+          if (sample_supernode(global_merges[thr_id].sketch) && !modified) modified = true;
+        } catch (...) {
+          except = true;
+          err = std::current_exception();
+        }
+      }
+    } else {
+      // This is an entirely local computation
+      // std::cout << std::endl;
+      try {
+        num_query += 1;
+        if (sample_supernode(local_sketch) && !modified) modified = true;
+      } catch (...) {
+        except = true;
+        err = std::current_exception();
+      }
     }
   }
-}
+  }
+
+  std::cout << "Number of roots queried = " << num_query << std::endl;
 
-void CCSketchAlg::undo_merge_supernodes(const size_t cur_round,
-                                        const std::vector<node_id_t> &merge_instr) {
-  if (cur_round > 0) merge_supernodes(cur_round, merge_instr);
+  if (except) {
+    // if one of our threads produced an exception throw it here
+    std::rethrow_exception(err);
+  }
+
+  return modified;
 }
 
 std::vector<std::set<node_id_t>> CCSketchAlg::boruvka_emulation() {
   update_locked = true;
 
   cc_alg_start = std::chrono::steady_clock::now();
-  std::vector<node_id_t> merge_instr(num_nodes);
+  std::vector<MergeInstr> merge_instr(num_nodes);
+
+  size_t num_threads = omp_get_max_threads();
+  std::vector<GlobalMergeData> global_merges;
+  global_merges.reserve(num_threads);
+  for (size_t i = 0; i < num_threads; i++) {
+    global_merges.emplace_back(num_nodes, seed);
+  }
 
   dsu.reset();
   for (node_id_t i = 0; i < num_nodes; ++i) {
-    merge_instr[i] = i;
+    merge_instr[i] = {i, i};
     spanning_forest[i].clear();
   }
   size_t round_num = 0;
   bool modified = true;
   while (true) {
-    // auto start = std::chrono::steady_clock::now();
-    try {
-      modified = sample_supernodes(merge_instr);
-    } catch (...) {
-      undo_merge_supernodes(round_num, merge_instr);
-      std::rethrow_exception(std::current_exception());
-    }
-    // std::cout << "sample: "
-    //           << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
-    //           << std::endl;
-
-    // start = std::chrono::steady_clock::now();
-    undo_merge_supernodes(round_num, merge_instr);
-    // std::cout << "undo merge: "
-    //           << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
-    //           << std::endl;
+    auto start = std::chrono::steady_clock::now();
+    modified = perform_boruvka_round(round_num, merge_instr, global_merges);
+    std::cout << "round: " << round_num << " = "
+              << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
+              << std::endl;
 
     if (!modified) break;
 
-    // calculate updated merge instructions
+    // calculate updated merge instructions for next round
+    start = std::chrono::steady_clock::now();
 #pragma omp parallel for
     for (node_id_t i = 0; i < num_nodes; i++)
-      merge_instr[i] = dsu.find_root(i);
-
-    // prepare for the next round by merging
-    // start = std::chrono::steady_clock::now();
-    merge_supernodes(round_num + 1, merge_instr);
-    // std::cout << "merge: "
-    //           << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
-    //           << std::endl;
+      merge_instr[i] = {dsu.find_root(i), i};
+
+    std::sort(merge_instr.begin(), merge_instr.end());
+
+    size_t num_roots = 1;
+    size_t cur_root = merge_instr[0].root;
+    for (size_t i = 1; i < num_nodes; i++) {
+      if (merge_instr[i].root != cur_root) {
+        num_roots += 1;
+        cur_root = merge_instr[i].root;
+      }
+    }
+    std::cout << "Number of roots = " << num_roots << std::endl;
+
+
+    std::cout << "post round processing = "
+              << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
+              << std::endl;
     ++round_num;
   }
   last_query_rounds = round_num;
diff --git a/src/sketch.cpp b/src/sketch.cpp
index 6edbea29..9a0306b2 100644
--- a/src/sketch.cpp
+++ b/src/sketch.cpp
@@ -159,6 +159,9 @@ void Sketch::range_merge(const Sketch &other, size_t start_sample, size_t n_samp
     return;
   }
 
+  // update sample idx to point at beginning of this range if before it
+  sample_idx = std::max(sample_idx, start_sample);
+
   // merge deterministic buffer
   buckets[num_buckets - 1].alpha ^= other.buckets[num_buckets - 1].alpha;
   buckets[num_buckets - 1].gamma ^= other.buckets[num_buckets - 1].gamma;

From 23c94ff358f4df382b5db524f49120d8631c1d79 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Sat, 11 Nov 2023 14:18:21 -0500
Subject: [PATCH 02/37] working but somewhat slow

---
 CMakeLists.txt                    | 14 +++-----
 src/cc_sketch_alg.cpp             | 56 +++++++++++++------------------
 test/sketch_test.cpp              |  2 ++
 test/util/file_graph_verifier.cpp | 19 ++++++++++-
 4 files changed, 47 insertions(+), 44 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cdbd929d..a66c474d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,10 +28,10 @@ else()
   message(STATUS "${CMAKE_CXX_COMPILER_ID} not recognized, no flags added")
 endif()
 
-add_compile_options(-fsanitize=address)
-add_link_options(-fsanitize=address)
-add_compile_options(-fsanitize=undefined)
-add_link_options(-fsanitize=undefined)
+#add_compile_options(-fsanitize=address)
+#add_link_options(-fsanitize=address)
+#add_compile_options(-fsanitize=undefined)
+#add_link_options(-fsanitize=undefined)
 
 # Check if this project is the top directory or build type is Debug
 # If so, build executables, otherwise, only build libraries
@@ -184,9 +184,3 @@ if (BUILD_BENCH)
   add_dependencies(bench_cc GraphZeppelin benchmark)
   target_link_libraries(bench_cc GraphZeppelin benchmark::benchmark xxhash)
 endif()
-
-
-add_executable(omp_test
-  test.cpp
-)
-target_link_libraries(omp_test PRIVATE GraphZeppelin)
\ No newline at end of file
diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp
index a236b609..439b838e 100644
--- a/src/cc_sketch_alg.cpp
+++ b/src/cc_sketch_alg.cpp
@@ -141,7 +141,7 @@ inline bool CCSketchAlg::sample_supernode(Sketch &skt) {
   Edge e = inv_concat_pairing_fn(sample.idx);
   SampleResult result_type = sample.result;
 
-  // std::cout << "Sample: " << result_type << " e:" << e.src << " " << e.dst << std::endl;
+  // std::cout << " " << result_type << " e:" << e.src << " " << e.dst << std::endl;
 
   if (result_type == FAIL) {
     modified = true;
@@ -231,9 +231,6 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
     global_merges[i].num_merge_done = 0;
   }
 
-  std::atomic<size_t> num_query;
-  num_query = 0;
-
 #pragma omp parallel default(shared) num_threads(8)
   {
     // some thread local variables
@@ -247,9 +244,6 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
     node_id_t end = partition.second;
     assert(start < end);
 
-#pragma omp critical
-    std::cout << thr_id << ": " << start << " " << end << std::endl;
-
     // node_id_t left_root = merge_instr[start].root;
     // node_id_t right_root = merge_instr[end - 1].root;
 
@@ -263,21 +257,22 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
     }
 
     node_id_t cur_root = merge_instr[start].root;
-#pragma omp critical
-  {
+
+    // std::cout << thr_id << std::endl;
+    // std::cout << "  Component " << cur_root << ":";
     for (node_id_t i = start; i < end; i++) {
       node_id_t root = merge_instr[i].root;
       node_id_t child = merge_instr[i].child;
-      std::cout << thr_id << ": " << child << " into " << root << std::endl;
-      std::cout << "root_from_left " << root_from_left << " root_exits_right " << root_exits_right << std::endl;
 
       if (root != cur_root) {
         if (root_from_left) {
           // we hold the global for this merge
+          // std::cout << " merge global (we own)" << std::endl;
           bool query_ready = merge_global(cur_round, local_sketch, global_merges[thr_id]);
           if (query_ready) {
+            // std::cout << "Performing query!";
             try {
-              num_query += 1;
+              // num_query += 1;
               if (sample_supernode(global_merges[thr_id].sketch) && !modified) modified = true;
             } catch (...) {
               except = true;
@@ -289,9 +284,9 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
           root_from_left = false;
         } else {
           // This is an entirely local computation
-          // std::cout << std::endl;
+          // std::cout << " query local";
           try {
-            num_query += 1;
+            // num_query += 1;
             if (sample_supernode(local_sketch) && !modified) modified = true;
           } catch (...) {
             except = true;
@@ -300,6 +295,7 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
         }
 
         cur_root = root;
+        // std::cout << "  Component " << cur_root << ":";
         local_sketch.zero_contents();
       }
 
@@ -310,6 +306,7 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
     if (root_exits_right || root_from_left) {
       // global merge where we may or may not own it
       size_t global_id = find_last_partition_of_root(merge_instr, cur_root, start, num_threads);
+      // std::cout << " merge global (" << global_id << ")" << std::endl;
       if (!root_from_left) {
         // Resolved root_from_left, so we are the first thread to encounter this root
         // set the number of threads that will merge into this component
@@ -318,9 +315,10 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
       }
       bool query_ready = merge_global(cur_round, local_sketch, global_merges[global_id]);
       if (query_ready) {
+        // std::cout << "Performing query!";
         try {
-          num_query += 1;
-          if (sample_supernode(global_merges[thr_id].sketch) && !modified) modified = true;
+          // num_query += 1;
+          if (sample_supernode(global_merges[global_id].sketch) && !modified) modified = true;
         } catch (...) {
           except = true;
           err = std::current_exception();
@@ -328,9 +326,9 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
       }
     } else {
       // This is an entirely local computation
-      // std::cout << std::endl;
+      // std::cout << " query local";
       try {
-        num_query += 1;
+        // num_query += 1;
         if (sample_supernode(local_sketch) && !modified) modified = true;
       } catch (...) {
         except = true;
@@ -338,9 +336,8 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
       }
     }
   }
-  }
 
-  std::cout << "Number of roots queried = " << num_query << std::endl;
+  // std::cout << "Number of roots queried = " << num_query << std::endl;
 
   if (except) {
     // if one of our threads produced an exception throw it here
@@ -385,20 +382,13 @@ std::vector<std::set<node_id_t>> CCSketchAlg::boruvka_emulation() {
     for (node_id_t i = 0; i < num_nodes; i++)
       merge_instr[i] = {dsu.find_root(i), i};
 
-    std::sort(merge_instr.begin(), merge_instr.end());
-
-    size_t num_roots = 1;
-    size_t cur_root = merge_instr[0].root;
-    for (size_t i = 1; i < num_nodes; i++) {
-      if (merge_instr[i].root != cur_root) {
-        num_roots += 1;
-        cur_root = merge_instr[i].root;
-      }
-    }
-    std::cout << "Number of roots = " << num_roots << std::endl;
-
+    std::cout << "  finding roots = "
+              << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
+              << std::endl;
 
-    std::cout << "post round processing = "
+    start = std::chrono::steady_clock::now();
+    std::sort(merge_instr.begin(), merge_instr.end());
+    std::cout << "  sorting = "
               << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
               << std::endl;
     ++round_num;
diff --git a/test/sketch_test.cpp b/test/sketch_test.cpp
index cdc57c13..f9bfccb8 100644
--- a/test/sketch_test.cpp
+++ b/test/sketch_test.cpp
@@ -444,6 +444,8 @@ TEST(SketchTestSuite, TestRawBucketUpdate) {
     sk2.reset_sample_state();
     sample = sk2.sample();
     ASSERT_EQ(sample.result, ZERO);
+
+    delete[] copy_data;
   }
   ASSERT_GT(successes, 0);
 }
diff --git a/test/util/file_graph_verifier.cpp b/test/util/file_graph_verifier.cpp
index eb5e4eed..a212be3c 100644
--- a/test/util/file_graph_verifier.cpp
+++ b/test/util/file_graph_verifier.cpp
@@ -69,8 +69,25 @@ void FileGraphVerifier::verify_soln(std::vector<std::set<node_id_t>> &retval) {
   auto temp {retval};
   std::sort(temp.begin(),temp.end());
   std::sort(kruskal_ref.begin(),kruskal_ref.end());
-  if (kruskal_ref != temp)
+  if (kruskal_ref != temp) {
+    std::cout << "Provided CC:" << std::endl;
+    for (auto cc : temp) {
+      for (auto v : cc) {
+        std::cout << " " << v;
+      }
+      std::cout << std::endl;
+    }
+
+    std::cout << "Expected CC:" << std::endl;
+    for (auto cc : kruskal_ref) {
+      for (auto v : cc) {
+        std::cout << " " << v;
+      }
+      std::cout << std::endl;
+    }
+
     throw IncorrectCCException();
+  }
 
   std::cout << "Solution ok: " << retval.size() << " CCs found." << std::endl;
 }

From 3aaaa64fad66f046b82de677a644f6d04b99f20c Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Mon, 13 Nov 2023 19:01:53 -0500
Subject: [PATCH 03/37] improved query performance

---
 CMakeLists.txt          |  1 +
 include/cc_sketch_alg.h |  6 ++++++
 src/cc_sketch_alg.cpp   | 36 ++++++++++++++++++++++++++++++++----
 3 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a66c474d..975f1a9b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -184,3 +184,4 @@ if (BUILD_BENCH)
   add_dependencies(bench_cc GraphZeppelin benchmark)
   target_link_libraries(bench_cc GraphZeppelin benchmark::benchmark xxhash)
 endif()
+
diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h
index 1183c48c..d466dab8 100644
--- a/include/cc_sketch_alg.h
+++ b/include/cc_sketch_alg.h
@@ -83,6 +83,12 @@ class CCSketchAlg {
   Sketch **delta_sketches = nullptr;
   size_t num_delta_sketches;
 
+  /**
+   * Run the first round of Boruvka. We can do things faster here because we know there will
+   * be no merging we have to do.
+   */
+  bool run_round_zero();
+
   /**
    * Update the query array with new samples
    * @param query  an array of sketch sample results
diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp
index 439b838e..a0bc318a 100644
--- a/src/cc_sketch_alg.cpp
+++ b/src/cc_sketch_alg.cpp
@@ -219,9 +219,36 @@ inline bool merge_global(const size_t cur_round, const Sketch &local_sketch,
   return global.num_merge_done >= global.num_merge_needed;
 }
 
+// faster query procedure optimized for when we know there is no merging to do (i.e. round 0)
+inline bool CCSketchAlg::run_round_zero() {
+  bool modified = false;
+  bool except = false;
+  std::exception_ptr err;
+#pragma omp parallel for
+  for (node_id_t i = 0; i < num_nodes; i++) {
+    try {
+      // num_query += 1;
+      if (sample_supernode(*sketches[i]) && !modified) modified = true;
+    } catch (...) {
+      except = true;
+      err = std::current_exception();
+    }
+  }
+  if (except) {
+    // if one of our threads produced an exception throw it here
+    std::rethrow_exception(err);
+  }
+
+  return modified;
+}
+
 bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
                                         const std::vector<MergeInstr> &merge_instr,
                                         std::vector<GlobalMergeData> &global_merges) {
+  if (cur_round == 0) {
+    return run_round_zero();
+  }
+
   bool modified = false;
   bool except = false;
   std::exception_ptr err;
@@ -231,7 +258,7 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
     global_merges[i].num_merge_done = 0;
   }
 
-#pragma omp parallel default(shared) num_threads(8)
+#pragma omp parallel default(shared)
   {
     // some thread local variables
     Sketch local_sketch(Sketch::calc_vector_length(num_nodes), seed,
@@ -379,9 +406,10 @@ std::vector<std::set<node_id_t>> CCSketchAlg::boruvka_emulation() {
     // calculate updated merge instructions for next round
     start = std::chrono::steady_clock::now();
 #pragma omp parallel for
-    for (node_id_t i = 0; i < num_nodes; i++)
-      merge_instr[i] = {dsu.find_root(i), i};
-
+    for (node_id_t i = 0; i < num_nodes; i++) {
+      node_id_t child = merge_instr[i].child;
+      merge_instr[i].root = dsu.find_root(child);
+    }
     std::cout << "  finding roots = "
               << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
               << std::endl;

From ab2c6eca2e0c9cdfbbc13f6592110d0cd8751255 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Fri, 17 Nov 2023 13:48:49 -0500
Subject: [PATCH 04/37] remove sort bottleneck. Begin investigating
 post_processing as bottleneck

---
 include/cc_sketch_alg.h  |   6 ++
 src/cc_sketch_alg.cpp    | 153 +++++++++++++++++++++++++++++++++------
 tools/process_stream.cpp |  19 ++++-
 3 files changed, 153 insertions(+), 25 deletions(-)

diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h
index d466dab8..038ff751 100644
--- a/include/cc_sketch_alg.h
+++ b/include/cc_sketch_alg.h
@@ -96,6 +96,12 @@ class CCSketchAlg {
    */
   bool sample_supernode(Sketch &skt);
 
+
+  /**
+   * Calculate the instructions for what vertices to merge to form each component
+   */
+  void create_merge_instructions(std::vector<MergeInstr> &merge_instr);
+
   /**
    * @param reps         set containing the roots of each supernode
    * @param merge_instr  a list of lists of supernodes to be merged
diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp
index a0bc318a..fcbe562c 100644
--- a/src/cc_sketch_alg.cpp
+++ b/src/cc_sketch_alg.cpp
@@ -6,6 +6,7 @@
 #include <map>
 #include <random>
 #include <omp.h>
+#include <unordered_map>
 
 CCSketchAlg::CCSketchAlg(node_id_t num_nodes, CCAlgConfiguration config)
     : num_nodes(num_nodes), dsu(num_nodes), config(config) {
@@ -269,7 +270,7 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
     std::pair<node_id_t, node_id_t> partition = get_ith_partition(num_nodes, thr_id, num_threads);
     node_id_t start = partition.first;
     node_id_t end = partition.second;
-    assert(start < end);
+    assert(start <= end);
 
     // node_id_t left_root = merge_instr[start].root;
     // node_id_t right_root = merge_instr[end - 1].root;
@@ -374,7 +375,90 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
   return modified;
 }
 
+inline void CCSketchAlg::create_merge_instructions(std::vector<MergeInstr> &merge_instr) {
+  std::vector<node_id_t> cc_prefix(num_nodes, 0);
+  node_id_t range_sums[omp_get_max_threads()];
+
+#pragma omp parallel default(shared)
+  {
+    // thread local variables
+    std::unordered_map<node_id_t, std::vector<node_id_t>> local_ccs;
+    std::vector<node_id_t> local_cc_idx;
+
+    size_t thr_id = omp_get_thread_num();
+    size_t num_threads = omp_get_num_threads();
+    std::pair<node_id_t, node_id_t> partition = get_ith_partition(num_nodes, thr_id, num_threads);
+    node_id_t start = partition.first;
+    node_id_t end = partition.second;
+
+    for (node_id_t i = start; i < end; i++) {
+      node_id_t child = merge_instr[i].child;
+      node_id_t root = dsu.find_root(child);
+      if (local_ccs.count(root) == 0) {
+        local_ccs[root] = {child};
+      } else {
+        local_ccs[root].push_back(child);
+      }
+    }
+
+    // each thread loops over its local_ccs and updates cc_prefix
+    for (auto const &cc : local_ccs) {
+      node_id_t root = cc.first;
+      const std::vector<node_id_t> &vertices = cc.second;
+
+      node_id_t idx;
+#pragma omp atomic capture
+      {idx = cc_prefix[root]; cc_prefix[root] += vertices.size(); }
+
+      local_cc_idx.push_back(idx);
+    }
+#pragma omp barrier
+
+    // perform a prefix sum over cc_prefix
+    for (node_id_t i = start + 1; i < end; i++) {
+      cc_prefix[i] += cc_prefix[i-1];
+    }
+#pragma omp barrier
+
+    // perform single threaded prefix sum of the resulting sums from each thread
+#pragma omp single
+    {
+      range_sums[0] = 0;
+      for (int t = 1; t < omp_get_num_threads(); t++) {
+        node_id_t cur = get_ith_partition(num_nodes, t - 1, num_threads).second - 1;
+        range_sums[t] = cc_prefix[cur] + range_sums[t - 1];
+      }
+    }
+
+    // in parallel finish the prefix sums
+    if (thr_id > 0) {
+      for (node_id_t i = start; i < end; i++) {
+        cc_prefix[i] += range_sums[thr_id];
+      }
+    }
+#pragma omp barrier
+
+    // Finally, write the local_ccs to the correct portion of the merge_instr array
+    node_id_t i = 0;
+    for (auto const &cc : local_ccs) {
+      node_id_t root = cc.first;
+      const std::vector<node_id_t> &vertices = cc.second;
+      node_id_t thr_idx = local_cc_idx[i];
+
+      node_id_t placement = thr_idx;
+      if (root > 0)
+        placement += cc_prefix[root - 1];
+
+      for (size_t j = 0; j < vertices.size(); j++) {
+        merge_instr[placement + j] = {root, vertices[j]};
+      }
+      i++;
+    }
+  }
+}
+
 std::vector<std::set<node_id_t>> CCSketchAlg::boruvka_emulation() {
+  auto start = std::chrono::steady_clock::now();
   update_locked = true;
 
   cc_alg_start = std::chrono::steady_clock::now();
@@ -394,10 +478,16 @@ std::vector<std::set<node_id_t>> CCSketchAlg::boruvka_emulation() {
   }
   size_t round_num = 0;
   bool modified = true;
+  std::cout << std::endl;
+  std::cout << "  pre boruvka processing = "
+              << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
+              << std::endl;
+
   while (true) {
-    auto start = std::chrono::steady_clock::now();
+    std::cout << "   Round: " << round_num << std::endl;
+    start = std::chrono::steady_clock::now();
     modified = perform_boruvka_round(round_num, merge_instr, global_merges);
-    std::cout << "round: " << round_num << " = "
+    std::cout << "     perform_boruvka_round = "
               << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
               << std::endl;
 
@@ -405,37 +495,32 @@ std::vector<std::set<node_id_t>> CCSketchAlg::boruvka_emulation() {
 
     // calculate updated merge instructions for next round
     start = std::chrono::steady_clock::now();
-#pragma omp parallel for
-    for (node_id_t i = 0; i < num_nodes; i++) {
-      node_id_t child = merge_instr[i].child;
-      merge_instr[i].root = dsu.find_root(child);
-    }
-    std::cout << "  finding roots = "
-              << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
-              << std::endl;
-
-    start = std::chrono::steady_clock::now();
-    std::sort(merge_instr.begin(), merge_instr.end());
-    std::cout << "  sorting = "
+    create_merge_instructions(merge_instr);
+    std::cout << "     create_merge_instructions = "
               << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
               << std::endl;
     ++round_num;
   }
+  start = std::chrono::steady_clock::now();
   last_query_rounds = round_num;
 
   dsu_valid = true;
   shared_dsu_valid = true;
 
   auto retval = cc_from_dsu();
-  cc_alg_end = std::chrono::steady_clock::now();
   update_locked = false;
+  std::cout << "  post boruvka processing = "
+              << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
+              << std::endl;
+
   return retval;
 }
 
 std::vector<std::set<node_id_t>> CCSketchAlg::connected_components() {
+  cc_alg_start = std::chrono::steady_clock::now();
+
   // if the DSU holds the answer, use that
   if (shared_dsu_valid) {
-    cc_alg_start = std::chrono::steady_clock::now();
 #ifdef VERIFY_SAMPLES_F
     for (node_id_t src = 0; src < num_nodes; ++src) {
       for (const auto &dst : spanning_forest[src]) {
@@ -455,6 +540,7 @@ std::vector<std::set<node_id_t>> CCSketchAlg::connected_components() {
 
   bool except = false;
   std::exception_ptr err;
+  auto start = std::chrono::steady_clock::now();
   try {
     ret = boruvka_emulation();
 #ifdef VERIFY_SAMPLES_F
@@ -464,9 +550,11 @@ std::vector<std::set<node_id_t>> CCSketchAlg::connected_components() {
     except = true;
     err = std::current_exception();
   }
+  std::cout << "boruvka_emulation = "
+            << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
+            << std::endl;
 
-  // get ready for ingesting more from the stream
-  // reset dsu and resume graph workers
+  // get ready for ingesting more from the stream by resetting the sketches sample state
   for (node_id_t i = 0; i < num_nodes; i++) {
     sketches[i]->reset_sample_state();
   }
@@ -474,6 +562,7 @@ std::vector<std::set<node_id_t>> CCSketchAlg::connected_components() {
   // check if boruvka error'd
   if (except) std::rethrow_exception(err);
 
+  cc_alg_end = std::chrono::steady_clock::now();
   return ret;
 }
 
@@ -538,13 +627,29 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) {
   return ret;
 }
 
-std::vector<std::set<node_id_t>> CCSketchAlg::cc_from_dsu() {
+inline std::vector<std::set<node_id_t>> CCSketchAlg::cc_from_dsu() {
   // calculate connected components using DSU structure
-  std::map<node_id_t, std::set<node_id_t>> temp;
-  for (node_id_t i = 0; i < num_nodes; ++i) temp[dsu.find_root(i)].insert(i);
+  std::vector<MergeInstr> merge_instr(num_nodes);
+  for (node_id_t i = 0; i < num_nodes; ++i) {
+    merge_instr[i] = {i, i};
+  }
+
+  create_merge_instructions(merge_instr);
+
   std::vector<std::set<node_id_t>> retval;
-  retval.reserve(temp.size());
-  for (const auto &it : temp) retval.push_back(it.second);
+  std::set<node_id_t> cc;
+  cc.insert(merge_instr[0].child);
+  node_id_t cur_root = merge_instr[0].root;
+  for (node_id_t i = 1; i < num_nodes; i++) {
+    if (merge_instr[i].root != cur_root) {
+      retval.push_back(cc);
+      cc.clear();
+      cur_root = merge_instr[i].root;
+    }
+    cc.insert(merge_instr[i].child);
+  }
+  retval.push_back(cc);
+
   return retval;
 }
 
diff --git a/tools/process_stream.cpp b/tools/process_stream.cpp
index 87d2e28c..b5777e04 100644
--- a/tools/process_stream.cpp
+++ b/tools/process_stream.cpp
@@ -92,8 +92,8 @@ int main(int argc, char **argv) {
   auto cc_start = std::chrono::steady_clock::now();
   driver.prep_query();
   auto CC_num = cc_alg.connected_components().size();
-  std::chrono::duration<double> insert_time = driver.flush_end - ins_start;
   std::chrono::duration<double> cc_time = std::chrono::steady_clock::now() - cc_start;
+  std::chrono::duration<double> insert_time = driver.flush_end - ins_start;
   std::chrono::duration<double> flush_time = driver.flush_end - driver.flush_start;
   std::chrono::duration<double> cc_alg_time = cc_alg.cc_alg_end - cc_alg.cc_alg_start;
 
@@ -108,4 +108,21 @@ int main(int argc, char **argv) {
   std::cout << "  Boruvka's Algorithm(sec):     " << cc_alg_time.count() << std::endl;
   std::cout << "Connected Components:         " << CC_num << std::endl;
   std::cout << "Maximum Memory Usage(MiB):    " << get_max_mem_used() << std::endl;
+
+
+  cc_start = std::chrono::steady_clock::now();
+  driver.prep_query();
+  CC_num = cc_alg.connected_components().size();
+  cc_time = std::chrono::steady_clock::now() - cc_start;
+  insert_time = driver.flush_end - ins_start;
+  flush_time = driver.flush_end - driver.flush_start;
+  cc_alg_time = cc_alg.cc_alg_end - cc_alg.cc_alg_start;
+
+  std::cout << "SECOND QUERY" << std::endl;
+  std::cout << "Total CC query latency:       " << cc_time.count() << std::endl;
+  std::cout << "  Flush Gutters(sec):           " << flush_time.count() << std::endl;
+  std::cout << "  Boruvka's Algorithm(sec):     " << cc_alg_time.count() << std::endl;
+  std::cout << "Connected Components:         " << CC_num << std::endl;
+  std::cout << "Maximum Memory Usage(MiB):    " << get_max_mem_used() << std::endl;
+
 }

From f1ae69f7d4bf9d067137c97f0090754afdd4088d Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Sun, 26 Nov 2023 16:30:37 -0500
Subject: [PATCH 05/37] make seed a mandatory argument of the CC algorithm

---
 CMakeLists.txt                              |  2 +-
 include/cc_sketch_alg.h                     | 14 ++--
 src/cc_sketch_alg.cpp                       | 77 ++++++++++-----------
 test/{graph_test.cpp => cc_alg_test.cpp}    | 45 ++++++------
 tools/process_stream.cpp                    |  7 +-
 tools/statistical_testing/graph_testing.cpp |  6 +-
 tools/test_correctness.cpp                  |  7 +-
 7 files changed, 86 insertions(+), 72 deletions(-)
 rename test/{graph_test.cpp => cc_alg_test.cpp} (90%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a66c474d..e194f5b8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -131,7 +131,7 @@ target_compile_definitions(GraphZeppelinVerifyCC PUBLIC XXH_INLINE_ALL VERIFY_SA
 if (BUILD_EXE)
   add_executable(tests
     test/test_runner.cpp
-    test/graph_test.cpp
+    test/cc_alg_test.cpp
     test/sketch_test.cpp
     test/dsu_test.cpp
     test/util_test.cpp
diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h
index 40bf6269..5ce376d6 100644
--- a/include/cc_sketch_alg.h
+++ b/include/cc_sketch_alg.h
@@ -32,8 +32,8 @@ class UpdateLockedException : public std::exception {
  * (no self-edges or multi-edges)
  */
 class CCSketchAlg {
- protected:
-  node_id_t num_nodes;
+ private:
+  node_id_t num_vertices;
   size_t seed;
   bool update_locked = false;
   // a set containing one "representative" from each supernode
@@ -94,11 +94,11 @@ class CCSketchAlg {
   CCAlgConfiguration config;
 
   // constructor for use when reading from a serialized file
-  CCSketchAlg(node_id_t num_nodes, size_t seed, std::ifstream &binary_stream,
+  CCSketchAlg(node_id_t num_vertices, size_t seed, std::ifstream &binary_stream,
               CCAlgConfiguration config);
 
  public:
-  CCSketchAlg(node_id_t num_nodes, CCAlgConfiguration config = CCAlgConfiguration());
+  CCSketchAlg(node_id_t num_vertices, size_t seed, CCAlgConfiguration config = CCAlgConfiguration());
   ~CCSketchAlg();
 
   // construct a CC algorithm from a serialized file
@@ -127,8 +127,8 @@ class CCSketchAlg {
     num_delta_sketches = num_workers;
     delta_sketches = new Sketch *[num_delta_sketches];
     for (size_t i = 0; i < num_delta_sketches; i++) {
-      delta_sketches[i] = new Sketch(Sketch::calc_vector_length(num_nodes), seed,
-                                     Sketch::calc_cc_samples(num_nodes));
+      delta_sketches[i] = new Sketch(Sketch::calc_vector_length(num_vertices), seed,
+                                     Sketch::calc_cc_samples(num_vertices));
     }
   }
 
@@ -211,6 +211,6 @@ class CCSketchAlg {
   size_t last_query_rounds = 0;
 
   // getters
-  inline node_id_t get_num_vertices() { return num_nodes; }
+  inline node_id_t get_num_vertices() { return num_vertices; }
   inline size_t get_seed() { return seed; }
 };
diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp
index 3a434697..59bc5180 100644
--- a/src/cc_sketch_alg.cpp
+++ b/src/cc_sketch_alg.cpp
@@ -6,25 +6,20 @@
 #include <map>
 #include <random>
 
-CCSketchAlg::CCSketchAlg(node_id_t num_nodes, CCAlgConfiguration config)
-    : num_nodes(num_nodes), dsu(num_nodes), config(config) {
+CCSketchAlg::CCSketchAlg(node_id_t num_vertices, size_t seed, CCAlgConfiguration config)
+    : num_vertices(num_vertices), seed(seed), dsu(num_vertices), config(config) {
   representatives = new std::set<node_id_t>();
-  sketches = new Sketch *[num_nodes];
-  seed = std::chrono::duration_cast<std::chrono::microseconds>(
-             std::chrono::high_resolution_clock::now().time_since_epoch())
-             .count();
-  std::mt19937_64 r(seed);
-  seed = r();
-
-  vec_t sketch_vec_len = Sketch::calc_vector_length(num_nodes);
-  size_t sketch_num_samples = Sketch::calc_cc_samples(num_nodes);
-  for (node_id_t i = 0; i < num_nodes; ++i) {
+  sketches = new Sketch *[num_vertices];
+
+  vec_t sketch_vec_len = Sketch::calc_vector_length(num_vertices);
+  size_t sketch_num_samples = Sketch::calc_cc_samples(num_vertices);
+  for (node_id_t i = 0; i < num_vertices; ++i) {
     representatives->insert(i);
     sketches[i] = new Sketch(sketch_vec_len, seed, sketch_num_samples);
   }
 
-  spanning_forest = new std::unordered_set<node_id_t>[num_nodes];
-  spanning_forest_mtx = new std::mutex[num_nodes];
+  spanning_forest = new std::unordered_set<node_id_t>[num_vertices];
+  spanning_forest_mtx = new std::mutex[num_vertices];
   dsu_valid = true;
   shared_dsu_valid = true;
 }
@@ -34,38 +29,38 @@ CCSketchAlg *CCSketchAlg::construct_from_serialized_data(const std::string &inpu
   double sketches_factor;
   auto binary_in = std::ifstream(input_file, std::ios::binary);
   size_t seed;
-  node_id_t num_nodes;
+  node_id_t num_vertices;
   binary_in.read((char *)&seed, sizeof(seed));
-  binary_in.read((char *)&num_nodes, sizeof(num_nodes));
+  binary_in.read((char *)&num_vertices, sizeof(num_vertices));
   binary_in.read((char *)&sketches_factor, sizeof(sketches_factor));
 
   config.sketches_factor(sketches_factor);
 
-  return new CCSketchAlg(num_nodes, seed, binary_in, config);
+  return new CCSketchAlg(num_vertices, seed, binary_in, config);
 }
 
-CCSketchAlg::CCSketchAlg(node_id_t num_nodes, size_t seed, std::ifstream &binary_stream,
+CCSketchAlg::CCSketchAlg(node_id_t num_vertices, size_t seed, std::ifstream &binary_stream,
                          CCAlgConfiguration config)
-    : num_nodes(num_nodes), seed(seed), dsu(num_nodes), config(config) {
+    : num_vertices(num_vertices), seed(seed), dsu(num_vertices), config(config) {
   representatives = new std::set<node_id_t>();
-  sketches = new Sketch *[num_nodes];
+  sketches = new Sketch *[num_vertices];
 
-  vec_t sketch_vec_len = Sketch::calc_vector_length(num_nodes);
-  size_t sketch_num_samples = Sketch::calc_cc_samples(num_nodes);
-  for (node_id_t i = 0; i < num_nodes; ++i) {
+  vec_t sketch_vec_len = Sketch::calc_vector_length(num_vertices);
+  size_t sketch_num_samples = Sketch::calc_cc_samples(num_vertices);
+  for (node_id_t i = 0; i < num_vertices; ++i) {
     representatives->insert(i);
     sketches[i] = new Sketch(sketch_vec_len, seed, binary_stream, sketch_num_samples);
   }
   binary_stream.close();
 
-  spanning_forest = new std::unordered_set<node_id_t>[num_nodes];
-  spanning_forest_mtx = new std::mutex[num_nodes];
+  spanning_forest = new std::unordered_set<node_id_t>[num_vertices];
+  spanning_forest_mtx = new std::mutex[num_vertices];
   dsu_valid = false;
   shared_dsu_valid = false;
 }
 
 CCSketchAlg::~CCSketchAlg() {
-  for (size_t i = 0; i < num_nodes; ++i) delete sketches[i];
+  for (size_t i = 0; i < num_vertices; ++i) delete sketches[i];
   delete[] sketches;
   if (delta_sketches != nullptr) {
     for (size_t i = 0; i < num_delta_sketches; i++) delete delta_sketches[i];
@@ -136,7 +131,7 @@ bool CCSketchAlg::sample_supernodes(std::vector<node_id_t> &merge_instr) {
   bool modified = false;
   std::exception_ptr err;
 #pragma omp parallel for default(shared)
-    for (node_id_t root = 0; root < num_nodes; root++) {
+    for (node_id_t root = 0; root < num_vertices; root++) {
       if (merge_instr[root] != root) {
         // don't query non-roots
         continue;
@@ -185,12 +180,12 @@ void CCSketchAlg::merge_supernodes(const size_t next_round,
 #pragma omp parallel default(shared)
   {
     // some thread local variables
-    Sketch local_sketch(Sketch::calc_vector_length(num_nodes), seed,
-                        Sketch::calc_cc_samples(num_nodes));
+    Sketch local_sketch(Sketch::calc_vector_length(num_vertices), seed,
+                        Sketch::calc_cc_samples(num_vertices));
     node_id_t cur_root = 0;
     bool first_root = true;
 #pragma omp for
-    for (node_id_t i = 0; i < num_nodes; i++) {
+    for (node_id_t i = 0; i < num_vertices; i++) {
       if (merge_instr[i] == i) continue;
 
       node_id_t root = merge_instr[i];
@@ -223,10 +218,10 @@ std::vector<std::set<node_id_t>> CCSketchAlg::boruvka_emulation() {
   update_locked = true;
 
   cc_alg_start = std::chrono::steady_clock::now();
-  std::vector<node_id_t> merge_instr(num_nodes);
+  std::vector<node_id_t> merge_instr(num_vertices);
 
   dsu.reset();
-  for (node_id_t i = 0; i < num_nodes; ++i) {
+  for (node_id_t i = 0; i < num_vertices; ++i) {
     merge_instr[i] = i;
     spanning_forest[i].clear();
   }
@@ -254,7 +249,7 @@ std::vector<std::set<node_id_t>> CCSketchAlg::boruvka_emulation() {
 
     // calculate updated merge instructions
 #pragma omp parallel for
-    for (node_id_t i = 0; i < num_nodes; i++)
+    for (node_id_t i = 0; i < num_vertices; i++)
       merge_instr[i] = dsu.find_root(i);
 
     // prepare for the next round by merging
@@ -281,7 +276,7 @@ std::vector<std::set<node_id_t>> CCSketchAlg::connected_components() {
   if (shared_dsu_valid) {
     cc_alg_start = std::chrono::steady_clock::now();
 #ifdef VERIFY_SAMPLES_F
-    for (node_id_t src = 0; src < num_nodes; ++src) {
+    for (node_id_t src = 0; src < num_vertices; ++src) {
       for (const auto &dst : spanning_forest[src]) {
         verifier->verify_edge({src, dst});
       }
@@ -311,7 +306,7 @@ std::vector<std::set<node_id_t>> CCSketchAlg::connected_components() {
 
   // get ready for ingesting more from the stream
   // reset dsu and resume graph workers
-  for (node_id_t i = 0; i < num_nodes; i++) {
+  for (node_id_t i = 0; i < num_vertices; i++) {
     sketches[i]->reset_sample_state();
   }
 
@@ -327,7 +322,7 @@ std::vector<std::pair<node_id_t, std::vector<node_id_t>>> CCSketchAlg::calc_span
   
   std::vector<std::pair<node_id_t, std::vector<node_id_t>>> forest;
 
-  for (node_id_t src = 0; src < num_nodes; src++) {
+  for (node_id_t src = 0; src < num_vertices; src++) {
     if (spanning_forest[src].size() > 0) {
       std::vector<node_id_t> edge_list;
       edge_list.reserve(spanning_forest[src].size());
@@ -345,7 +340,7 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) {
   if (dsu_valid) {
     cc_alg_start = std::chrono::steady_clock::now();
 #ifdef VERIFY_SAMPLES_F
-    for (node_id_t src = 0; src < num_nodes; ++src) {
+    for (node_id_t src = 0; src < num_vertices; ++src) {
       for (const auto &dst : spanning_forest[src]) {
         verifier->verify_edge({src, dst});
       }
@@ -372,7 +367,7 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) {
 
   // get ready for ingesting more from the stream
   // reset dsu and resume graph workers
-  for (node_id_t i = 0; i < num_nodes; i++) {
+  for (node_id_t i = 0; i < num_vertices; i++) {
     sketches[i]->reset_sample_state();
   }
 
@@ -385,7 +380,7 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) {
 std::vector<std::set<node_id_t>> CCSketchAlg::cc_from_dsu() {
   // calculate connected components using DSU structure
   std::map<node_id_t, std::set<node_id_t>> temp;
-  for (node_id_t i = 0; i < num_nodes; ++i) temp[dsu.find_root(i)].insert(i);
+  for (node_id_t i = 0; i < num_vertices; ++i) temp[dsu.find_root(i)].insert(i);
   std::vector<std::set<node_id_t>> retval;
   retval.reserve(temp.size());
   for (const auto &it : temp) retval.push_back(it.second);
@@ -395,9 +390,9 @@ std::vector<std::set<node_id_t>> CCSketchAlg::cc_from_dsu() {
 void CCSketchAlg::write_binary(const std::string &filename) {
   auto binary_out = std::fstream(filename, std::ios::out | std::ios::binary);
   binary_out.write((char *)&seed, sizeof(seed));
-  binary_out.write((char *)&num_nodes, sizeof(num_nodes));
+  binary_out.write((char *)&num_vertices, sizeof(num_vertices));
   binary_out.write((char *)&config._sketches_factor, sizeof(config._sketches_factor));
-  for (node_id_t i = 0; i < num_nodes; ++i) {
+  for (node_id_t i = 0; i < num_vertices; ++i) {
     sketches[i]->serialize(binary_out);
   }
   binary_out.close();
diff --git a/test/graph_test.cpp b/test/cc_alg_test.cpp
similarity index 90%
rename from test/graph_test.cpp
rename to test/cc_alg_test.cpp
index 380eddf2..55b567eb 100644
--- a/test/graph_test.cpp
+++ b/test/cc_alg_test.cpp
@@ -11,6 +11,11 @@
 #include "graph_sketch_driver.h"
 #include "mat_graph_verifier.h"
 
+static size_t get_seed() {
+  auto now = std::chrono::high_resolution_clock::now();
+  return std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
+}
+
 /**
  * For many of these tests (especially for those upon very sparse and small graphs)
  * we allow for a certain number of failures per test.
@@ -23,11 +28,11 @@
 
 // We create this class and instantiate a paramaterized test suite so that we
 // can run these tests both with the GutterTree and with StandAloneGutters
-class GraphTest : public testing::TestWithParam<GutterSystem> {};
-INSTANTIATE_TEST_SUITE_P(GraphTestSuite, GraphTest,
+class CCAlgTest : public testing::TestWithParam<GutterSystem> {};
+INSTANTIATE_TEST_SUITE_P(CCAlgTestSuite, CCAlgTest,
                          testing::Values(GUTTERTREE, STANDALONE, CACHETREE));
 
-TEST_P(GraphTest, SmallGraphConnectivity) {
+TEST_P(CCAlgTest, SmallGraphConnectivity) {
   auto driver_config = DriverConfiguration().gutter_sys(GetParam());
   const std::string fname = __FILE__;
   size_t pos = fname.find_last_of("\\/");
@@ -35,7 +40,7 @@ TEST_P(GraphTest, SmallGraphConnectivity) {
   AsciiFileStream stream{curr_dir + "/res/multiples_graph_1024.txt", false};
   node_id_t num_nodes = stream.vertices();
 
-  CCSketchAlg cc_alg{num_nodes};
+  CCSketchAlg cc_alg{num_nodes, get_seed()};
   cc_alg.set_verifier(
       std::make_unique<FileGraphVerifier>(1024, curr_dir + "/res/multiples_graph_1024.txt"));
 
@@ -45,7 +50,7 @@ TEST_P(GraphTest, SmallGraphConnectivity) {
   ASSERT_EQ(78, cc_alg.connected_components().size());
 }
 
-TEST_P(GraphTest, TestCorrectnessOnSmallRandomGraphs) {
+TEST_P(CCAlgTest, TestCorrectnessOnSmallRandomGraphs) {
   auto driver_config = DriverConfiguration().gutter_sys(GetParam());
   int num_trials = 5;
   while (num_trials--) {
@@ -53,7 +58,7 @@ TEST_P(GraphTest, TestCorrectnessOnSmallRandomGraphs) {
     AsciiFileStream stream{"./sample.txt"};
     node_id_t num_nodes = stream.vertices();
 
-    CCSketchAlg cc_alg{num_nodes};
+    CCSketchAlg cc_alg{num_nodes, get_seed()};
     cc_alg.set_verifier(std::make_unique<FileGraphVerifier>(1024, "./cumul_sample.txt"));
 
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
@@ -64,7 +69,7 @@ TEST_P(GraphTest, TestCorrectnessOnSmallRandomGraphs) {
   }
 }
 
-TEST_P(GraphTest, TestCorrectnessOnSmallSparseGraphs) {
+TEST_P(CCAlgTest, TestCorrectnessOnSmallSparseGraphs) {
   auto driver_config = DriverConfiguration().gutter_sys(GetParam());
   int num_trials = 5;
   while (num_trials--) {
@@ -72,7 +77,7 @@ TEST_P(GraphTest, TestCorrectnessOnSmallSparseGraphs) {
     AsciiFileStream stream{"./sample.txt"};
     node_id_t num_nodes = stream.vertices();
 
-    CCSketchAlg cc_alg{num_nodes};
+    CCSketchAlg cc_alg{num_nodes, get_seed()};
     cc_alg.set_verifier(std::make_unique<FileGraphVerifier>(1024, "./cumul_sample.txt"));
 
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
@@ -83,7 +88,7 @@ TEST_P(GraphTest, TestCorrectnessOnSmallSparseGraphs) {
   }
 }
 
-TEST_P(GraphTest, TestCorrectnessOfReheating) {
+TEST_P(CCAlgTest, TestCorrectnessOfReheating) {
   auto driver_config = DriverConfiguration().gutter_sys(GetParam());
   int num_trials = 5;
   while (num_trials--) {
@@ -92,7 +97,7 @@ TEST_P(GraphTest, TestCorrectnessOfReheating) {
     AsciiFileStream stream{"./sample.txt"};
     node_id_t num_nodes = stream.vertices();
 
-    CCSketchAlg cc_alg{num_nodes};
+    CCSketchAlg cc_alg{num_nodes, get_seed()};
     cc_alg.set_verifier(std::make_unique<FileGraphVerifier>(1024, "./cumul_sample.txt"));
 
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
@@ -114,7 +119,7 @@ TEST_P(GraphTest, TestCorrectnessOfReheating) {
 }
 
 // Test the multithreaded system by using multiple worker threads
-TEST_P(GraphTest, MultipleWorkers) {
+TEST_P(CCAlgTest, MultipleWorkers) {
   auto driver_config = DriverConfiguration().gutter_sys(GetParam()).worker_threads(8);
   int num_trials = 5;
   while (num_trials--) {
@@ -122,7 +127,7 @@ TEST_P(GraphTest, MultipleWorkers) {
     AsciiFileStream stream{"./sample.txt"};
     node_id_t num_nodes = stream.vertices();
 
-    CCSketchAlg cc_alg{num_nodes};
+    CCSketchAlg cc_alg{num_nodes, get_seed()};
     cc_alg.set_verifier(std::make_unique<FileGraphVerifier>(1024, "./cumul_sample.txt"));
 
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
@@ -132,7 +137,7 @@ TEST_P(GraphTest, MultipleWorkers) {
   }
 }
 
-TEST_P(GraphTest, TestPointQuery) {
+TEST_P(CCAlgTest, TestPointQuery) {
   auto driver_config = DriverConfiguration().gutter_sys(GetParam());
   const std::string fname = __FILE__;
   size_t pos = fname.find_last_of("\\/");
@@ -140,7 +145,7 @@ TEST_P(GraphTest, TestPointQuery) {
   AsciiFileStream stream{curr_dir + "/res/multiples_graph_1024.txt", false};
   node_id_t num_nodes = stream.vertices();
 
-  CCSketchAlg cc_alg{num_nodes};
+  CCSketchAlg cc_alg{num_nodes, get_seed()};
   cc_alg.set_verifier(
       std::make_unique<FileGraphVerifier>(1024, curr_dir + "/res/multiples_graph_1024.txt"));
 
@@ -164,7 +169,7 @@ TEST_P(GraphTest, TestPointQuery) {
   }
 }
 
-TEST(GraphTest, TestQueryDuringStream) {
+TEST(CCAlgTest, TestQueryDuringStream) {
   auto driver_config = DriverConfiguration().gutter_sys(STANDALONE);
   auto cc_config = CCAlgConfiguration();
   generate_stream({1024, 0.002, 0.5, 0, "./sample.txt", "./cumul_sample.txt"});
@@ -174,7 +179,7 @@ TEST(GraphTest, TestQueryDuringStream) {
   edge_id_t num_edges = stream.edges();
   edge_id_t tenth     = num_edges / 10;
 
-  CCSketchAlg cc_alg{num_nodes, cc_config};
+  CCSketchAlg cc_alg{num_nodes, get_seed(), cc_config};
   GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
   MatGraphVerifier verify(num_nodes);
 
@@ -210,9 +215,9 @@ TEST(GraphTest, TestQueryDuringStream) {
   cc_alg.connected_components();
 }
 
-TEST(GraphTest, EagerDSUTest) {
+TEST(CCAlgTest, EagerDSUTest) {
   node_id_t num_nodes = 100;
-  CCSketchAlg cc_alg{num_nodes};
+  CCSketchAlg cc_alg{num_nodes, get_seed()};
   MatGraphVerifier verify(num_nodes);
 
   // This should be a spanning forest edge
@@ -258,7 +263,7 @@ TEST(GraphTest, EagerDSUTest) {
   cc_alg.connected_components();
 }
 
-TEST(GraphTest, MTStreamWithMultipleQueries) {
+TEST(CCAlgTest, MTStreamWithMultipleQueries) {
   for (int t = 1; t <= 3; t++) {
     auto driver_config = DriverConfiguration().gutter_sys(STANDALONE);
 
@@ -273,7 +278,7 @@ TEST(GraphTest, MTStreamWithMultipleQueries) {
 
     std::cerr << num_nodes << " " << num_edges << std::endl;
 
-    CCSketchAlg cc_alg{num_nodes};
+    CCSketchAlg cc_alg{num_nodes, get_seed()};
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config, 4);
     MatGraphVerifier verify(num_nodes);
 
diff --git a/tools/process_stream.cpp b/tools/process_stream.cpp
index 87d2e28c..28f46361 100644
--- a/tools/process_stream.cpp
+++ b/tools/process_stream.cpp
@@ -12,6 +12,11 @@ static double get_max_mem_used() {
   return (double) data.ru_maxrss / 1024.0;
 }
 
+static size_t get_seed() {
+  auto now = std::chrono::high_resolution_clock::now();
+  return std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
+}
+
 /*
  * Function which is run in a seperate thread and will query
  * the graph for the number of updates it has processed
@@ -81,7 +86,7 @@ int main(int argc, char **argv) {
 
   auto driver_config = DriverConfiguration().gutter_sys(CACHETREE).worker_threads(num_threads);
   auto cc_config = CCAlgConfiguration().batch_factor(1);
-  CCSketchAlg cc_alg{num_nodes, cc_config};
+  CCSketchAlg cc_alg{num_nodes, get_seed(), cc_config};
   GraphSketchDriver<CCSketchAlg> driver{&cc_alg, &stream, driver_config, reader_threads};
 
   auto ins_start = std::chrono::steady_clock::now();
diff --git a/tools/statistical_testing/graph_testing.cpp b/tools/statistical_testing/graph_testing.cpp
index cddb7053..dee89912 100644
--- a/tools/statistical_testing/graph_testing.cpp
+++ b/tools/statistical_testing/graph_testing.cpp
@@ -6,11 +6,15 @@
 #include "file_graph_verifier.h"
 
 static DriverConfiguration driver_config;
+static size_t get_seed() {
+  auto now = std::chrono::high_resolution_clock::now();
+  return std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
+}
 
 static inline int do_run() {
     AsciiFileStream stream{"./sample.txt"};
     node_id_t n = stream.vertices();
-    CCSketchAlg cc_alg{n};
+    CCSketchAlg cc_alg{n, get_seed()};
     cc_alg.set_verifier(std::make_unique<FileGraphVerifier>(n, "./cumul_sample.txt"));
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
     driver.process_stream_until(END_OF_STREAM);
diff --git a/tools/test_correctness.cpp b/tools/test_correctness.cpp
index b80ca204..00e7f822 100644
--- a/tools/test_correctness.cpp
+++ b/tools/test_correctness.cpp
@@ -8,6 +8,11 @@
 #include <cc_sketch_alg.h>
 #include <mat_graph_verifier.h>
 
+static size_t get_seed() {
+  auto now = std::chrono::high_resolution_clock::now();
+  return std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
+}
+
 struct CorrectnessResults {
   size_t num_failures = 0;
   std::vector<size_t> num_round_hist;
@@ -43,7 +48,7 @@ CorrectnessResults test_path_correctness(size_t num_vertices, size_t num_graphs,
     verifier.reset_cc_state();
 
     for (size_t s = 0; s < samples_per_graph; s++) {
-      CCSketchAlg cc_alg(num_vertices);
+      CCSketchAlg cc_alg(num_vertices, get_seed());
       
       node_id_t cur_node = copy_vertices[0];
       for (size_t i = 1; i < num_vertices; i++) {

From ec061c758d8f1bfcb978ee5ed254c17b32c1b400 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Tue, 28 Nov 2023 20:29:47 -0500
Subject: [PATCH 06/37] oops forgot some files

---
 include/return_types.h | 32 ++++++++++++++++++++++++++++++++
 src/return_types.cpp   | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 include/return_types.h
 create mode 100644 src/return_types.cpp

diff --git a/include/return_types.h b/include/return_types.h
new file mode 100644
index 00000000..b1fa1257
--- /dev/null
+++ b/include/return_types.h
@@ -0,0 +1,32 @@
+// This file defines the query return types from the cc algorithm class
+#include <cstddef>
+#include <iterator>
+#include <set>
+#include <vector>
+
+#include "dsu.h"
+#include "types.h"
+
+// This class defines the connected components of a graph
+class ConnectedComponents {
+ private:
+  node_id_t *parent_arr;
+  node_id_t num_vertices;
+  node_id_t num_cc;
+
+ public:
+  ConnectedComponents(node_id_t num_vertices, DisjointSetUnion_MT<node_id_t> &dsu);
+  ~ConnectedComponents();
+
+  std::vector<std::set<node_id_t>> get_component_sets();
+  bool is_connected(node_id_t a, node_id_t b) { return parent_arr[a] == parent_arr[b]; }
+  node_id_t size() { return num_cc; }
+};
+
+// // This class defines a spanning forest of a graph
+// class SpanningForest {
+//  private:
+
+//  public:
+
+// };
diff --git a/src/return_types.cpp b/src/return_types.cpp
new file mode 100644
index 00000000..9698f7a3
--- /dev/null
+++ b/src/return_types.cpp
@@ -0,0 +1,32 @@
+#include "return_types.h"
+
+#include <map>
+
+ConnectedComponents::ConnectedComponents(node_id_t num_vertices, DisjointSetUnion_MT<node_id_t> &dsu)
+    : parent_arr(new node_id_t[num_vertices]), num_vertices(num_vertices) {
+
+  size_t temp_cc = 0;
+#pragma omp parallel for
+  for (node_id_t i = 0; i < num_vertices; i++) {
+    parent_arr[i] = dsu.find_root(i);
+    if (parent_arr[i] == i) {
+#pragma omp atomic update
+      temp_cc += 1;
+    }
+  }
+
+  num_cc = temp_cc;
+}
+
+ConnectedComponents::~ConnectedComponents() {
+  delete[] parent_arr;
+}
+
+std::vector<std::set<node_id_t>> ConnectedComponents::get_component_sets() {
+  std::map<node_id_t, std::set<node_id_t>> temp;
+  for (node_id_t i = 0; i < num_vertices; ++i) temp[parent_arr[i]].insert(i);
+  std::vector<std::set<node_id_t>> retval;
+  retval.reserve(temp.size());
+  for (const auto &it : temp) retval.push_back(it.second);
+  return retval;
+}

From c23514c2630f728b8d039568131bf2836800c3c4 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Wed, 29 Nov 2023 13:40:53 -0500
Subject: [PATCH 07/37] add spanning forest return type

---
 include/cc_sketch_alg.h |   2 +-
 include/return_types.h  |  16 +++---
 src/cc_sketch_alg.cpp   | 112 ++++++++++++++++++----------------------
 src/return_types.cpp    |  18 +++++--
 4 files changed, 75 insertions(+), 73 deletions(-)

diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h
index ac464f6e..a58f2043 100644
--- a/include/cc_sketch_alg.h
+++ b/include/cc_sketch_alg.h
@@ -218,7 +218,7 @@ class CCSketchAlg {
    * that is, unless you really know what you're doing.
    * @return an adjacency list representation of the spanning forest of the graph
    */
-  std::vector<std::pair<node_id_t, std::vector<node_id_t>>> calc_spanning_forest();
+  SpanningForest calc_spanning_forest();
 
 #ifdef VERIFY_SAMPLES_F
   std::unique_ptr<GraphVerifier> verifier;
diff --git a/include/return_types.h b/include/return_types.h
index b1fa1257..d7967c2e 100644
--- a/include/return_types.h
+++ b/include/return_types.h
@@ -2,6 +2,7 @@
 #include <cstddef>
 #include <iterator>
 #include <set>
+#include <unordered_set>
 #include <vector>
 
 #include "dsu.h"
@@ -23,10 +24,13 @@ class ConnectedComponents {
   node_id_t size() { return num_cc; }
 };
 
-// // This class defines a spanning forest of a graph
-// class SpanningForest {
-//  private:
-
-//  public:
+// This class defines a spanning forest of a graph
+class SpanningForest {
+ private:
+  std::vector<Edge> edges;
+  node_id_t num_vertices;
+ public:
+  SpanningForest(node_id_t num_vertices, const std::unordered_set<node_id_t> *spanning_forest);
 
-// };
+  const std::vector<Edge>& get_edges() { return edges; }
+};
diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp
index 9b4d0779..78fc54a4 100644
--- a/src/cc_sketch_alg.cpp
+++ b/src/cc_sketch_alg.cpp
@@ -453,7 +453,7 @@ inline void CCSketchAlg::create_merge_instructions(std::vector<MergeInstr> &merg
 }
 
 void CCSketchAlg::boruvka_emulation() {
-  auto start = std::chrono::steady_clock::now();
+  // auto start = std::chrono::steady_clock::now();
   update_locked = true;
 
   cc_alg_start = std::chrono::steady_clock::now();
@@ -473,27 +473,27 @@ void CCSketchAlg::boruvka_emulation() {
   }
   size_t round_num = 0;
   bool modified = true;
-  std::cout << std::endl;
-  std::cout << "  pre boruvka processing = "
-              << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
-              << std::endl;
+  // std::cout << std::endl;
+  // std::cout << "  pre boruvka processing = "
+  //             << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
+  //             << std::endl;
 
   while (true) {
-    std::cout << "   Round: " << round_num << std::endl;
-    start = std::chrono::steady_clock::now();
+    // std::cout << "   Round: " << round_num << std::endl;
+    // start = std::chrono::steady_clock::now();
     modified = perform_boruvka_round(round_num, merge_instr, global_merges);
-    std::cout << "     perform_boruvka_round = "
-              << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
-              << std::endl;
+    // std::cout << "     perform_boruvka_round = "
+    //           << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
+    //           << std::endl;
 
     if (!modified) break;
 
     // calculate updated merge instructions for next round
-    start = std::chrono::steady_clock::now();
+    // start = std::chrono::steady_clock::now();
     create_merge_instructions(merge_instr);
-    std::cout << "     create_merge_instructions = "
-              << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
-              << std::endl;
+    // std::cout << "     create_merge_instructions = "
+    //           << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
+    //           << std::endl;
     ++round_num;
   }
   last_query_rounds = round_num;
@@ -521,11 +521,11 @@ ConnectedComponents CCSketchAlg::connected_components() {
     bool except = false;
     std::exception_ptr err;
     try {
-      auto start = std::chrono::steady_clock::now();
+      // auto start = std::chrono::steady_clock::now();
       boruvka_emulation();
-      std::cout << " boruvka's algorithm = "
-              << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
-              << std::endl;
+      // std::cout << " boruvka's algorithm = "
+      //         << std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count()
+      //         << std::endl;
     } catch (...) {
       except = true;
       err = std::current_exception();
@@ -548,29 +548,18 @@ ConnectedComponents CCSketchAlg::connected_components() {
   return cc;
 }
 
-std::vector<std::pair<node_id_t, std::vector<node_id_t>>> CCSketchAlg::calc_spanning_forest() {
+SpanningForest CCSketchAlg::calc_spanning_forest() {
   // TODO: Could probably optimize this a bit by writing new code
   connected_components();
-  
-  std::vector<std::pair<node_id_t, std::vector<node_id_t>>> forest;
-
-  for (node_id_t src = 0; src < num_vertices; src++) {
-    if (spanning_forest[src].size() > 0) {
-      std::vector<node_id_t> edge_list;
-      edge_list.reserve(spanning_forest[src].size());
-      for (node_id_t dst : spanning_forest[src]) {
-        edge_list.push_back(dst);
-      }
-      forest.push_back({src, edge_list});
-    }
-  }
-  return forest;
+
+  return SpanningForest(num_vertices, spanning_forest);
 }
 
 bool CCSketchAlg::point_query(node_id_t a, node_id_t b) {
-  // DSU check before calling force_flush()
+  cc_alg_start = std::chrono::steady_clock::now();
+
+  // if the DSU holds the answer, use that
   if (dsu_valid) {
-    cc_alg_start = std::chrono::steady_clock::now();
 #ifdef VERIFY_SAMPLES_F
     for (node_id_t src = 0; src < num_vertices; ++src) {
       for (const auto &dst : spanning_forest[src]) {
@@ -578,37 +567,38 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) {
       }
     }
 #endif
-    bool retval = (dsu.find_root(a) == dsu.find_root(b));
-    cc_alg_end = std::chrono::steady_clock::now();
-    return retval;
-  }
+  } 
+  // The DSU does not hold the answer, make it so
+  else {
+    bool except = false;
+    std::exception_ptr err;
+    bool ret;
+    try {
+      boruvka_emulation();
+    } catch (...) {
+      except = true;
+      err = std::current_exception();
+    }
 
-  bool except = false;
-  std::exception_ptr err;
-  bool ret;
-  try {
-    boruvka_emulation();
-#ifdef VERIFY_SAMPLES_F
-    ConnectedComponents cc(num_vertices, dsu);
-    auto cc_sets = cc.get_component_sets();
-    verifier->verify_soln(cc_sets);
-#endif
-    ret = (dsu.find_root(a) == dsu.find_root(b));
-  } catch (...) {
-    except = true;
-    err = std::current_exception();
-  }
+    // get ready for ingesting more from the stream
+    // reset dsu and resume graph workers
+    for (node_id_t i = 0; i < num_vertices; i++) {
+      sketches[i]->reset_sample_state();
+    }
 
-  // get ready for ingesting more from the stream
-  // reset dsu and resume graph workers
-  for (node_id_t i = 0; i < num_vertices; i++) {
-    sketches[i]->reset_sample_state();
+    // check if boruvka errored
+    if (except) std::rethrow_exception(err);
   }
 
-  // check if boruvka errored
-  if (except) std::rethrow_exception(err);
+#ifdef VERIFY_SAMPLES_F
+  ConnectedComponents cc(num_vertices, dsu);
+  auto cc_sets = cc.get_component_sets();
+  verifier->verify_soln(cc_sets);
+#endif
 
-  return ret;
+  bool retval = (dsu.find_root(a) == dsu.find_root(b));
+  cc_alg_end = std::chrono::steady_clock::now();
+  return retval;
 }
 
 void CCSketchAlg::write_binary(const std::string &filename) {
diff --git a/src/return_types.cpp b/src/return_types.cpp
index 9698f7a3..8b2726dc 100644
--- a/src/return_types.cpp
+++ b/src/return_types.cpp
@@ -2,9 +2,9 @@
 
 #include <map>
 
-ConnectedComponents::ConnectedComponents(node_id_t num_vertices, DisjointSetUnion_MT<node_id_t> &dsu)
+ConnectedComponents::ConnectedComponents(node_id_t num_vertices,
+                                         DisjointSetUnion_MT<node_id_t> &dsu)
     : parent_arr(new node_id_t[num_vertices]), num_vertices(num_vertices) {
-
   size_t temp_cc = 0;
 #pragma omp parallel for
   for (node_id_t i = 0; i < num_vertices; i++) {
@@ -18,9 +18,7 @@ ConnectedComponents::ConnectedComponents(node_id_t num_vertices, DisjointSetUnio
   num_cc = temp_cc;
 }
 
-ConnectedComponents::~ConnectedComponents() {
-  delete[] parent_arr;
-}
+ConnectedComponents::~ConnectedComponents() { delete[] parent_arr; }
 
 std::vector<std::set<node_id_t>> ConnectedComponents::get_component_sets() {
   std::map<node_id_t, std::set<node_id_t>> temp;
@@ -30,3 +28,13 @@ std::vector<std::set<node_id_t>> ConnectedComponents::get_component_sets() {
   for (const auto &it : temp) retval.push_back(it.second);
   return retval;
 }
+
+SpanningForest::SpanningForest(node_id_t num_vertices,
+                               const std::unordered_set<node_id_t> *spanning_forest)
+    : num_vertices(num_vertices) {
+  for (node_id_t src = 0; src < num_vertices; src++) {
+    for (node_id_t dst : spanning_forest[src]) {
+      edges.push_back({src, dst});
+    }
+  }
+}

From 3fbf54e43c2fb8776287d8aa8c1b0e6b132eca84 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Fri, 1 Dec 2023 12:58:40 -0500
Subject: [PATCH 08/37] fewer rounds from math

---
 include/sketch.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/sketch.h b/include/sketch.h
index 7dc09bce..44db971c 100644
--- a/include/sketch.h
+++ b/include/sketch.h
@@ -170,10 +170,11 @@ class Sketch {
 
 #ifdef L0_SAMPLING
   static constexpr size_t default_cols_per_sample = 7;
+  // NOTE: can improve this but leaving for comparison purposes
   static constexpr double num_samples_div = log2(3) - 1;
 #else
   static constexpr size_t default_cols_per_sample = 1;
-  static constexpr double num_samples_div = log2(3) - 1;
+  static constexpr double num_samples_div = 1 - log2(2 - 0.8);
 #endif
 };
 

From 93d153ebdfa1e4a70bbe205f6114a2140eda796d Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Sat, 2 Dec 2023 16:43:17 -0500
Subject: [PATCH 09/37] fix tests

---
 test/sketch_test.cpp | 51 +++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/test/sketch_test.cpp b/test/sketch_test.cpp
index cdc57c13..14847a82 100644
--- a/test/sketch_test.cpp
+++ b/test/sketch_test.cpp
@@ -2,11 +2,17 @@
 #include "bucket.h"
 #include <chrono>
 #include <gtest/gtest.h>
+#include <random>
 #include "testing_vector.h"
 
+static size_t get_seed() {
+  auto now = std::chrono::high_resolution_clock::now();
+  return std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
+}
+
 static const int num_columns = 7;
 TEST(SketchTestSuite, TestSampleResults) {
-  Sketch sketch1(10, rand(), 1, num_columns);
+  Sketch sketch1(10, get_seed(), 1, num_columns);
   ASSERT_EQ(sketch1.sample().result, ZERO);
   sketch1.update(1);
   ASSERT_THROW(sketch1.sample(), OutOfSamplesException);
@@ -59,7 +65,7 @@ TEST(SketchTestSuite, TestSampleResults) {
 
 TEST(SketchTestSuite, GIVENonlyIndexZeroUpdatedTHENitWorks) {
   // GIVEN only the index 0 is updated
-  Sketch sketch(40, rand(), 1, num_columns);
+  Sketch sketch(40, get_seed(), 1, num_columns);
   sketch.update(0);
   sketch.update(0);
   sketch.update(0);
@@ -84,7 +90,7 @@ void test_sketch_sample(unsigned long num_sketches,
   unsigned long sample_incorrect_failures = 0;
   for (unsigned long i = 0; i < num_sketches; i++) {
     Testing_Vector test_vec = Testing_Vector(vec_size, num_updates);
-    Sketch sketch(vec_size, rand() + i * 7, 1, num_columns);
+    Sketch sketch(vec_size, get_seed() + i * 7, 1, num_columns);
     auto start_time = std::chrono::steady_clock::now();
     for (unsigned long j = 0; j < num_updates; j++){
       sketch.update(test_vec.get_update(j));
@@ -151,7 +157,7 @@ void test_sketch_merge(unsigned long num_sketches,
   unsigned long all_bucket_failures = 0;
   unsigned long sample_incorrect_failures = 0;
   for (unsigned long i = 0; i < num_sketches; i++){
-    const long seed = rand() + 7 * i;
+    const long seed = get_seed() + 7 * i;
     Sketch sketch1(vec_size, seed, 1, num_columns);
     Sketch sketch2(vec_size, seed, 1, num_columns);
     Testing_Vector test_vec1 = Testing_Vector(vec_size, num_updates);
@@ -209,8 +215,8 @@ TEST(SketchTestSuite, TestSketchMerge) {
 }
 
 TEST(SketchTestSuite, TestSketchRangeMerge) {
-  Sketch skt1(2048, rand(), 10, 3);
-  Sketch skt2(2048, rand(), 10, 3);
+  Sketch skt1(2048, get_seed(), 10, 3);
+  Sketch skt2(2048, get_seed(), 10, 3);
 
   skt1.sample();
   skt1.range_merge(skt2, 1, 1);
@@ -235,13 +241,14 @@ void test_sketch_large(unsigned long vec_size, unsigned long num_updates) {
   // therefore we need to ensure that in this test that we don't do more than that
   num_updates = std::min(num_updates, vec_size / 4);
 
-  Sketch sketch(vec_size, rand(), 1, 2 * log2(vec_size));
-  //Keep seed for replaying update stream later
-  unsigned long seed = rand();
-  srand(seed);
+  // Keep seed for replaying update stream later
+  unsigned long seed = get_seed();
+  Sketch sketch(vec_size, seed, 1, 2 * log2(vec_size));
+
+  std::mt19937_64 gen(seed);
   auto start_time = std::chrono::steady_clock::now();
   for (unsigned long j = 0; j < num_updates; j++){
-    sketch.update(static_cast<vec_t>(rand() % vec_size));
+    sketch.update(static_cast<vec_t>(gen() % vec_size));
   }
   std::cout << "Updating vector of size " << vec_size << " with " << num_updates
     << " updates took " << std::chrono::duration<long double>(
@@ -255,10 +262,10 @@ void test_sketch_large(unsigned long vec_size, unsigned long num_updates) {
       //Multiple queries shouldn't happen, but if we do get here fail test
       ASSERT_LT(res_idx, vec_size) << "Sampled index out of bounds";
       //Replay update stream, keep track of the sampled index
-      srand(seed);
+      gen = std::mt19937_64(seed);
       bool actual_delta = false;
       for (unsigned long j = 0; j < num_updates; j++){
-        vec_t update_idx = static_cast<vec_t>(rand() % vec_size);
+        vec_t update_idx = static_cast<vec_t>(gen() % vec_size);
         if (update_idx == res_idx) {
           actual_delta = !actual_delta;
         }
@@ -291,7 +298,7 @@ TEST(SketchTestSuite, TestSerialization) {
   unsigned long vec_size = 1 << 10;
   unsigned long num_updates = 10000;
   Testing_Vector test_vec = Testing_Vector(vec_size, num_updates);
-  auto seed = rand();
+  auto seed = get_seed();
   Sketch sketch(vec_size, seed, 3, num_columns);
   for (unsigned long j = 0; j < num_updates; j++){
     sketch.update(test_vec.get_update(j));
@@ -323,7 +330,7 @@ TEST(SketchTestSuite, TestExhaustiveQuery) {
   size_t runs = 10;
   size_t vec_size = 2000;
   for (size_t i = 0; i < runs; i++) {
-    Sketch sketch(vec_size, rand() + 7 * i, 1, log2(vec_size));
+    Sketch sketch(vec_size, get_seed() + 7 * i, 1, log2(vec_size));
 
     sketch.update(1);
     sketch.update(2);
@@ -355,8 +362,8 @@ TEST(SketchTestSuite, TestExhaustiveQuery) {
 }
 
 TEST(SketchTestSuite, TestSampleInsertGrinder) {
-  size_t nodes = 1024;
-  Sketch sketch(Sketch::calc_vector_length(nodes), rand(), Sketch::calc_cc_samples(nodes));
+  size_t nodes = 4096;
+  Sketch sketch(Sketch::calc_vector_length(nodes), get_seed(), Sketch::calc_cc_samples(nodes));
 
   for (size_t src = 0; src < nodes - 1; src++) {
     for (size_t dst = src + 7; dst < nodes; dst += 7) {
@@ -376,12 +383,12 @@ TEST(SketchTestSuite, TestSampleInsertGrinder) {
     Edge e = inv_concat_pairing_fn(ret.idx);
     ASSERT_EQ((e.dst - e.src) % 7, 0);
   }
-  ASSERT_GE(successes, log2(nodes));
+  ASSERT_GE(successes, 2);
 }
 
 TEST(SketchTestSuite, TestSampleDeleteGrinder) {
-  size_t nodes = 1024;
-  Sketch sketch(Sketch::calc_vector_length(nodes), rand(), Sketch::calc_cc_samples(nodes));
+  size_t nodes = 4096;
+  Sketch sketch(Sketch::calc_vector_length(nodes), get_seed(), Sketch::calc_cc_samples(nodes));
 
   // insert
   for (size_t src = 0; src < nodes - 1; src++) {
@@ -410,13 +417,13 @@ TEST(SketchTestSuite, TestSampleDeleteGrinder) {
     ASSERT_EQ((e.dst - e.src) % 7, 0);
     ASSERT_EQ(e.src % 2, 0);
   }
-  ASSERT_GE(successes, log2(nodes));
+  ASSERT_GE(successes, 2);
 }
 
 TEST(SketchTestSuite, TestRawBucketUpdate) {
   size_t successes = 0;
   for (size_t t = 0; t < 20; t++) {
-    size_t seed = rand() + 5 * t;
+    size_t seed = get_seed() + 5 * t;
     Sketch sk1(4096, seed, 1, 1);
     Sketch sk2(4096, seed, 1, 1);
 

From 8546f1da03e9ceca8a4be8a5c32a2fb4f80cfe43 Mon Sep 17 00:00:00 2001
From: Daniel DeLayo <dandelayo@gmail.com>
Date: Mon, 22 Jan 2024 11:54:49 -0500
Subject: [PATCH 10/37] statistical testing

---
 CMakeLists.txt                                |   5 +
 tools/statistical_testing/sketch_testing.cpp  | 147 ++++++++++++++++++
 .../statistical_testing/sum_sketch_testing.py |  54 +++++++
 3 files changed, 206 insertions(+)
 create mode 100644 tools/statistical_testing/sketch_testing.cpp
 create mode 100644 tools/statistical_testing/sum_sketch_testing.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a66c474d..d775131d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -148,6 +148,11 @@ if (BUILD_EXE)
     test/util/graph_gen.cpp)
   add_dependencies(statistical_test GraphZeppelinVerifyCC)
   target_link_libraries(statistical_test PRIVATE GraphZeppelinVerifyCC)
+  
+  add_executable(statistical_sketch_test
+    tools/statistical_testing/sketch_testing.cpp)
+  add_dependencies(statistical_sketch_test GraphZeppelinVerifyCC)
+  target_link_libraries(statistical_sketch_test PRIVATE GraphZeppelinVerifyCC)
 
   # executables for experiment/benchmarking
   add_executable(efficient_gen
diff --git a/tools/statistical_testing/sketch_testing.cpp b/tools/statistical_testing/sketch_testing.cpp
new file mode 100644
index 00000000..3329a429
--- /dev/null
+++ b/tools/statistical_testing/sketch_testing.cpp
@@ -0,0 +1,147 @@
+#include <iostream>
+#include <random>
+#include <set>
+#include <cassert>
+
+#include "sketch.h"
+#include "cc_alg_configuration.h"
+
+std::random_device dev;
+std::mt19937_64 rng(dev());
+using rand_type = std::mt19937_64::result_type;
+
+    
+rand_type gen(rand_type n)
+{
+    std::uniform_int_distribution<rand_type> dist(0,n-1); 
+    return dist(rng);
+}
+
+rand_type seed = gen(1ll << 62);
+
+rand_type gen_seed()
+{
+    //std::uniform_int_distribution<rand_type> dist(0,1ll << 63);
+    //return dist(rng);
+    return seed++;
+}
+
+
+enum ResultType {
+    R_GOOD=0,
+    R_BAD=1,
+    R_HASHFAIL=2
+};
+
+ResultType test_z(rand_type n, rand_type z)
+{
+    assert(z >= 1);
+    assert(z <= n*n);
+    Sketch sketch(n, gen_seed(), 1, 1);
+
+    // Generate z edges and track them
+    /*std::unordered_set<rand_type> edges;
+    while (edges.size() < z)
+    {
+        edges.insert(gen(n*n));
+    }
+
+    for (const auto& r : edges)
+    {
+        sketch.update(r);
+    }
+    */
+    for (rand_type i = 0; i < z; i++)
+        sketch.update(i);
+    // Sample the sketches
+    SketchSample query_ret = sketch.sample();
+    SampleResult ret_code = query_ret.result;
+
+    assert(ret_code != ZERO);
+
+    if (ret_code == GOOD)
+    {
+        //if (edges.find(res) == edges.end())
+        //    return R_HASHFAIL;
+        return R_GOOD;
+    }   
+    return R_BAD;
+}
+
+std::pair<double, double> fit_to_binomial(rand_type ngood, rand_type ntrials)
+{
+    double p = ngood / (1.0 * ntrials);
+    double variance = ntrials * p * (1-p);
+    double stddev = sqrt(variance);
+    return std::pair<double, double>(p, stddev/ntrials);
+}
+
+std::pair<double, double> test_nz_pair(rand_type n, rand_type z)
+{
+    int ntrials = 500;
+    int results[3] = {0,0,0};
+    for (int i = 0; i < ntrials; i++)
+        results[test_z(n, z)]++;
+    //std::cout << "GOOD: " << results[0] << std::endl;
+    //std::cout << "BAD: " << results[1] << std::endl;
+    //std::cout << "HASHFAIL: " << results[2] << std::endl;
+    int ngood = results[0];
+    // Fit to binomial
+    return fit_to_binomial(ngood, ntrials);
+}
+
+void test_n_one(rand_type n, rand_type* good, rand_type max_z)
+{
+  Sketch sketch(n*n, gen_seed(), 1, 1);
+  for (rand_type i = 0; i < max_z; i++)
+  {
+    sketch.update(i);
+    // Sample the sketches
+    SketchSample query_ret = sketch.sample();
+    SampleResult ret_code = query_ret.result;
+    //assert(ret_code != ZERO);
+    if (ret_code == GOOD)
+      good[i]++;
+    sketch.reset_sample_state();
+  }
+}
+
+void test_n(rand_type n)
+{
+  int ntrials = 500;
+  rand_type max_z = 1+(n*n)/4;
+  // Default init to 0?
+  rand_type* good = new rand_type[max_z];
+  for (int i = 0; i < ntrials; i++)
+    test_n_one(n, good, max_z);
+
+  double worst_3sigma = 1;
+  rand_type worst_i = 0;
+  for (rand_type i = 0; i < max_z; i++)
+  { 
+    auto pair = fit_to_binomial(good[i], ntrials);
+    double ans = pair.first;
+    double stddev = pair.second;
+    std::cout << i << ": " << ans << " +- " << stddev << std::endl;
+    if (ans - 3 * stddev < worst_3sigma)
+    {
+      worst_i = i;
+      worst_3sigma = ans-3*stddev;
+    }
+  }
+  auto pair = fit_to_binomial(good[worst_i], ntrials);
+  double ans = pair.first;
+  double stddev = pair.second;
+  std::cout << "WORST" << std::endl;
+  std::cout << worst_i << ": " << ans << " +- " << stddev << std::endl;
+
+  delete[] good;  
+}
+
+int main()
+{
+  std::cout << CCAlgConfiguration() << std::endl;
+  rand_type n = 1 << 13;
+  std::cout << "TESTING: " << n << " TO " << (n*n)/4 << std::endl;
+  test_n(n);
+}
diff --git a/tools/statistical_testing/sum_sketch_testing.py b/tools/statistical_testing/sum_sketch_testing.py
new file mode 100644
index 00000000..01052777
--- /dev/null
+++ b/tools/statistical_testing/sum_sketch_testing.py
@@ -0,0 +1,54 @@
+import sys
+import re
+
+prob = r"([0-9]*[.])?[0-9]+"
+which = r"[0-9]+"
+
+pattern = re.compile("(" + which + "): (" + prob + ") \+- (" + prob + ")")
+
+def parse(filename):
+  with open(filename) as file:
+    lines = file.readlines()[:4000000]
+    stats = []
+    for l in lines:
+      match = pattern.match(l)
+      if match:
+        t = (int(match.group(1)), float(match.group(2)), float(match.group(4)))
+        stats.append(t)
+    return stats
+
+def above(stats, target, sigmas):
+  above = 0
+  below = 0
+
+
+  for s in stats:
+    if (s[1] - sigmas * s[2] > target):
+      above += 1
+    else:
+      below += 1
+
+  print (above / (above + below))
+  
+
+def mean(stats, sigmas):
+  summ = 0
+  count = 0
+  for s in stats:
+    count += 1
+    summ += s[1] - sigmas * s[2]
+  print(summ/count)
+  
+  
+stats = parse(sys.argv[1])
+
+above(stats, 0.71, 0)
+
+mean(stats, 0)
+
+
+
+
+
+
+

From 395089d1de69eb4ab3f326e52f31ba8536535366 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Mon, 5 Feb 2024 13:50:10 -0500
Subject: [PATCH 11/37] Failed sketch merge should leave sketch in bad state

---
 include/cc_sketch_alg.h | 1 +
 include/sketch.h        | 1 +
 src/sketch.cpp          | 1 +
 3 files changed, 3 insertions(+)

diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h
index a58f2043..ebf8547d 100644
--- a/include/cc_sketch_alg.h
+++ b/include/cc_sketch_alg.h
@@ -241,4 +241,5 @@ class CCSketchAlg {
   // getters
   inline node_id_t get_num_vertices() { return num_vertices; }
   inline size_t get_seed() { return seed; }
+  inline size_t max_rounds() { return sketches[0]->get_num_samples(); }
 };
diff --git a/include/sketch.h b/include/sketch.h
index 44db971c..23f009f0 100644
--- a/include/sketch.h
+++ b/include/sketch.h
@@ -164,6 +164,7 @@ class Sketch {
   inline size_t checksum_seed() const { return seed; }
   inline size_t get_columns() const { return num_columns; }
   inline size_t get_buckets() const { return num_buckets; }
+  inline size_t get_num_samples() const { return num_samples; }
 
   static size_t calc_bkt_per_col(size_t n) { return ceil(log2(n)) + 1; }
   static size_t calc_cc_samples(size_t n) { return ceil(log2(n) / num_samples_div); }
diff --git a/src/sketch.cpp b/src/sketch.cpp
index 9a0306b2..0c687fa1 100644
--- a/src/sketch.cpp
+++ b/src/sketch.cpp
@@ -156,6 +156,7 @@ void Sketch::merge(const Sketch &other) {
 void Sketch::range_merge(const Sketch &other, size_t start_sample, size_t n_samples) {
   if (start_sample + n_samples > num_samples) {
     assert(false);
+    sample_idx = num_samples; // sketch is in a fail state!
     return;
   }
 

From 36c8ff4debba998c59e894aaf560a751b1302fcf Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Thu, 8 Feb 2024 16:28:17 -0500
Subject: [PATCH 12/37] adjust to begin incorporation with streaming utilities

---
 CMakeLists.txt               |  40 +++------
 include/ascii_file_stream.h  | 106 ------------------------
 include/binary_file_stream.h | 153 -----------------------------------
 include/graph_stream.h       |  67 ---------------
 include/types.h              |  34 +-------
 5 files changed, 15 insertions(+), 385 deletions(-)
 delete mode 100644 include/ascii_file_stream.h
 delete mode 100644 include/binary_file_stream.h
 delete mode 100644 include/graph_stream.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 72ee8ac9..14d811cf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -43,19 +43,19 @@ else()
   message (STATUS "GraphZeppelin building executables")
 endif()
 
-# Get xxHash
+# Get GutterTree Project
 FetchContent_Declare(
-  xxhash
+  GutterTree
 
-  GIT_REPOSITORY https://github.com/Cyan4973/xxHash.git
-  GIT_TAG        v0.8.0
+  GIT_REPOSITORY  https://github.com/GraphStreamingProject/GutterTree.git
+  GIT_TAG         main
 )
 
-# Get GutterTree Project
+# Get StreamingUtilities
 FetchContent_Declare(
-  GutterTree
+  StreamingUtilities
 
-  GIT_REPOSITORY  https://github.com/GraphStreamingProject/GutterTree.git
+  GIT_REPOSITORY  https://github.com/GraphStreamingProject/StreamingUtilities.git
   GIT_TAG         main
 )
 
@@ -72,21 +72,7 @@ if (BUILD_BENCH)
   FetchContent_MakeAvailable(benchmark)
 endif()
 
-FetchContent_MakeAvailable(xxHash GutterTree)
-#####
-# Some additional steps for xxHash as it is unofficial
-#####
-#xxHash messes with BUILD_SHARED_LIBS if it is empty
-set(SAVED_BUILD_SHARED_LIBS "${BUILD_SHARED_LIBS}")
-
-add_subdirectory(
-  "${xxhash_SOURCE_DIR}/cmake_unofficial"
-  "${xxhash_BINARY_DIR}"
-  EXCLUDE_FROM_ALL
-)
-#Restore BUILD_SHARED_LIBS
-set(BUILD_SHARED_LIBS "${SAVED_BUILD_SHARED_LIBS}" CACHE BOOL "" FORCE)
-
+FetchContent_MakeAvailable(GutterTree StreamingUtilities)
 
 # AVAILABLE COMPILATION DEFINITIONS:
 # VERIFY_SAMPLES_F   Use a deterministic connected-components
@@ -107,8 +93,8 @@ add_library(GraphZeppelin
   src/cc_alg_configuration.cpp
   src/sketch.cpp
   src/util.cpp)
-add_dependencies(GraphZeppelin GutterTree)
-target_link_libraries(GraphZeppelin PUBLIC xxhash GutterTree)
+add_dependencies(GraphZeppelin GutterTree StreamingUtilities)
+target_link_libraries(GraphZeppelin PUBLIC xxhash GutterTree StreamingUtilities)
 target_include_directories(GraphZeppelin PUBLIC include/)
 target_compile_options(GraphZeppelin PUBLIC -fopenmp)
 target_link_options(GraphZeppelin PUBLIC -fopenmp)
@@ -123,8 +109,8 @@ add_library(GraphZeppelinVerifyCC
   src/util.cpp
   test/util/file_graph_verifier.cpp
   test/util/mat_graph_verifier.cpp)
-add_dependencies(GraphZeppelinVerifyCC GutterTree)
-target_link_libraries(GraphZeppelinVerifyCC PUBLIC xxhash GutterTree)
+add_dependencies(GraphZeppelinVerifyCC GutterTree StreamingUtilities)
+target_link_libraries(GraphZeppelinVerifyCC PUBLIC xxhash GutterTree StreamingUtilities)
 target_include_directories(GraphZeppelinVerifyCC PUBLIC include/ include/test/)
 target_compile_options(GraphZeppelinVerifyCC PUBLIC -fopenmp)
 target_link_options(GraphZeppelinVerifyCC PUBLIC -fopenmp)
@@ -156,7 +142,7 @@ if (BUILD_EXE)
     src/util.cpp
     test/util/efficient_gen/edge_gen.cpp
     test/util/efficient_gen/efficient_gen.cpp)
-  target_link_libraries(efficient_gen PRIVATE xxhash GraphZeppelinCommon)
+  target_link_libraries(efficient_gen PRIVATE xxhash GraphZeppelinCommon StreamingUtilities)
 
   # executable for converting to stream format
   add_executable(to_binary_format
diff --git a/include/ascii_file_stream.h b/include/ascii_file_stream.h
deleted file mode 100644
index 2fb10147..00000000
--- a/include/ascii_file_stream.h
+++ /dev/null
@@ -1,106 +0,0 @@
-#pragma once
-
-#include <fstream>
-#include <iostream>
-#include <cassert>
-
-#include "graph_stream.h"
-
-class AsciiFileStream : public GraphStream {
- public:
-  AsciiFileStream(std::string file_name, bool has_type = true)
-      : file_name(file_name), has_type(has_type) {
-
-    bool stream_exists = false;
-    {
-      std::fstream check(file_name, std::fstream::in);
-      stream_exists = check.is_open();
-    }
-
-    if (stream_exists)
-      stream_file.open(file_name, std::fstream::in | std::fstream::out);
-    else
-      stream_file.open(file_name, std::fstream::in | std::fstream::out | std::fstream::trunc);
-
-    if (!stream_file.is_open())
-      throw StreamException("AsciiFileStream: could not open " + file_name);
-
-    if (stream_exists)
-      stream_file >> num_vertices >> num_edges;
-  }
-
-  inline size_t get_update_buffer(GraphStreamUpdate* upd_buf, size_t num_updates) {
-    assert(upd_buf != nullptr);
-
-    size_t i = 0;
-    for (; i < num_updates; i++) {
-      GraphStreamUpdate& upd = upd_buf[i];
-
-      if (upd_offset >= num_edges || upd_offset >= break_edge_idx) {
-        upd.type = BREAKPOINT;
-        upd.edge = {0, 0};
-        return i + 1;
-      }
-      int type = INSERT;
-      if (has_type)
-        stream_file >> type;
-      stream_file >> upd.edge.src >> upd.edge.dst;
-      upd.type = type;
-      ++upd_offset;
-    }
-    return i;
-  }
-
-  // get_update_buffer() is not thread safe
-  inline bool get_update_is_thread_safe() { return false; }
-
-  inline void write_header(node_id_t num_verts, edge_id_t num_edg) {
-    stream_file.seekp(0); // seek to beginning
-    stream_file << num_verts << " " << num_edg << std::endl;
-    num_vertices = num_verts;
-    num_edges = num_edg;
-  }
-
-  inline void write_updates(GraphStreamUpdate* upd_buf, edge_id_t num_updates) {
-    for (edge_id_t i = 0; i < num_updates; i++) {
-      auto upd = upd_buf[i];
-      if (has_type)
-        stream_file << (int) upd.type << " ";
-      stream_file << upd.edge.src << " " << upd.edge.dst << std::endl;
-    }
-  }
-
-  inline void set_num_edges(edge_id_t num_edg) {
-    num_edges = num_edg;
-  }
-
-  inline void seek(edge_id_t pos) {
-    if (pos != 0)
-      throw StreamException("AsciiFileStream: stream does not support seeking by update index");
-    stream_file.seekp(0); stream_file.seekg(0);
-    upd_offset = 0;
-  }
-
-  inline bool set_break_point(edge_id_t break_idx) {
-    if (break_idx < upd_offset) return false;
-    break_edge_idx = break_idx;
-    return true;
-  }
-
-  inline void serialize_metadata(std::ostream& out) {
-    out << AsciiFile << " " << file_name << std::endl;
-  }
-
-  static GraphStream* construct_from_metadata(std::istream& in) {
-    std::string file_name_from_stream;
-    in >> file_name_from_stream;
-    return new AsciiFileStream(file_name_from_stream);
-  }
-
- private:
-  const std::string file_name;
-  const bool has_type;
-  std::fstream stream_file;
-  edge_id_t break_edge_idx = -1;
-  edge_id_t upd_offset = 0;
-};
diff --git a/include/binary_file_stream.h b/include/binary_file_stream.h
deleted file mode 100644
index b3dd9f61..00000000
--- a/include/binary_file_stream.h
+++ /dev/null
@@ -1,153 +0,0 @@
-#pragma once
-#include <fcntl.h>
-#include <unistd.h>  //open and close
-
-#include <atomic>
-#include <cassert>
-#include <cstring>
-#include <fstream>
-#include <iostream>
-
-#include "graph_stream.h"
-
-class BinaryFileStream : public GraphStream {
- public:
-  /**
-   * Open a BinaryFileStream
-   * @param file_name   Name of the stream file
-   */
-  BinaryFileStream(std::string file_name, bool open_read_only = true)
-      : read_only(open_read_only), file_name(file_name) {
-    if (read_only)
-      stream_fd = open(file_name.c_str(), O_RDONLY, S_IRUSR);
-    else
-      stream_fd = open(file_name.c_str(), O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR);
-
-    if (!stream_fd)
-      throw StreamException("BinaryFileStream: Could not open stream file " + file_name +
-                            ". Does it exist?");
-
-    // read header from the input file
-    if (read_only) {
-      if (read(stream_fd, (char*)&num_vertices, sizeof(num_vertices)) != sizeof(num_vertices))
-        throw StreamException("BinaryFileStream: Could not read number of nodes");
-      if (read(stream_fd, (char*)&num_edges, sizeof(num_edges)) != sizeof(num_edges))
-        throw StreamException("BinaryFileStream: Could not read number of edges");
-
-      end_of_file = (num_edges * edge_size) + header_size;
-      stream_off = header_size;
-      set_break_point(-1);
-    }
-  }
-
-  ~BinaryFileStream() {
-    if (stream_fd) close(stream_fd);
-  }
-
-  inline size_t get_update_buffer(GraphStreamUpdate* upd_buf, size_t num_updates) {
-    assert(upd_buf != nullptr);
-
-    // many threads may execute this line simultaneously creating edge cases
-    size_t bytes_to_read = num_updates * edge_size;
-    size_t read_off = stream_off.fetch_add(bytes_to_read, std::memory_order_relaxed);
-
-    // catch these edge cases here
-    if (read_off + bytes_to_read > break_index) {
-      bytes_to_read = read_off > break_index ? 0 : break_index - read_off;
-      stream_off = break_index.load();
-      upd_buf[bytes_to_read / edge_size] = {BREAKPOINT, {0, 0}};
-    }
-    // read into the buffer
-    assert(bytes_to_read % edge_size == 0);
-    size_t bytes_read = 0;
-    while (bytes_read < bytes_to_read) {
-      int r =
-          pread(stream_fd, upd_buf + bytes_read, bytes_to_read - bytes_read, read_off + bytes_read);
-      if (r == -1) throw StreamException("BinaryFileStream: Could not perform pread");
-      if (r == 0) throw StreamException("BinaryFileStream: pread() got no data");
-      bytes_read += r;
-    }
-
-    size_t upds_read = bytes_to_read / edge_size;
-    if (upds_read < num_updates) {
-      GraphStreamUpdate& upd = upd_buf[upds_read];
-      upd.type = BREAKPOINT;
-      upd.edge = {0, 0};
-      return upds_read + 1;
-    }
-    return upds_read;
-  }
-
-  // get_update_buffer() is thread safe! :)
-  inline bool get_update_is_thread_safe() { return true; }
-
-  // write the number of nodes and edges to the stream
-  inline void write_header(node_id_t num_verts, edge_id_t num_edg) {
-    if (read_only) throw StreamException("BinaryFileStream: stream not open for writing!");
-
-    lseek(stream_fd, 0, SEEK_SET);
-    int r1 = write(stream_fd, (char*)&num_verts, sizeof(num_verts));
-    int r2 = write(stream_fd, (char*)&num_edg, sizeof(num_edg));
-
-    if (r1 + r2 != header_size) {
-      perror("write_header");
-      throw StreamException("BinaryFileStream: could not write header to stream file");
-    }
-
-    stream_off = header_size;
-    num_vertices = num_verts;
-    num_edges = num_edg;
-    end_of_file = (num_edges * edge_size) + header_size;
-  }
-
-  // write an edge to the stream
-  inline void write_updates(GraphStreamUpdate* upd, edge_id_t num_updates) {
-    if (read_only) throw StreamException("BinaryFileStream: stream not open for writing!");
-
-    size_t bytes_to_write = num_updates * edge_size;
-    // size_t write_off = stream_off.fetch_add(bytes_to_write, std::memory_order_relaxed);
-
-    size_t bytes_written = 0;
-    while (bytes_written < bytes_to_write) {
-      int r = write(stream_fd, (char*)upd + bytes_written, bytes_to_write - bytes_written);
-      if (r == -1) throw StreamException("BinaryFileStream: Could not perform write");
-      bytes_written += r;
-    }
-  }
-
-  // seek to a position in the stream
-  inline void seek(edge_id_t edge_idx) { stream_off = edge_idx * edge_size + header_size; }
-
-  inline bool set_break_point(edge_id_t break_idx) {
-    edge_id_t byte_index = END_OF_STREAM;
-    if (break_idx != END_OF_STREAM) {
-      byte_index = header_size + break_idx * edge_size;
-    }
-    if (byte_index < stream_off) return false;
-    break_index = byte_index;
-    if (break_index > end_of_file) break_index = end_of_file;
-    return true;
-  }
-
-  inline void serialize_metadata(std::ostream& out) {
-    out << BinaryFile << " " << file_name << std::endl;
-  }
-
-  static GraphStream* construct_from_metadata(std::istream& in) {
-    std::string file_name_from_stream;
-    in >> file_name_from_stream;
-    return new BinaryFileStream(file_name_from_stream);
-  }
-
- private:
-  int stream_fd;
-  edge_id_t end_of_file;
-  std::atomic<edge_id_t> stream_off;
-  std::atomic<edge_id_t> break_index;
-  const bool read_only;  // is stream read only?
-  const std::string file_name;
-
-  // size of binary encoded edge and buffer read size
-  static constexpr size_t edge_size = sizeof(GraphStreamUpdate);
-  static constexpr size_t header_size = sizeof(node_id_t) + sizeof(edge_id_t);
-};
diff --git a/include/graph_stream.h b/include/graph_stream.h
deleted file mode 100644
index 2cd4a968..00000000
--- a/include/graph_stream.h
+++ /dev/null
@@ -1,67 +0,0 @@
-#pragma once
-#include <exception>
-#include <string>
-#include <unordered_map>
-
-#include "types.h"
-
-#pragma pack(push,1)
-struct GraphStreamUpdate {
-  uint8_t type;
-  Edge edge;
-};
-#pragma pack(pop)
-
-static constexpr edge_id_t END_OF_STREAM = (edge_id_t) -1;
-
-// Enum that defines the types of streams
-enum StreamType {
-  BinaryFile,
-  AsciiFile,
-};
-
-class GraphStream {
- public:
-  virtual ~GraphStream() = default;
-  inline node_id_t vertices() { return num_vertices; }
-  inline edge_id_t edges() { return num_edges; }
-
-  // Extract a buffer of many updates from the stream
-  virtual size_t get_update_buffer(GraphStreamUpdate* upd_buf, edge_id_t num_updates) = 0;
-
-  // Query the GraphStream to see if get_update_buffer is thread-safe
-  // this is implemenation dependent
-  virtual bool get_update_is_thread_safe() = 0;
-
-  // Move read pointer to new location in stream
-  // Child classes may choose to throw an error if seek is called
-  // For example, a GraphStream recieved over the network would
-  // likely not support seek
-  virtual void seek(edge_id_t edge_idx) = 0;
-
-  // Query handling
-  // Call this function to register a query at a future edge index
-  // This function returns true if the query is correctly registered
-  virtual bool set_break_point(edge_id_t query_idx) = 0;
-
-  // Serialize GraphStream metadata for distribution
-  // So that stream reading can happen simultaneously
-  virtual void serialize_metadata(std::ostream &out) = 0;
-
-  // construct a stream object from serialized metadata
-  static GraphStream* construct_stream_from_metadata(std::istream &in);
-
- protected:
-  node_id_t num_vertices = 0;
-  edge_id_t num_edges = 0;
- private:
-  static std::unordered_map<size_t, GraphStream* (*)(std::istream&)> constructor_map;
-};
-
-class StreamException : public std::exception {
- private:
-  std::string err_msg;
- public:
-  StreamException(std::string err) : err_msg(err) {}
-  virtual const char* what() const throw() { return err_msg.c_str(); }
-};
diff --git a/include/types.h b/include/types.h
index 76e45164..6fea6b26 100644
--- a/include/types.h
+++ b/include/types.h
@@ -2,43 +2,13 @@
 #include <xxhash.h>
 #include <graph_zeppelin_common.h>
 #include <functional>
+#include <graph_stream.h>
 
 typedef uint64_t col_hash_t;
 static const auto& vec_hash = XXH3_64bits_withSeed;
 static const auto& col_hash = XXH3_64bits_withSeed;
 
-// Is a stream update an insertion or a deletion
-// BREAKPOINT: special type that indicates that a break point has been reached
-// a break point may be either the end of the stream or the index of a query
-enum UpdateType {
-  INSERT = 0,
-  DELETE = 1,
-  BREAKPOINT = 2
-};
-
-struct Edge {
-  node_id_t src = 0;
-  node_id_t dst = 0;
-
-  bool operator< (const Edge&oth) const {
-    if (src == oth.src)
-      return dst < oth.dst;
-    return src < oth.src;
-  }
-  bool operator== (const Edge&oth) const {
-    return src == oth.src && dst == oth.dst;
-  }
-};
-namespace std {
-  template <>
-  struct hash<Edge> {
-    auto operator()(const Edge&edge) const -> size_t {
-      std::hash<size_t> h;
-      return h(edge.dst) + (31 * h(edge.src));
-    }
-  };
-}
-
+// Graph Stream Updates are parsed into the GraphUpdate type for more convinient processing
 struct GraphUpdate {
   Edge edge;
   UpdateType type;

From 3d0c64382b8c323a09b50a44235b0c04a045504c Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Sat, 10 Feb 2024 19:59:38 -0500
Subject: [PATCH 13/37] fix the bug and remove more streaming stuff from this
 repo

---
 CMakeLists.txt                                |  27 ----
 include/graph_sketch_driver.h                 |   4 +-
 include/test/efficient_gen.h                  |   9 --
 include/test/graph_gen.h                      |  26 ----
 include/worker_thread_group.h                 |   2 +-
 src/cc_sketch_alg.cpp                         |   4 +-
 test/cc_alg_test.cpp                          |  40 +++--
 test/util/efficient_gen/edge_gen.cpp          | 118 --------------
 test/util/efficient_gen/efficient_gen.cpp     |  19 ---
 test/util/graph_gen.cpp                       | 145 ------------------
 test/util/graph_gen_test.cpp                  |  12 --
 tools/statistical_testing/analyze_results.py  |  73 ---------
 tools/statistical_testing/graph_testing.cpp   |  96 ------------
 .../medium_test_expected.txt                  |   2 -
 tools/statistical_testing/requirements.txt    |   3 -
 .../small_test_expected.txt                   |   1 -
 tools/statistical_testing/stat_config.txt     |   5 -
 tools/statistical_testing/test_runner.py      | 130 ----------------
 tools/to_binary_format.cpp                    |  98 ------------
 tools/validate_binary_stream.cpp              |  45 ------
 20 files changed, 33 insertions(+), 826 deletions(-)
 delete mode 100644 include/test/efficient_gen.h
 delete mode 100644 include/test/graph_gen.h
 delete mode 100644 test/util/efficient_gen/edge_gen.cpp
 delete mode 100644 test/util/efficient_gen/efficient_gen.cpp
 delete mode 100644 test/util/graph_gen.cpp
 delete mode 100644 test/util/graph_gen_test.cpp
 delete mode 100644 tools/statistical_testing/analyze_results.py
 delete mode 100644 tools/statistical_testing/graph_testing.cpp
 delete mode 100644 tools/statistical_testing/medium_test_expected.txt
 delete mode 100644 tools/statistical_testing/requirements.txt
 delete mode 100644 tools/statistical_testing/small_test_expected.txt
 delete mode 100644 tools/statistical_testing/stat_config.txt
 delete mode 100644 tools/statistical_testing/test_runner.py
 delete mode 100644 tools/to_binary_format.cpp
 delete mode 100644 tools/validate_binary_stream.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 14d811cf..9384c358 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -124,31 +124,10 @@ if (BUILD_EXE)
     test/dsu_test.cpp
     test/util_test.cpp
     test/util/file_graph_verifier.cpp
-    test/util/graph_gen.cpp
-    test/util/graph_gen_test.cpp
     test/util/graph_verifier_test.cpp)
   add_dependencies(tests GraphZeppelinVerifyCC)
   target_link_libraries(tests PRIVATE GraphZeppelinVerifyCC)
 
-  add_executable(statistical_test
-    tools/statistical_testing/graph_testing.cpp
-    test/util/file_graph_verifier.cpp
-    test/util/graph_gen.cpp)
-  add_dependencies(statistical_test GraphZeppelinVerifyCC)
-  target_link_libraries(statistical_test PRIVATE GraphZeppelinVerifyCC)
-
-  # executables for experiment/benchmarking
-  add_executable(efficient_gen
-    src/util.cpp
-    test/util/efficient_gen/edge_gen.cpp
-    test/util/efficient_gen/efficient_gen.cpp)
-  target_link_libraries(efficient_gen PRIVATE xxhash GraphZeppelinCommon StreamingUtilities)
-
-  # executable for converting to stream format
-  add_executable(to_binary_format
-    tools/to_binary_format.cpp)
-  target_link_libraries(to_binary_format PRIVATE GraphZeppelinCommon)
-
   # executable for processing a binary graph stream
   add_executable(process_stream
     tools/process_stream.cpp)
@@ -158,12 +137,6 @@ if (BUILD_EXE)
   add_executable(test_correctness
     tools/test_correctness.cpp)
   target_link_libraries(test_correctness PRIVATE GraphZeppelinVerifyCC)
-
-  # tool for validating that a binary stream appears correct  
-  add_executable(validate_binary_stream
-    tools/validate_binary_stream.cpp
-  )
-  target_link_libraries(validate_binary_stream PRIVATE GraphZeppelin)
 endif()
 
 if (BUILD_BENCH)
diff --git a/include/graph_sketch_driver.h b/include/graph_sketch_driver.h
index 1ad6f0ef..38f77eba 100644
--- a/include/graph_sketch_driver.h
+++ b/include/graph_sketch_driver.h
@@ -58,8 +58,8 @@ class GraphSketchDriver {
   FRIEND_TEST(GraphTest, TestSupernodeRestoreAfterCCFailure);
  public:
   GraphSketchDriver(Alg *sketching_alg, GraphStream *stream, DriverConfiguration config,
-                    size_t num_inserters = 1)
-      : sketching_alg(sketching_alg), stream(stream), num_stream_threads(num_inserters) {
+                    size_t num_stream_threads = 1)
+      : sketching_alg(sketching_alg), stream(stream), num_stream_threads(num_stream_threads) {
     sketching_alg->allocate_worker_memory(config.get_worker_threads());
     // set the leaf size of the guttering system appropriately
     if (config.gutter_conf().get_gutter_bytes() == GutteringConfiguration::uninit_param) {
diff --git a/include/test/efficient_gen.h b/include/test/efficient_gen.h
deleted file mode 100644
index 2a578c00..00000000
--- a/include/test/efficient_gen.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#pragma once
-
-void write_edges(uint32_t n, double p, const std::string& out_f);
-// insert, delete based on a geometric distribution with ratio p
-// i.e. p% of edges will be deleted, p^2% will be re-inserted, p^3 will be re-deleted
-// until 1 element is left
-void insert_delete(double p, const std::string& in_file, const std::string& out_file);
-
-void write_cumul(const std::string& stream_f, const std::string& cumul_f);
diff --git a/include/test/graph_gen.h b/include/test/graph_gen.h
deleted file mode 100644
index 24f03359..00000000
--- a/include/test/graph_gen.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#pragma once
-#include <utility>
-#include <string>
-
-typedef struct genSet {
-  long n;            // number of nodes
-  double p;          // prob of edge between nodes
-  double r;          // geometric insertion/removal
-  int max_appearances;  // the maximum number of times an edge can show up
-                            // in the stream. 0 for no limit.
-  std::string out_file; // file to write stream
-  std::string cumul_out_file; // file to write cumul graph
-  genSet(long n, double p, double r, int max_appearances,
-         std::string out_file, std::string cumul_out_file)
-         : n(n), p(p), r(r), max_appearances
-         (max_appearances), out_file(std::move(out_file)), cumul_out_file
-         (std::move(cumul_out_file)) {}
-} GraphGenSettings;
-
-/**
- * Generates a 1024-node graph with approximately 60,000 edge insert/deletes.
- * Writes stream output to sample.txt
- * Writes cumulative output to cumul_sample.txt
- */
-void generate_stream(const GraphGenSettings& settings =
-      {1024,0.03,0.5,0,"./sample.txt", "./cumul_sample.txt"});
diff --git a/include/worker_thread_group.h b/include/worker_thread_group.h
index 6575afda..a7ee26a6 100644
--- a/include/worker_thread_group.h
+++ b/include/worker_thread_group.h
@@ -88,7 +88,7 @@ class WorkerThread {
       }
     }
   }
-  int id;
+  const int id;
   GraphSketchDriver<Alg> *driver;
   GutteringSystem *gts;
   std::condition_variable &flush_condition;
diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp
index 78fc54a4..27855b31 100644
--- a/src/cc_sketch_alg.cpp
+++ b/src/cc_sketch_alg.cpp
@@ -109,12 +109,12 @@ void CCSketchAlg::apply_update_batch(int thr_id, node_id_t src_vertex,
     delta_sketch.update(static_cast<vec_t>(concat_pairing_fn(src_vertex, dst)));
   }
 
-  std::unique_lock<std::mutex>(sketches[src_vertex]->mutex);
+  std::unique_lock<std::mutex> lk(sketches[src_vertex]->mutex);
   sketches[src_vertex]->merge(delta_sketch);
 }
 
 void CCSketchAlg::apply_raw_buckets_update(node_id_t src_vertex, Bucket *raw_buckets) {
-  std::unique_lock<std::mutex>(sketches[src_vertex]->mutex);
+  std::unique_lock<std::mutex> lk(sketches[src_vertex]->mutex);
   sketches[src_vertex]->merge_raw_bucket_buffer(raw_buckets);
 }
 
diff --git a/test/cc_alg_test.cpp b/test/cc_alg_test.cpp
index 457534fa..a92cd406 100644
--- a/test/cc_alg_test.cpp
+++ b/test/cc_alg_test.cpp
@@ -1,5 +1,6 @@
 #include <ascii_file_stream.h>
 #include <binary_file_stream.h>
+#include <dynamic_erdos_generator.h>
 #include <gtest/gtest.h>
 
 #include <algorithm>
@@ -7,7 +8,6 @@
 
 #include "cc_sketch_alg.h"
 #include "file_graph_verifier.h"
-#include "graph_gen.h"
 #include "graph_sketch_driver.h"
 #include "mat_graph_verifier.h"
 
@@ -16,6 +16,21 @@ static size_t get_seed() {
   return std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
 }
 
+// helper function to generate a dynamic binary stream and its cumulative insert only stream
+void generate_stream(size_t seed, node_id_t num_vertices, double density, double delete_portion,
+                     double adtl_portion, size_t rounds, std::string stream_name,
+                     std::string cumul_name) {
+  // remove old versions of the stream files
+  std::remove(stream_name.c_str());
+  std::remove(cumul_name.c_str());
+
+  // generate new stream files
+  DynamicErdosGenerator dy_stream(seed, num_vertices, density, delete_portion, adtl_portion,
+                                  rounds);
+  dy_stream.to_ascii_file(stream_name);
+  dy_stream.write_cumulative_file(cumul_name);
+}
+
 /**
  * For many of these tests (especially for those upon very sparse and small graphs)
  * we allow for a certain number of failures per test.
@@ -54,7 +69,7 @@ TEST_P(CCAlgTest, TestCorrectnessOnSmallRandomGraphs) {
   auto driver_config = DriverConfiguration().gutter_sys(GetParam());
   int num_trials = 5;
   while (num_trials--) {
-    generate_stream();
+    generate_stream(get_seed(), 1024, 0.03, 0.5, 0.005, 3, "sample.txt", "cumul_sample.txt");
     AsciiFileStream stream{"./sample.txt"};
     node_id_t num_nodes = stream.vertices();
 
@@ -73,7 +88,7 @@ TEST_P(CCAlgTest, TestCorrectnessOnSmallSparseGraphs) {
   auto driver_config = DriverConfiguration().gutter_sys(GetParam());
   int num_trials = 5;
   while (num_trials--) {
-    generate_stream({1024, 0.002, 0.5, 0, "./sample.txt", "./cumul_sample.txt"});
+    generate_stream(get_seed(), 1024, 0.002, 0.5, 0.005, 3, "sample.txt", "cumul_sample.txt");
     AsciiFileStream stream{"./sample.txt"};
     node_id_t num_nodes = stream.vertices();
 
@@ -92,7 +107,7 @@ TEST_P(CCAlgTest, TestCorrectnessOfReheating) {
   auto driver_config = DriverConfiguration().gutter_sys(GetParam());
   int num_trials = 5;
   while (num_trials--) {
-    generate_stream({1024, 0.002, 0.5, 0, "./sample.txt", "./cumul_sample.txt"});
+    generate_stream(get_seed(), 1024, 0.002, 0.5, 0.005, 3, "sample.txt", "cumul_sample.txt");
 
     AsciiFileStream stream{"./sample.txt"};
     node_id_t num_nodes = stream.vertices();
@@ -123,11 +138,13 @@ TEST_P(CCAlgTest, MultipleWorkers) {
   auto driver_config = DriverConfiguration().gutter_sys(GetParam()).worker_threads(8);
   int num_trials = 5;
   while (num_trials--) {
-    generate_stream({1024, 0.002, 0.5, 0, "./sample.txt", "./cumul_sample.txt"});
+    size_t seed = get_seed();
+    generate_stream(seed, 1024, 0.002, 0.5, 0.5, 3, "sample.txt", "cumul_sample.txt");
     AsciiFileStream stream{"./sample.txt"};
     node_id_t num_nodes = stream.vertices();
 
-    CCSketchAlg cc_alg{num_nodes, get_seed()};
+    seed = get_seed();
+    CCSketchAlg cc_alg{num_nodes, seed};
     cc_alg.set_verifier(std::make_unique<FileGraphVerifier>(1024, "./cumul_sample.txt"));
 
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
@@ -172,18 +189,17 @@ TEST_P(CCAlgTest, TestPointQuery) {
 TEST(CCAlgTest, TestQueryDuringStream) {
   auto driver_config = DriverConfiguration().gutter_sys(STANDALONE);
   auto cc_config = CCAlgConfiguration();
-  generate_stream({1024, 0.002, 0.5, 0, "./sample.txt", "./cumul_sample.txt"});
+  generate_stream(get_seed(), 1024, 0.03, 0.5, 0.05, 3, "sample.txt", "cumul_sample.txt");
   std::ifstream in{"./sample.txt"};
   AsciiFileStream stream{"./sample.txt"};
   node_id_t num_nodes = stream.vertices();
   edge_id_t num_edges = stream.edges();
-  edge_id_t tenth     = num_edges / 10;
+  edge_id_t tenth = num_edges / 10;
 
   CCSketchAlg cc_alg{num_nodes, get_seed(), cc_config};
   GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
   MatGraphVerifier verify(num_nodes);
 
-
   int type;
   node_id_t a, b;
 
@@ -197,7 +213,7 @@ TEST(CCAlgTest, TestQueryDuringStream) {
     }
     verify.reset_cc_state();
 
-    driver.process_stream_until(tenth * (j+1));
+    driver.process_stream_until(tenth * (j + 1));
     driver.prep_query();
     cc_alg.set_verifier(std::make_unique<MatGraphVerifier>(verify));
     cc_alg.connected_components();
@@ -284,7 +300,7 @@ TEST(CCAlgTest, MTStreamWithMultipleQueries) {
 
     size_t num_queries = 10;
     size_t upd_per_query = num_edges / num_queries;
-    for (size_t i = 0; i < num_queries-1; i++) {
+    for (size_t i = 0; i < num_queries - 1; i++) {
       for (size_t j = 0; j < upd_per_query; j++) {
         GraphStreamUpdate upd;
         verify_stream.get_update_buffer(&upd, 1);
@@ -294,7 +310,7 @@ TEST(CCAlgTest, MTStreamWithMultipleQueries) {
       verify.reset_cc_state();
       cc_alg.set_verifier(std::make_unique<MatGraphVerifier>(verify));
 
-      driver.process_stream_until(upd_per_query * (i+1));
+      driver.process_stream_until(upd_per_query * (i + 1));
       driver.prep_query();
       cc_alg.connected_components();
     }
diff --git a/test/util/efficient_gen/edge_gen.cpp b/test/util/efficient_gen/edge_gen.cpp
deleted file mode 100644
index 5187a756..00000000
--- a/test/util/efficient_gen/edge_gen.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-#include <fstream>
-#include <algorithm>
-#include <cmath>
-#include <random>
-#include <vector>
-#include <iostream>
-#include "../../../include/test/efficient_gen.h"
-#include "../../../include/types.h"
-#include "../../../include/util.h"
-
-typedef uint32_t ul;
-typedef uint64_t ull;
-
-std::ofstream& operator<< (std::ofstream &os, const std::pair<ull,ull> p) {
-  os << p.first << " " << p.second;
-  return os;
-}
-
-void write_edges(ul n, double p, const std::string& out_f) {
-  ull num_edges = ((ull)n*(n-1))/2;
-  ull* arr = (ull*) malloc(num_edges*sizeof(ull));
-  ul idx = 0;
-
-  std::cout << "Generating possible edges" << std::endl;
-  for (unsigned i=0; i < n; ++i) {
-    for (unsigned j=i+1;j < n; ++j) {
-      arr[idx++] = concat_pairing_fn(i, j);
-    }
-  }
-
-  std::cout << "Permuting edges" << std::endl;  
-  std::shuffle(arr,arr+num_edges, std::mt19937(std::random_device()()));
-  std::ofstream out(out_f);
-  ull m = (ull) (num_edges*p);
-  out << n << " " << m << std::endl;
-
-  std::cout << "Writing edges to file" << std::endl;
-  while (m--) {
-    Edge e = inv_concat_pairing_fn(arr[m]);
-    out << e.src << " " << e.dst << std::endl;
-  }
-
-  out.close();
-  free(arr);
-}
-
-void insert_delete(double p, const std::string& in_file, const std::string& out_file) {
-  std::cout << "Deleting and reinserting some edges" << std::endl;
-  std::ifstream in(in_file);
-  std::ofstream out(out_file);
-  int n; ull m; in >> n >> m;
- 
-  ull full_m = m;
-  ull ins_del_arr[(ull)log2(m)+2];
-  std::fill(ins_del_arr,ins_del_arr + (ull)log2(m)+2,0);
-  ins_del_arr[0] = m;
-  for (unsigned i = 0; ins_del_arr[i] > 1; ++i) {
-    ins_del_arr[i+1] = (ul)(ins_del_arr[i]*p);
-    full_m += ins_del_arr[i+1];
-  }
-  
-  out << n << " " << full_m << std::endl;
-  
-  ull* memoized = (ull*) malloc(ins_del_arr[1]*sizeof(ull));
-  ul a,b;
-  
-  for (unsigned i=0;i<ins_del_arr[1];++i) {
-    in >> a >> b;
-    out << "0 " << a << " " << b << std::endl;
-    memoized[i] = concat_pairing_fn(a, b);
-  }
-
-  for (unsigned i=ins_del_arr[1];i<m;++i) {
-    in >> a >> b;
-    out << "0 " << a << " " << b << std::endl;
-  }
-
-  for (unsigned i = 1; ins_del_arr[i] >= 1; ++i) {
-    int temp = i%2;
-    for (unsigned j=0;j<ins_del_arr[i];++j) {
-      out << temp << " ";
-      Edge e = inv_concat_pairing_fn(memoized[j]);
-      out << e.src << " " << e.dst << std::endl;
-    }
-  }
-  free(memoized);
-}
-
-void write_cumul(const std::string& stream_f, const std::string& cumul_f) {
-  std::ifstream in(stream_f);
-  std::ofstream out(cumul_f);
-  int n; ull m; in >> n >> m;
-  std::vector<std::vector<bool>> adj(n,std::vector<bool>(n,false));
-  bool type;
-  int a,b;
-  for (ull i=1;i<=m;++i) {
-    in >> type >> a >> b;
-    if ((type == INSERT && adj[a][b] == 1) || (type == DELETE && adj[a][b] == 0)) {
-      std::cerr << "Insertion/deletion error at line " << i
-            << " in " << stream_f;
-      return;
-    }
-    adj[a][b] = !adj[a][b];
-  }
-  // write cumul output
-  ull m_cumul = 0;
-  for (int i = 0; i < n; ++i) {
-    for (int j = 0; j < n; ++j) {
-      if (adj[i][j]) ++m_cumul;
-    }
-  }
-  out << n << " " << m_cumul << std::endl;
-  for (int i = 0; i < n; ++i) {
-    for (int j = 0; j < n; ++j) {
-      if (adj[i][j]) out << i << " " << j << std::endl;
-    }
-  }
-}
diff --git a/test/util/efficient_gen/efficient_gen.cpp b/test/util/efficient_gen/efficient_gen.cpp
deleted file mode 100644
index 93aa5b30..00000000
--- a/test/util/efficient_gen/efficient_gen.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-#include <iostream>
-#include "../../../include/test/efficient_gen.h"
-
-int main() {
-  int n; double p, r = 0.1; std::string s,t; char c = 0; bool cumul = false;
-  std::cout << "n: "; std::cin >> n;
-  std::cout << "p: "; std::cin >> p;
-  std::cout << "r: "; std::cin >> r;
-  std::cout << "cumul (y/n): "; std::cin >> c;
-  if (c == 'y' || c == 'Y') cumul = true;
-  std::cout << "Out file: "; std::cin >> s;
-  if (cumul) { std::cout << "Cumul out: "; std::cin >> t; }
-
-  auto start = time(nullptr);
-  write_edges(n, p, "./TEMP_F");
-  insert_delete(r,"./TEMP_F", s);
-  if (cumul) write_cumul(s,t);
-  std::cout << "Completed in " << time(nullptr)-start << " seconds" << std::endl;
-}
diff --git a/test/util/graph_gen.cpp b/test/util/graph_gen.cpp
deleted file mode 100644
index 8114abc0..00000000
--- a/test/util/graph_gen.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-#include "graph_gen.h"
-#include "types.h"
-#include "util.h"
-
-#include <fstream>
-#include <algorithm>
-#include <random>
-#include <iostream>
-
-#define endl '\n'
-
-typedef uint32_t ul;
-typedef uint64_t ull;
-
-const ull ULLMAX = std::numeric_limits<ul>::max();
-
-
-std::ofstream& operator<< (std::ofstream &os, const std::pair<ull,ull> p) {
-  os << p.first << " " << p.second;
-  return os;
-}
-
-void write_edges(long n, double p, const std::string& out_f) {
-  ul num_edges = (n*(n-1))/2;
-  ull* arr = (ull*) malloc(num_edges*sizeof(ull));
-  ul e = 0;
-  for (unsigned i = 0; i < n; ++i) {
-    for (unsigned j = i+1; j < n; ++j) {
-      arr[e++] = concat_pairing_fn(i, j);
-    }
-  }
-  std::shuffle(arr,arr+num_edges, std::mt19937(std::random_device()()));
-  std::ofstream out(out_f);
-  ul m = (ul) (num_edges*p);
-  out << n << " " << m << endl;
-
-  while (m--) {
-    Edge e = inv_concat_pairing_fn(arr[m]);
-    out << e.src << " " << e.dst << endl;
-  }
-  out.flush();
-  out.close();
-  free(arr);
-}
-
-void insert_delete(double p, int max_appearances, const std::string& in_file,
-                   const std::string& out_file) {
-  std::ifstream in(in_file);
-  std::ofstream out(out_file);
-  int n; ul m; in >> n >> m;
-  long long full_m = m;
-  ull ins_del_arr[(ul)log2(m)+2];
-  std::fill(ins_del_arr,ins_del_arr + (ul)log2(m)+2,0);
-  ins_del_arr[0] = m;
-  if (max_appearances == 0) {
-    for (unsigned i = 0; ins_del_arr[i] > 1; ++i) {
-      ins_del_arr[i + 1] = (ull) (ins_del_arr[i] * p);
-      full_m += ins_del_arr[i + 1];
-    }
-  } else {
-    for (int i = 0; i < max_appearances - 1; ++i) {
-      ins_del_arr[i + 1] = (ull) (ins_del_arr[i] * p);
-      full_m += ins_del_arr[i + 1];
-    }
-  }
-
-  out << n << " " << full_m << endl;
-
-  ull* memoized = (ull*) malloc(ins_del_arr[1]*sizeof(ull));
-  ul a,b;
-
-  for (unsigned i=0;i<ins_del_arr[1];++i) {
-    in >> a >> b;
-    out << "0 " << a << " " << b << endl;
-    memoized[i] = concat_pairing_fn(a, b);
-  }
-
-  for (unsigned i=ins_del_arr[1];i<m;++i) {
-    in >> a >> b;
-    out << "0 " << a << " " << b << endl;
-  }
-
-  in.close();
-
-  unsigned stopping = 1;
-  if (max_appearances == 0) {
-    for (; ins_del_arr[stopping] >= 1; ++stopping);
-  } else {
-    stopping = max_appearances;
-  }
-  for (unsigned i = 1; i < stopping; ++i) {
-    int temp = i % 2;
-    for (unsigned j = 0; j < ins_del_arr[i]; ++j) {
-      out << temp << " ";
-      Edge e = inv_concat_pairing_fn(memoized[j]);
-      out << e.src << " " << e.dst << endl;
-    }
-  }
-  out.flush();
-  out.close();
-  free(memoized);
-}
-
-void write_cumul(const std::string& stream_f, const std::string& cumul_f) {
-  std::ifstream in(stream_f);
-  std::ofstream out(cumul_f);
-  int n; ull m; in >> n >> m;
-  std::vector<std::vector<bool>> adj(n,std::vector<bool>(n,false));
-  bool type;
-  int a,b;
-  for (ull i=1;i<=m;++i) {
-    in >> type >> a >> b;
-    if ((type == INSERT && adj[a][b] == 1) || (type == DELETE && adj[a][b] == 0)) {
-      std::cerr << "Insertion/deletion error at line " << i
-                << " in " << stream_f;
-      return;
-    }
-    adj[a][b] = !adj[a][b];
-  }
-
-  in.close();
-
-  // write cumul output
-  ull m_cumul = 0;
-  for (int i = 0; i < n; ++i) {
-    for (int j = 0; j < n; ++j) {
-      if (adj[i][j]) ++m_cumul;
-    }
-  }
-  out << n << " " << m_cumul << endl;
-  for (int i = 0; i < n; ++i) {
-    for (int j = 0; j < n; ++j) {
-      if (adj[i][j]) out << i << " " << j << endl;
-    }
-  }
-  out.flush();
-  out.close();
-}
-
-void generate_stream(const GraphGenSettings& settings) {
-  write_edges(settings.n, settings.p, "./TEMP_F");
-  insert_delete(settings.r, settings.max_appearances, "./TEMP_F", settings
-  .out_file);
-  write_cumul(settings.out_file,settings.cumul_out_file);
-}
diff --git a/test/util/graph_gen_test.cpp b/test/util/graph_gen_test.cpp
deleted file mode 100644
index edd4d9f8..00000000
--- a/test/util/graph_gen_test.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <gtest/gtest.h>
-#include "../../include/test/graph_gen.h"
-
-TEST(GraphGenTestSuite, TestGeneration) {
-  std::string fname = __FILE__;
-  size_t pos = fname.find_last_of("\\/");
-  std::string curr_dir = (std::string::npos == pos) ? "" : fname.substr(0, pos);
-  generate_stream();
-  struct stat buffer;
-  ASSERT_FALSE(stat("./sample.txt", &buffer));
-  ASSERT_FALSE(stat("./cumul_sample.txt", &buffer));
-}
diff --git a/tools/statistical_testing/analyze_results.py b/tools/statistical_testing/analyze_results.py
deleted file mode 100644
index 2c284fba..00000000
--- a/tools/statistical_testing/analyze_results.py
+++ /dev/null
@@ -1,73 +0,0 @@
-
-import numpy as np
-import argparse
-from scipy.stats import ttest_ind, norm
-
-def check_error(test_name, test_result_file, expected_result_file, confidence=0.95):
-    print('::::: ', test_name, ' :::::', sep='')
-    test_file = open(test_result_file)
-    test_result = np.loadtxt(test_file)
-
-    test_file = open(expected_result_file)
-    test_expect = np.loadtxt(test_file)
-
-    result_t = test_result.transpose()
-    test_failures = result_t[0,:]
-    test_runs     = result_t[1,:]
-
-    total_expect_failures = test_expect[0]
-    total_expect_runs     = test_expect[1]  
-
-    assert (test_runs == 100).all(), "Each bin must be of size 100"
-
-    # First step:  Verify that there is not a dependency between tests and upon the graph
-    if (test_failures >= 6).any():
-        return True, "Dependency between tests or upon input graph found"
-
-    # Second step: Verify that the number of test failures does not deviate from the expectation
-    total_test_failures = np.sum(test_failures)
-    total_test_runs     = np.sum(test_runs)
-
-    assert total_test_runs == total_expect_runs, "The number of runs must be the same"
-    pr = total_expect_failures / total_expect_runs
-    critical_z_val = norm.ppf(1 - (1 - confidence) / 2)
-    z_test_deviation = np.ceil(critical_z_val * np.sqrt(pr * (1-pr) / total_expect_runs) * total_expect_runs)
-    print("Number of test failures:", total_test_failures, "{0}%".format(total_test_failures/total_test_runs))
-    print("Total number of failures is allowed to deviate by at most", z_test_deviation)
-    print("Deviation is", total_test_failures - total_expect_failures)
-    if total_test_failures - z_test_deviation > total_expect_failures:
-        return True, "Test error is statistically greater than expectation {0}/{1}".format(int(total_test_failures), int(total_test_runs))
-
-    if total_test_failures + z_test_deviation < total_expect_failures:
-        return True, "Test error is statistically less than expectation {0}/{1}".format(int(total_test_failures), int(total_test_runs))
-
-    return False, "No statistical deviation detected {0}/{1}".format(int(total_test_failures), int(total_test_runs))
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Statistical testing on graph tests.')
-    parser.add_argument('small', metavar="small output", type=str,
-            help='the file which contains the results from the small graph test')
-    parser.add_argument('medium', metavar="medium output", type=str,
-            help='the file which contains the results from the medium graph test')
-    parser.add_argument('iso', metavar="medium iso output", type=str,
-            help='the file which contains the results from the medium+iso graph test')
-
-    parser.add_argument('small_exp', metavar="small expect", type=str,
-            help="the file which contains the results from a correct branch for small graph")
-    parser.add_argument('medium_exp', metavar="medium expect", type=str,
-            help="the file which contains the results from a correct branch for medium graph")
-    parser.add_argument('iso_exp', metavar="medium iso expect", type=str,
-            help="the file which contains the results from a correct branch for medium+iso graph")
-    args = parser.parse_args()
-
-    stat_result = check_error("small_test", args.small, args.small_exp, 0.1)
-    print(stat_result[0])
-    print(stat_result[1])
-    
-    stat_result = check_error("medium_test", args.medium, args.medium_exp, 0.1)
-    print(stat_result[0])
-    print(stat_result[1])
-    
-    stat_result = check_error("medium_iso_test", args.iso, args.iso_exp, 0.1)
-    print(stat_result[0])
-    print(stat_result[1])
diff --git a/tools/statistical_testing/graph_testing.cpp b/tools/statistical_testing/graph_testing.cpp
deleted file mode 100644
index dee89912..00000000
--- a/tools/statistical_testing/graph_testing.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-#include <iostream>
-#include "graph_sketch_driver.h"
-#include "cc_sketch_alg.h"
-#include "ascii_file_stream.h"
-#include "graph_gen.h"
-#include "file_graph_verifier.h"
-
-static DriverConfiguration driver_config;
-static size_t get_seed() {
-  auto now = std::chrono::high_resolution_clock::now();
-  return std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
-}
-
-static inline int do_run() {
-    AsciiFileStream stream{"./sample.txt"};
-    node_id_t n = stream.vertices();
-    CCSketchAlg cc_alg{n, get_seed()};
-    cc_alg.set_verifier(std::make_unique<FileGraphVerifier>(n, "./cumul_sample.txt"));
-    GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
-    driver.process_stream_until(END_OF_STREAM);
-    driver.prep_query();
-    try {
-        cc_alg.connected_components();
-    } catch (std::exception const &err) {
-        return 1;
-    }
-    return 0;
-}
-
-int small_graph_test(int runs) {
-    int failures = 0;
-    for (int i = 0; i < runs; i++) {
-        generate_stream({1024,0.002,0.5,0,"./sample.txt","./cumul_sample.txt"});
-        failures += do_run();
-    }
-    return failures;
-}
-
-int medium_graph_test(int runs) {
-    int failures = 0;
-    for (int i = 0; i < runs; i++) {
-        generate_stream({2048,0.002,0.5,0,"./sample.txt","./cumul_sample.txt"});
-        failures += do_run();
-    }
-    return failures;
-}
-
-int main() {
-    int runs = 100;
-    int num_trails = 500;
-    std::vector<int> trial_list;
-    std::ofstream out;
-
-    // run both with GutterTree and StandAloneGutters
-    for(int i = 0; i < 2; i++) { 
-        bool use_tree = (bool) i;
-
-        // setup configuration file per buffering
-        driver_config.gutter_sys(use_tree ? GUTTERTREE : STANDALONE);
-        driver_config.worker_threads(4);
-        std::string prefix = use_tree? "tree" : "gutters";
-        std::string test_name;
-
-        /************* small graph test *************/
-        test_name = prefix + "_" + "small_graph_test";
-        fprintf(stderr, "%s\n", test_name.c_str());
-        out.open("./" + test_name);
-        for(int i = 0; i < num_trails; i++) {
-            if (i % 50 == 0) fprintf(stderr, "trial %i\n", i);
-            int trial_result = small_graph_test(runs);
-            trial_list.push_back(trial_result);
-        }
-        // output the results of these trials
-        for (unsigned i = 0; i < trial_list.size(); i++) {
-            out << trial_list[i] << " " << runs << "\n";
-        }
-        trial_list.clear();
-        out.close();
-
-        /************* medium graph test ************/
-        test_name = prefix + "_" + "medium_graph_test";
-        fprintf(stderr, "%s\n", test_name.c_str());
-        out.open("./" + test_name);
-        for(int i = 0; i < num_trails; i++) {
-            if (i % 50 == 0) fprintf(stderr, "trial %i\n", i);
-            int trial_result = medium_graph_test(runs);
-            trial_list.push_back(trial_result);
-        }
-        // output the results of these trials
-        for (unsigned i = 0; i < trial_list.size(); i++) {
-            out << trial_list[i] << " " << runs << "\n";
-        }
-        trial_list.clear();
-        out.close();
-    }
-}
diff --git a/tools/statistical_testing/medium_test_expected.txt b/tools/statistical_testing/medium_test_expected.txt
deleted file mode 100644
index 03e815e8..00000000
--- a/tools/statistical_testing/medium_test_expected.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-180 50000
-
diff --git a/tools/statistical_testing/requirements.txt b/tools/statistical_testing/requirements.txt
deleted file mode 100644
index db9b7bba..00000000
--- a/tools/statistical_testing/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-numpy>=1.21.2
-scipy>=1.7.1
-GitPython>=3.1.24
diff --git a/tools/statistical_testing/small_test_expected.txt b/tools/statistical_testing/small_test_expected.txt
deleted file mode 100644
index 022b6d48..00000000
--- a/tools/statistical_testing/small_test_expected.txt
+++ /dev/null
@@ -1 +0,0 @@
-228 50000
diff --git a/tools/statistical_testing/stat_config.txt b/tools/statistical_testing/stat_config.txt
deleted file mode 100644
index f4a33f1c..00000000
--- a/tools/statistical_testing/stat_config.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-build_path=./build
-stat_path=./test/statistical_testing
-confidence=0.95
-usr=
-pwd=
diff --git a/tools/statistical_testing/test_runner.py b/tools/statistical_testing/test_runner.py
deleted file mode 100644
index 1b4a39d3..00000000
--- a/tools/statistical_testing/test_runner.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import subprocess
-import importlib
-import datetime
-import smtplib
-import git
-SMTP_PORT = 465
-
-importlib.import_module('analyze_results')
-from analyze_results import check_error
-
-'''
-Configure the system by reading from the configuration file
-'''
-def configure():
-	build_path = "./"
-	stat_path  = "./"
-	confidence = 0.95
-	usr        = ""
-	pwd        = ""
-	with open('test/statistical_testing/stat_config.txt') as config:
-		lines = config.readlines()
-		for line in lines:
-			line_pair = line.split('=')
-			if line_pair[0].rstrip() == 'build_path':
-				build_path = line_pair[1].rstrip()
-			elif line_pair[0].rstrip() == 'stat_path':
-				stat_path = line_pair[1].rstrip()
-			elif line_pair[0].rstrip() == 'confidence':
-				confidence = float(line_pair[1].rstrip())
-			elif line_pair[0].rstrip() == 'usr':
-				usr        = line_pair[1].rstrip()
-			elif line_pair[0].rstrip() == 'pwd':
-				pwd        = line_pair[1].rstrip()
-			else:
-				print("Error: unknown configuration parameter", line_pair[0])
-				exit(1)
-
-	return build_path, stat_path, confidence, usr, pwd
-
-'''
-Run the statistical_testing executables
-'''
-def run_test(build_path):
-	subprocess.run(build_path + '/statistical_test', stdout=subprocess.DEVNULL, check=True)
-
-'''
-Format the results of the test and raise an error if necessary
-'''
-def log_result(test_name, err, err_dsc):
-	if err:
-		return 'ERROR Test: ' + test_name + ' = ' + err_dsc
-	else:
-		return 'PASSED Test: ' + test_name + ' = ' + err_dsc
-'''
-Send an email containing the log
-'''
-def send_email(err_found, log, usr, pwd):
-	server_ssl = smtplib.SMTP_SSL('smtp.gmail.com', SMTP_PORT)
-	server_ssl.ehlo()
-
-	today = datetime.datetime.today()
-
-	server_ssl.login(usr, pwd)
-	subject = ''
-	if err_found:
-		subject = 'ERROR: '
-	subject += 'Statistical Testing Log {0}/{1}/{2}'.format(str(today.month), str(today.day), str(today.year))
-
-	msg = "\r\n".join([
-		"From: "+usr,
-		"To: graph.stat.testing@gmail.com",
-		"Subject:"+subject,
-		"",
-		log
-	])
-	server_ssl.sendmail(usr, "graph.stat.testing@gmail.com", msg)
-	server_ssl.quit()
-
-if __name__ == "__main__":
-	# Setup
-	build_path, stat_path, confidence, usr, pwd = configure()
-	assert usr != '' and pwd != '', "must specifiy user and password in configuration file"
-
-	try:
-		repo     = git.Repo("./")
-		buf_repo = git.Repo(build_path + "/GutterTree/src/GutterTree")
-	except:
-		print("Must run code at root directory of StreamingRepo and must have GutterTree code present in build dir")
-		exit(1)
-	head = repo.heads[0]
-	stream_commit_hash = head.commit.hexsha
-	stream_commit_msg  = head.commit.message
-
-	head = buf_repo.heads[0]
-	buffer_commit_hash = head.commit.hexsha
-	buffer_commit_msg  = head.commit.message
-
-	log =  "StreamRepo Commit: " + stream_commit_hash + "\n" + stream_commit_msg + "\n"
-	log += "GutterTree Commit: " + buffer_commit_hash + "\n" + buffer_commit_msg + "\n"
-
-	# Run the tests
-	run_test(build_path)
-
-	for pre in ["tree", "gutters"]:
-		if pre == "tree":
-			log += "GutterTree\n"
-		else:
-			log += "StandAloneGutters\n"
-
-		# Collect statistical results
-		# test_name, test_result_file, expected_result_file
-		try:
-			print("small test")
-			small_err, small_dsc   = check_error('small test', pre + 'small_graph_test', stat_path + '/small_test_expected.txt')
-		except Exception as err:
-			small_err = True
-			small_dsc = "test threw expection: {0}".format(err)
-		try:
-			print("medium test")
-			medium_err, medium_dsc = check_error('medium test', pre + 'medium_graph_test', stat_path + '/medium_test_expected.txt')
-		except Exception as err:
-			medium_err = True
-			medium_dsc = "test threw expection: {0}".format(err)
-
-		# Create a log, and send email
-		log += log_result('small test', small_err, small_dsc) + "\n"
-		log += log_result('medium test', medium_err, medium_dsc) + "\n"
-
-	print("Sending email!")
-	send_email(small_err or medium_err, log, usr, pwd)
diff --git a/tools/to_binary_format.cpp b/tools/to_binary_format.cpp
deleted file mode 100644
index 290fde6b..00000000
--- a/tools/to_binary_format.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <errno.h>
-#include <string.h>
-#include <graph_zeppelin_common.h>
-
-int main(int argc, char **argv) {
-  if (argc < 3 || argc > 5) {
-    std::cout << "Incorrect number of arguments. "
-                 "Expected [2-4] but got " << argc-1 << std::endl;
-    std::cout << "Arguments are: ascii_stream out_file_name [--update_type] [--verbose]" << std::endl;
-    std::cout << "ascii_stream:  The file to parse into binary format" << std::endl;
-    std::cout << "out_file_name: Where the binary stream will be written" << std::endl;
-    std::cout << "--update_type: If present then ascii stream indicates insertions vs deletions" << std::endl;
-    std::cout << "--silent:      If present then no warnings are printed when stream corrections are made" << std::endl;
-    exit(EXIT_FAILURE);
-  }
-
-  std::ifstream txt_file(argv[1]);
-  if (!txt_file) {
-    std::cerr << "ERROR: could not open input file!" << std::endl;
-    exit(EXIT_FAILURE);
-  }
-  std::ofstream out_file(argv[2], std::ios_base::binary | std::ios_base::out);
-  if (!out_file) {
-    std::cerr << "ERROR: could not open output file! " << argv[2] << ": " << strerror(errno) << std::endl;
-    exit(EXIT_FAILURE);
-  }
-
-  bool update_type = false;
-  bool silent = false;
-  for (int i = 3; i < argc; i++) {
-    if (std::string(argv[i]) == "--update_type")
-      update_type = true;
-    else if (std::string(argv[i]) == "--silent") {
-      silent = true;
-    }
-    else {
-      std::cerr << "Did not recognize argument: " << argv[i] << " Expected '--update_type' or '--silent'";
-      return EXIT_FAILURE;
-    }
-  }
-
-  node_id_t num_nodes;
-  edge_id_t num_edges;
-
-  txt_file >> num_nodes >> num_edges;
-  
-  std::cout << "Parsed ascii stream header. . ." << std::endl;
-  std::cout << "Number of nodes:   " << num_nodes << std::endl;
-  std::cout << "Number of updates: " << num_edges << std::endl; 
-  if (update_type)
-    std::cout << "Assuming that update format is: upd_type src dst" << std::endl;
-  else
-    std::cout << "Assuming that update format is: src dst" << std::endl;
-  
-
-  out_file.write((char *) &num_nodes, sizeof(num_nodes));
-  out_file.write((char *) &num_edges, sizeof(num_edges));
-
-  std::vector<std::vector<bool>> adj_mat(num_nodes);
-  for (node_id_t i = 0; i < num_nodes; ++i)
-    adj_mat[i] = std::vector<bool>(num_nodes - i);
-
-  bool u;
-  node_id_t src;
-  node_id_t dst;
-
-  while(num_edges--) {
-    u = false;
-    if (update_type)
-      txt_file >> u >> src >> dst;
-    else
-      txt_file >> src >> dst;
-
-    if (src > dst) {
-      if (!silent && u != adj_mat[dst][src - dst]) {
-        std::cout << "WARNING: update " << u << " " << src << " " << dst;
-        std::cout << " is double insert or delete before insert. Correcting." << std::endl;
-      }
-      u = adj_mat[dst][src - dst];
-      adj_mat[dst][src - dst] = !adj_mat[dst][src - dst];
-    } else {
-      if (!silent && u != adj_mat[src][dst - src]) {
-        std::cout << "WARNING: update " << u << " " << src << " " << dst;
-        std::cout << " is double insert or delete before insert. Correcting." << std::endl;
-      }
-      u = adj_mat[src][dst - src];
-      adj_mat[src][dst - src] = !adj_mat[src][dst - src];
-    }
-
-    out_file.write((char *) &u, sizeof(u));
-    out_file.write((char *) &src, sizeof(src));
-    out_file.write((char *) &dst, sizeof(dst));
-  }
-}
-
diff --git a/tools/validate_binary_stream.cpp b/tools/validate_binary_stream.cpp
deleted file mode 100644
index 41227832..00000000
--- a/tools/validate_binary_stream.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include <binary_file_stream.h>
-
-int main(int argc, char **argv) {
-  if (argc != 2) {
-    std::cout << "Incorrect Number of Arguments!" << std::endl;
-    std::cout << "Arguments: stream_file" << std::endl;
-    exit(EXIT_FAILURE);
-  }
-  
-  BinaryFileStream stream(argv[1]);
-  node_id_t nodes = stream.vertices();
-  size_t edges    = stream.edges();
-
-  std::cout << "Attempting to validate stream " << argv[1] << std::endl;
-  std::cout << "Number of nodes   = " << nodes << std::endl;
-  std::cout << "Number of updates = " << edges << std::endl;
-
-  // validate the src and dst of each node in the stream and ensure there are enough of them
-  bool err = false;
-  for (size_t e = 0; e < edges; e++) {
-    GraphStreamUpdate upd;
-    try {
-      stream.get_update_buffer(&upd, 1);
-    } catch (...) {
-      std::cerr << "ERROR: Could not get edge at index: " << e << std::endl;
-      err = true;
-      std::rethrow_exception(std::current_exception());
-      break;
-    }
-    Edge edge = upd.edge;
-    UpdateType u = static_cast<UpdateType>(upd.type);
-	  std::cerr << u << " " << edge.src << " " << edge.dst << std::endl;
-    if (edge.src >= nodes || edge.dst >= nodes || (u != INSERT && u != DELETE) ||
-        edge.src == edge.dst) {
-      std::cerr << "ERROR: edge idx:" << e << "=(" << edge.src << "," << edge.dst << "), " << u
-                << std::endl;
-      err = true;
-    }
-    if (e % 1000000000 == 0 && e != 0) std::cout << e << std::endl; 
-  }
-
-  if (!err) std::cout << "Stream validated!" << std::endl;
-  if (err) std::cout << "Stream invalid!" << std::endl;
-}
-

From 2534e219047f849107c70e282093a17b84d85531 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Mon, 12 Feb 2024 12:13:18 -0500
Subject: [PATCH 14/37] swap unnecessary unique_lock for lock_guard

---
 src/cc_sketch_alg.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp
index 27855b31..ecd67fcb 100644
--- a/src/cc_sketch_alg.cpp
+++ b/src/cc_sketch_alg.cpp
@@ -109,12 +109,12 @@ void CCSketchAlg::apply_update_batch(int thr_id, node_id_t src_vertex,
     delta_sketch.update(static_cast<vec_t>(concat_pairing_fn(src_vertex, dst)));
   }
 
-  std::unique_lock<std::mutex> lk(sketches[src_vertex]->mutex);
+  std::lock_guard<std::mutex> lk(sketches[src_vertex]->mutex);
   sketches[src_vertex]->merge(delta_sketch);
 }
 
 void CCSketchAlg::apply_raw_buckets_update(node_id_t src_vertex, Bucket *raw_buckets) {
-  std::unique_lock<std::mutex> lk(sketches[src_vertex]->mutex);
+  std::lock_guard<std::mutex> lk(sketches[src_vertex]->mutex);
   sketches[src_vertex]->merge_raw_bucket_buffer(raw_buckets);
 }
 
@@ -152,7 +152,7 @@ inline bool CCSketchAlg::sample_supernode(Sketch &skt) {
       auto src = std::min(e.src, e.dst);
       auto dst = std::max(e.src, e.dst);
       {
-        std::unique_lock<std::mutex> lk(spanning_forest_mtx[src]);
+        std::lock_guard<std::mutex> lk(spanning_forest_mtx[src]);
         spanning_forest[src].insert(dst);
       }
     }
@@ -207,7 +207,7 @@ inline node_id_t find_last_partition_of_root(const std::vector<MergeInstr> &merg
 // merge the global and return if it is safe to query now
 inline bool merge_global(const size_t cur_round, const Sketch &local_sketch,
                          GlobalMergeData &global) {
-  std::unique_lock<std::mutex> lk(global.mtx);
+  std::lock_guard<std::mutex> lk(global.mtx);
   global.sketch.range_merge(local_sketch, cur_round, 1);
   ++global.num_merge_done;
   assert(global.num_merge_done <= global.num_merge_needed);
@@ -333,7 +333,7 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
       if (!root_from_left) {
         // Resolved root_from_left, so we are the first thread to encounter this root
         // set the number of threads that will merge into this component
-        std::unique_lock<std::mutex> lk(global_merges[global_id].mtx);
+        std::lock_guard<std::mutex> lk(global_merges[global_id].mtx);
         global_merges[global_id].num_merge_needed = global_id - thr_id + 1;
       }
       bool query_ready = merge_global(cur_round, local_sketch, global_merges[global_id]);

From 057f91b1ea57d3300863f4eecbee6104d528fee6 Mon Sep 17 00:00:00 2001
From: Daniel DeLayo <dandelayo@gmail.com>
Date: Mon, 12 Feb 2024 15:42:00 -0500
Subject: [PATCH 15/37] move to tools

---
 tools/{statistical_testing => }/sum_sketch_testing.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)
 rename tools/{statistical_testing => }/sum_sketch_testing.py (88%)

diff --git a/tools/statistical_testing/sum_sketch_testing.py b/tools/sum_sketch_testing.py
similarity index 88%
rename from tools/statistical_testing/sum_sketch_testing.py
rename to tools/sum_sketch_testing.py
index 01052777..55b666e7 100644
--- a/tools/statistical_testing/sum_sketch_testing.py
+++ b/tools/sum_sketch_testing.py
@@ -27,6 +27,7 @@ def above(stats, target, sigmas):
       above += 1
     else:
       below += 1
+      print("BELOW")
 
   print (above / (above + below))
   
@@ -42,9 +43,11 @@ def mean(stats, sigmas):
   
 stats = parse(sys.argv[1])
 
-above(stats, 0.71, 0)
+above(stats, 0.76, 0)
+#above(stats, 0.78, 1)
+#above(stats, 0.78, 2)
 
-mean(stats, 0)
+#mean(stats, 3)
 
 
 

From d669088bbc76e05bef99a4c7ac99286937b32aa2 Mon Sep 17 00:00:00 2001
From: Daniel DeLayo <dandelayo@gmail.com>
Date: Thu, 15 Feb 2024 15:01:12 -0500
Subject: [PATCH 16/37] move to tools 2

---
 CMakeLists.txt                                     | 2 +-
 tools/{statistical_testing => }/sketch_testing.cpp | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename tools/{statistical_testing => }/sketch_testing.cpp (100%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a326e6e7..3c695baa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -152,7 +152,7 @@ if (BUILD_EXE)
   target_link_libraries(statistical_test PRIVATE GraphZeppelinVerifyCC)
   
   add_executable(statistical_sketch_test
-    tools/statistical_testing/sketch_testing.cpp)
+    tools/sketch_testing.cpp)
   add_dependencies(statistical_sketch_test GraphZeppelinVerifyCC)
   target_link_libraries(statistical_sketch_test PRIVATE GraphZeppelinVerifyCC)
 
diff --git a/tools/statistical_testing/sketch_testing.cpp b/tools/sketch_testing.cpp
similarity index 100%
rename from tools/statistical_testing/sketch_testing.cpp
rename to tools/sketch_testing.cpp

From c607b581a2c62ee8e2a4d19612c6513bba94a0fc Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Thu, 15 Feb 2024 16:13:00 -0500
Subject: [PATCH 17/37] Make documentation accurate again

---
 README.md | 56 ++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 5884f01a..625f5464 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,8 @@ This is the source code of GraphZeppelin: a compact, fast, and scalable graph pr
 
 The full experiments for our SIGMOD paper can be found in our [Experiments Repository](https://github.com/GraphStreamingProject/ZeppelinExperiments). Our experiments were replicated by the SIGMOD reproducibility committee, details can be found in the [reproducibility report](https://reproducibility.sigmod.org/rep_rep/2023/Dayan-SIGMODReproReport26.pdf).
 
+Since submitting to SIGMOD, GraphZeppelin has been continually updated improve robustness, performance, and reduce memory consumption.
+
 ## Installing and Running GraphZeppelin
 ### Requirements
 - Unix OS (not Mac, tested on Ubuntu)
@@ -16,28 +18,43 @@ The full experiments for our SIGMOD paper can be found in our [Experiments Repos
 
 This library can easily be included with other cmake projects using FetchContent or ExternalProject.
 
-### Basic Example
+## Minimal Example
 ```
-#include <graph.h>
-#include <binary_graph_stream.h>
+#include <binary_file_stream.h>
+#include <cc_sketch_alg.h>
+#include <graph_sketch_driver.h>
+#include <time.h>
 
 std::string file_name = "/path/to/binary/stream";
 
 int main() {
-  BinaryGraphStream stream(file_name, 1024*32);  // Create a stream object for parsing a stream 'file_name' with 32 KiB buffer
-  node_id_t num_nodes   = stream.nodes();        // Extract the number of nodes from the stream 
-  size_t    num_updates = stream.edges();        // Extract the number of edge updates from the stream
-  Graph g{num_nodes};                            // Create a empty graph with 'num_nodes' nodes
-
-  for (size_t e = 0; e < num_updates; e++)       // Loop through all the updates in the stream
-    g.update(stream.get_edge());                 // Update the graph by applying the next edge update
-
-  auto CC = g.connected_components();            // Extract the connected components in the graph defined by the stream
+  BinaryFileStream stream(file_name);           // Create a stream object for parsing a graph stream 'file_name'
+  node_id_t num_vertices = stream.vertices();   // Extract the number of graph vertices from the stream
+  CCSketchAlg cc_alg{                           // Create connected components sketch algorithm
+    num_vertices,                                  // vertices in graph
+    size_t(time(NULL)),                            // seed
+    CCAlgConfiguration()                           // configuration
+  }; 
+  GraphSketchDriver<CCSketchAlg> driver{        // Create a driver to manage the CC algorithm
+    &cc_alg,                                       // algorithm to update
+    &stream,                                       // stream to read
+    DriverConfiguration()                          // configuration
+  };
+  driver.process_stream_until(END_OF_STREAM);   // Tell the driver to process the entire graph stream
+  driver.prep_query();                          // Ensure that all updates have been processed
+  auto CC = cc_alg.connected_components();      // Extract the connected components
 }
 ```
 A more detailed example can be found in `tools/process_stream.cpp`.
 
-### Binary Stream Format
+## Configuration
+GraphZeppelin has a number of parameters both for the driver and the sketch algorithm. Examples of these parameters include the number of threads and which GutteringSystem to run for the driver and the desired batch size for the algorithm.
+To achieve high performance, it is important to set these parameters correctly. See `tools/process_stream.cpp`.
+
+The driver options are set with the `DriverConfiguration` object (see `include/driver_configuration.h`).
+The algorithm configuration is allowed to vary by algorithm. The connected components algorithm options is managed with the `CCAlgConfiguration` object (see `include/cc_alg_configuration.h`).
+
+## Binary Stream Format
 GraphZeppelin uses a binary stream format for efficient file parsing. The format of these files is as follows.
 ```
 <num_nodes> <num_updates> <edge_update>  ...  <edge_update>
@@ -50,17 +67,14 @@ Each edge_update has the following format:
 <UpdateType> <src_node> <dst_node>
 |  1 byte   | 4 bytes  | 4 bytes  |
 ```
-The UpdateType is 0 to indicate an insertion of the associated edge and 1 to indicate a deletion.
+Where UpdateType is 0 to indicate an insertion and 1 to indicate a deletion.
 
-### Other Stream Formats
-Other file formats can be used by writing a simple file parser that passes graph `update()` the expected edge update format `GraphUpdate := std::pair<Edge, UpdateType>`. See our unit tests under `/test/graph_test.cpp` for examples of string based stream parsing.
+See our [StreamingUtilities](https://github.com/GraphStreamingProject/StreamingUtilities) repository for more details.
 
-If receiving edge updates over the network it is equally straightforward to define a stream format that will receive, parse, and provide those updates to the graph `update()` function.
-
-## Configuration
-GraphZeppelin has a number of parameters. These can be defined with the `GraphConfiguration` object. Key parameters include the number of graph workers and the guttering system to use for buffering updates.
+## GutteringSystems
+To achieve high update throughput, GraphZeppelin buffers updates in what we call a GutteringSystem. Choosing the correct GutteringSystem is important for performance. If you expect storage to include on disk data-structures, choose the `GutterTree`. Otherwise, choose the `CacheTree`.
 
-See `include/graph_configuration.h` for more details.
+For more details see the [GutteringSystems](https://github.com/GraphStreamingProject/GutterTree) repository.
 
 ## Debugging
 You can enable the symbol table and turn off compiler optimizations for debugging with tools like `gdb` or `valgrind` by performing the following steps

From bdfed7ffacd89f40afd8d82b73844ac4f9666f18 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Thu, 15 Feb 2024 21:39:41 -0500
Subject: [PATCH 18/37] GraphSketchDriver doc in README

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 625f5464..b6965a22 100644
--- a/README.md
+++ b/README.md
@@ -71,6 +71,10 @@ Where UpdateType is 0 to indicate an insertion and 1 to indicate a deletion.
 
 See our [StreamingUtilities](https://github.com/GraphStreamingProject/StreamingUtilities) repository for more details.
 
+
+## GraphSketchDriver
+The `GraphSketchDriver` is responsible for managing the flow of data through the various components of our system. It is templatized by the specific sketch algorithm one is running. If using GraphZeppelin on a single machine, we recommend using the `GraphSketchDriver` for any vertex-based sketch algorithm. When implementing a new algorithm, the class must provide an interface to the driver. This interface is described at the top of `include/graph_sketch_driver.h` and is depicted in the Data Flow Documentation.
+
 ## GutteringSystems
 To achieve high update throughput, GraphZeppelin buffers updates in what we call a GutteringSystem. Choosing the correct GutteringSystem is important for performance. If you expect storage to include on disk data-structures, choose the `GutterTree`. Otherwise, choose the `CacheTree`.
 

From 085e5ed8f1fa65f796a6c0f6277feaeeb30637d2 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Thu, 15 Feb 2024 21:50:24 -0500
Subject: [PATCH 19/37] get rid of doxy and add initial control flow
 documentation

---
 .gitignore           |    8 +-
 Doxyfile             | 2608 ------------------------------------------
 README.md            |    2 +-
 docs/control_flow.md |   34 +
 4 files changed, 36 insertions(+), 2616 deletions(-)
 delete mode 100755 Doxyfile
 create mode 100644 docs/control_flow.md

diff --git a/.gitignore b/.gitignore
index 2e1a7d7e..04ab3644 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,16 +48,10 @@
 # other IDEs
 /.vscode
 
-#Doxygen docs
-/docs/
-
-# Our configuration file
-streaming.conf
-test/statistical_testing/stat_config.txt
-
 # Mac Files
 *.DS_Store
 
 # Python stuff for statistical testing
 *__pycache__/
 *test_env/
+
diff --git a/Doxyfile b/Doxyfile
deleted file mode 100755
index e5fdb804..00000000
--- a/Doxyfile
+++ /dev/null
@@ -1,2608 +0,0 @@
-# Doxyfile 1.8.20
-
-# This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project.
-#
-# All text after a double hash (##) is considered a comment and is placed in
-# front of the TAG it is preceding.
-#
-# All text after a single hash (#) is considered a comment and will be ignored.
-# The format is:
-# TAG = value [value, ...]
-# For lists, items can also be appended using:
-# TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes (\" \").
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-
-# This tag specifies the encoding used for all characters in the configuration
-# file that follow. The default is UTF-8 which is also the encoding used for all
-# text before the first occurrence of this tag. Doxygen uses libiconv (or the
-# iconv built into libc) for the transcoding. See
-# https://www.gnu.org/software/libiconv/ for the list of possible encodings.
-# The default value is: UTF-8.
-
-DOXYFILE_ENCODING      = UTF-8
-
-# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
-# double-quotes, unless you are using Doxywizard) that should identify the
-# project for which the documentation is generated. This name is used in the
-# title of most generated pages and in a few other places.
-# The default value is: My Project.
-
-PROJECT_NAME           = "My Project"
-
-# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
-# could be handy for archiving the generated documentation or if some version
-# control system is used.
-
-PROJECT_NUMBER         =
-
-# Using the PROJECT_BRIEF tag one can provide an optional one line description
-# for a project that appears at the top of each page and should give viewer a
-# quick idea about the purpose of the project. Keep the description short.
-
-PROJECT_BRIEF          =
-
-# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
-# in the documentation. The maximum height of the logo should not exceed 55
-# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
-# the logo to the output directory.
-
-PROJECT_LOGO           =
-
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
-# into which the generated documentation will be written. If a relative path is
-# entered, it will be relative to the location where doxygen was started. If
-# left blank the current directory will be used.
-
-OUTPUT_DIRECTORY       = "docs/"
-
-# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
-# directories (in 2 levels) under the output directory of each output format and
-# will distribute the generated files over these directories. Enabling this
-# option can be useful when feeding doxygen a huge amount of source files, where
-# putting all generated files in the same directory would otherwise causes
-# performance problems for the file system.
-# The default value is: NO.
-
-CREATE_SUBDIRS         = NO
-
-# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
-# characters to appear in the names of generated files. If set to NO, non-ASCII
-# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
-# U+3044.
-# The default value is: NO.
-
-ALLOW_UNICODE_NAMES    = NO
-
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all constant output in the proper language.
-# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
-# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
-# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
-# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
-# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
-# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
-# Ukrainian and Vietnamese.
-# The default value is: English.
-
-OUTPUT_LANGUAGE        = English
-
-# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all generated output in the proper direction.
-# Possible values are: None, LTR, RTL and Context.
-# The default value is: None.
-
-OUTPUT_TEXT_DIRECTION  = None
-
-# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
-# descriptions after the members that are listed in the file and class
-# documentation (similar to Javadoc). Set to NO to disable this.
-# The default value is: YES.
-
-BRIEF_MEMBER_DESC      = YES
-
-# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
-# description of a member or function before the detailed description
-#
-# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
-# brief descriptions will be completely suppressed.
-# The default value is: YES.
-
-REPEAT_BRIEF           = YES
-
-# This tag implements a quasi-intelligent brief description abbreviator that is
-# used to form the text in various listings. Each string in this list, if found
-# as the leading text of the brief description, will be stripped from the text
-# and the result, after processing the whole list, is used as the annotated
-# text. Otherwise, the brief description is used as-is. If left blank, the
-# following values are used ($name is automatically replaced with the name of
-# the entity):The $name class, The $name widget, The $name file, is, provides,
-# specifies, contains, represents, a, an and the.
-
-ABBREVIATE_BRIEF       = "The $name class" \
-                         "The $name widget" \
-                         "The $name file" \
-                         is \
-                         provides \
-                         specifies \
-                         contains \
-                         represents \
-                         a \
-                         an \
-                         the
-
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
-# doxygen will generate a detailed section even if there is only a brief
-# description.
-# The default value is: NO.
-
-ALWAYS_DETAILED_SEC    = NO
-
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
-# inherited members of a class in the documentation of that class as if those
-# members were ordinary class members. Constructors, destructors and assignment
-# operators of the base classes will not be shown.
-# The default value is: NO.
-
-INLINE_INHERITED_MEMB  = NO
-
-# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
-# before files name in the file list and in the header files. If set to NO the
-# shortest path that makes the file name unique will be used
-# The default value is: YES.
-
-FULL_PATH_NAMES        = YES
-
-# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
-# Stripping is only done if one of the specified strings matches the left-hand
-# part of the path. The tag can be used to show relative paths in the file list.
-# If left blank the directory from which doxygen is run is used as the path to
-# strip.
-#
-# Note that you can specify absolute paths here, but also relative paths, which
-# will be relative from the directory where doxygen is started.
-# This tag requires that the tag FULL_PATH_NAMES is set to YES.
-
-STRIP_FROM_PATH        =
-
-# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
-# path mentioned in the documentation of a class, which tells the reader which
-# header file to include in order to use a class. If left blank only the name of
-# the header file containing the class definition is used. Otherwise one should
-# specify the list of include paths that are normally passed to the compiler
-# using the -I flag.
-
-STRIP_FROM_INC_PATH    =
-
-# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
-# less readable) file names. This can be useful is your file systems doesn't
-# support long names like on DOS, Mac, or CD-ROM.
-# The default value is: NO.
-
-SHORT_NAMES            = NO
-
-# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
-# first line (until the first dot) of a Javadoc-style comment as the brief
-# description. If set to NO, the Javadoc-style will behave just like regular Qt-
-# style comments (thus requiring an explicit @brief command for a brief
-# description.)
-# The default value is: NO.
-
-JAVADOC_AUTOBRIEF      = NO
-
-# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line
-# such as
-# /***************
-# as being the beginning of a Javadoc-style comment "banner". If set to NO, the
-# Javadoc-style will behave just like regular comments and it will not be
-# interpreted by doxygen.
-# The default value is: NO.
-
-JAVADOC_BANNER         = NO
-
-# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
-# line (until the first dot) of a Qt-style comment as the brief description. If
-# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
-# requiring an explicit \brief command for a brief description.)
-# The default value is: NO.
-
-QT_AUTOBRIEF           = NO
-
-# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
-# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
-# a brief description. This used to be the default behavior. The new default is
-# to treat a multi-line C++ comment block as a detailed description. Set this
-# tag to YES if you prefer the old behavior instead.
-#
-# Note that setting this tag to YES also means that rational rose comments are
-# not recognized any more.
-# The default value is: NO.
-
-MULTILINE_CPP_IS_BRIEF = NO
-
-# By default Python docstrings are displayed as preformatted text and doxygen's
-# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
-# doxygen's special commands can be used and the contents of the docstring
-# documentation blocks is shown as doxygen documentation.
-# The default value is: YES.
-
-PYTHON_DOCSTRING       = YES
-
-# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
-# documentation from any documented member that it re-implements.
-# The default value is: YES.
-
-INHERIT_DOCS           = YES
-
-# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
-# page for each member. If set to NO, the documentation of a member will be part
-# of the file/class/namespace that contains it.
-# The default value is: NO.
-
-SEPARATE_MEMBER_PAGES  = NO
-
-# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
-# uses this value to replace tabs by spaces in code fragments.
-# Minimum value: 1, maximum value: 16, default value: 4.
-
-TAB_SIZE               = 4
-
-# This tag can be used to specify a number of aliases that act as commands in
-# the documentation. An alias has the form:
-# name=value
-# For example adding
-# "sideeffect=@par Side Effects:\n"
-# will allow you to put the command \sideeffect (or @sideeffect) in the
-# documentation, which will result in a user-defined paragraph with heading
-# "Side Effects:". You can put \n's in the value part of an alias to insert
-# newlines (in the resulting output). You can put ^^ in the value part of an
-# alias to insert a newline as if a physical newline was in the original file.
-# When you need a literal { or } or , in the value part of an alias you have to
-# escape them by means of a backslash (\), this can lead to conflicts with the
-# commands \{ and \} for these it is advised to use the version @{ and @} or use
-# a double escape (\\{ and \\})
-
-ALIASES                =
-
-# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
-# only. Doxygen will then generate output that is more tailored for C. For
-# instance, some of the names that are used will be different. The list of all
-# members will be omitted, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_FOR_C  = NO
-
-# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
-# Python sources only. Doxygen will then generate output that is more tailored
-# for that language. For instance, namespaces will be presented as packages,
-# qualified scopes will look different, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_JAVA   = NO
-
-# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
-# sources. Doxygen will then generate output that is tailored for Fortran.
-# The default value is: NO.
-
-OPTIMIZE_FOR_FORTRAN   = NO
-
-# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
-# sources. Doxygen will then generate output that is tailored for VHDL.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_VHDL   = NO
-
-# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice
-# sources only. Doxygen will then generate output that is more tailored for that
-# language. For instance, namespaces will be presented as modules, types will be
-# separated into more groups, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_SLICE  = NO
-
-# Doxygen selects the parser to use depending on the extension of the files it
-# parses. With this tag you can assign which parser to use for a given
-# extension. Doxygen has a built-in mapping, but you can override or extend it
-# using this tag. The format is ext=language, where ext is a file extension, and
-# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
-# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL,
-# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
-# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
-# tries to guess whether the code is fixed or free formatted code, this is the
-# default for Fortran type files). For instance to make doxygen treat .inc files
-# as Fortran files (default is PHP), and .f files as C (default is Fortran),
-# use: inc=Fortran f=C.
-#
-# Note: For files without extension you can use no_extension as a placeholder.
-#
-# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
-
-EXTENSION_MAPPING      =
-
-# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
-# according to the Markdown format, which allows for more readable
-# documentation. See https://daringfireball.net/projects/markdown/ for details.
-# The output of markdown processing is further processed by doxygen, so you can
-# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
-# case of backward compatibilities issues.
-# The default value is: YES.
-
-MARKDOWN_SUPPORT       = YES
-
-# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up
-# to that level are automatically included in the table of contents, even if
-# they do not have an id attribute.
-# Note: This feature currently applies only to Markdown headings.
-# Minimum value: 0, maximum value: 99, default value: 5.
-# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
-
-TOC_INCLUDE_HEADINGS   = 5
-
-# When enabled doxygen tries to link words that correspond to documented
-# classes, or namespaces to their corresponding documentation. Such a link can
-# be prevented in individual cases by putting a % sign in front of the word or
-# globally by setting AUTOLINK_SUPPORT to NO.
-# The default value is: YES.
-
-AUTOLINK_SUPPORT       = YES
-
-# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
-# to include (a tag file for) the STL sources as input, then you should set this
-# tag to YES in order to let doxygen match functions declarations and
-# definitions whose arguments contain STL classes (e.g. func(std::string);
-# versus func(std::string) {}). This also make the inheritance and collaboration
-# diagrams that involve STL classes more complete and accurate.
-# The default value is: NO.
-
-BUILTIN_STL_SUPPORT    = NO
-
-# If you use Microsoft's C++/CLI language, you should set this option to YES to
-# enable parsing support.
-# The default value is: NO.
-
-CPP_CLI_SUPPORT        = NO
-
-# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
-# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
-# will parse them like normal C++ but will assume all classes use public instead
-# of private inheritance when no explicit protection keyword is present.
-# The default value is: NO.
-
-SIP_SUPPORT            = NO
-
-# For Microsoft's IDL there are propget and propput attributes to indicate
-# getter and setter methods for a property. Setting this option to YES will make
-# doxygen to replace the get and set methods by a property in the documentation.
-# This will only work if the methods are indeed getting or setting a simple
-# type. If this is not the case, or you want to show the methods anyway, you
-# should set this option to NO.
-# The default value is: YES.
-
-IDL_PROPERTY_SUPPORT   = YES
-
-# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
-# tag is set to YES then doxygen will reuse the documentation of the first
-# member in the group (if any) for the other members of the group. By default
-# all members of a group must be documented explicitly.
-# The default value is: NO.
-
-DISTRIBUTE_GROUP_DOC   = NO
-
-# If one adds a struct or class to a group and this option is enabled, then also
-# any nested class or struct is added to the same group. By default this option
-# is disabled and one has to add nested compounds explicitly via \ingroup.
-# The default value is: NO.
-
-GROUP_NESTED_COMPOUNDS = NO
-
-# Set the SUBGROUPING tag to YES to allow class member groups of the same type
-# (for instance a group of public functions) to be put as a subgroup of that
-# type (e.g. under the Public Functions section). Set it to NO to prevent
-# subgrouping. Alternatively, this can be done per class using the
-# \nosubgrouping command.
-# The default value is: YES.
-
-SUBGROUPING            = YES
-
-# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
-# are shown inside the group in which they are included (e.g. using \ingroup)
-# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
-# and RTF).
-#
-# Note that this feature does not work in combination with
-# SEPARATE_MEMBER_PAGES.
-# The default value is: NO.
-
-INLINE_GROUPED_CLASSES = NO
-
-# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
-# with only public data fields or simple typedef fields will be shown inline in
-# the documentation of the scope in which they are defined (i.e. file,
-# namespace, or group documentation), provided this scope is documented. If set
-# to NO, structs, classes, and unions are shown on a separate page (for HTML and
-# Man pages) or section (for LaTeX and RTF).
-# The default value is: NO.
-
-INLINE_SIMPLE_STRUCTS  = NO
-
-# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
-# enum is documented as struct, union, or enum with the name of the typedef. So
-# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
-# with name TypeT. When disabled the typedef will appear as a member of a file,
-# namespace, or class. And the struct will be named TypeS. This can typically be
-# useful for C code in case the coding convention dictates that all compound
-# types are typedef'ed and only the typedef is referenced, never the tag name.
-# The default value is: NO.
-
-TYPEDEF_HIDES_STRUCT   = NO
-
-# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
-# cache is used to resolve symbols given their name and scope. Since this can be
-# an expensive process and often the same symbol appears multiple times in the
-# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
-# doxygen will become slower. If the cache is too large, memory is wasted. The
-# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
-# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
-# symbols. At the end of a run doxygen will report the cache usage and suggest
-# the optimal cache size from a speed point of view.
-# Minimum value: 0, maximum value: 9, default value: 0.
-
-LOOKUP_CACHE_SIZE      = 0
-
-# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use
-# during processing. When set to 0 doxygen will based this on the number of
-# cores available in the system. You can set it explicitly to a value larger
-# than 0 to get more control over the balance between CPU load and processing
-# speed. At this moment only the input processing can be done using multiple
-# threads. Since this is still an experimental feature the default is set to 1,
-# which efficively disables parallel processing. Please report any issues you
-# encounter. Generating dot graphs in parallel is controlled by the
-# DOT_NUM_THREADS setting.
-# Minimum value: 0, maximum value: 32, default value: 1.
-
-NUM_PROC_THREADS       = 1
-
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-
-# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
-# documentation are documented, even if no documentation was available. Private
-# class members and static file members will be hidden unless the
-# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
-# Note: This will also disable the warnings about undocumented members that are
-# normally produced when WARNINGS is set to YES.
-# The default value is: NO.
-
-EXTRACT_ALL            = NO
-
-# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
-# be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PRIVATE        = NO
-
-# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual
-# methods of a class will be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PRIV_VIRTUAL   = NO
-
-# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
-# scope will be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PACKAGE        = NO
-
-# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
-# included in the documentation.
-# The default value is: NO.
-
-EXTRACT_STATIC         = NO
-
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
-# locally in source files will be included in the documentation. If set to NO,
-# only classes defined in header files are included. Does not have any effect
-# for Java sources.
-# The default value is: YES.
-
-EXTRACT_LOCAL_CLASSES  = YES
-
-# This flag is only useful for Objective-C code. If set to YES, local methods,
-# which are defined in the implementation section but not in the interface are
-# included in the documentation. If set to NO, only methods in the interface are
-# included.
-# The default value is: NO.
-
-EXTRACT_LOCAL_METHODS  = NO
-
-# If this flag is set to YES, the members of anonymous namespaces will be
-# extracted and appear in the documentation as a namespace called
-# 'anonymous_namespace{file}', where file will be replaced with the base name of
-# the file that contains the anonymous namespace. By default anonymous namespace
-# are hidden.
-# The default value is: NO.
-
-EXTRACT_ANON_NSPACES   = NO
-
-# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
-# undocumented members inside documented classes or files. If set to NO these
-# members will be included in the various overviews, but no documentation
-# section is generated. This option has no effect if EXTRACT_ALL is enabled.
-# The default value is: NO.
-
-HIDE_UNDOC_MEMBERS     = NO
-
-# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
-# undocumented classes that are normally visible in the class hierarchy. If set
-# to NO, these classes will be included in the various overviews. This option
-# has no effect if EXTRACT_ALL is enabled.
-# The default value is: NO.
-
-HIDE_UNDOC_CLASSES     = NO
-
-# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
-# declarations. If set to NO, these declarations will be included in the
-# documentation.
-# The default value is: NO.
-
-HIDE_FRIEND_COMPOUNDS  = NO
-
-# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
-# documentation blocks found inside the body of a function. If set to NO, these
-# blocks will be appended to the function's detailed documentation block.
-# The default value is: NO.
-
-HIDE_IN_BODY_DOCS      = NO
-
-# The INTERNAL_DOCS tag determines if documentation that is typed after a
-# \internal command is included. If the tag is set to NO then the documentation
-# will be excluded. Set it to YES to include the internal documentation.
-# The default value is: NO.
-
-INTERNAL_DOCS          = NO
-
-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES, upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# (including Cygwin) and Mac users are advised to set this option to NO.
-# The default value is: system dependent.
-
-CASE_SENSE_NAMES       = NO
-
-# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
-# their full class and namespace scopes in the documentation. If set to YES, the
-# scope will be hidden.
-# The default value is: NO.
-
-HIDE_SCOPE_NAMES       = NO
-
-# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
-# append additional text to a page's title, such as Class Reference. If set to
-# YES the compound reference will be hidden.
-# The default value is: NO.
-
-HIDE_COMPOUND_REFERENCE= NO
-
-# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
-# the files that are included by a file in the documentation of that file.
-# The default value is: YES.
-
-SHOW_INCLUDE_FILES     = YES
-
-# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
-# grouped member an include statement to the documentation, telling the reader
-# which file to include in order to use the member.
-# The default value is: NO.
-
-SHOW_GROUPED_MEMB_INC  = NO
-
-# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
-# files with double quotes in the documentation rather than with sharp brackets.
-# The default value is: NO.
-
-FORCE_LOCAL_INCLUDES   = NO
-
-# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
-# documentation for inline members.
-# The default value is: YES.
-
-INLINE_INFO            = YES
-
-# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
-# (detailed) documentation of file and class members alphabetically by member
-# name. If set to NO, the members will appear in declaration order.
-# The default value is: YES.
-
-SORT_MEMBER_DOCS       = YES
-
-# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
-# descriptions of file, namespace and class members alphabetically by member
-# name. If set to NO, the members will appear in declaration order. Note that
-# this will also influence the order of the classes in the class list.
-# The default value is: NO.
-
-SORT_BRIEF_DOCS        = NO
-
-# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
-# (brief and detailed) documentation of class members so that constructors and
-# destructors are listed first. If set to NO the constructors will appear in the
-# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
-# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
-# member documentation.
-# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
-# detailed member documentation.
-# The default value is: NO.
-
-SORT_MEMBERS_CTORS_1ST = NO
-
-# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
-# of group names into alphabetical order. If set to NO the group names will
-# appear in their defined order.
-# The default value is: NO.
-
-SORT_GROUP_NAMES       = NO
-
-# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
-# fully-qualified names, including namespaces. If set to NO, the class list will
-# be sorted only by class name, not including the namespace part.
-# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
-# Note: This option applies only to the class list, not to the alphabetical
-# list.
-# The default value is: NO.
-
-SORT_BY_SCOPE_NAME     = NO
-
-# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
-# type resolution of all parameters of a function it will reject a match between
-# the prototype and the implementation of a member function even if there is
-# only one candidate or it is obvious which candidate to choose by doing a
-# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
-# accept a match between prototype and implementation in such cases.
-# The default value is: NO.
-
-STRICT_PROTO_MATCHING  = NO
-
-# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
-# list. This list is created by putting \todo commands in the documentation.
-# The default value is: YES.
-
-GENERATE_TODOLIST      = YES
-
-# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
-# list. This list is created by putting \test commands in the documentation.
-# The default value is: YES.
-
-GENERATE_TESTLIST      = YES
-
-# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
-# list. This list is created by putting \bug commands in the documentation.
-# The default value is: YES.
-
-GENERATE_BUGLIST       = YES
-
-# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
-# the deprecated list. This list is created by putting \deprecated commands in
-# the documentation.
-# The default value is: YES.
-
-GENERATE_DEPRECATEDLIST= YES
-
-# The ENABLED_SECTIONS tag can be used to enable conditional documentation
-# sections, marked by \if <section_label> ... \endif and \cond <section_label>
-# ... \endcond blocks.
-
-ENABLED_SECTIONS       =
-
-# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
-# initial value of a variable or macro / define can have for it to appear in the
-# documentation. If the initializer consists of more lines than specified here
-# it will be hidden. Use a value of 0 to hide initializers completely. The
-# appearance of the value of individual variables and macros / defines can be
-# controlled using \showinitializer or \hideinitializer command in the
-# documentation regardless of this setting.
-# Minimum value: 0, maximum value: 10000, default value: 30.
-
-MAX_INITIALIZER_LINES  = 30
-
-# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
-# the bottom of the documentation of classes and structs. If set to YES, the
-# list will mention the files that were used to generate the documentation.
-# The default value is: YES.
-
-SHOW_USED_FILES        = YES
-
-# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
-# will remove the Files entry from the Quick Index and from the Folder Tree View
-# (if specified).
-# The default value is: YES.
-
-SHOW_FILES             = YES
-
-# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
-# page. This will remove the Namespaces entry from the Quick Index and from the
-# Folder Tree View (if specified).
-# The default value is: YES.
-
-SHOW_NAMESPACES        = YES
-
-# The FILE_VERSION_FILTER tag can be used to specify a program or script that
-# doxygen should invoke to get the current version for each file (typically from
-# the version control system). Doxygen will invoke the program by executing (via
-# popen()) the command command input-file, where command is the value of the
-# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
-# by doxygen. Whatever the program writes to standard output is used as the file
-# version. For an example see the documentation.
-
-FILE_VERSION_FILTER    =
-
-# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
-# by doxygen. The layout file controls the global structure of the generated
-# output files in an output format independent way. To create the layout file
-# that represents doxygen's defaults, run doxygen with the -l option. You can
-# optionally specify a file name after the option, if omitted DoxygenLayout.xml
-# will be used as the name of the layout file.
-#
-# Note that if you run doxygen from a directory containing a file called
-# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
-# tag is left empty.
-
-LAYOUT_FILE            =
-
-# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
-# the reference definitions. This must be a list of .bib files. The .bib
-# extension is automatically appended if omitted. This requires the bibtex tool
-# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
-# For LaTeX the style of the bibliography can be controlled using
-# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
-# search path. See also \cite for info how to create references.
-
-CITE_BIB_FILES         =
-
-#---------------------------------------------------------------------------
-# Configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-
-# The QUIET tag can be used to turn on/off the messages that are generated to
-# standard output by doxygen. If QUIET is set to YES this implies that the
-# messages are off.
-# The default value is: NO.
-
-QUIET                  = NO
-
-# The WARNINGS tag can be used to turn on/off the warning messages that are
-# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
-# this implies that the warnings are on.
-#
-# Tip: Turn warnings on while writing the documentation.
-# The default value is: YES.
-
-WARNINGS               = YES
-
-# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
-# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
-# will automatically be disabled.
-# The default value is: YES.
-
-WARN_IF_UNDOCUMENTED   = YES
-
-# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some parameters
-# in a documented function, or documenting parameters that don't exist or using
-# markup commands wrongly.
-# The default value is: YES.
-
-WARN_IF_DOC_ERROR      = YES
-
-# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
-# are documented, but have no documentation for their parameters or return
-# value. If set to NO, doxygen will only warn about wrong or incomplete
-# parameter documentation, but not about the absence of documentation. If
-# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
-# The default value is: NO.
-
-WARN_NO_PARAMDOC       = NO
-
-# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
-# a warning is encountered.
-# The default value is: NO.
-
-WARN_AS_ERROR          = NO
-
-# The WARN_FORMAT tag determines the format of the warning messages that doxygen
-# can produce. The string should contain the $file, $line, and $text tags, which
-# will be replaced by the file and line number from which the warning originated
-# and the warning text. Optionally the format may contain $version, which will
-# be replaced by the version of the file (if it could be obtained via
-# FILE_VERSION_FILTER)
-# The default value is: $file:$line: $text.
-
-WARN_FORMAT            = "$file:$line: $text"
-
-# The WARN_LOGFILE tag can be used to specify a file to which warning and error
-# messages should be written. If left blank the output is written to standard
-# error (stderr).
-
-WARN_LOGFILE           =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the input files
-#---------------------------------------------------------------------------
-
-# The INPUT tag is used to specify the files and/or directories that contain
-# documented source files. You may enter file names like myfile.cpp or
-# directories like /usr/src/myproject. Separate the files or directories with
-# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
-# Note: If this tag is empty the current directory is searched.
-
-INPUT                  =
-
-# This tag can be used to specify the character encoding of the source files
-# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
-# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
-# possible encodings.
-# The default value is: UTF-8.
-
-INPUT_ENCODING         = UTF-8
-
-# If the value of the INPUT tag contains directories, you can use the
-# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
-# *.h) to filter out the source-files in the directories.
-#
-# Note that for custom extensions or not directly supported extensions you also
-# need to set EXTENSION_MAPPING for the extension otherwise the files are not
-# read by doxygen.
-#
-# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
-# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
-# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
-# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
-# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen
-# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd,
-# *.vhdl, *.ucf, *.qsf and *.ice.
-
-FILE_PATTERNS          = *.c \
-                         *.cc \
-                         *.cxx \
-                         *.cpp \
-                         *.c++ \
-                         *.java \
-                         *.ii \
-                         *.ixx \
-                         *.ipp \
-                         *.i++ \
-                         *.inl \
-                         *.idl \
-                         *.ddl \
-                         *.odl \
-                         *.h \
-                         *.hh \
-                         *.hxx \
-                         *.hpp \
-                         *.h++ \
-                         *.cs \
-                         *.d \
-                         *.php \
-                         *.php4 \
-                         *.php5 \
-                         *.phtml \
-                         *.inc \
-                         *.m \
-                         *.markdown \
-                         *.md \
-                         *.mm \
-                         *.dox \
-                         *.doc \
-                         *.txt \
-                         *.py \
-                         *.pyw \
-                         *.f90 \
-                         *.f95 \
-                         *.f03 \
-                         *.f08 \
-                         *.f18 \
-                         *.f \
-                         *.for \
-                         *.vhd \
-                         *.vhdl \
-                         *.ucf \
-                         *.qsf \
-                         *.ice
-
-# The RECURSIVE tag can be used to specify whether or not subdirectories should
-# be searched for input files as well.
-# The default value is: NO.
-
-RECURSIVE              = NO
-
-# The EXCLUDE tag can be used to specify files and/or directories that should be
-# excluded from the INPUT source files. This way you can easily exclude a
-# subdirectory from a directory tree whose root is specified with the INPUT tag.
-#
-# Note that relative paths are relative to the directory from which doxygen is
-# run.
-
-EXCLUDE                =
-
-# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
-# directories that are symbolic links (a Unix file system feature) are excluded
-# from the input.
-# The default value is: NO.
-
-EXCLUDE_SYMLINKS       = NO
-
-# If the value of the INPUT tag contains directories, you can use the
-# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
-# certain files from those directories.
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories for example use the pattern */test/*
-
-EXCLUDE_PATTERNS       =
-
-# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
-# (namespaces, classes, functions, etc.) that should be excluded from the
-# output. The symbol name can be a fully qualified name, a word, or if the
-# wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories use the pattern */test/*
-
-EXCLUDE_SYMBOLS        =
-
-# The EXAMPLE_PATH tag can be used to specify one or more files or directories
-# that contain example code fragments that are included (see the \include
-# command).
-
-EXAMPLE_PATH           =
-
-# If the value of the EXAMPLE_PATH tag contains directories, you can use the
-# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
-# *.h) to filter out the source-files in the directories. If left blank all
-# files are included.
-
-EXAMPLE_PATTERNS       = *
-
-# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
-# searched for input files to be used with the \include or \dontinclude commands
-# irrespective of the value of the RECURSIVE tag.
-# The default value is: NO.
-
-EXAMPLE_RECURSIVE      = NO
-
-# The IMAGE_PATH tag can be used to specify one or more files or directories
-# that contain images that are to be included in the documentation (see the
-# \image command).
-
-IMAGE_PATH             =
-
-# The INPUT_FILTER tag can be used to specify a program that doxygen should
-# invoke to filter for each input file. Doxygen will invoke the filter program
-# by executing (via popen()) the command:
-#
-# <filter> <input-file>
-#
-# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
-# name of an input file. Doxygen will then use the output that the filter
-# program writes to standard output. If FILTER_PATTERNS is specified, this tag
-# will be ignored.
-#
-# Note that the filter must not add or remove lines; it is applied before the
-# code is scanned, but not when the output code is generated. If lines are added
-# or removed, the anchors will not be placed correctly.
-#
-# Note that for custom extensions or not directly supported extensions you also
-# need to set EXTENSION_MAPPING for the extension otherwise the files are not
-# properly processed by doxygen.
-
-INPUT_FILTER           =
-
-# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
-# basis. Doxygen will compare the file name with each pattern and apply the
-# filter if there is a match. The filters are a list of the form: pattern=filter
-# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
-# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
-# patterns match the file name, INPUT_FILTER is applied.
-#
-# Note that for custom extensions or not directly supported extensions you also
-# need to set EXTENSION_MAPPING for the extension otherwise the files are not
-# properly processed by doxygen.
-
-FILTER_PATTERNS        =
-
-# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
-# INPUT_FILTER) will also be used to filter the input files that are used for
-# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
-# The default value is: NO.
-
-FILTER_SOURCE_FILES    = NO
-
-# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
-# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
-# it is also possible to disable source filtering for a specific pattern using
-# *.ext= (so without naming a filter).
-# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
-
-FILTER_SOURCE_PATTERNS =
-
-# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
-# is part of the input, its contents will be placed on the main page
-# (index.html). This can be useful if you have a project on for instance GitHub
-# and want to reuse the introduction page also for the doxygen output.
-
-USE_MDFILE_AS_MAINPAGE =
-
-#---------------------------------------------------------------------------
-# Configuration options related to source browsing
-#---------------------------------------------------------------------------
-
-# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
-# generated. Documented entities will be cross-referenced with these sources.
-#
-# Note: To get rid of all source code in the generated output, make sure that
-# also VERBATIM_HEADERS is set to NO.
-# The default value is: NO.
-
-SOURCE_BROWSER         = NO
-
-# Setting the INLINE_SOURCES tag to YES will include the body of functions,
-# classes and enums directly into the documentation.
-# The default value is: NO.
-
-INLINE_SOURCES         = NO
-
-# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
-# special comment blocks from generated source code fragments. Normal C, C++ and
-# Fortran comments will always remain visible.
-# The default value is: YES.
-
-STRIP_CODE_COMMENTS    = YES
-
-# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
-# entity all documented functions referencing it will be listed.
-# The default value is: NO.
-
-REFERENCED_BY_RELATION = NO
-
-# If the REFERENCES_RELATION tag is set to YES then for each documented function
-# all documented entities called/used by that function will be listed.
-# The default value is: NO.
-
-REFERENCES_RELATION    = NO
-
-# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
-# to YES then the hyperlinks from functions in REFERENCES_RELATION and
-# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
-# link to the documentation.
-# The default value is: YES.
-
-REFERENCES_LINK_SOURCE = YES
-
-# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
-# source code will show a tooltip with additional information such as prototype,
-# brief description and links to the definition and documentation. Since this
-# will make the HTML file larger and loading of large files a bit slower, you
-# can opt to disable this feature.
-# The default value is: YES.
-# This tag requires that the tag SOURCE_BROWSER is set to YES.
-
-SOURCE_TOOLTIPS        = YES
-
-# If the USE_HTAGS tag is set to YES then the references to source code will
-# point to the HTML generated by the htags(1) tool instead of doxygen built-in
-# source browser. The htags tool is part of GNU's global source tagging system
-# (see https://www.gnu.org/software/global/global.html). You will need version
-# 4.8.6 or higher.
-#
-# To use it do the following:
-# - Install the latest version of global
-# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file
-# - Make sure the INPUT points to the root of the source tree
-# - Run doxygen as normal
-#
-# Doxygen will invoke htags (and that will in turn invoke gtags), so these
-# tools must be available from the command line (i.e. in the search path).
-#
-# The result: instead of the source browser generated by doxygen, the links to
-# source code will now point to the output of htags.
-# The default value is: NO.
-# This tag requires that the tag SOURCE_BROWSER is set to YES.
-
-USE_HTAGS              = NO
-
-# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
-# verbatim copy of the header file for each class for which an include is
-# specified. Set to NO to disable this.
-# See also: Section \class.
-# The default value is: YES.
-
-VERBATIM_HEADERS       = YES
-
-# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the
-# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the
-# cost of reduced performance. This can be particularly helpful with template
-# rich C++ code for which doxygen's built-in parser lacks the necessary type
-# information.
-# Note: The availability of this option depends on whether or not doxygen was
-# generated with the -Duse_libclang=ON option for CMake.
-# The default value is: NO.
-
-CLANG_ASSISTED_PARSING = NO
-
-# If clang assisted parsing is enabled you can provide the compiler with command
-# line options that you would normally use when invoking the compiler. Note that
-# the include paths will already be set by doxygen for the files and directories
-# specified with INPUT and INCLUDE_PATH.
-# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
-
-CLANG_OPTIONS          =
-
-# If clang assisted parsing is enabled you can provide the clang parser with the
-# path to the directory containing a file called compile_commands.json. This
-# file is the compilation database (see:
-# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the
-# options used when the source files were built. This is equivalent to
-# specifying the "-p" option to a clang tool, such as clang-check. These options
-# will then be passed to the parser. Any options specified with CLANG_OPTIONS
-# will be added as well.
-# Note: The availability of this option depends on whether or not doxygen was
-# generated with the -Duse_libclang=ON option for CMake.
-
-CLANG_DATABASE_PATH    =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-
-# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
-# compounds will be generated. Enable this if the project contains a lot of
-# classes, structs, unions or interfaces.
-# The default value is: YES.
-
-ALPHABETICAL_INDEX     = YES
-
-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX    = 5
-
-# In case all classes in a project start with a common prefix, all classes will
-# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
-# can be used to specify a prefix (or a list of prefixes) that should be ignored
-# while generating the index headers.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-IGNORE_PREFIX          =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the HTML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
-# The default value is: YES.
-
-GENERATE_HTML          = YES
-
-# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: html.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_OUTPUT            = html
-
-# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
-# generated HTML page (for example: .htm, .php, .asp).
-# The default value is: .html.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_FILE_EXTENSION    = .html
-
-# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
-# each generated HTML page. If the tag is left blank doxygen will generate a
-# standard header.
-#
-# To get valid HTML the header file that includes any scripts and style sheets
-# that doxygen needs, which is dependent on the configuration options used (e.g.
-# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
-# default header using
-# doxygen -w html new_header.html new_footer.html new_stylesheet.css
-# YourConfigFile
-# and then modify the file new_header.html. See also section "Doxygen usage"
-# for information on how to generate the default header that doxygen normally
-# uses.
-# Note: The header is subject to change so you typically have to regenerate the
-# default header when upgrading to a newer version of doxygen. For a description
-# of the possible markers and block names see the documentation.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_HEADER            =
-
-# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
-# generated HTML page. If the tag is left blank doxygen will generate a standard
-# footer. See HTML_HEADER for more information on how to generate a default
-# footer and what special commands can be used inside the footer. See also
-# section "Doxygen usage" for information on how to generate the default footer
-# that doxygen normally uses.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_FOOTER            =
-
-# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
-# sheet that is used by each HTML page. It can be used to fine-tune the look of
-# the HTML output. If left blank doxygen will generate a default style sheet.
-# See also section "Doxygen usage" for information on how to generate the style
-# sheet that doxygen normally uses.
-# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
-# it is more robust and this tag (HTML_STYLESHEET) will in the future become
-# obsolete.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_STYLESHEET        =
-
-# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
-# cascading style sheets that are included after the standard style sheets
-# created by doxygen. Using this option one can overrule certain style aspects.
-# This is preferred over using HTML_STYLESHEET since it does not replace the
-# standard style sheet and is therefore more robust against future updates.
-# Doxygen will copy the style sheet files to the output directory.
-# Note: The order of the extra style sheet files is of importance (e.g. the last
-# style sheet in the list overrules the setting of the previous ones in the
-# list). For an example see the documentation.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_EXTRA_STYLESHEET  =
-
-# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
-# other source files which should be copied to the HTML output directory. Note
-# that these files will be copied to the base HTML output directory. Use the
-# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
-# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
-# files will be copied as-is; there are no commands or markers available.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_EXTRA_FILES       =
-
-# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
-# will adjust the colors in the style sheet and background images according to
-# this color. Hue is specified as an angle on a colorwheel, see
-# https://en.wikipedia.org/wiki/Hue for more information. For instance the value
-# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
-# purple, and 360 is red again.
-# Minimum value: 0, maximum value: 359, default value: 220.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_HUE    = 220
-
-# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
-# in the HTML output. For a value of 0 the output will use grayscales only. A
-# value of 255 will produce the most vivid colors.
-# Minimum value: 0, maximum value: 255, default value: 100.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_SAT    = 100
-
-# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
-# luminance component of the colors in the HTML output. Values below 100
-# gradually make the output lighter, whereas values above 100 make the output
-# darker. The value divided by 100 is the actual gamma applied, so 80 represents
-# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
-# change the gamma.
-# Minimum value: 40, maximum value: 240, default value: 80.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_GAMMA  = 80
-
-# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
-# page will contain the date and time when the page was generated. Setting this
-# to YES can help to show when doxygen was last run and thus if the
-# documentation is up to date.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_TIMESTAMP         = NO
-
-# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
-# documentation will contain a main index with vertical navigation menus that
-# are dynamically created via JavaScript. If disabled, the navigation index will
-# consists of multiple levels of tabs that are statically embedded in every HTML
-# page. Disable this option to support browsers that do not have JavaScript,
-# like the Qt help browser.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_DYNAMIC_MENUS     = YES
-
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
-# documentation will contain sections that can be hidden and shown after the
-# page has loaded.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_DYNAMIC_SECTIONS  = NO
-
-# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
-# shown in the various tree structured indices initially; the user can expand
-# and collapse entries dynamically later on. Doxygen will expand the tree to
-# such a level that at most the specified number of entries are visible (unless
-# a fully collapsed tree already exceeds this amount). So setting the number of
-# entries 1 will produce a full collapsed tree by default. 0 is a special value
-# representing an infinite number of entries and will result in a full expanded
-# tree by default.
-# Minimum value: 0, maximum value: 9999, default value: 100.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_INDEX_NUM_ENTRIES = 100
-
-# If the GENERATE_DOCSET tag is set to YES, additional index files will be
-# generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: https://developer.apple.com/xcode/), introduced with OSX
-# 10.5 (Leopard). To create a documentation set, doxygen will generate a
-# Makefile in the HTML output directory. Running make will produce the docset in
-# that directory and running make install will install the docset in
-# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
-# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
-# genXcode/_index.html for more information.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_DOCSET        = NO
-
-# This tag determines the name of the docset feed. A documentation feed provides
-# an umbrella under which multiple documentation sets from a single provider
-# (such as a company or product suite) can be grouped.
-# The default value is: Doxygen generated docs.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_FEEDNAME        = "Doxygen generated docs"
-
-# This tag specifies a string that should uniquely identify the documentation
-# set bundle. This should be a reverse domain-name style string, e.g.
-# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_BUNDLE_ID       = org.doxygen.Project
-
-# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
-# the documentation publisher. This should be a reverse domain-name style
-# string, e.g. com.mycompany.MyDocSet.documentation.
-# The default value is: org.doxygen.Publisher.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
-
-# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
-# The default value is: Publisher.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_PUBLISHER_NAME  = Publisher
-
-# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
-# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
-# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on
-# Windows.
-#
-# The HTML Help Workshop contains a compiler that can convert all HTML output
-# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
-# files are now used as the Windows 98 help format, and will replace the old
-# Windows help format (.hlp) on all Windows platforms in the future. Compressed
-# HTML files also contain an index, a table of contents, and you can search for
-# words in the documentation. The HTML workshop also contains a viewer for
-# compressed HTML files.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_HTMLHELP      = NO
-
-# The CHM_FILE tag can be used to specify the file name of the resulting .chm
-# file. You can add a path in front of the file if the result should not be
-# written to the html output directory.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-CHM_FILE               =
-
-# The HHC_LOCATION tag can be used to specify the location (absolute path
-# including file name) of the HTML help compiler (hhc.exe). If non-empty,
-# doxygen will try to run the HTML help compiler on the generated index.hhp.
-# The file has to be specified with full path.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-HHC_LOCATION           =
-
-# The GENERATE_CHI flag controls if a separate .chi index file is generated
-# (YES) or that it should be included in the main .chm file (NO).
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-GENERATE_CHI           = NO
-
-# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
-# and project file content.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-CHM_INDEX_ENCODING     =
-
-# The BINARY_TOC flag controls whether a binary table of contents is generated
-# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
-# enables the Previous and Next buttons.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-BINARY_TOC             = NO
-
-# The TOC_EXPAND flag can be set to YES to add extra items for group members to
-# the table of contents of the HTML help documentation and to the tree view.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-TOC_EXPAND             = NO
-
-# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
-# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
-# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
-# (.qch) of the generated HTML documentation.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_QHP           = NO
-
-# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
-# the file name of the resulting .qch file. The path specified is relative to
-# the HTML output folder.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QCH_FILE               =
-
-# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
-# Project output. For more information please see Qt Help Project / Namespace
-# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_NAMESPACE          = org.doxygen.Project
-
-# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
-# Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-
-# folders).
-# The default value is: doc.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_VIRTUAL_FOLDER     = doc
-
-# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
-# filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_CUST_FILTER_NAME   =
-
-# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
-# custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_CUST_FILTER_ATTRS  =
-
-# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
-# project's filter section matches. Qt Help Project / Filter Attributes (see:
-# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_SECT_FILTER_ATTRS  =
-
-# The QHG_LOCATION tag can be used to specify the location of Qt's
-# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
-# generated .qhp file.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHG_LOCATION           =
-
-# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
-# generated, together with the HTML files, they form an Eclipse help plugin. To
-# install this plugin and make it available under the help contents menu in
-# Eclipse, the contents of the directory containing the HTML and XML files needs
-# to be copied into the plugins directory of eclipse. The name of the directory
-# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
-# After copying Eclipse needs to be restarted before the help appears.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_ECLIPSEHELP   = NO
-
-# A unique identifier for the Eclipse help plugin. When installing the plugin
-# the directory name containing the HTML and XML files should also have this
-# name. Each documentation set should have its own identifier.
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
-
-ECLIPSE_DOC_ID         = org.doxygen.Project
-
-# If you want full control over the layout of the generated HTML pages it might
-# be necessary to disable the index and replace it with your own. The
-# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
-# of each HTML page. A value of NO enables the index and the value YES disables
-# it. Since the tabs in the index contain the same information as the navigation
-# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-DISABLE_INDEX          = NO
-
-# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
-# structure should be generated to display hierarchical information. If the tag
-# value is set to YES, a side panel will be generated containing a tree-like
-# index structure (just like the one that is generated for HTML Help). For this
-# to work a browser that supports JavaScript, DHTML, CSS and frames is required
-# (i.e. any modern browser). Windows users are probably better off using the
-# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
-# further fine-tune the look of the index. As an example, the default style
-# sheet generated by doxygen has an example that shows how to put an image at
-# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
-# the same information as the tab index, you could consider setting
-# DISABLE_INDEX to YES when enabling this option.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_TREEVIEW      = NO
-
-# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
-# doxygen will group on one line in the generated HTML documentation.
-#
-# Note that a value of 0 will completely suppress the enum values from appearing
-# in the overview section.
-# Minimum value: 0, maximum value: 20, default value: 4.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-ENUM_VALUES_PER_LINE   = 4
-
-# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
-# to set the initial width (in pixels) of the frame in which the tree is shown.
-# Minimum value: 0, maximum value: 1500, default value: 250.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-TREEVIEW_WIDTH         = 250
-
-# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
-# external symbols imported via tag files in a separate window.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-EXT_LINKS_IN_WINDOW    = NO
-
-# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg
-# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
-# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
-# the HTML output. These images will generally look nicer at scaled resolutions.
-# Possible values are: png (the default) and svg (looks nicer but requires the
-# pdf2svg or inkscape tool).
-# The default value is: png.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_FORMULA_FORMAT    = png
-
-# Use this tag to change the font size of LaTeX formulas included as images in
-# the HTML documentation. When you change the font size after a successful
-# doxygen run you need to manually remove any form_*.png images from the HTML
-# output directory to force them to be regenerated.
-# Minimum value: 8, maximum value: 50, default value: 10.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_FONTSIZE       = 10
-
-# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
-# generated for formulas are transparent PNGs. Transparent PNGs are not
-# supported properly for IE 6.0, but are supported on all modern browsers.
-#
-# Note that when changing this option you need to delete any form_*.png files in
-# the HTML output directory before the changes have effect.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_TRANSPARENT    = YES
-
-# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
-# to create new LaTeX commands to be used in formulas as building blocks. See
-# the section "Including formulas" for details.
-
-FORMULA_MACROFILE      =
-
-# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
-# https://www.mathjax.org) which uses client side JavaScript for the rendering
-# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
-# installed or if you want to formulas look prettier in the HTML output. When
-# enabled you may also need to install MathJax separately and configure the path
-# to it using the MATHJAX_RELPATH option.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-USE_MATHJAX            = NO
-
-# When MathJax is enabled you can set the default output format to be used for
-# the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/latest/output.html) for more details.
-# Possible values are: HTML-CSS (which is slower, but has the best
-# compatibility), NativeMML (i.e. MathML) and SVG.
-# The default value is: HTML-CSS.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_FORMAT         = HTML-CSS
-
-# When MathJax is enabled you need to specify the location relative to the HTML
-# output directory using the MATHJAX_RELPATH option. The destination directory
-# should contain the MathJax.js script. For instance, if the mathjax directory
-# is located at the same level as the HTML output directory, then
-# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
-# Content Delivery Network so you can quickly see the result without installing
-# MathJax. However, it is strongly recommended to install a local copy of
-# MathJax from https://www.mathjax.org before deployment.
-# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_RELPATH        = https://cdn.jsdelivr.net/npm/mathjax@2
-
-# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
-# extension names that should be enabled during MathJax rendering. For example
-# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_EXTENSIONS     =
-
-# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
-# of code that will be used on startup of the MathJax code. See the MathJax site
-# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
-# example see the documentation.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_CODEFILE       =
-
-# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
-# the HTML output. The underlying search engine uses javascript and DHTML and
-# should work on any modern browser. Note that when using HTML help
-# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
-# there is already a search function so this one should typically be disabled.
-# For large projects the javascript based search engine can be slow, then
-# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
-# search using the keyboard; to jump to the search box use <access key> + S
-# (what the <access key> is depends on the OS and browser, but it is typically
-# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
-# key> to jump into the search results window, the results can be navigated
-# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
-# the search. The filter options can be selected when the cursor is inside the
-# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
-# to select a filter and <Enter> or <escape> to activate or cancel the filter
-# option.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-SEARCHENGINE           = YES
-
-# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
-# implemented using a web server instead of a web client using JavaScript. There
-# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
-# setting. When disabled, doxygen will generate a PHP script for searching and
-# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
-# and searching needs to be provided by external tools. See the section
-# "External Indexing and Searching" for details.
-# The default value is: NO.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-SERVER_BASED_SEARCH    = NO
-
-# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
-# script for searching. Instead the search results are written to an XML file
-# which needs to be processed by an external indexer. Doxygen will invoke an
-# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
-# search results.
-#
-# Doxygen ships with an example indexer (doxyindexer) and search engine
-# (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/).
-#
-# See the section "External Indexing and Searching" for details.
-# The default value is: NO.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-EXTERNAL_SEARCH        = NO
-
-# The SEARCHENGINE_URL should point to a search engine hosted by a web server
-# which will return the search results when EXTERNAL_SEARCH is enabled.
-#
-# Doxygen ships with an example indexer (doxyindexer) and search engine
-# (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/). See the section "External Indexing and
-# Searching" for details.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-SEARCHENGINE_URL       =
-
-# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
-# search data is written to a file for indexing by an external tool. With the
-# SEARCHDATA_FILE tag the name of this file can be specified.
-# The default file is: searchdata.xml.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-SEARCHDATA_FILE        = searchdata.xml
-
-# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
-# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
-# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
-# projects and redirect the results back to the right project.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-EXTERNAL_SEARCH_ID     =
-
-# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
-# projects other than the one defined by this configuration file, but that are
-# all added to the same external search index. Each project needs to have a
-# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
-# to a relative location where the documentation can be found. The format is:
-# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-EXTRA_SEARCH_MAPPINGS  =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the LaTeX output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
-# The default value is: YES.
-
-GENERATE_LATEX         = YES
-
-# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: latex.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_OUTPUT           = latex
-
-# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
-# invoked.
-#
-# Note that when not enabling USE_PDFLATEX the default is latex when enabling
-# USE_PDFLATEX the default is pdflatex and when in the later case latex is
-# chosen this is overwritten by pdflatex. For specific output languages the
-# default can have been set differently, this depends on the implementation of
-# the output language.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_CMD_NAME         =
-
-# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
-# index for LaTeX.
-# Note: This tag is used in the Makefile / make.bat.
-# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file
-# (.tex).
-# The default file is: makeindex.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-MAKEINDEX_CMD_NAME     = makeindex
-
-# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to
-# generate index for LaTeX. In case there is no backslash (\) as first character
-# it will be automatically added in the LaTeX code.
-# Note: This tag is used in the generated output file (.tex).
-# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.
-# The default value is: makeindex.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_MAKEINDEX_CMD    = makeindex
-
-# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
-# documents. This may be useful for small projects and may help to save some
-# trees in general.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-COMPACT_LATEX          = NO
-
-# The PAPER_TYPE tag can be used to set the paper type that is used by the
-# printer.
-# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
-# 14 inches) and executive (7.25 x 10.5 inches).
-# The default value is: a4.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-PAPER_TYPE             = a4
-
-# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
-# that should be included in the LaTeX output. The package can be specified just
-# by its name or with the correct syntax as to be used with the LaTeX
-# \usepackage command. To get the times font for instance you can specify :
-# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
-# To use the option intlimits with the amsmath package you can specify:
-# EXTRA_PACKAGES=[intlimits]{amsmath}
-# If left blank no extra packages will be included.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-EXTRA_PACKAGES         =
-
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
-# generated LaTeX document. The header should contain everything until the first
-# chapter. If it is left blank doxygen will generate a standard header. See
-# section "Doxygen usage" for information on how to let doxygen write the
-# default header to a separate file.
-#
-# Note: Only use a user-defined header if you know what you are doing! The
-# following commands have a special meaning inside the header: $title,
-# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
-# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
-# string, for the replacement values of the other commands the user is referred
-# to HTML_HEADER.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_HEADER           =
-
-# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
-# generated LaTeX document. The footer should contain everything after the last
-# chapter. If it is left blank doxygen will generate a standard footer. See
-# LATEX_HEADER for more information on how to generate a default footer and what
-# special commands can be used inside the footer.
-#
-# Note: Only use a user-defined footer if you know what you are doing!
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_FOOTER           =
-
-# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
-# LaTeX style sheets that are included after the standard style sheets created
-# by doxygen. Using this option one can overrule certain style aspects. Doxygen
-# will copy the style sheet files to the output directory.
-# Note: The order of the extra style sheet files is of importance (e.g. the last
-# style sheet in the list overrules the setting of the previous ones in the
-# list).
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_EXTRA_STYLESHEET =
-
-# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
-# other source files which should be copied to the LATEX_OUTPUT output
-# directory. Note that the files will be copied as-is; there are no commands or
-# markers available.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_EXTRA_FILES      =
-
-# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
-# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
-# contain links (just like the HTML output) instead of page references. This
-# makes the output suitable for online browsing using a PDF viewer.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-PDF_HYPERLINKS         = YES
-
-# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
-# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
-# files. Set this option to YES, to get a higher quality PDF documentation.
-#
-# See also section LATEX_CMD_NAME for selecting the engine.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-USE_PDFLATEX           = YES
-
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
-# command to the generated LaTeX files. This will instruct LaTeX to keep running
-# if errors occur, instead of asking the user for help. This option is also used
-# when generating formulas in HTML.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_BATCHMODE        = NO
-
-# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
-# index chapters (such as File Index, Compound Index, etc.) in the output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_HIDE_INDICES     = NO
-
-# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
-# code with syntax highlighting in the LaTeX output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_SOURCE_CODE      = NO
-
-# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
-# bibliography, e.g. plainnat, or ieeetr. See
-# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
-# The default value is: plain.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_BIB_STYLE        = plain
-
-# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
-# page will contain the date and time when the page was generated. Setting this
-# to NO can help when comparing the output of multiple runs.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_TIMESTAMP        = NO
-
-# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
-# path from which the emoji images will be read. If a relative path is entered,
-# it will be relative to the LATEX_OUTPUT directory. If left blank the
-# LATEX_OUTPUT directory will be used.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_EMOJI_DIRECTORY  =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the RTF output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
-# RTF output is optimized for Word 97 and may not look too pretty with other RTF
-# readers/editors.
-# The default value is: NO.
-
-GENERATE_RTF           = NO
-
-# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: rtf.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_OUTPUT             = rtf
-
-# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
-# documents. This may be useful for small projects and may help to save some
-# trees in general.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-COMPACT_RTF            = NO
-
-# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
-# contain hyperlink fields. The RTF file will contain links (just like the HTML
-# output) instead of page references. This makes the output suitable for online
-# browsing using Word or some other Word compatible readers that support those
-# fields.
-#
-# Note: WordPad (write) and others do not support links.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_HYPERLINKS         = NO
-
-# Load stylesheet definitions from file. Syntax is similar to doxygen's
-# configuration file, i.e. a series of assignments. You only have to provide
-# replacements, missing definitions are set to their default value.
-#
-# See also section "Doxygen usage" for information on how to generate the
-# default style sheet that doxygen normally uses.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_STYLESHEET_FILE    =
-
-# Set optional variables used in the generation of an RTF document. Syntax is
-# similar to doxygen's configuration file. A template extensions file can be
-# generated using doxygen -e rtf extensionFile.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_EXTENSIONS_FILE    =
-
-# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
-# with syntax highlighting in the RTF output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_SOURCE_CODE        = NO
-
-#---------------------------------------------------------------------------
-# Configuration options related to the man page output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
-# classes and files.
-# The default value is: NO.
-
-GENERATE_MAN           = NO
-
-# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it. A directory man3 will be created inside the directory specified by
-# MAN_OUTPUT.
-# The default directory is: man.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_OUTPUT             = man
-
-# The MAN_EXTENSION tag determines the extension that is added to the generated
-# man pages. In case the manual section does not start with a number, the number
-# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
-# optional.
-# The default value is: .3.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_EXTENSION          = .3
-
-# The MAN_SUBDIR tag determines the name of the directory created within
-# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by
-# MAN_EXTENSION with the initial . removed.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_SUBDIR             =
-
-# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
-# will generate one additional man file for each entity documented in the real
-# man page(s). These additional files only source the real man page, but without
-# them the man command would be unable to find the correct page.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_LINKS              = NO
-
-#---------------------------------------------------------------------------
-# Configuration options related to the XML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
-# captures the structure of the code including all documentation.
-# The default value is: NO.
-
-GENERATE_XML           = NO
-
-# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: xml.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_OUTPUT             = xml
-
-# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
-# listings (including syntax highlighting and cross-referencing information) to
-# the XML output. Note that enabling this will significantly increase the size
-# of the XML output.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_PROGRAMLISTING     = YES
-
-# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include
-# namespace members in file scope as well, matching the HTML output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_NS_MEMB_FILE_SCOPE = NO
-
-#---------------------------------------------------------------------------
-# Configuration options related to the DOCBOOK output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
-# that can be used to generate PDF.
-# The default value is: NO.
-
-GENERATE_DOCBOOK       = NO
-
-# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
-# front of it.
-# The default directory is: docbook.
-# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
-
-DOCBOOK_OUTPUT         = docbook
-
-# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
-# program listings (including syntax highlighting and cross-referencing
-# information) to the DOCBOOK output. Note that enabling this will significantly
-# increase the size of the DOCBOOK output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
-
-DOCBOOK_PROGRAMLISTING = NO
-
-#---------------------------------------------------------------------------
-# Configuration options for the AutoGen Definitions output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
-# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
-# the structure of the code including all documentation. Note that this feature
-# is still experimental and incomplete at the moment.
-# The default value is: NO.
-
-GENERATE_AUTOGEN_DEF   = NO
-
-#---------------------------------------------------------------------------
-# Configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
-# file that captures the structure of the code including all documentation.
-#
-# Note that this feature is still experimental and incomplete at the moment.
-# The default value is: NO.
-
-GENERATE_PERLMOD       = NO
-
-# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
-# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
-# output from the Perl module output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_PERLMOD is set to YES.
-
-PERLMOD_LATEX          = NO
-
-# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
-# formatted so it can be parsed by a human reader. This is useful if you want to
-# understand what is going on. On the other hand, if this tag is set to NO, the
-# size of the Perl module output will be much smaller and Perl will parse it
-# just the same.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_PERLMOD is set to YES.
-
-PERLMOD_PRETTY         = YES
-
-# The names of the make variables in the generated doxyrules.make file are
-# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
-# so different doxyrules.make files included by the same Makefile don't
-# overwrite each other's variables.
-# This tag requires that the tag GENERATE_PERLMOD is set to YES.
-
-PERLMOD_MAKEVAR_PREFIX =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-
-# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
-# C-preprocessor directives found in the sources and include files.
-# The default value is: YES.
-
-ENABLE_PREPROCESSING   = YES
-
-# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
-# in the source code. If set to NO, only conditional compilation will be
-# performed. Macro expansion can be done in a controlled way by setting
-# EXPAND_ONLY_PREDEF to YES.
-# The default value is: NO.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-MACRO_EXPANSION        = NO
-
-# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
-# the macro expansion is limited to the macros specified with the PREDEFINED and
-# EXPAND_AS_DEFINED tags.
-# The default value is: NO.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-EXPAND_ONLY_PREDEF     = NO
-
-# If the SEARCH_INCLUDES tag is set to YES, the include files in the
-# INCLUDE_PATH will be searched if a #include is found.
-# The default value is: YES.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-SEARCH_INCLUDES        = YES
-
-# The INCLUDE_PATH tag can be used to specify one or more directories that
-# contain include files that are not input files but should be processed by the
-# preprocessor.
-# This tag requires that the tag SEARCH_INCLUDES is set to YES.
-
-INCLUDE_PATH           =
-
-# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
-# patterns (like *.h and *.hpp) to filter out the header-files in the
-# directories. If left blank, the patterns specified with FILE_PATTERNS will be
-# used.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-INCLUDE_FILE_PATTERNS  =
-
-# The PREDEFINED tag can be used to specify one or more macro names that are
-# defined before the preprocessor is started (similar to the -D option of e.g.
-# gcc). The argument of the tag is a list of macros of the form: name or
-# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
-# is assumed. To prevent a macro definition from being undefined via #undef or
-# recursively expanded use the := operator instead of the = operator.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-PREDEFINED             =
-
-# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
-# tag can be used to specify a list of macro names that should be expanded. The
-# macro definition that is found in the sources will be used. Use the PREDEFINED
-# tag if you want to use a different macro definition that overrules the
-# definition found in the source code.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-EXPAND_AS_DEFINED      =
-
-# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
-# remove all references to function-like macros that are alone on a line, have
-# an all uppercase name, and do not end with a semicolon. Such function macros
-# are typically used for boiler-plate code, and will confuse the parser if not
-# removed.
-# The default value is: YES.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-SKIP_FUNCTION_MACROS   = YES
-
-#---------------------------------------------------------------------------
-# Configuration options related to external references
-#---------------------------------------------------------------------------
-
-# The TAGFILES tag can be used to specify one or more tag files. For each tag
-# file the location of the external documentation should be added. The format of
-# a tag file without this location is as follows:
-# TAGFILES = file1 file2 ...
-# Adding location for the tag files is done as follows:
-# TAGFILES = file1=loc1 "file2 = loc2" ...
-# where loc1 and loc2 can be relative or absolute paths or URLs. See the
-# section "Linking to external documentation" for more information about the use
-# of tag files.
-# Note: Each tag file must have a unique name (where the name does NOT include
-# the path). If a tag file is not located in the directory in which doxygen is
-# run, you must also specify the path to the tagfile here.
-
-TAGFILES               =
-
-# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
-# tag file that is based on the input files it reads. See section "Linking to
-# external documentation" for more information about the usage of tag files.
-
-GENERATE_TAGFILE       =
-
-# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
-# the class index. If set to NO, only the inherited external classes will be
-# listed.
-# The default value is: NO.
-
-ALLEXTERNALS           = NO
-
-# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
-# in the modules index. If set to NO, only the current project's groups will be
-# listed.
-# The default value is: YES.
-
-EXTERNAL_GROUPS        = YES
-
-# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
-# the related pages index. If set to NO, only the current project's pages will
-# be listed.
-# The default value is: YES.
-
-EXTERNAL_PAGES         = YES
-
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-
-# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
-# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
-# NO turns the diagrams off. Note that this option also works with HAVE_DOT
-# disabled, but it is recommended to install and use dot, since it yields more
-# powerful graphs.
-# The default value is: YES.
-
-CLASS_DIAGRAMS         = YES
-
-# You can include diagrams made with dia in doxygen documentation. Doxygen will
-# then run dia to produce the diagram and insert it in the documentation. The
-# DIA_PATH tag allows you to specify the directory where the dia binary resides.
-# If left empty dia is assumed to be found in the default search path.
-
-DIA_PATH               =
-
-# If set to YES the inheritance and collaboration graphs will hide inheritance
-# and usage relations if the target is undocumented or is not a class.
-# The default value is: YES.
-
-HIDE_UNDOC_RELATIONS   = YES
-
-# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
-# available from the path. This tool is part of Graphviz (see:
-# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
-# Bell Labs. The other options in this section have no effect if this option is
-# set to NO
-# The default value is: NO.
-
-HAVE_DOT               = NO
-
-# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
-# to run in parallel. When set to 0 doxygen will base this on the number of
-# processors available in the system. You can set it explicitly to a value
-# larger than 0 to get control over the balance between CPU load and processing
-# speed.
-# Minimum value: 0, maximum value: 32, default value: 0.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_NUM_THREADS        = 0
-
-# When you want a differently looking font in the dot files that doxygen
-# generates you can specify the font name using DOT_FONTNAME. You need to make
-# sure dot is able to find the font, which can be done by putting it in a
-# standard location or by setting the DOTFONTPATH environment variable or by
-# setting DOT_FONTPATH to the directory containing the font.
-# The default value is: Helvetica.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTNAME           = Helvetica
-
-# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
-# dot graphs.
-# Minimum value: 4, maximum value: 24, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTSIZE           = 10
-
-# By default doxygen will tell dot to use the default font as specified with
-# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
-# the path where dot can find it using this tag.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTPATH           =
-
-# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
-# each documented class showing the direct and indirect inheritance relations.
-# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-CLASS_GRAPH            = YES
-
-# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
-# graph for each documented class showing the direct and indirect implementation
-# dependencies (inheritance, containment, and class references variables) of the
-# class with other documented classes.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-COLLABORATION_GRAPH    = YES
-
-# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
-# groups, showing the direct groups dependencies.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-GROUP_GRAPHS           = YES
-
-# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
-# collaboration diagrams in a style similar to the OMG's Unified Modeling
-# Language.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-UML_LOOK               = NO
-
-# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
-# class node. If there are many fields or methods and many nodes the graph may
-# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
-# number of items for each type to make the size more manageable. Set this to 0
-# for no limit. Note that the threshold may be exceeded by 50% before the limit
-# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
-# but if the number exceeds 15, the total amount of fields shown is limited to
-# 10.
-# Minimum value: 0, maximum value: 100, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-UML_LIMIT_NUM_FIELDS   = 10
-
-# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
-# collaboration graphs will show the relations between templates and their
-# instances.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-TEMPLATE_RELATIONS     = NO
-
-# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
-# YES then doxygen will generate a graph for each documented file showing the
-# direct and indirect include dependencies of the file with other documented
-# files.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-INCLUDE_GRAPH          = YES
-
-# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
-# set to YES then doxygen will generate a graph for each documented file showing
-# the direct and indirect include dependencies of the file with other documented
-# files.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-INCLUDED_BY_GRAPH      = YES
-
-# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
-# dependency graph for every global function or class method.
-#
-# Note that enabling this option will significantly increase the time of a run.
-# So in most cases it will be better to enable call graphs for selected
-# functions only using the \callgraph command. Disabling a call graph can be
-# accomplished by means of the command \hidecallgraph.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-CALL_GRAPH             = NO
-
-# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
-# dependency graph for every global function or class method.
-#
-# Note that enabling this option will significantly increase the time of a run.
-# So in most cases it will be better to enable caller graphs for selected
-# functions only using the \callergraph command. Disabling a caller graph can be
-# accomplished by means of the command \hidecallergraph.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-CALLER_GRAPH           = NO
-
-# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
-# hierarchy of all classes instead of a textual one.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-GRAPHICAL_HIERARCHY    = YES
-
-# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
-# dependencies a directory has on other directories in a graphical way. The
-# dependency relations are determined by the #include relations between the
-# files in the directories.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DIRECTORY_GRAPH        = YES
-
-# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
-# generated by dot. For an explanation of the image formats see the section
-# output formats in the documentation of the dot tool (Graphviz (see:
-# http://www.graphviz.org/)).
-# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
-# to make the SVG files visible in IE 9+ (other browsers do not have this
-# requirement).
-# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo,
-# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
-# png:gdiplus:gdiplus.
-# The default value is: png.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_IMAGE_FORMAT       = png
-
-# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
-# enable generation of interactive SVG images that allow zooming and panning.
-#
-# Note that this requires a modern browser other than Internet Explorer. Tested
-# and working are Firefox, Chrome, Safari, and Opera.
-# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
-# the SVG files visible. Older versions of IE do not have SVG support.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-INTERACTIVE_SVG        = NO
-
-# The DOT_PATH tag can be used to specify the path where the dot tool can be
-# found. If left blank, it is assumed the dot tool can be found in the path.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_PATH               =
-
-# The DOTFILE_DIRS tag can be used to specify one or more directories that
-# contain dot files that are included in the documentation (see the \dotfile
-# command).
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOTFILE_DIRS           =
-
-# The MSCFILE_DIRS tag can be used to specify one or more directories that
-# contain msc files that are included in the documentation (see the \mscfile
-# command).
-
-MSCFILE_DIRS           =
-
-# The DIAFILE_DIRS tag can be used to specify one or more directories that
-# contain dia files that are included in the documentation (see the \diafile
-# command).
-
-DIAFILE_DIRS           =
-
-# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
-# path where java can find the plantuml.jar file. If left blank, it is assumed
-# PlantUML is not used or called during a preprocessing step. Doxygen will
-# generate a warning when it encounters a \startuml command in this case and
-# will not generate output for the diagram.
-
-PLANTUML_JAR_PATH      =
-
-# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a
-# configuration file for plantuml.
-
-PLANTUML_CFG_FILE      =
-
-# When using plantuml, the specified paths are searched for files specified by
-# the !include statement in a plantuml block.
-
-PLANTUML_INCLUDE_PATH  =
-
-# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
-# that will be shown in the graph. If the number of nodes in a graph becomes
-# larger than this value, doxygen will truncate the graph, which is visualized
-# by representing a node as a red box. Note that doxygen if the number of direct
-# children of the root node in a graph is already larger than
-# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
-# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
-# Minimum value: 0, maximum value: 10000, default value: 50.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_GRAPH_MAX_NODES    = 50
-
-# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
-# generated by dot. A depth value of 3 means that only nodes reachable from the
-# root by following a path via at most 3 edges will be shown. Nodes that lay
-# further from the root node will be omitted. Note that setting this option to 1
-# or 2 may greatly reduce the computation time needed for large code bases. Also
-# note that the size of a graph can be further restricted by
-# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
-# Minimum value: 0, maximum value: 1000, default value: 0.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-MAX_DOT_GRAPH_DEPTH    = 0
-
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is disabled by default, because dot on Windows does not seem
-# to support this out of the box.
-#
-# Warning: Depending on the platform used, enabling this option may lead to
-# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
-# read).
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_TRANSPARENT        = NO
-
-# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
-# files in one run (i.e. multiple -o and -T options on the command line). This
-# makes dot run faster, but since only newer versions of dot (>1.8.10) support
-# this, this feature is disabled by default.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_MULTI_TARGETS      = NO
-
-# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
-# explaining the meaning of the various boxes and arrows in the dot generated
-# graphs.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-GENERATE_LEGEND        = YES
-
-# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
-# files that are used to generate the various graphs.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_CLEANUP            = YES
diff --git a/README.md b/README.md
index b6965a22..6f07e2e4 100644
--- a/README.md
+++ b/README.md
@@ -73,7 +73,7 @@ See our [StreamingUtilities](https://github.com/GraphStreamingProject/StreamingU
 
 
 ## GraphSketchDriver
-The `GraphSketchDriver` is responsible for managing the flow of data through the various components of our system. It is templatized by the specific sketch algorithm one is running. If using GraphZeppelin on a single machine, we recommend using the `GraphSketchDriver` for any vertex-based sketch algorithm. When implementing a new algorithm, the class must provide an interface to the driver. This interface is described at the top of `include/graph_sketch_driver.h` and is depicted in the Data Flow Documentation.
+The `GraphSketchDriver` is responsible for managing the flow of data through the various components of our system. It is templatized by the specific sketch algorithm one is running. If using GraphZeppelin on a single machine, we recommend using the `GraphSketchDriver` for any vertex-based sketch algorithm. When implementing a new algorithm, the class must provide an interface to the driver. This interface is described at the top of `include/graph_sketch_driver.h` and is visualized in our [Control Flow Documentation](/docs/control_flow.md).
 
 ## GutteringSystems
 To achieve high update throughput, GraphZeppelin buffers updates in what we call a GutteringSystem. Choosing the correct GutteringSystem is important for performance. If you expect storage to include on disk data-structures, choose the `GutterTree`. Otherwise, choose the `CacheTree`.
diff --git a/docs/control_flow.md b/docs/control_flow.md
new file mode 100644
index 00000000..2eb8b008
--- /dev/null
+++ b/docs/control_flow.md
@@ -0,0 +1,34 @@
+# GraphZeppelin Control Flow
+These charts describe how our basic operations are performed in GraphZeppelin.
+
+## Driver Level Flow
+---
+
+### Initialization
+```mermaid
+flowchart TD
+    A[User] -->|2. Construct| B[GraphSketchDriver]
+    A -->|1. Construct| C
+    B -->|3. get_num_vertices\n4. get_desired_update_batch\n5. allocate_worker_memory\n6. print_configuration| C[Sketch Algorithm]
+```
+
+### Stream Processing
+```mermaid
+flowchart TD
+    A[User] -->|1. process_stream_until| B[GraphSketchDriver]
+    B -->|5. resume| E[WorkerThreadGroup]
+    B -->|4. insert| D[GutteringSystem]
+    E -->|5. get_data| D
+    E -->|6. apply_update_batch| C
+    B -->|3. pre_insert| C[Sketch Algorithm]
+```
+
+### Preforming a Query
+```mermaid
+flowchart TD
+    A[User] -->|1. prep_query| B[GraphSketchDriver]
+    A -->|5. query| C
+    B -->|2. has_cached_query| C[Sketch Algorithm]
+    B -->|3. flush| D[GutteringSystem]
+    B -->|4. pause| E[WorkerThreadGroup]
+```

From bcdb4a4008ccff6929e24255c36317a5065df2ae Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Thu, 15 Feb 2024 22:04:21 -0500
Subject: [PATCH 20/37] Update control_flow.md

---
 docs/control_flow.md | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/docs/control_flow.md b/docs/control_flow.md
index 2eb8b008..e454867a 100644
--- a/docs/control_flow.md
+++ b/docs/control_flow.md
@@ -2,9 +2,9 @@
 These charts describe how our basic operations are performed in GraphZeppelin.
 
 ## Driver Level Flow
----
 
 ### Initialization
+The driver pulls information from the graph sketch algorithm and tells the algorithm to allocate scratch space for its threads to use.
 ```mermaid
 flowchart TD
     A[User] -->|2. Construct| B[GraphSketchDriver]
@@ -13,17 +13,19 @@ flowchart TD
 ```
 
 ### Stream Processing
+When processing a stream, the driver coordinates its own threads, the `GutteringSystem` which batches updates, the `WorkerThreadGroup` which applies sketch updates, and the graph sketch algorithm. Once the setup steps 1-2 complete, for each stream update until the breakpoint (either query or end of stream) we perform steps 3-7.
 ```mermaid
 flowchart TD
     A[User] -->|1. process_stream_until| B[GraphSketchDriver]
-    B -->|5. resume| E[WorkerThreadGroup]
+    B -->|2. resume| E[WorkerThreadGroup]
     B -->|4. insert| D[GutteringSystem]
     E -->|5. get_data| D
-    E -->|6. apply_update_batch| C
-    B -->|3. pre_insert| C[Sketch Algorithm]
+    E -->|6. batch_callback| B
+    B --->|3. pre_insert\n7. apply_update_batch| C[Sketch Algorithm]
 ```
 
 ### Preforming a Query
+To perform a query, the user must first call `driver.prep_query()` in which the driver ensures the query is safe to perform. Specifically, the driver must ensure that all stream updates have been processed before allowing the query to continue. If step 2 `has_cached_query()` returns true, the driver can safely skip steps 3-4 and immediately allow the user to perform the query.
 ```mermaid
 flowchart TD
     A[User] -->|1. prep_query| B[GraphSketchDriver]

From 3b3bef4861bb9c065a1de2729fec866259406a21 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Fri, 16 Feb 2024 16:25:00 -0500
Subject: [PATCH 21/37] Update control_flow.md

---
 docs/control_flow.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/control_flow.md b/docs/control_flow.md
index e454867a..bf4ba812 100644
--- a/docs/control_flow.md
+++ b/docs/control_flow.md
@@ -9,7 +9,9 @@ The driver pulls information from the graph sketch algorithm and tells the algor
 flowchart TD
     A[User] -->|2. Construct| B[GraphSketchDriver]
     A -->|1. Construct| C
-    B -->|3. get_num_vertices\n4. get_desired_update_batch\n5. allocate_worker_memory\n6. print_configuration| C[Sketch Algorithm]
+    B -->|3. get_num_vertices\n4. get_desired_update_batch\n5. allocate_worker_memory\n8. print_configuration| C[Sketch Algorithm]
+    B -->|6. Construct| D[GutteringSystem]
+    B -->|7. Construct| E[WorkerThreadGroup]
 ```
 
 ### Stream Processing

From f4ccce6bd2703aa1f36551e9dabca5c89f210953 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Tue, 20 Feb 2024 10:43:10 -0500
Subject: [PATCH 22/37] Add description of driver and steps.

---
 docs/control_flow.md | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/docs/control_flow.md b/docs/control_flow.md
index bf4ba812..f1abdbd0 100644
--- a/docs/control_flow.md
+++ b/docs/control_flow.md
@@ -2,9 +2,18 @@
 These charts describe how our basic operations are performed in GraphZeppelin.
 
 ## Driver Level Flow
+The driver is responsible for managing and coordinating the CPU `WorkerThreadGroup`, the `GutteringSystem`, and the `SketchAlgorithm`. The driver can do this for any vertex based sketch algorithm (which the driver is templatized upon) so long as the algorithm implements the required functions.
 
 ### Initialization
-The driver pulls information from the graph sketch algorithm and tells the algorithm to allocate scratch space for its threads to use.
+Steps to initialize the sketch algorith and driver.
+  1. User constructs the `SketchAlgorithm`.
+  2. User constructs the `GraphSketchDriver`.
+  3. `GraphSketchDriver` pulls `num_vertices` from algorithm.
+  4. `GraphSketchDriver` requests the desired batch size from the algorithm.
+  5. `GraphSketchDriver` tells the algorithm to allocate space for its worker threads.
+  6. `GraphSketchDriver` constructs `GutteringSystem`.
+  7. `GraphSketchDriver` constructs `WorkerThreadGroup`.
+
 ```mermaid
 flowchart TD
     A[User] -->|2. Construct| B[GraphSketchDriver]
@@ -15,7 +24,14 @@ flowchart TD
 ```
 
 ### Stream Processing
-When processing a stream, the driver coordinates its own threads, the `GutteringSystem` which batches updates, the `WorkerThreadGroup` which applies sketch updates, and the graph sketch algorithm. Once the setup steps 1-2 complete, for each stream update until the breakpoint (either query or end of stream) we perform steps 3-7.
+When processing a stream, the driver coordinates its own threads, the `GutteringSystem` which batches updates, the `WorkerThreadGroup` which applies sketch updates, and application of updates to the graph sketch algorithm. Once the setup steps 1-2 complete, for each stream update until the breakpoint (either query or end of stream) we perform steps 3-7.
+  1. User tells the driver to read stream until a query breakpoint or end of stream.
+  2. `GraphSketchDriver` resumes its CPU worker threads (may have been paused by a query).
+  3. When processing an update `GraphSketchDriver` first calls the algorithm's `pre_insert` function to allow the algorithm to do per update work before the updates are batched. Maintaining a cached query result for Connected Components is an example of why we allow this option.
+  4. `GraphSketchDriver` inserts the update into the `GutteringSystem`.
+  5. `WorkerThreadGroup` pulls a batch of updates from the `GutteringSystem`.
+  6. `WorkerThreadGroup` does not know what sketch algorithm we are running so calls `GraphSketchDriver`'s `batch_callback` function.
+  7. `GraphSketchDriver` calls the algorithm's `apply_update_batch()` function.
 ```mermaid
 flowchart TD
     A[User] -->|1. process_stream_until| B[GraphSketchDriver]
@@ -28,6 +44,11 @@ flowchart TD
 
 ### Preforming a Query
 To perform a query, the user must first call `driver.prep_query()` in which the driver ensures the query is safe to perform. Specifically, the driver must ensure that all stream updates have been processed before allowing the query to continue. If step 2 `has_cached_query()` returns true, the driver can safely skip steps 3-4 and immediately allow the user to perform the query.
+  1. User wants to preform a query so calls `prep_query`.
+  2. `GraphSketchDriver` checks if the algorithm has a valid cached query answer. In this case it can immediately return control to the user.
+  3. `GraphSketchDriver` flushes the `GutteringSystem` of all updates.
+  4. `GraphSketchDriver` calls `pause` to wait for the `WorkerThreadGroup` to finish applying all updates.
+  5. The user preforms the desired query. For example `connected_components()`.
 ```mermaid
 flowchart TD
     A[User] -->|1. prep_query| B[GraphSketchDriver]

From 5c45a76f8d15c36acfb074e8839703937fab9565 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Tue, 20 Feb 2024 20:24:10 -0500
Subject: [PATCH 23/37] make sketches_factor actually do something and fix
 exception race-case

---
 src/cc_sketch_alg.cpp    | 32 ++++++++++++++++++++++----------
 tools/process_stream.cpp |  2 +-
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp
index ecd67fcb..5ce546f8 100644
--- a/src/cc_sketch_alg.cpp
+++ b/src/cc_sketch_alg.cpp
@@ -14,7 +14,11 @@ CCSketchAlg::CCSketchAlg(node_id_t num_vertices, size_t seed, CCAlgConfiguration
   sketches = new Sketch *[num_vertices];
 
   vec_t sketch_vec_len = Sketch::calc_vector_length(num_vertices);
-  size_t sketch_num_samples = Sketch::calc_cc_samples(num_vertices);
+  size_t sketch_num_samples = Sketch::calc_cc_samples(num_vertices) * config.get_sketch_factor();
+
+  std::cout << "sketch vector length = " << sketch_vec_len << std::endl;
+  std::cout << "sketch samples = " << sketch_num_samples << std::endl;
+
   for (node_id_t i = 0; i < num_vertices; ++i) {
     representatives->insert(i);
     sketches[i] = new Sketch(sketch_vec_len, seed, sketch_num_samples);
@@ -48,7 +52,7 @@ CCSketchAlg::CCSketchAlg(node_id_t num_vertices, size_t seed, std::ifstream &bin
   sketches = new Sketch *[num_vertices];
 
   vec_t sketch_vec_len = Sketch::calc_vector_length(num_vertices);
-  size_t sketch_num_samples = Sketch::calc_cc_samples(num_vertices);
+  size_t sketch_num_samples = Sketch::calc_cc_samples(num_vertices) * config.get_sketch_factor();
   for (node_id_t i = 0; i < num_vertices; ++i) {
     representatives->insert(i);
     sketches[i] = new Sketch(sketch_vec_len, seed, binary_stream, sketch_num_samples);
@@ -227,6 +231,7 @@ inline bool CCSketchAlg::run_round_zero() {
       if (sample_supernode(*sketches[i]) && !modified) modified = true;
     } catch (...) {
       except = true;
+#pragma omp critical
       err = std::current_exception();
     }
   }
@@ -266,6 +271,8 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
     node_id_t start = partition.first;
     node_id_t end = partition.second;
     assert(start <= end);
+    bool local_except = false;
+    std::exception_ptr local_err;
 
     // node_id_t left_root = merge_instr[start].root;
     // node_id_t right_root = merge_instr[end - 1].root;
@@ -298,8 +305,8 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
               // num_query += 1;
               if (sample_supernode(global_merges[thr_id].sketch) && !modified) modified = true;
             } catch (...) {
-              except = true;
-              err = std::current_exception();
+              local_except = true;
+              local_err = std::current_exception();
             }
           }
 
@@ -312,8 +319,8 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
             // num_query += 1;
             if (sample_supernode(local_sketch) && !modified) modified = true;
           } catch (...) {
-            except = true;
-            err = std::current_exception();
+            local_except = true;
+            local_err = std::current_exception();
           }
         }
 
@@ -343,8 +350,8 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
           // num_query += 1;
           if (sample_supernode(global_merges[global_id].sketch) && !modified) modified = true;
         } catch (...) {
-          except = true;
-          err = std::current_exception();
+          local_except = true;
+          local_err = std::current_exception();
         }
       }
     } else {
@@ -354,10 +361,15 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
         // num_query += 1;
         if (sample_supernode(local_sketch) && !modified) modified = true;
       } catch (...) {
-        except = true;
-        err = std::current_exception();
+        local_except = true;
+        local_err = std::current_exception();
       }
     }
+    if (local_except) {
+#pragma omp critical
+      err = local_err;
+      except = true;
+    }
   }
 
   // std::cout << "Number of roots queried = " << num_query << std::endl;
diff --git a/tools/process_stream.cpp b/tools/process_stream.cpp
index 3f9127f7..91bfd5be 100644
--- a/tools/process_stream.cpp
+++ b/tools/process_stream.cpp
@@ -85,7 +85,7 @@ int main(int argc, char **argv) {
   std::cout << std::endl;
 
   auto driver_config = DriverConfiguration().gutter_sys(CACHETREE).worker_threads(num_threads);
-  auto cc_config = CCAlgConfiguration().batch_factor(1);
+  auto cc_config = CCAlgConfiguration().batch_factor(1).sketches_factor(0.5);
   CCSketchAlg cc_alg{num_nodes, get_seed(), cc_config};
   GraphSketchDriver<CCSketchAlg> driver{&cc_alg, &stream, driver_config, reader_threads};
 

From 6567d4499a7c79fd4097d06eacd74ffcad15dcbd Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Tue, 20 Feb 2024 20:48:46 -0500
Subject: [PATCH 24/37] also fix log2 constexpr issue

---
 include/sketch.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/sketch.h b/include/sketch.h
index 23f009f0..35a13ba3 100644
--- a/include/sketch.h
+++ b/include/sketch.h
@@ -172,10 +172,10 @@ class Sketch {
 #ifdef L0_SAMPLING
   static constexpr size_t default_cols_per_sample = 7;
   // NOTE: can improve this but leaving for comparison purposes
-  static constexpr double num_samples_div = log2(3) - 1;
+  static constexpr double num_samples_div = 0.5849625007211561; // log2(3) - 1
 #else
   static constexpr size_t default_cols_per_sample = 1;
-  static constexpr double num_samples_div = 1 - log2(2 - 0.8);
+  static constexpr double num_samples_div = 0.7369655941662062; // 1 - log2(2 - 0.8)
 #endif
 };
 

From 9795fb01a83cc44674c0682d7632669bf3f6e957 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Tue, 20 Feb 2024 20:59:58 -0500
Subject: [PATCH 25/37] turn off annoying sketches factor warning

---
 src/cc_alg_configuration.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/cc_alg_configuration.cpp b/src/cc_alg_configuration.cpp
index 93d0ae47..becbb7e5 100644
--- a/src/cc_alg_configuration.cpp
+++ b/src/cc_alg_configuration.cpp
@@ -14,11 +14,6 @@ CCAlgConfiguration& CCAlgConfiguration::sketches_factor(double factor) {
               << "Defaulting to 1." << std::endl;
     _sketches_factor = 1;
   }
-  if (_sketches_factor != 1) {
-    std::cerr << "WARNING: Your graph configuration specifies using a factor " << _sketches_factor 
-              << " of the normal quantity of sketches." << std::endl;
-    std::cerr << "         Is this intentional? If not, set sketches_factor to one!" << std::endl;
-  }
   return *this;
 }
 

From f7ef3d4fea16c35fcd0f81b02b69474c9b50a6c5 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Tue, 20 Feb 2024 23:08:04 -0500
Subject: [PATCH 26/37] no actually we need to be more thorough

---
 include/cc_alg_configuration.h |  2 +-
 include/cc_sketch_alg.h        |  9 +++++----
 include/sketch.h               | 17 ++++++++++++++---
 src/cc_sketch_alg.cpp          | 12 +++++-------
 test/sketch_test.cpp           |  8 ++++----
 tools/test_correctness.cpp     |  2 +-
 6 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/include/cc_alg_configuration.h b/include/cc_alg_configuration.h
index 52da61c6..c1cc80f6 100644
--- a/include/cc_alg_configuration.h
+++ b/include/cc_alg_configuration.h
@@ -26,7 +26,7 @@ class CCAlgConfiguration {
 
   // getters
   std::string get_disk_dir() { return _disk_dir; }
-  double get_sketch_factor() { return _sketches_factor; }
+  double get_sketches_factor() { return _sketches_factor; }
   double get_batch_factor() { return _batch_factor; }
 
   friend std::ostream& operator<< (std::ostream &out, const CCAlgConfiguration &conf);
diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h
index ebf8547d..b47795a4 100644
--- a/include/cc_sketch_alg.h
+++ b/include/cc_sketch_alg.h
@@ -45,9 +45,9 @@ struct alignas(64) GlobalMergeData {
   size_t num_merge_needed = -1;
   size_t num_merge_done = 0;
 
-  GlobalMergeData(node_id_t num_vertices, size_t seed)
+  GlobalMergeData(node_id_t num_vertices, size_t seed, double sketches_factor)
       : sketch(Sketch::calc_vector_length(num_vertices), seed,
-               Sketch::calc_cc_samples(num_vertices)) {}
+               Sketch::calc_cc_samples(num_vertices, sketches_factor)) {}
 
   GlobalMergeData(const GlobalMergeData&& other)
   : sketch(other.sketch) {
@@ -155,8 +155,9 @@ class CCSketchAlg {
     num_delta_sketches = num_workers;
     delta_sketches = new Sketch *[num_delta_sketches];
     for (size_t i = 0; i < num_delta_sketches; i++) {
-      delta_sketches[i] = new Sketch(Sketch::calc_vector_length(num_vertices), seed,
-                                     Sketch::calc_cc_samples(num_vertices));
+      delta_sketches[i] =
+          new Sketch(Sketch::calc_vector_length(num_vertices), seed,
+                     Sketch::calc_cc_samples(num_vertices, config.get_sketches_factor()));
     }
   }
 
diff --git a/include/sketch.h b/include/sketch.h
index 35a13ba3..1b50ed30 100644
--- a/include/sketch.h
+++ b/include/sketch.h
@@ -63,6 +63,18 @@ class Sketch {
     return ceil(double(num_vertices) * (num_vertices - 1) / 2);
   }
 
+  /**
+   * This function computes the number of samples a Sketch should support in order to solve
+   * connected components. Optionally, can increase or decrease the number of samples by a
+   * multiplicative factor.
+   * @param num_vertices   Number of graph vertices
+   * @param f              Multiplicative sample factor
+   * @return               The number of samples
+   */
+  static size_t calc_cc_samples(node_id_t num_vertices, double f) {
+    return ceil(f * log2(num_vertices) / num_samples_div);
+  }
+
   /**
    * Construct a sketch object
    * @param vector_len       Length of the vector we are sketching
@@ -167,15 +179,14 @@ class Sketch {
   inline size_t get_num_samples() const { return num_samples; }
 
   static size_t calc_bkt_per_col(size_t n) { return ceil(log2(n)) + 1; }
-  static size_t calc_cc_samples(size_t n) { return ceil(log2(n) / num_samples_div); }
 
 #ifdef L0_SAMPLING
   static constexpr size_t default_cols_per_sample = 7;
   // NOTE: can improve this but leaving for comparison purposes
-  static constexpr double num_samples_div = 0.5849625007211561; // log2(3) - 1
+  static constexpr double num_samples_div = log2(3) - 1;
 #else
   static constexpr size_t default_cols_per_sample = 1;
-  static constexpr double num_samples_div = 0.7369655941662062; // 1 - log2(2 - 0.8)
+  static constexpr double num_samples_div = 1 - log2(2 - 0.8);
 #endif
 };
 
diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp
index 5ce546f8..d1a3379d 100644
--- a/src/cc_sketch_alg.cpp
+++ b/src/cc_sketch_alg.cpp
@@ -14,10 +14,7 @@ CCSketchAlg::CCSketchAlg(node_id_t num_vertices, size_t seed, CCAlgConfiguration
   sketches = new Sketch *[num_vertices];
 
   vec_t sketch_vec_len = Sketch::calc_vector_length(num_vertices);
-  size_t sketch_num_samples = Sketch::calc_cc_samples(num_vertices) * config.get_sketch_factor();
-
-  std::cout << "sketch vector length = " << sketch_vec_len << std::endl;
-  std::cout << "sketch samples = " << sketch_num_samples << std::endl;
+  size_t sketch_num_samples = Sketch::calc_cc_samples(num_vertices, config.get_sketches_factor());
 
   for (node_id_t i = 0; i < num_vertices; ++i) {
     representatives->insert(i);
@@ -52,7 +49,8 @@ CCSketchAlg::CCSketchAlg(node_id_t num_vertices, size_t seed, std::ifstream &bin
   sketches = new Sketch *[num_vertices];
 
   vec_t sketch_vec_len = Sketch::calc_vector_length(num_vertices);
-  size_t sketch_num_samples = Sketch::calc_cc_samples(num_vertices) * config.get_sketch_factor();
+  size_t sketch_num_samples = Sketch::calc_cc_samples(num_vertices, config.get_sketches_factor());
+
   for (node_id_t i = 0; i < num_vertices; ++i) {
     representatives->insert(i);
     sketches[i] = new Sketch(sketch_vec_len, seed, binary_stream, sketch_num_samples);
@@ -263,7 +261,7 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
   {
     // some thread local variables
     Sketch local_sketch(Sketch::calc_vector_length(num_vertices), seed,
-                        Sketch::calc_cc_samples(num_vertices));
+                        Sketch::calc_cc_samples(num_vertices, config.get_sketches_factor()));
 
     size_t thr_id = omp_get_thread_num();
     size_t num_threads = omp_get_num_threads();
@@ -475,7 +473,7 @@ void CCSketchAlg::boruvka_emulation() {
   std::vector<GlobalMergeData> global_merges;
   global_merges.reserve(num_threads);
   for (size_t i = 0; i < num_threads; i++) {
-    global_merges.emplace_back(num_vertices, seed);
+    global_merges.emplace_back(num_vertices, seed, config.get_sketches_factor());
   }
 
   dsu.reset();
diff --git a/test/sketch_test.cpp b/test/sketch_test.cpp
index dfe81eae..e35f4aa2 100644
--- a/test/sketch_test.cpp
+++ b/test/sketch_test.cpp
@@ -363,7 +363,7 @@ TEST(SketchTestSuite, TestExhaustiveQuery) {
 
 TEST(SketchTestSuite, TestSampleInsertGrinder) {
   size_t nodes = 4096;
-  Sketch sketch(Sketch::calc_vector_length(nodes), get_seed(), Sketch::calc_cc_samples(nodes));
+  Sketch sketch(Sketch::calc_vector_length(nodes), get_seed(), Sketch::calc_cc_samples(nodes, 1));
 
   for (size_t src = 0; src < nodes - 1; src++) {
     for (size_t dst = src + 7; dst < nodes; dst += 7) {
@@ -372,7 +372,7 @@ TEST(SketchTestSuite, TestSampleInsertGrinder) {
   }
 
   size_t successes = 0;
-  for (size_t i = 0; i < Sketch::calc_cc_samples(nodes); i++) {
+  for (size_t i = 0; i < Sketch::calc_cc_samples(nodes, 1); i++) {
     SketchSample ret = sketch.sample();
     if (ret.result == FAIL) continue;
 
@@ -388,7 +388,7 @@ TEST(SketchTestSuite, TestSampleInsertGrinder) {
 
 TEST(SketchTestSuite, TestSampleDeleteGrinder) {
   size_t nodes = 4096;
-  Sketch sketch(Sketch::calc_vector_length(nodes), get_seed(), Sketch::calc_cc_samples(nodes));
+  Sketch sketch(Sketch::calc_vector_length(nodes), get_seed(), Sketch::calc_cc_samples(nodes, 1));
 
   // insert
   for (size_t src = 0; src < nodes - 1; src++) {
@@ -405,7 +405,7 @@ TEST(SketchTestSuite, TestSampleDeleteGrinder) {
   }
 
   size_t successes = 0;
-  for (size_t i = 0; i < Sketch::calc_cc_samples(nodes); i++) {
+  for (size_t i = 0; i < Sketch::calc_cc_samples(nodes, 1); i++) {
     SketchSample ret = sketch.sample();
     if (ret.result == FAIL) continue;
 
diff --git a/tools/test_correctness.cpp b/tools/test_correctness.cpp
index 00e7f822..36a6322f 100644
--- a/tools/test_correctness.cpp
+++ b/tools/test_correctness.cpp
@@ -22,7 +22,7 @@ CorrectnessResults test_path_correctness(size_t num_vertices, size_t num_graphs,
                                          size_t samples_per_graph) {
   CorrectnessResults results;
 
-  size_t num_rounds = Sketch::calc_cc_samples(num_vertices);
+  size_t num_rounds = Sketch::calc_cc_samples(num_vertices, 1);
   for (size_t r = 0; r < num_rounds; r++)
     results.num_round_hist.push_back(0);
 

From d502ae4265146e7d5eaf9396650c550b58e0df97 Mon Sep 17 00:00:00 2001
From: Daniel DeLayo <dandelayo@gmail.com>
Date: Thu, 22 Feb 2024 11:23:07 -0500
Subject: [PATCH 27/37] documentation

---
 tools/sketch_testing.cpp    | 24 ++++++++++++++++++++++++
 tools/sum_sketch_testing.py |  6 ++++++
 2 files changed, 30 insertions(+)

diff --git a/tools/sketch_testing.cpp b/tools/sketch_testing.cpp
index 3329a429..6d10287f 100644
--- a/tools/sketch_testing.cpp
+++ b/tools/sketch_testing.cpp
@@ -6,7 +6,31 @@
 #include "sketch.h"
 #include "cc_alg_configuration.h"
 
+/*
+
+  The purpose of this file is to test the probability that a sketch column returns a nonzero
+  That is, for a number of nonzeroes z, how what's the probability of success?  
+
+  We model this as a binomial process for the sake of confidence intervals / stddev.
+  
+  Originally, this code inserted z random elements into a sketch then queried it.
+
+  As a first speed optimization (that didn't appear to change outcome) (xxHash works well) 
+  We replaced random insertion with sequential inserted.
+
+  As a second speed optimization, we queried the sketch after every update. 
+  That is, instead of O(z^2) insertions per z data points, we perform O(z) insertions per z data points.
+  This sacrifices independence. Whether or not the z-1th sketch is good is a fantastic predictor for the zth sketch being good.
+  But, for a given z, the results are still independent.
+
+  For parity with the main code, column seeds are sequential.
+
+  The output of this is intended to be parsed into summary stats by sum_sketch_testing.py
+*/
+
+
 std::random_device dev;
+
 std::mt19937_64 rng(dev());
 using rand_type = std::mt19937_64::result_type;
 
diff --git a/tools/sum_sketch_testing.py b/tools/sum_sketch_testing.py
index 55b666e7..7a5f3a2b 100644
--- a/tools/sum_sketch_testing.py
+++ b/tools/sum_sketch_testing.py
@@ -1,6 +1,12 @@
 import sys
 import re
 
+"""
+The purpose of this file is to parse the output of sketch_testing.cpp into summary statistics
+That is, we can answer questions like "how many data points are 2 stddev above .8"
+or "What is the mean of the data"
+"""
+
 prob = r"([0-9]*[.])?[0-9]+"
 which = r"[0-9]+"
 

From 91106f65f59d7a95757f0726096f4e81879fce00 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Sat, 24 Feb 2024 18:42:40 -0500
Subject: [PATCH 28/37] fix bug in pre_insert and optimize dsu find_root

---
 include/dsu.h         |  8 ++++----
 src/cc_sketch_alg.cpp | 10 ++++++----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/dsu.h b/include/dsu.h
index 77358cdd..daa7bde1 100644
--- a/include/dsu.h
+++ b/include/dsu.h
@@ -70,11 +70,11 @@ class DisjointSetUnion {
 
   inline T find_root(T u) {
     assert(0 <= u && u < n);
-    while (parent[parent[u]] != u) {
+    while (parent[parent[u]] != parent[u]) {
       parent[u] = parent[parent[u]];
       u = parent[u];
     }
-    return u;
+    return parent[u];
   }
 
   inline DSUMergeRet<T> merge(T u, T v) {
@@ -144,11 +144,11 @@ class DisjointSetUnion_MT {
 
   inline T find_root(T u) {
     assert(0 <= u && u < n);
-    while (parent[parent[u]] != u) {
+    while (parent[parent[u]] != parent[u]) {
       parent[u] = parent[parent[u]].load();
       u = parent[u];
     }
-    return u;
+    return parent[u];
   }
 
   // use CAS in this function to allow for simultaneous merge calls
diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp
index d1a3379d..ace96bce 100644
--- a/src/cc_sketch_alg.cpp
+++ b/src/cc_sketch_alg.cpp
@@ -90,12 +90,14 @@ void CCSketchAlg::pre_insert(GraphUpdate upd, int /* thr_id */) {
     auto src = std::min(edge.src, edge.dst);
     auto dst = std::max(edge.src, edge.dst);
     std::lock_guard<std::mutex> sflock(spanning_forest_mtx[src]);
-    if (spanning_forest[src].find(dst) != spanning_forest[src].end()) {
+    if (dsu.merge(src, dst).merged) {
+      // this edge adds new connectivity information so add to spanning forest
+      spanning_forest[src].insert(dst);
+    }
+    else if (spanning_forest[src].find(dst) != spanning_forest[src].end()) {
+      // this update deletes one of our spanning forest edges so mark dsu invalid
       dsu_valid = false;
       shared_dsu_valid = false;
-    } else {
-      spanning_forest[src].insert(dst);
-      dsu.merge(src, dst);
     }
   }
 #endif  // NO_EAGER_DSU

From 3d8cf1498ad053c5227c6cf9ebc5ba52fcae8c6d Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Sat, 24 Feb 2024 18:49:14 -0500
Subject: [PATCH 29/37] add benchmark for find_root

---
 src/return_types.cpp              |  1 +
 test/util/mat_graph_verifier.cpp  |  2 +-
 tools/benchmark/graphcc_bench.cpp | 99 +++++++++++++++++++++++++++++++
 3 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/src/return_types.cpp b/src/return_types.cpp
index 8b2726dc..f4c4998d 100644
--- a/src/return_types.cpp
+++ b/src/return_types.cpp
@@ -32,6 +32,7 @@ std::vector<std::set<node_id_t>> ConnectedComponents::get_component_sets() {
 SpanningForest::SpanningForest(node_id_t num_vertices,
                                const std::unordered_set<node_id_t> *spanning_forest)
     : num_vertices(num_vertices) {
+  edges.reserve(num_vertices);
   for (node_id_t src = 0; src < num_vertices; src++) {
     for (node_id_t dst : spanning_forest[src]) {
       edges.push_back({src, dst});
diff --git a/test/util/mat_graph_verifier.cpp b/test/util/mat_graph_verifier.cpp
index 8c2db81a..d313d736 100644
--- a/test/util/mat_graph_verifier.cpp
+++ b/test/util/mat_graph_verifier.cpp
@@ -19,7 +19,7 @@ void MatGraphVerifier::edge_update(node_id_t src, node_id_t dst) {
   // update adj_matrix entry
   adj_matrix[src][dst] = !adj_matrix[src][dst];
 }
-  
+
 
 void MatGraphVerifier::reset_cc_state() {
   kruskal_ref = kruskal();
diff --git a/tools/benchmark/graphcc_bench.cpp b/tools/benchmark/graphcc_bench.cpp
index f510d6de..fc5b8995 100644
--- a/tools/benchmark/graphcc_bench.cpp
+++ b/tools/benchmark/graphcc_bench.cpp
@@ -299,6 +299,105 @@ BENCHMARK(BM_Sketch_Serialize)->RangeMultiplier(10)->Range(1e3, 1e6);
 // }
 // BENCHMARK(BM_Sketch_Sparse_Serialize)->RangeMultiplier(10)->Range(1e3, 1e6);
 
+// Benchmark DSU Find Root
+static void BM_DSU_Find(benchmark::State& state) {
+  constexpr size_t size_of_dsu = 16 * MB;
+  DisjointSetUnion<node_id_t> dsu(size_of_dsu);
+
+  auto rng = std::default_random_engine{};
+  std::vector<node_id_t> queries;
+  for (size_t i = 0; i < 4096; i++) {
+    queries.push_back((size_of_dsu / 4096) * i);
+  }
+  std::shuffle(queries.begin(), queries.end(), rng);
+
+  // perform find test
+  for (auto _ : state) {
+    for (auto q : queries)
+      dsu.find_root(q);
+  }
+  state.counters["Find_Latency"] =
+      benchmark::Counter(state.iterations() * queries.size(),
+                         benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
+}
+BENCHMARK(BM_DSU_Find);
+
+static void BM_DSU_Find_After_Combine(benchmark::State& state) {
+  constexpr size_t size_of_dsu = 16 * MB;
+  DisjointSetUnion<node_id_t> dsu(size_of_dsu);
+  // merge everything into same root
+  for (size_t i = 0; i < size_of_dsu - 1; i++) {
+    dsu.merge(i, i+1);
+  }
+
+  auto rng = std::default_random_engine{};
+  std::vector<node_id_t> queries;
+  for (size_t i = 0; i < 4096; i++) {
+    queries.push_back((size_of_dsu / 4096) * i);
+  }
+  std::shuffle(queries.begin(), queries.end(), rng);
+
+  // perform find test
+  for (auto _ : state) {
+    for (auto q : queries)
+      dsu.find_root(q);
+  }
+  state.counters["Find_Latency"] =
+      benchmark::Counter(state.iterations() * queries.size(),
+                         benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
+}
+BENCHMARK(BM_DSU_Find_After_Combine);
+
+// MT DSU Find Root
+static void BM_MT_DSU_Find(benchmark::State& state) {
+  constexpr size_t size_of_dsu = 16 * MB;
+  DisjointSetUnion_MT<node_id_t> dsu(size_of_dsu);
+
+  auto rng = std::default_random_engine{};
+  std::vector<node_id_t> queries;
+  for (size_t i = 0; i < 4096; i++) {
+    queries.push_back((size_of_dsu / 4096) * i);
+  }
+  std::shuffle(queries.begin(), queries.end(), rng);
+
+  // perform find test
+  for (auto _ : state) {
+    for (auto q : queries)
+      dsu.find_root(q);
+  }
+  state.counters["Find_Latency"] =
+      benchmark::Counter(state.iterations() * queries.size(),
+                         benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
+}
+BENCHMARK(BM_MT_DSU_Find);
+
+// MT DSU Find Root
+static void BM_MT_DSU_Find_After_Combine(benchmark::State& state) {
+  constexpr size_t size_of_dsu = MB;
+  DisjointSetUnion_MT<node_id_t> dsu(size_of_dsu);
+  // merge everything into same root
+  for (size_t i = 0; i < size_of_dsu - 1; i++) {
+    dsu.merge(i, i+1);
+  }
+
+  auto rng = std::default_random_engine{};
+  std::vector<node_id_t> queries;
+  for (size_t i = 0; i < 512; i++) {
+    queries.push_back((size_of_dsu / 512) * i);
+  }
+  std::shuffle(queries.begin(), queries.end(), rng);
+
+  // perform find test
+  for (auto _ : state) {
+    for (auto q : queries)
+      dsu.find_root(q);
+  }
+  state.counters["Find_Latency"] =
+      benchmark::Counter(state.iterations() * queries.size(),
+                         benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
+}
+BENCHMARK(BM_MT_DSU_Find_After_Combine);
+
 // Benchmark speed of DSU merges when the sequence of merges is adversarial
 // This means we avoid joining roots wherever possible
 static void BM_DSU_Adversarial(benchmark::State& state) {

From a3aac1dd8a05950123d42234f062fd8f18e5f98e Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Sat, 24 Feb 2024 18:50:07 -0500
Subject: [PATCH 30/37] remove sketches_factor from process_stream

---
 tools/process_stream.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/process_stream.cpp b/tools/process_stream.cpp
index 91bfd5be..3f9127f7 100644
--- a/tools/process_stream.cpp
+++ b/tools/process_stream.cpp
@@ -85,7 +85,7 @@ int main(int argc, char **argv) {
   std::cout << std::endl;
 
   auto driver_config = DriverConfiguration().gutter_sys(CACHETREE).worker_threads(num_threads);
-  auto cc_config = CCAlgConfiguration().batch_factor(1).sketches_factor(0.5);
+  auto cc_config = CCAlgConfiguration().batch_factor(1);
   CCSketchAlg cc_alg{num_nodes, get_seed(), cc_config};
   GraphSketchDriver<CCSketchAlg> driver{&cc_alg, &stream, driver_config, reader_threads};
 

From 736be6ddbe3bda5f5d5666239c0b2e5d78f06bc9 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Sat, 24 Feb 2024 19:54:28 -0500
Subject: [PATCH 31/37] Update control_flow.md

---
 docs/control_flow.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/control_flow.md b/docs/control_flow.md
index f1abdbd0..913046fb 100644
--- a/docs/control_flow.md
+++ b/docs/control_flow.md
@@ -1,5 +1,5 @@
 # GraphZeppelin Control Flow
-These charts describe how our basic operations are performed in GraphZeppelin.
+In this file we document how the GraphZeppelin control works using flowcharts and step by step descriptions. In our flow chart we connect objects `A` and `B` with a directed edge `A --> B` that indicates object `A` calling a method of object `B`.
 
 ## Driver Level Flow
 The driver is responsible for managing and coordinating the CPU `WorkerThreadGroup`, the `GutteringSystem`, and the `SketchAlgorithm`. The driver can do this for any vertex based sketch algorithm (which the driver is templatized upon) so long as the algorithm implements the required functions.

From 3f002348cea4d2e7687495541c72c1dbd23b51a4 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Mon, 4 Mar 2024 20:57:52 -0500
Subject: [PATCH 32/37] better verifier, minimum number of columns, tests

---
 CMakeLists.txt                    |   4 +-
 include/cc_sketch_alg.h           |  22 ++---
 include/graph_sketch_driver.h     |  57 +++++++++--
 include/return_types.h            |   7 +-
 include/sketch.h                  |  10 +-
 include/test/graph_verifier.h     |  85 +++++++++++++---
 src/cc_sketch_alg.cpp             |  16 ++-
 src/sketch.cpp                    |   4 +-
 test/cc_alg_test.cpp              | 114 ++++++++++++----------
 test/util/graph_verifier.cpp      | 157 ++++++++++++++++++++++++++++++
 test/util/graph_verifier_test.cpp |  86 +++++++++++++---
 tools/test_correctness.cpp        |   9 +-
 12 files changed, 453 insertions(+), 118 deletions(-)
 create mode 100644 test/util/graph_verifier.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 38d34684..95e90896 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -107,8 +107,7 @@ add_library(GraphZeppelinVerifyCC
   src/cc_alg_configuration.cpp
   src/sketch.cpp
   src/util.cpp
-  test/util/file_graph_verifier.cpp
-  test/util/mat_graph_verifier.cpp)
+  test/util/graph_verifier.cpp)
 add_dependencies(GraphZeppelinVerifyCC GutterTree StreamingUtilities)
 target_link_libraries(GraphZeppelinVerifyCC PUBLIC xxhash GutterTree StreamingUtilities)
 target_include_directories(GraphZeppelinVerifyCC PUBLIC include/ include/test/)
@@ -123,7 +122,6 @@ if (BUILD_EXE)
     test/sketch_test.cpp
     test/dsu_test.cpp
     test/util_test.cpp
-    test/util/file_graph_verifier.cpp
     test/util/graph_verifier_test.cpp)
   add_dependencies(tests GraphZeppelinVerifyCC)
   target_link_libraries(tests PRIVATE GraphZeppelinVerifyCC)
diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h
index b47795a4..7d4df737 100644
--- a/include/cc_sketch_alg.h
+++ b/include/cc_sketch_alg.h
@@ -85,6 +85,11 @@ class CCSketchAlg {
   Sketch **delta_sketches = nullptr;
   size_t num_delta_sketches;
 
+  CCAlgConfiguration config;
+#ifdef VERIFY_SAMPLES_F
+  std::unique_ptr<GraphVerifier> verifier;
+#endif
+
   /**
    * Run the first round of Boruvka. We can do things faster here because we know there will
    * be no merging we have to do.
@@ -92,13 +97,13 @@ class CCSketchAlg {
   bool run_round_zero();
 
   /**
-   * Update the query array with new samples
-   * @param query  an array of sketch sample results
-   * @param reps   an array containing node indices for the representative of each supernode
+   * Sample a single supernode represented by a single sketch containing one or more vertices.
+   * Updates the dsu and spanning forest with query results if edge contains new connectivity info.
+   * @param skt   sketch to sample
+   * @return      [bool] true if the query result indicates we should run an additional round.
    */
   bool sample_supernode(Sketch &skt);
 
-
   /**
    * Calculate the instructions for what vertices to merge to form each component
    */
@@ -117,10 +122,6 @@ class CCSketchAlg {
    */
   void boruvka_emulation();
 
-  FRIEND_TEST(GraphTestSuite, TestCorrectnessOfReheating);
-
-  CCAlgConfiguration config;
-
   // constructor for use when reading from a serialized file
   CCSketchAlg(node_id_t num_vertices, size_t seed, std::ifstream &binary_stream,
               CCAlgConfiguration config);
@@ -201,7 +202,7 @@ class CCSketchAlg {
 
   /**
    * Main parallel query algorithm utilizing Boruvka and L_0 sampling.
-   * @return a vector of the connected components in the graph.
+   * @return  the connected components in the graph.
    */
   ConnectedComponents connected_components();
 
@@ -217,12 +218,11 @@ class CCSketchAlg {
    * Return a spanning forest of the graph utilizing Boruvka and L_0 sampling
    * IMPORTANT: The updates to this algorithm MUST NOT be a function of the output of this query
    * that is, unless you really know what you're doing.
-   * @return an adjacency list representation of the spanning forest of the graph
+   * @return  the spanning forest of the graph
    */
   SpanningForest calc_spanning_forest();
 
 #ifdef VERIFY_SAMPLES_F
-  std::unique_ptr<GraphVerifier> verifier;
   void set_verifier(std::unique_ptr<GraphVerifier> verifier) {
     this->verifier = std::move(verifier);
   }
diff --git a/include/graph_sketch_driver.h b/include/graph_sketch_driver.h
index 38f77eba..6d93a59c 100644
--- a/include/graph_sketch_driver.h
+++ b/include/graph_sketch_driver.h
@@ -6,33 +6,40 @@
 #include "driver_configuration.h"
 #include "graph_stream.h"
 #include "worker_thread_group.h"
+#ifdef VERIFY_SAMPLES_F
+#include "graph_verifier.h"
+#endif
 
 /**
  * GraphSketchDriver class:
  * Driver for sketching algorithms on a single machine.
  * Templatized by the "top level" sketching algorithm to manage.
  *
- * Algorithms need to implement the following functions to be managed by the driver
+ * Algorithms need to implement the following functions to be managed by the driver:
  *
  *    1) void allocate_worker_memory(size_t num_workers)
  *          For performance reasons it is often helpful for the algorithm to allocate some scratch
- *          space to be used by an individual worker threads. For example, in the connected
- *          components algorithm, we allocate a delta sketch for each worker.
+ *          space to be used by individual worker threads. This scratch memory is managed by the
+ *          algorithm. For example, in the connected components algorithm, we allocate a delta
+ *          sketch for each worker.
  *
  *    2) size_t get_desired_updates_per_batch()
  *          Return the number of updates the algorithm would like us to batch. This serves as the
- *          maximum number of updates in a batch. We only provide smaller batches if force_flush'd
+ *          maximum number of updates in a batch. We only provide smaller batches during
+ *          prep_query()
  *
  *    3) node_id_t get_num_vertices()
  *          Returns the number of vertices in the Graph or an appropriate upper bound.
  *
  *    4) void pre_insert(GraphUpdate upd, node_id_t thr_id)
  *          Called before each update is added to the guttering system for the purpose of eager
- *          query heuristics. This function must be fast executing.
+ *          query heuristics. This function must be thread-safe and fast executing. The algorithm
+ *          may choose to make this function a no-op.
  *
  *    5) void apply_update_batch(size_t thr_id, node_id_t src_vertex, const std::vector<node_id_t>
  *                               &dst_vertices)
- *          Called by worker threads to apply a batch of updates destined for a single vertex.
+ *          Called by worker threads to apply a batch of updates destined for a single vertex. This
+ *          function must be thread-safe.
  *
  *    6) bool has_cached_query()
  *          Check if the algorithm already has a cached answer for its query type. If so, the driver
@@ -41,6 +48,12 @@
  *    7) void print_configuration()
  *          Print the configuration of the algorithm. The algorithm may choose to print the
  *          configurations of subalgorithms as well.
+ *
+ *    8) void set_verifier(std::unique_ptr<GraphVerifier> verifier);
+ *          If VERIFIER_SAMPLES_F is defined, then the driver provides the algorithm with a
+ *          verifier. The verifier encodes the graph state at the time of a query losslessly
+ *          and should be used by the algorithm to check its query answer. This is only used for
+ *          correctness testing, not for production code.
  */
 template <class Alg>
 class GraphSketchDriver {
@@ -48,6 +61,10 @@ class GraphSketchDriver {
   GutteringSystem *gts;
   Alg *sketching_alg;
   GraphStream *stream;
+#ifdef VERIFY_SAMPLES_F
+  GraphVerifier *verifier;
+  std::mutex verifier_mtx;
+#endif
 
   WorkerThreadGroup<Alg> *worker_threads;
 
@@ -55,7 +72,6 @@ class GraphSketchDriver {
   static constexpr size_t update_array_size = 4000;
 
   std::atomic<size_t> total_updates;
-  FRIEND_TEST(GraphTest, TestSupernodeRestoreAfterCCFailure);
  public:
   GraphSketchDriver(Alg *sketching_alg, GraphStream *stream, DriverConfiguration config,
                     size_t num_stream_threads = 1)
@@ -83,10 +99,14 @@ class GraphSketchDriver {
     sketching_alg->print_configuration();
 
     if (num_stream_threads > 1 && !stream->get_update_is_thread_safe()) {
-      std::cerr << "WARNING: stream get_update is not thread safe. Setting num inserters to 1"
-                << std::endl;
+      std::cerr
+          << "WARNING: stream get_update is not thread safe. Setting number of stream threads to 1"
+          << std::endl;
       num_stream_threads = 1;
     }
+#ifdef VERIFY_SAMPLES_F
+    verifier = new GraphVerifier(sketching_alg->get_num_vertices());
+#endif
 
     total_updates = 0;
     std::cout << std::endl;
@@ -95,6 +115,9 @@ class GraphSketchDriver {
   ~GraphSketchDriver() {
     delete worker_threads;
     delete gts;
+#ifdef VERIFY_SAMPLES_F
+    delete verifier;
+#endif
   }
 
   void process_stream_until(edge_id_t break_edge_idx) {
@@ -106,6 +129,9 @@ class GraphSketchDriver {
 
     auto task = [&](int thr_id) {
       GraphStreamUpdate update_array[update_array_size];
+#ifdef VERIFY_SAMPLES_F
+      GraphVerifier local_verifier(sketching_alg->get_num_vertices());
+#endif
 
       while (true) {
         size_t updates = stream->get_update_buffer(update_array, update_array_size);
@@ -114,6 +140,11 @@ class GraphSketchDriver {
           upd.edge = update_array[i].edge;
           upd.type = static_cast<UpdateType>(update_array[i].type);
           if (upd.type == BREAKPOINT) {
+            // reached the breakpoint. Update verifier if applicable and return
+#ifdef VERIFY_SAMPLES_F
+            std::lock_guard<std::mutex> lk(verifier_mtx);
+            verifier->combine(local_verifier);
+#endif
             return;
           }
           else {
@@ -121,6 +152,9 @@ class GraphSketchDriver {
             Edge edge = upd.edge;
             gts->insert({edge.src, edge.dst}, thr_id);
             gts->insert({edge.dst, edge.src}, thr_id);
+#ifdef VERIFY_SAMPLES_F
+            local_verifier.edge_update(edge);
+#endif
           }
         }
       }
@@ -131,6 +165,11 @@ class GraphSketchDriver {
 
     // wait for threads to finish
     for (size_t i = 0; i < num_stream_threads; i++) threads[i].join();
+
+    // pass the verifier to the algorithm
+#ifdef VERIFY_SAMPLES_F
+    sketching_alg->set_verifier(std::make_unique<GraphVerifier>(*verifier));
+#endif
   }
 
   void prep_query() {
diff --git a/include/return_types.h b/include/return_types.h
index d7967c2e..b329bfe6 100644
--- a/include/return_types.h
+++ b/include/return_types.h
@@ -1,4 +1,5 @@
 // This file defines the query return types from the cc algorithm class
+#pragma once
 #include <cstddef>
 #include <iterator>
 #include <set>
@@ -20,8 +21,8 @@ class ConnectedComponents {
   ~ConnectedComponents();
 
   std::vector<std::set<node_id_t>> get_component_sets();
-  bool is_connected(node_id_t a, node_id_t b) { return parent_arr[a] == parent_arr[b]; }
-  node_id_t size() { return num_cc; }
+  bool is_connected(node_id_t a, node_id_t b) const { return parent_arr[a] == parent_arr[b]; }
+  node_id_t size() const { return num_cc; }
 };
 
 // This class defines a spanning forest of a graph
@@ -32,5 +33,5 @@ class SpanningForest {
  public:
   SpanningForest(node_id_t num_vertices, const std::unordered_set<node_id_t> *spanning_forest);
 
-  const std::vector<Edge>& get_edges() { return edges; }
+  const std::vector<Edge>& get_edges() const { return edges; }
 };
diff --git a/include/sketch.h b/include/sketch.h
index 1b50ed30..80473ecc 100644
--- a/include/sketch.h
+++ b/include/sketch.h
@@ -72,7 +72,7 @@ class Sketch {
    * @return               The number of samples
    */
   static size_t calc_cc_samples(node_id_t num_vertices, double f) {
-    return ceil(f * log2(num_vertices) / num_samples_div);
+    return std::max(size_t(18), (size_t) ceil(f * log2(num_vertices) / num_samples_div));
   }
 
   /**
@@ -191,8 +191,14 @@ class Sketch {
 };
 
 class OutOfSamplesException : public std::exception {
+ private:
+  std::string err_msg;
  public:
+  OutOfSamplesException(size_t seed, size_t num_samples, size_t sample_idx)
+      : err_msg("This sketch (seed=" + std::to_string(seed) +
+                ", max samples=" + std::to_string(num_samples) +
+                ") cannot be sampled more times (cur idx=" + std::to_string(sample_idx) + ")!") {}
   virtual const char* what() const throw() {
-    return "This sketch cannot be sampled more times!";
+    return err_msg.c_str();
   }
 };
diff --git a/include/test/graph_verifier.h b/include/test/graph_verifier.h
index 7142f981..9fb4a389 100644
--- a/include/test/graph_verifier.h
+++ b/include/test/graph_verifier.h
@@ -3,6 +3,7 @@
 #include <vector>
 
 #include "types.h"
+#include "return_types.h"
 
 /**
  * A plugin for the Graph class that runs Boruvka alongside the graph algorithm
@@ -10,39 +11,97 @@
  * generates.
  */
 class GraphVerifier {
-protected:
+private:
+  const node_id_t num_vertices;
   std::vector<std::vector<bool>> adj_matrix;
+  DisjointSetUnion<node_id_t> kruskal_dsu;
+  node_id_t kruskal_ccs;
+  bool need_query_compute = true;
 
+  /**
+   * Runs Kruskal's (deterministic) CC algo to compute the kruskal dsu.
+   */
+  void kruskal();
 public:
+  /**
+   * Empty Graph Verifier constructor
+   */
+  GraphVerifier(node_id_t vertices);
+  
+  /**
+   * Construct GraphVerifier from a cumulative stream file
+   */
+  GraphVerifier(node_id_t num_vertices, const std::string &cumul_file_name);
+
+  /**
+   * Copy a GraphVerifier
+   */
+  GraphVerifier(const GraphVerifier &oth_verifier)
+      : num_vertices(oth_verifier.num_vertices),
+        adj_matrix(oth_verifier.adj_matrix),
+        kruskal_dsu(oth_verifier.kruskal_dsu) {};
+
+  /**
+   * Flip an edge in the adjacency list.
+   * @param edge   the edge to flip
+   */
+  void edge_update(Edge edge);
+
   /**
    * Verifies an edge exists in the graph.
    * @param edge the edge to be tested.
    * @throws BadEdgeException if the edge does not exist in the graph.
    */
-  virtual void verify_edge(Edge edge) = 0;
+  void verify_edge(Edge edge);
 
   /**
    * Verifies the connected components solution is correct. Compares
    * retval against kruskal_ref.
+   * @throws IncorrectCCException if the solution cannot be verified
    */
-  virtual void verify_soln(std::vector<std::set<node_id_t>> &retval) = 0;
+  void verify_connected_components(const ConnectedComponents &cc);
 
-  std::vector<std::vector<bool>> extract_adj_matrix() { return adj_matrix; }
+  /**
+   * Verifies that one or more spanning forests are valid
+   * Additionally, enforces that spanning forests must be edge disjoint.
+   * @param SFs    the spanning forests to check
+   * @throws IncorrectForestException if a bad spanning forest is found
+   */
+  void verify_spanning_forests(std::vector<SpanningForest> SFs);
 
-  GraphVerifier() = default;
-  GraphVerifier(std::vector<std::vector<bool>> _adj) : adj_matrix(std::move(_adj)) {};
+  /*
+   * Merge two GraphVerifiers that have seen two different streams.
+   * Yields a GraphVerifier that has seen both streams.
+   * @param oth   a GraphVerifier to combine into this one.
+   */
+  void combine(const GraphVerifier &oth);
+
+  std::vector<std::vector<bool>> extract_adj_matrix() { return adj_matrix; }
+  node_id_t get_num_kruskal_ccs() { return kruskal_ccs; }
 
-  virtual ~GraphVerifier() {};
+  bool operator==(const GraphVerifier &oth) { return adj_matrix == oth.adj_matrix; }
 };
 
 class BadEdgeException : public std::exception {
-  virtual const char* what() const throw() {
-    return "The edge is not in the cut of the sample!";
-  }
+ private:
+  std::string err_msg;
+ public:
+  BadEdgeException(std::string err) : err_msg(err) {};
+  virtual const char* what() const throw() { return err_msg.c_str(); }
 };
 
 class IncorrectCCException : public std::exception {
-  virtual const char* what() const throw() {
-    return "The connected components are incorrect!";
-  }
+ private:
+  std::string err_msg;
+ public:
+  IncorrectCCException(std::string err) : err_msg(err) {};
+  virtual const char* what() const throw() { return err_msg.c_str(); }
+};
+
+class IncorrectForestException : public std::exception {
+ private:
+  std::string err_msg;
+ public:
+  IncorrectForestException(std::string err) : err_msg(err) {};
+  virtual const char* what() const throw() { return err_msg.c_str(); }
 };
diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp
index ace96bce..a1e688db 100644
--- a/src/cc_sketch_alg.cpp
+++ b/src/cc_sketch_alg.cpp
@@ -274,9 +274,6 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round,
     bool local_except = false;
     std::exception_ptr local_err;
 
-    // node_id_t left_root = merge_instr[start].root;
-    // node_id_t right_root = merge_instr[end - 1].root;
-
     bool root_from_left = false;
     if (start > 0) {
       root_from_left = merge_instr[start - 1].root == merge_instr[start].root;
@@ -553,8 +550,7 @@ ConnectedComponents CCSketchAlg::connected_components() {
 
   ConnectedComponents cc(num_vertices, dsu);
 #ifdef VERIFY_SAMPLES_F
-  auto cc_sets = cc.get_component_sets();
-  verifier->verify_soln(cc_sets);
+  verifier->verify_connected_components(cc);
 #endif
   cc_alg_end = std::chrono::steady_clock::now();
   return cc;
@@ -564,7 +560,11 @@ SpanningForest CCSketchAlg::calc_spanning_forest() {
   // TODO: Could probably optimize this a bit by writing new code
   connected_components();
 
-  return SpanningForest(num_vertices, spanning_forest);
+  SpanningForest ret(num_vertices, spanning_forest);
+#ifdef VERIFY_SAMPLES_F
+  verifier->verify_spanning_forests(std::vector<SpanningForest>{ret});
+#endif
+  return ret;
 }
 
 bool CCSketchAlg::point_query(node_id_t a, node_id_t b) {
@@ -584,7 +584,6 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) {
   else {
     bool except = false;
     std::exception_ptr err;
-    bool ret;
     try {
       boruvka_emulation();
     } catch (...) {
@@ -604,8 +603,7 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) {
 
 #ifdef VERIFY_SAMPLES_F
   ConnectedComponents cc(num_vertices, dsu);
-  auto cc_sets = cc.get_component_sets();
-  verifier->verify_soln(cc_sets);
+  verifier->verify_connected_components(cc);
 #endif
 
   bool retval = (dsu.find_root(a) == dsu.find_root(b));
diff --git a/src/sketch.cpp b/src/sketch.cpp
index 0c687fa1..ac674c5e 100644
--- a/src/sketch.cpp
+++ b/src/sketch.cpp
@@ -93,7 +93,7 @@ void Sketch::zero_contents() {
 
 SketchSample Sketch::sample() {
   if (sample_idx >= num_samples) {
-    throw OutOfSamplesException();
+    throw OutOfSamplesException(seed, num_samples, sample_idx);
   }
 
   size_t idx = sample_idx++;
@@ -117,7 +117,7 @@ SketchSample Sketch::sample() {
 
 ExhaustiveSketchSample Sketch::exhaustive_sample() {
   if (sample_idx >= num_samples) {
-    throw OutOfSamplesException();
+    throw OutOfSamplesException(seed, num_samples, sample_idx);
   }
   std::unordered_set<vec_t> ret;
 
diff --git a/test/cc_alg_test.cpp b/test/cc_alg_test.cpp
index a92cd406..3e378c50 100644
--- a/test/cc_alg_test.cpp
+++ b/test/cc_alg_test.cpp
@@ -7,13 +7,14 @@
 #include <fstream>
 
 #include "cc_sketch_alg.h"
-#include "file_graph_verifier.h"
 #include "graph_sketch_driver.h"
-#include "mat_graph_verifier.h"
+#include "graph_verifier.h"
 
 static size_t get_seed() {
   auto now = std::chrono::high_resolution_clock::now();
-  return std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
+  size_t s = std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
+  std::cout << "Seed = " << s << std::endl;
+  return s;
 }
 
 // helper function to generate a dynamic binary stream and its cumulative insert only stream
@@ -31,16 +32,6 @@ void generate_stream(size_t seed, node_id_t num_vertices, double density, double
   dy_stream.write_cumulative_file(cumul_name);
 }
 
-/**
- * For many of these tests (especially for those upon very sparse and small graphs)
- * we allow for a certain number of failures per test.
- * This is because the responsibility of these tests is to quickly alert us
- * to “this code is very wrong” whereas the statistical testing is responsible
- * for a more fine grained analysis.
- * In this context a false positive is much worse than a false negative.
- * With 2 failures allowed per test our entire testing suite should fail 1/5000 runs.
- */
-
 // We create this class and instantiate a paramaterized test suite so that we
 // can run these tests both with the GutterTree and with StandAloneGutters
 class CCAlgTest : public testing::TestWithParam<GutterSystem> {};
@@ -57,7 +48,7 @@ TEST_P(CCAlgTest, SmallGraphConnectivity) {
 
   CCSketchAlg cc_alg{num_nodes, get_seed()};
   cc_alg.set_verifier(
-      std::make_unique<FileGraphVerifier>(1024, curr_dir + "/res/multiples_graph_1024.txt"));
+      std::make_unique<GraphVerifier>(1024, curr_dir + "/res/multiples_graph_1024.txt"));
 
   GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
   driver.process_stream_until(END_OF_STREAM);
@@ -74,7 +65,7 @@ TEST_P(CCAlgTest, TestCorrectnessOnSmallRandomGraphs) {
     node_id_t num_nodes = stream.vertices();
 
     CCSketchAlg cc_alg{num_nodes, get_seed()};
-    cc_alg.set_verifier(std::make_unique<FileGraphVerifier>(1024, "./cumul_sample.txt"));
+    cc_alg.set_verifier(std::make_unique<GraphVerifier>(1024, "./cumul_sample.txt"));
 
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
     driver.process_stream_until(END_OF_STREAM);
@@ -93,7 +84,7 @@ TEST_P(CCAlgTest, TestCorrectnessOnSmallSparseGraphs) {
     node_id_t num_nodes = stream.vertices();
 
     CCSketchAlg cc_alg{num_nodes, get_seed()};
-    cc_alg.set_verifier(std::make_unique<FileGraphVerifier>(1024, "./cumul_sample.txt"));
+    cc_alg.set_verifier(std::make_unique<GraphVerifier>(1024, "./cumul_sample.txt"));
 
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
     driver.process_stream_until(END_OF_STREAM);
@@ -113,7 +104,7 @@ TEST_P(CCAlgTest, TestCorrectnessOfReheating) {
     node_id_t num_nodes = stream.vertices();
 
     CCSketchAlg cc_alg{num_nodes, get_seed()};
-    cc_alg.set_verifier(std::make_unique<FileGraphVerifier>(1024, "./cumul_sample.txt"));
+    cc_alg.set_verifier(std::make_unique<GraphVerifier>(1024, "./cumul_sample.txt"));
 
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
     driver.process_stream_until(END_OF_STREAM);
@@ -125,7 +116,7 @@ TEST_P(CCAlgTest, TestCorrectnessOfReheating) {
     printf("number of CC = %lu\n", orig_cc.size());
 
     CCSketchAlg *reheat_alg = CCSketchAlg::construct_from_serialized_data("./out_temp.txt");
-    reheat_alg->set_verifier(std::make_unique<FileGraphVerifier>(1024, "./cumul_sample.txt"));
+    reheat_alg->set_verifier(std::make_unique<GraphVerifier>(1024, "./cumul_sample.txt"));
     auto reheat_cc = reheat_alg->connected_components().get_component_sets();
     printf("number of reheated CC = %lu\n", reheat_cc.size());
     ASSERT_EQ(orig_cc.size(), reheat_cc.size());
@@ -136,16 +127,16 @@ TEST_P(CCAlgTest, TestCorrectnessOfReheating) {
 // Test the multithreaded system by using multiple worker threads
 TEST_P(CCAlgTest, MultipleWorkers) {
   auto driver_config = DriverConfiguration().gutter_sys(GetParam()).worker_threads(8);
-  int num_trials = 5;
+  int num_trials = 2;
   while (num_trials--) {
     size_t seed = get_seed();
-    generate_stream(seed, 1024, 0.002, 0.5, 0.5, 3, "sample.txt", "cumul_sample.txt");
+    generate_stream(seed, 1024, 0.002, 0.5, 0.2, 3, "sample.txt", "cumul_sample.txt");
     AsciiFileStream stream{"./sample.txt"};
     node_id_t num_nodes = stream.vertices();
 
     seed = get_seed();
     CCSketchAlg cc_alg{num_nodes, seed};
-    cc_alg.set_verifier(std::make_unique<FileGraphVerifier>(1024, "./cumul_sample.txt"));
+    cc_alg.set_verifier(std::make_unique<GraphVerifier>(1024, "./cumul_sample.txt"));
 
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
     driver.process_stream_until(END_OF_STREAM);
@@ -164,7 +155,7 @@ TEST_P(CCAlgTest, TestPointQuery) {
 
   CCSketchAlg cc_alg{num_nodes, get_seed()};
   cc_alg.set_verifier(
-      std::make_unique<FileGraphVerifier>(1024, curr_dir + "/res/multiples_graph_1024.txt"));
+      std::make_unique<GraphVerifier>(1024, curr_dir + "/res/multiples_graph_1024.txt"));
 
   GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
   driver.process_stream_until(END_OF_STREAM);
@@ -180,7 +171,7 @@ TEST_P(CCAlgTest, TestPointQuery) {
   for (node_id_t i = 0; i < std::min(10u, num_nodes); ++i) {
     for (node_id_t j = 0; j < std::min(10u, num_nodes); ++j) {
       cc_alg.set_verifier(
-          std::make_unique<FileGraphVerifier>(1024, curr_dir + "/res/multiples_graph_1024.txt"));
+          std::make_unique<GraphVerifier>(1024, curr_dir + "/res/multiples_graph_1024.txt"));
       ASSERT_EQ(cc_alg.point_query(i, j), ccid[i] == ccid[j]);
     }
   }
@@ -198,7 +189,7 @@ TEST(CCAlgTest, TestQueryDuringStream) {
 
   CCSketchAlg cc_alg{num_nodes, get_seed(), cc_config};
   GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
-  MatGraphVerifier verify(num_nodes);
+  GraphVerifier verify(num_nodes);
 
   int type;
   node_id_t a, b;
@@ -209,76 +200,101 @@ TEST(CCAlgTest, TestQueryDuringStream) {
   for (int j = 0; j < 9; j++) {
     for (edge_id_t i = 0; i < tenth; i++) {
       in >> type >> a >> b;
-      verify.edge_update(a, b);
+      verify.edge_update({a, b});
     }
-    verify.reset_cc_state();
 
     driver.process_stream_until(tenth * (j + 1));
     driver.prep_query();
-    cc_alg.set_verifier(std::make_unique<MatGraphVerifier>(verify));
+    cc_alg.set_verifier(std::make_unique<GraphVerifier>(verify));
     cc_alg.connected_components();
   }
   num_edges -= 9 * tenth;
   while (num_edges--) {
     in >> type >> a >> b;
-    verify.edge_update(a, b);
+    verify.edge_update({a, b});
   }
-  verify.reset_cc_state();
 
   driver.process_stream_until(END_OF_STREAM);
   driver.prep_query();
-  cc_alg.set_verifier(std::make_unique<MatGraphVerifier>(verify));
+  cc_alg.set_verifier(std::make_unique<GraphVerifier>(verify));
   cc_alg.connected_components();
 }
 
 TEST(CCAlgTest, EagerDSUTest) {
   node_id_t num_nodes = 100;
   CCSketchAlg cc_alg{num_nodes, get_seed()};
-  MatGraphVerifier verify(num_nodes);
+  GraphVerifier verify(num_nodes);
 
   // This should be a spanning forest edge
   cc_alg.update({{1, 2}, INSERT});
-  verify.edge_update(1, 2);
-  verify.reset_cc_state();
+  verify.edge_update({1, 2});
   cc_alg.set_verifier(std::make_unique<decltype(verify)>(verify));
   cc_alg.connected_components();
 
   // This should be a spanning forest edge
   cc_alg.update({{2, 3}, INSERT});
-  verify.edge_update(2, 3);
-  verify.reset_cc_state();
+  verify.edge_update({2, 3});
   cc_alg.set_verifier(std::make_unique<decltype(verify)>(verify));
   cc_alg.connected_components();
 
   // This should be an edge within a component
   cc_alg.update({{1, 3}, INSERT});
-  verify.edge_update(1, 3);
-  verify.reset_cc_state();
+  verify.edge_update({1, 3});
   cc_alg.set_verifier(std::make_unique<decltype(verify)>(verify));
   cc_alg.connected_components();
 
   // This should delete an edge within a component
   cc_alg.update({{1, 3}, DELETE});
-  verify.edge_update(1, 3);
-  verify.reset_cc_state();
+  verify.edge_update({1, 3});
   cc_alg.set_verifier(std::make_unique<decltype(verify)>(verify));
   cc_alg.connected_components();
 
   // This one should delete a spanning forest edge and cause a rebuild
   cc_alg.update({{2, 3}, DELETE});
-  verify.edge_update(2, 3);
-  verify.reset_cc_state();
+  verify.edge_update({2, 3});
   cc_alg.set_verifier(std::make_unique<decltype(verify)>(verify));
   cc_alg.connected_components();
 
   // This one should be a normal edge
   cc_alg.update({{2, 3}, INSERT});
-  verify.edge_update(2, 3);
-  verify.reset_cc_state();
+  verify.edge_update({2, 3});
   cc_alg.set_verifier(std::make_unique<decltype(verify)>(verify));
   cc_alg.connected_components();
 }
 
+TEST(CCAlgTest, SpanningForestExtraction) {
+  auto driver_config = DriverConfiguration().gutter_sys(STANDALONE);
+  auto cc_config = CCAlgConfiguration();
+  generate_stream(get_seed(), 1024, 0.03, 0.5, 0.05, 3, "sample.txt", "cumul_sample.txt");
+  AsciiFileStream stream{"./sample.txt"};
+  node_id_t num_nodes = stream.vertices();
+
+  CCSketchAlg cc_alg{num_nodes, get_seed()};
+  GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
+
+  driver.process_stream_until(END_OF_STREAM);
+  driver.prep_query();
+  
+  cc_alg.calc_spanning_forest();
+}
+
+TEST(CCAlgTest, InsertOnlyStream) {
+  auto driver_config = DriverConfiguration().gutter_sys(STANDALONE);
+  auto cc_config = CCAlgConfiguration();
+  generate_stream(get_seed(), 1024, 0.1, 0, 0, 1, "sample.txt", "cumul_sample.txt");
+  AsciiFileStream stream{"./sample.txt"};
+  node_id_t num_nodes = stream.vertices();
+
+  CCSketchAlg cc_alg{num_nodes, get_seed()};
+  GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
+
+  driver.process_stream_until(END_OF_STREAM);
+  driver.prep_query();
+  
+  cc_alg.connected_components();
+  cc_alg.calc_spanning_forest();
+}
+
 TEST(CCAlgTest, MTStreamWithMultipleQueries) {
   for (int t = 1; t <= 3; t++) {
     auto driver_config = DriverConfiguration().gutter_sys(STANDALONE);
@@ -296,7 +312,7 @@ TEST(CCAlgTest, MTStreamWithMultipleQueries) {
 
     CCSketchAlg cc_alg{num_nodes, get_seed()};
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config, 4);
-    MatGraphVerifier verify(num_nodes);
+    GraphVerifier verify(num_nodes);
 
     size_t num_queries = 10;
     size_t upd_per_query = num_edges / num_queries;
@@ -304,11 +320,10 @@ TEST(CCAlgTest, MTStreamWithMultipleQueries) {
       for (size_t j = 0; j < upd_per_query; j++) {
         GraphStreamUpdate upd;
         verify_stream.get_update_buffer(&upd, 1);
-        verify.edge_update(upd.edge.src, upd.edge.dst);
+        verify.edge_update(upd.edge);
         ASSERT_NE(upd.type, BREAKPOINT);
       }
-      verify.reset_cc_state();
-      cc_alg.set_verifier(std::make_unique<MatGraphVerifier>(verify));
+      cc_alg.set_verifier(std::make_unique<GraphVerifier>(verify));
 
       driver.process_stream_until(upd_per_query * (i + 1));
       driver.prep_query();
@@ -319,11 +334,10 @@ TEST(CCAlgTest, MTStreamWithMultipleQueries) {
     while (num_edges--) {
       GraphStreamUpdate upd;
       verify_stream.get_update_buffer(&upd, 1);
-      verify.edge_update(upd.edge.src, upd.edge.dst);
+      verify.edge_update(upd.edge);
       ASSERT_NE(upd.type, BREAKPOINT);
     }
-    verify.reset_cc_state();
-    cc_alg.set_verifier(std::make_unique<MatGraphVerifier>(verify));
+    cc_alg.set_verifier(std::make_unique<GraphVerifier>(verify));
 
     driver.process_stream_until(END_OF_STREAM);
     driver.prep_query();
diff --git a/test/util/graph_verifier.cpp b/test/util/graph_verifier.cpp
new file mode 100644
index 00000000..4d35ec1d
--- /dev/null
+++ b/test/util/graph_verifier.cpp
@@ -0,0 +1,157 @@
+#include "graph_verifier.h"
+#include <ascii_file_stream.h>
+
+#include <map>
+#include <iostream>
+#include <algorithm>
+#include <cassert>
+
+GraphVerifier::GraphVerifier(node_id_t num_vertices)
+    : num_vertices(num_vertices), kruskal_dsu(num_vertices) {
+  // initialize adjacency matrix
+  adj_matrix = std::vector<std::vector<bool>>(num_vertices);
+  for (node_id_t i = 0; i < num_vertices; ++i)
+    adj_matrix[i] = std::vector<bool>(num_vertices - i);
+}
+
+GraphVerifier::GraphVerifier(node_id_t num_vertices, const std::string &cumul_file_name)
+    : num_vertices(num_vertices), kruskal_dsu(num_vertices) {
+  // initialize adjacency matrix
+  adj_matrix = std::vector<std::vector<bool>>(num_vertices);
+  for (node_id_t i = 0; i < num_vertices; ++i)
+    adj_matrix[i] = std::vector<bool>(num_vertices - i);
+
+  // cumulative files do not have update types
+  AsciiFileStream stream(cumul_file_name, false);
+
+  GraphStreamUpdate stream_upd;
+  stream.get_update_buffer(&stream_upd, 1);
+
+  node_id_t src = stream_upd.edge.src;
+  node_id_t dst = stream_upd.edge.dst;
+  UpdateType type = static_cast<UpdateType>(stream_upd.type);
+
+  while (type != BREAKPOINT) {
+    if (src > dst)
+      std::swap(src, dst);
+    dst -= src;
+    adj_matrix[src][dst] = !adj_matrix[src][dst];
+
+    stream.get_update_buffer(&stream_upd, 1);
+    src = stream_upd.edge.src;
+    dst = stream_upd.edge.dst;
+    type = static_cast<UpdateType>(stream_upd.type);
+  }
+
+  kruskal();
+}
+
+void GraphVerifier::edge_update(Edge edge) {
+  auto src = edge.src;
+  auto dst = edge.dst;
+
+  if (src >= num_vertices || dst >= num_vertices) {
+    throw BadEdgeException("Source " + std::to_string(src) + " or Destination " +
+                           std::to_string(dst) + " out of bounds!");
+  }
+  if (src > dst) std::swap(src, dst);
+
+  dst = dst - src;
+
+  // update adj_matrix entry
+  adj_matrix[src][dst] = !adj_matrix[src][dst];
+  need_query_compute = true;
+}
+
+void GraphVerifier::kruskal() {
+  if (!need_query_compute)
+    return;
+
+  kruskal_ccs = num_vertices;
+  kruskal_dsu.reset();
+  for (node_id_t i = 0; i < num_vertices; i++) {
+    for (node_id_t j = 0; j < adj_matrix[i].size(); j++) {
+      if (adj_matrix[i][j] && kruskal_dsu.merge(i, i + j).merged)
+        kruskal_ccs -= 1;
+    }
+  }
+  need_query_compute = false;
+}
+
+void GraphVerifier::verify_edge(Edge edge) {
+  // verify that the edge in question actually exists
+  if (edge.src > edge.dst) std::swap(edge.src, edge.dst);
+  if (!adj_matrix[edge.src][edge.dst - edge.src]) {
+    printf("Got an error on edge (%u, %u): edge is not in adj_matrix\n", edge.src, edge.dst);
+    throw BadEdgeException("The edge is not in the cut of the sample!");
+  }
+}
+
+void GraphVerifier::verify_connected_components(const ConnectedComponents &cc) {
+  // compute the connected components for the verifier
+  kruskal();
+
+  // first check that the number of components is the same for both
+  if (kruskal_ccs != cc.size()) {
+    throw IncorrectCCException("Incorrect number of components!");
+  }
+
+  // then check that we agree on where all the vertices belong
+  for (node_id_t i = 0; i < num_vertices; i++) {
+    node_id_t root = kruskal_dsu.find_root(i);
+    if (!cc.is_connected(root, i))
+      throw IncorrectCCException("Incorrect Connectivity!");
+  }
+}
+
+void GraphVerifier::verify_spanning_forests(std::vector<SpanningForest> SFs) {
+  // backup the adjacency matrix
+  std::vector<std::vector<bool>> backup(adj_matrix);
+
+  for (SpanningForest &forest : SFs) {
+    kruskal();
+
+    DisjointSetUnion<node_id_t> forest_ccs(num_vertices);
+    for (auto edge : forest.get_edges()) {
+      // every edge in the spanning forest must encode connectivity info
+      if (!forest_ccs.merge(edge.src, edge.dst).merged) {
+        adj_matrix = backup;
+        throw IncorrectForestException(
+            "Found an edge: (" + std::to_string(edge.src) + ", " +
+            std::to_string(edge.dst) + ") that is redundant within a single spanning forest!");
+      }
+
+      try {
+        verify_edge(edge);
+      } catch (...) {
+        adj_matrix = backup;
+        throw;
+      }
+      edge_update(edge);
+    }
+
+    // root map allows us to translate from the kruskal_dsu's roots to the forest_ccs' roots
+    std::map<node_id_t, node_id_t> root_map;
+
+    for (node_id_t i = 0; i < num_vertices; i++) {
+      node_id_t kruskal_root = kruskal_dsu.find_root(i);
+
+      if (root_map.count(kruskal_root) == 0) {
+        root_map[kruskal_root] = forest_ccs.find_root(i);
+      }
+      else if (root_map[kruskal_root] != forest_ccs.find_root(i)) {
+        adj_matrix = backup;
+        throw IncorrectForestException("Forest does not match expected component sets!");
+      }
+    }
+  }
+  adj_matrix = backup;
+}
+
+void GraphVerifier::combine(const GraphVerifier &oth) {
+  for (size_t i = 0; i < adj_matrix.size(); i++) {
+    for (size_t j = 0; j < adj_matrix[i].size(); j++) {
+      adj_matrix[i][j] = adj_matrix[i][j] != oth.adj_matrix[i][j];
+    }
+  }
+}
diff --git a/test/util/graph_verifier_test.cpp b/test/util/graph_verifier_test.cpp
index 85d852c1..878c238f 100644
--- a/test/util/graph_verifier_test.cpp
+++ b/test/util/graph_verifier_test.cpp
@@ -1,16 +1,37 @@
 #include <gtest/gtest.h>
-#include "../../include/test/file_graph_verifier.h"
+#include <graph_verifier.h>
 
 const std::string fname = __FILE__;
 size_t pos = fname.find_last_of("\\/");
 const std::string curr_dir = (std::string::npos == pos) ? "" : fname.substr(0,pos);
 
-TEST(DeterministicToolsTestSuite, TestKruskal) {
-  ASSERT_EQ(78,FileGraphVerifier::kruskal(curr_dir+"/../res/multiples_graph_1024.txt").size());
+constexpr size_t num_primes = 97;
+const size_t primes[num_primes] {
+    2,   3,   5,   7,   11,  13,  17,  19,  23,  29,  31,  37,  41,  43,  47,  53,  59,
+    61,  67,  71,  73,  79,  83,  89,  97,  101, 103, 107, 109, 113, 127, 131, 137, 139,
+    149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233,
+    239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337,
+    347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439,
+    443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509};
+
+// finds a factor of x that is less than x.
+static node_id_t find_a_factor(node_id_t x) {
+  for (auto prime : primes) {
+    if (prime >= x) break;
+    if (x % prime == 0) return prime;
+  }
+
+  return 0;
+}
+
+TEST(GraphVerifierTest, TestCorrectNumCC) {
+  GraphVerifier verifier(1024, curr_dir+"/../res/multiples_graph_1024.txt");
+
+  ASSERT_EQ(78, verifier.get_num_kruskal_ccs());
 }
 
-TEST(DeterministicToolsTestSuite, TestEdgeVerifier) {
-  FileGraphVerifier verifier(1024, curr_dir+"/../res/multiples_graph_1024.txt");
+TEST(GraphVerifierTest, TestEdgeVerifier) {
+  GraphVerifier verifier(1024, curr_dir+"/../res/multiples_graph_1024.txt");
   // add edges of the form {i,2i}
   for (node_id_t i = 2; i < 512; ++i) {
     verifier.verify_edge({i, i*2});
@@ -20,11 +41,54 @@ TEST(DeterministicToolsTestSuite, TestEdgeVerifier) {
   ASSERT_THROW(verifier.verify_edge({420,69}), BadEdgeException);
 }
 
-TEST(DeterministicToolsTestSuite, TestCCVerifier) {
-  FileGraphVerifier verifier (1024, curr_dir+"/../res/multiples_graph_1024.txt");
-  // {0}, {1}, and primes \in [521,1021] are CCs
-  // add edges of the form {i,2i}
-  for (node_id_t i = 2; i < 512; ++i) {
-    verifier.verify_edge({i, i*2});
+TEST(GraphVerifierTest, TestVerifySpanningForest) {
+  GraphVerifier verifier(1024, curr_dir+"/../res/multiples_graph_1024.txt");
+
+  {
+    // create a partial spanning forest
+    std::unordered_set<node_id_t> adj_list[1024];
+    for (node_id_t i = 2; i < 512; i++) {
+      adj_list[i].insert(i*2);
+    }
+
+    // This spanning forest should be incorrect,
+    // it is incomplete
+    ASSERT_THROW(
+      verifier.verify_spanning_forests(std::vector<SpanningForest>{SpanningForest(1024, adj_list)}),
+      IncorrectForestException
+    );
+  }
+  {
+    // create a partial spanning forest
+    std::unordered_set<node_id_t> adj_list[1024];
+    for (node_id_t i = 2; i < 512; i++) {
+      adj_list[i].insert(i*2);
+    }
+    adj_list[2].insert(5); // this is the bad edge
+    for (node_id_t i = 6; i < 1024; i+=2) {
+      adj_list[2].insert(i);
+    }
+
+    // This spanning forest should be incorrect,
+    // it contains an edge not found in the original graph
+    ASSERT_THROW(
+      verifier.verify_spanning_forests(std::vector<SpanningForest>{SpanningForest(1024, adj_list)}),
+      BadEdgeException
+    );
+  }
+  {
+    // This is a correct spanning forest
+    std::unordered_set<node_id_t> adj_list[1024];
+    for (node_id_t i = 2; i < 1024; i++) {
+      node_id_t factor = find_a_factor(i);
+      if (factor != 0) adj_list[factor].insert(i);
+    }
+    for (auto prime : primes) {
+      if (prime == 2) continue;
+      adj_list[prime].insert(prime * 2);
+    }
+
+    SpanningForest forest(1024, adj_list);
+    verifier.verify_spanning_forests(std::vector<SpanningForest>{forest});
   }
 }
diff --git a/tools/test_correctness.cpp b/tools/test_correctness.cpp
index 36a6322f..48739ea3 100644
--- a/tools/test_correctness.cpp
+++ b/tools/test_correctness.cpp
@@ -6,7 +6,7 @@
 #include <chrono>
 
 #include <cc_sketch_alg.h>
-#include <mat_graph_verifier.h>
+#include <graph_verifier.h>
 
 static size_t get_seed() {
   auto now = std::chrono::high_resolution_clock::now();
@@ -38,14 +38,13 @@ CorrectnessResults test_path_correctness(size_t num_vertices, size_t num_graphs,
     size_t edge_seed = gen();
     std::vector<node_id_t> copy_vertices(vertices);
     std::shuffle(copy_vertices.begin(), copy_vertices.end(), std::mt19937_64(edge_seed));
-    MatGraphVerifier verifier(num_vertices);
+    GraphVerifier verifier(num_vertices);
 
     node_id_t cur_node = copy_vertices[0];
     for (size_t i = 1; i < num_vertices; i++) {
-      verifier.edge_update(cur_node, copy_vertices[i]);
+      verifier.edge_update({cur_node, copy_vertices[i]});
       cur_node = copy_vertices[i];
     }
-    verifier.reset_cc_state();
 
     for (size_t s = 0; s < samples_per_graph; s++) {
       CCSketchAlg cc_alg(num_vertices, get_seed());
@@ -55,7 +54,7 @@ CorrectnessResults test_path_correctness(size_t num_vertices, size_t num_graphs,
         cc_alg.update({{cur_node, copy_vertices[i]}, INSERT});
         cur_node = copy_vertices[i];
       }
-      cc_alg.set_verifier(std::make_unique<MatGraphVerifier>(verifier));
+      cc_alg.set_verifier(std::make_unique<GraphVerifier>(verifier));
 
       std::cout << "graph: " << g << " sample: " << s << " ";
       try {

From a69cbf0180d5e85aeecc9a6fa1d3e3ec76609519 Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Mon, 4 Mar 2024 20:59:05 -0500
Subject: [PATCH 33/37] remove other verifier files

---
 include/test/file_graph_verifier.h | 28 ---------
 include/test/mat_graph_verifier.h  | 35 -----------
 test/util/file_graph_verifier.cpp  | 93 ------------------------------
 test/util/mat_graph_verifier.cpp   | 67 ---------------------
 4 files changed, 223 deletions(-)
 delete mode 100644 include/test/file_graph_verifier.h
 delete mode 100644 include/test/mat_graph_verifier.h
 delete mode 100644 test/util/file_graph_verifier.cpp
 delete mode 100644 test/util/mat_graph_verifier.cpp

diff --git a/include/test/file_graph_verifier.h b/include/test/file_graph_verifier.h
deleted file mode 100644
index 733548dc..00000000
--- a/include/test/file_graph_verifier.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#pragma once
-#include "graph_verifier.h"
-
-#include <iostream>
-
-#include "dsu.h"
-
-/**
- * A plugin for the Graph class that runs Boruvka alongside the graph algorithm
- * and verifies the edges and connected components that the graph algorithm
- * generates. Takes a reference graph from a file.
- */
-class FileGraphVerifier : public GraphVerifier {
-  std::vector<std::set<node_id_t>> kruskal_ref;
-
-public:
-  FileGraphVerifier(node_id_t n, const std::string& input_file);
-
-  void verify_edge(Edge edge);
-  void verify_soln(std::vector<std::set<node_id_t>>& retval);
-
-  /**
-   * Runs Kruskal's (deterministic) CC algo.
-   * @param input_file the file to read input from.
-   * @return an array of connected components.
-   */
-  static std::vector<std::set<node_id_t>> kruskal(const std::string& input_file = "cumul_sample.txt");
-};
diff --git a/include/test/mat_graph_verifier.h b/include/test/mat_graph_verifier.h
deleted file mode 100644
index 1b028f2b..00000000
--- a/include/test/mat_graph_verifier.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#pragma once
-#include "graph_verifier.h"
-
-#include <iostream>
-
-#include "dsu.h"
-
-/**
- * A plugin for the Graph class that runs Boruvka alongside the graph algorithm
- * and verifies the edges and connected components that the graph algorithm
- * generates. Takes a reference graph from a packed in-memory adjacency matrix.
- */
-class MatGraphVerifier : public GraphVerifier {
-  std::vector<std::set<node_id_t>> kruskal_ref;
-  node_id_t n;
-
-  /**
-   * Runs Kruskal's (deterministic) CC algo.
-   * @param input_file the file to read input from.
-   * @return an array of connected components.
-   */
-  std::vector<std::set<node_id_t>> kruskal();
-public:
-  MatGraphVerifier(node_id_t n);
-
-  // When we want to build a MatGraphVerifier without iterative edge_updates
-  MatGraphVerifier(node_id_t n, std::vector<std::vector<bool>> _adj)
-   : GraphVerifier(_adj), n(n) { reset_cc_state(); };
-  
-  void reset_cc_state();       // run this function before using as a verifier in CC
-  void edge_update(node_id_t src, node_id_t dst);
-
-  void verify_edge(Edge edge);
-  void verify_soln(std::vector<std::set<node_id_t>> &retval);
-};
diff --git a/test/util/file_graph_verifier.cpp b/test/util/file_graph_verifier.cpp
deleted file mode 100644
index a212be3c..00000000
--- a/test/util/file_graph_verifier.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-#include "../../include/test/file_graph_verifier.h"
-
-#include <map>
-#include <iostream>
-#include <algorithm>
-#include <cassert>
-#include <fstream>
-
-FileGraphVerifier::FileGraphVerifier(node_id_t n, const std::string &input_file) {
-  std::ifstream in(input_file);
-  if (!in) {
-    throw std::invalid_argument("FileGraphVerifier: Could not open: " + input_file);
-  }
-
-  kruskal_ref = kruskal(input_file);
-  node_id_t num_nodes;
-  edge_id_t m;
-  node_id_t a, b;
-  in >> num_nodes >> m;
-  if (num_nodes != n) throw std::invalid_argument("num_nodes != n in FileGraphVerifier");
-
-  for (unsigned i = 0; i < n; ++i) {
-    adj_matrix.emplace_back(n - i);
-  }
-  while (m--) {
-    in >> a >> b;
-    if (a > b) std::swap(a, b);
-    b = b - a;
-    adj_matrix[a][b] = !adj_matrix[a][b];
-  }
-  in.close();
-}
-
-std::vector<std::set<node_id_t>> FileGraphVerifier::kruskal(const std::string& input_file) {
-  std::ifstream in(input_file);
-  node_id_t n;
-  edge_id_t m;
-  in >> n >> m;
-  DisjointSetUnion<node_id_t> kruskal_sets(n);
-  int a,b;
-  while (m--) {
-    in >> a >> b;
-    kruskal_sets.merge(a,b);
-  }
-  in.close();
-
-  std::map<node_id_t, std::set<node_id_t>> temp;
-  for (unsigned i = 0; i < n; ++i) {
-    temp[kruskal_sets.find_root(i)].insert(i);
-  }
-
-  std::vector<std::set<node_id_t>> retval;
-  retval.reserve(temp.size());
-  for (const auto& entry : temp) {
-    retval.push_back(entry.second);
-  }
-  return retval;
-}
-
-void FileGraphVerifier::verify_edge(Edge edge) {
-  if (edge.src > edge.dst) std::swap(edge.src, edge.dst);
-  if (!adj_matrix[edge.src][edge.dst - edge.src]) {
-    printf("Got an error on edge (%u, %u): edge is not in graph!\n", edge.src, edge.dst);
-    throw BadEdgeException();
-  }
-}
-
-void FileGraphVerifier::verify_soln(std::vector<std::set<node_id_t>> &retval) {
-  auto temp {retval};
-  std::sort(temp.begin(),temp.end());
-  std::sort(kruskal_ref.begin(),kruskal_ref.end());
-  if (kruskal_ref != temp) {
-    std::cout << "Provided CC:" << std::endl;
-    for (auto cc : temp) {
-      for (auto v : cc) {
-        std::cout << " " << v;
-      }
-      std::cout << std::endl;
-    }
-
-    std::cout << "Expected CC:" << std::endl;
-    for (auto cc : kruskal_ref) {
-      for (auto v : cc) {
-        std::cout << " " << v;
-      }
-      std::cout << std::endl;
-    }
-
-    throw IncorrectCCException();
-  }
-
-  std::cout << "Solution ok: " << retval.size() << " CCs found." << std::endl;
-}
diff --git a/test/util/mat_graph_verifier.cpp b/test/util/mat_graph_verifier.cpp
deleted file mode 100644
index d313d736..00000000
--- a/test/util/mat_graph_verifier.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-#include "../../include/test/mat_graph_verifier.h"
-
-#include <map>
-#include <iostream>
-#include <algorithm>
-#include <cassert>
-
-MatGraphVerifier::MatGraphVerifier(node_id_t n) : n(n) {
-  adj_matrix = std::vector<std::vector<bool>>(n);
-  for (node_id_t i = 0; i < n; ++i)
-    adj_matrix[i] = std::vector<bool>(n - i);
-}
-
-void MatGraphVerifier::edge_update(node_id_t src, node_id_t dst) {
-  if (src > dst) std::swap(src, dst);
-  
-  dst = dst - src;
-  
-  // update adj_matrix entry
-  adj_matrix[src][dst] = !adj_matrix[src][dst];
-}
-
-
-void MatGraphVerifier::reset_cc_state() {
-  kruskal_ref = kruskal();
-}
-
-std::vector<std::set<node_id_t>> MatGraphVerifier::kruskal() {
-  DisjointSetUnion<node_id_t> kruskal_dsu(n);
-
-  for (node_id_t i = 0; i < n; i++) {
-    for (node_id_t j = 0; j < adj_matrix[i].size(); j++) {
-      if (adj_matrix[i][j]) kruskal_dsu.merge(i, i + j);
-    }
-  }
-
-  std::map<node_id_t, std::set<node_id_t>> temp;
-  for (unsigned i = 0; i < n; ++i) {
-    temp[kruskal_dsu.find_root(i)].insert(i);
-  }
-
-  std::vector<std::set<node_id_t>> retval;
-  retval.reserve(temp.size());
-  for (const auto& entry : temp) {
-    retval.push_back(entry.second);
-  }
-  return retval;
-}
-
-void MatGraphVerifier::verify_edge(Edge edge) {
-  // verify that the edge in question actually exists
-  if (edge.src > edge.dst) std::swap(edge.src, edge.dst);
-  if (!adj_matrix[edge.src][edge.dst - edge.src]) {
-    printf("Got an error on edge (%u, %u): edge is not in adj_matrix\n", edge.src, edge.dst);
-    throw BadEdgeException();
-  }
-}
-
-void MatGraphVerifier::verify_soln(std::vector<std::set<node_id_t>> &retval) {
-  auto temp {retval};
-  std::sort(temp.begin(),temp.end());
-  std::sort(kruskal_ref.begin(),kruskal_ref.end());
-  if (kruskal_ref != temp)
-    throw IncorrectCCException();
-
-  std::cout << "Solution ok: " << retval.size() << " CCs found." << std::endl;
-}

From f15c7ec1e4f8e1c4f8fe6179b15c1d659b3f9faa Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Tue, 5 Mar 2024 15:14:47 -0500
Subject: [PATCH 34/37] use driver verifier in tests and verify the verifier.
 Also add query code for optimistic query heuristics

---
 README.md                     |  2 +-
 include/cc_sketch_alg.h       | 14 ++++++++-
 include/graph_sketch_driver.h | 45 +++++++++++++++++++++++-----
 include/test/graph_verifier.h |  1 +
 test/cc_alg_test.cpp          | 56 +++++++++++++++++++----------------
 tools/process_stream.cpp      |  4 +--
 6 files changed, 85 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 625f5464..121a9a80 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ int main() {
     DriverConfiguration()                          // configuration
   };
   driver.process_stream_until(END_OF_STREAM);   // Tell the driver to process the entire graph stream
-  driver.prep_query();                          // Ensure that all updates have been processed
+  driver.prep_query(CONNECTIVITY);              // Ensure algorithm is ready for a connectivity query
   auto CC = cc_alg.connected_components();      // Extract the connected components
 }
 ```
diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h
index 7d4df737..9e9d3f8c 100644
--- a/include/cc_sketch_alg.h
+++ b/include/cc_sketch_alg.h
@@ -56,6 +56,12 @@ struct alignas(64) GlobalMergeData {
   }
 };
 
+// What type of query is the user going to perform. Used for has_cached_query()
+enum QueryCode {
+  CONNECTIVITY,     // connected components and spanning forest of graph
+  KSPANNINGFORESTS, // k disjoint spanning forests
+};
+
 /**
  * Algorithm for computing connected components on undirected graph streams
  * (no self-edges or multi-edges)
@@ -175,7 +181,13 @@ class CCSketchAlg {
    * Return if we have cached an answer to query.
    * This allows the driver to avoid flushing the gutters before calling query functions.
    */
-  bool has_cached_query() { return shared_dsu_valid; }
+  bool has_cached_query(int query_code) {
+    QueryCode code = (QueryCode) query_code;
+    if (code == CONNECTIVITY)
+      return shared_dsu_valid;
+    else
+      return false;
+  }
 
   /**
    * Print the configuration of the connected components graph sketching.
diff --git a/include/graph_sketch_driver.h b/include/graph_sketch_driver.h
index 6d93a59c..e627e443 100644
--- a/include/graph_sketch_driver.h
+++ b/include/graph_sketch_driver.h
@@ -1,4 +1,4 @@
-
+#pragma once
 #include <cache_guttering.h>
 #include <gutter_tree.h>
 #include <standalone_gutters.h>
@@ -10,6 +10,16 @@
 #include "graph_verifier.h"
 #endif
 
+class DriverException : public std::exception {
+ private:
+  std::string err_msg;
+ public:
+  DriverException(std::string msg) : err_msg(msg) {}
+  virtual const char* what() const throw() {
+    return err_msg.c_str();
+  }
+};
+
 /**
  * GraphSketchDriver class:
  * Driver for sketching algorithms on a single machine.
@@ -41,9 +51,11 @@
  *          Called by worker threads to apply a batch of updates destined for a single vertex. This
  *          function must be thread-safe.
  *
- *    6) bool has_cached_query()
- *          Check if the algorithm already has a cached answer for its query type. If so, the driver
- *          can skip flushing the updates and applying them in prep_query().
+ *    6) bool has_cached_query(int query_type)
+ *          Check if the algorithm already has a cached answer for a given query type. If so, the
+ *          driver can skip flushing the updates and applying them in prep_query(). The query_type
+ *          should be defined by the algorithm as an enum (see cc_sketch_alg.h) but is typed in this
+ *          code as an integer to ensure compatability across algorithms.
  *
  *    7) void print_configuration()
  *          Print the configuration of the algorithm. The algorithm may choose to print the
@@ -120,9 +132,15 @@ class GraphSketchDriver {
 #endif
   }
 
+  /**
+   * Processes the stream until a given edge index, at which point the function returns
+   * @param break_edge_idx  the breakpoint edge index. All updates up to but not including this
+   *                        index are processed by this call.
+   * @throws DriverException if we cannot set the requested breakpoint.
+   */
   void process_stream_until(edge_id_t break_edge_idx) {
     if (!stream->set_break_point(break_edge_idx)) {
-      std::cerr << "ERROR: COULD NOT CORRECTLY SET BREAKPOINT!" << std::endl;
+      DriverException("Could not correctly set breakpoint: " + std::to_string(break_edge_idx));
       exit(EXIT_FAILURE);
     }
     worker_threads->resume_workers();
@@ -172,8 +190,8 @@ class GraphSketchDriver {
 #endif
   }
 
-  void prep_query() {
-    if (sketching_alg->has_cached_query()) {
+  void prep_query(int query_code) {
+    if (sketching_alg->has_cached_query(query_code)) {
       flush_start = flush_end = std::chrono::steady_clock::now();
       return;
     }
@@ -190,6 +208,19 @@ class GraphSketchDriver {
     sketching_alg->apply_update_batch(thr_id, src_vertex, dst_vertices);
   }
 
+#ifdef VERIFY_SAMPLES_F
+  /**
+   * checks that the verifier we constructed in process_stream_until matches another verifier
+   * @param expected  the ground truth verifier
+   * @throws DriverException if the verifiers do not match
+   */
+  void check_verifier(const GraphVerifier &expected) {
+    if (*verifier != expected) {
+      throw DriverException("Mismatch between driver verifier and expected verifier");
+    }
+  }
+#endif
+
   size_t get_total_updates() { return total_updates.load(); }
 
   // time hooks for experiments
diff --git a/include/test/graph_verifier.h b/include/test/graph_verifier.h
index 9fb4a389..b4e445b7 100644
--- a/include/test/graph_verifier.h
+++ b/include/test/graph_verifier.h
@@ -80,6 +80,7 @@ class GraphVerifier {
   node_id_t get_num_kruskal_ccs() { return kruskal_ccs; }
 
   bool operator==(const GraphVerifier &oth) { return adj_matrix == oth.adj_matrix; }
+  bool operator!=(const GraphVerifier &oth) { return !(*this == oth); }
 };
 
 class BadEdgeException : public std::exception {
diff --git a/test/cc_alg_test.cpp b/test/cc_alg_test.cpp
index 3e378c50..5f395b01 100644
--- a/test/cc_alg_test.cpp
+++ b/test/cc_alg_test.cpp
@@ -47,12 +47,12 @@ TEST_P(CCAlgTest, SmallGraphConnectivity) {
   node_id_t num_nodes = stream.vertices();
 
   CCSketchAlg cc_alg{num_nodes, get_seed()};
-  cc_alg.set_verifier(
-      std::make_unique<GraphVerifier>(1024, curr_dir + "/res/multiples_graph_1024.txt"));
 
   GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
   driver.process_stream_until(END_OF_STREAM);
-  driver.prep_query();
+  driver.prep_query(CONNECTIVITY);
+  driver.check_verifier(GraphVerifier(1024, curr_dir + "/res/multiples_graph_1024.txt"));
+
   ASSERT_EQ(78, cc_alg.connected_components().size());
 }
 
@@ -65,11 +65,11 @@ TEST_P(CCAlgTest, TestCorrectnessOnSmallRandomGraphs) {
     node_id_t num_nodes = stream.vertices();
 
     CCSketchAlg cc_alg{num_nodes, get_seed()};
-    cc_alg.set_verifier(std::make_unique<GraphVerifier>(1024, "./cumul_sample.txt"));
 
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
     driver.process_stream_until(END_OF_STREAM);
-    driver.prep_query();
+    driver.prep_query(CONNECTIVITY);
+    driver.check_verifier(GraphVerifier(1024, "./cumul_sample.txt"));
 
     cc_alg.connected_components();
   }
@@ -84,11 +84,11 @@ TEST_P(CCAlgTest, TestCorrectnessOnSmallSparseGraphs) {
     node_id_t num_nodes = stream.vertices();
 
     CCSketchAlg cc_alg{num_nodes, get_seed()};
-    cc_alg.set_verifier(std::make_unique<GraphVerifier>(1024, "./cumul_sample.txt"));
 
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
     driver.process_stream_until(END_OF_STREAM);
-    driver.prep_query();
+    driver.prep_query(CONNECTIVITY);
+    driver.check_verifier(GraphVerifier(1024, "./cumul_sample.txt"));
 
     cc_alg.connected_components();
   }
@@ -104,11 +104,11 @@ TEST_P(CCAlgTest, TestCorrectnessOfReheating) {
     node_id_t num_nodes = stream.vertices();
 
     CCSketchAlg cc_alg{num_nodes, get_seed()};
-    cc_alg.set_verifier(std::make_unique<GraphVerifier>(1024, "./cumul_sample.txt"));
 
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
     driver.process_stream_until(END_OF_STREAM);
-    driver.prep_query();
+    driver.prep_query(CONNECTIVITY);
+    driver.check_verifier(GraphVerifier(1024, "./cumul_sample.txt"));
 
     cc_alg.write_binary("./out_temp.txt");
     std::vector<std::set<node_id_t>> orig_cc;
@@ -136,11 +136,12 @@ TEST_P(CCAlgTest, MultipleWorkers) {
 
     seed = get_seed();
     CCSketchAlg cc_alg{num_nodes, seed};
-    cc_alg.set_verifier(std::make_unique<GraphVerifier>(1024, "./cumul_sample.txt"));
 
     GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
     driver.process_stream_until(END_OF_STREAM);
-    driver.prep_query();
+    driver.prep_query(CONNECTIVITY);
+    driver.check_verifier(GraphVerifier(1024, "./cumul_sample.txt"));
+
     cc_alg.connected_components();
   }
 }
@@ -154,12 +155,11 @@ TEST_P(CCAlgTest, TestPointQuery) {
   node_id_t num_nodes = stream.vertices();
 
   CCSketchAlg cc_alg{num_nodes, get_seed()};
-  cc_alg.set_verifier(
-      std::make_unique<GraphVerifier>(1024, curr_dir + "/res/multiples_graph_1024.txt"));
 
   GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
   driver.process_stream_until(END_OF_STREAM);
-  driver.prep_query();
+  driver.prep_query(CONNECTIVITY);
+  driver.check_verifier(GraphVerifier(1024, curr_dir + "/res/multiples_graph_1024.txt"));
 
   std::vector<std::set<node_id_t>> ret = cc_alg.connected_components().get_component_sets();
   std::vector<node_id_t> ccid(num_nodes);
@@ -170,8 +170,6 @@ TEST_P(CCAlgTest, TestPointQuery) {
   }
   for (node_id_t i = 0; i < std::min(10u, num_nodes); ++i) {
     for (node_id_t j = 0; j < std::min(10u, num_nodes); ++j) {
-      cc_alg.set_verifier(
-          std::make_unique<GraphVerifier>(1024, curr_dir + "/res/multiples_graph_1024.txt"));
       ASSERT_EQ(cc_alg.point_query(i, j), ccid[i] == ccid[j]);
     }
   }
@@ -204,8 +202,9 @@ TEST(CCAlgTest, TestQueryDuringStream) {
     }
 
     driver.process_stream_until(tenth * (j + 1));
-    driver.prep_query();
-    cc_alg.set_verifier(std::make_unique<GraphVerifier>(verify));
+    driver.prep_query(CONNECTIVITY);
+    driver.check_verifier(verify);
+
     cc_alg.connected_components();
   }
   num_edges -= 9 * tenth;
@@ -215,8 +214,9 @@ TEST(CCAlgTest, TestQueryDuringStream) {
   }
 
   driver.process_stream_until(END_OF_STREAM);
-  driver.prep_query();
-  cc_alg.set_verifier(std::make_unique<GraphVerifier>(verify));
+  driver.prep_query(CONNECTIVITY);
+  driver.check_verifier(verify);
+
   cc_alg.connected_components();
 }
 
@@ -273,7 +273,8 @@ TEST(CCAlgTest, SpanningForestExtraction) {
   GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
 
   driver.process_stream_until(END_OF_STREAM);
-  driver.prep_query();
+  driver.prep_query(CONNECTIVITY);
+  driver.check_verifier(GraphVerifier(1024, "./cumul_sample.txt"));
   
   cc_alg.calc_spanning_forest();
 }
@@ -289,7 +290,8 @@ TEST(CCAlgTest, InsertOnlyStream) {
   GraphSketchDriver<CCSketchAlg> driver(&cc_alg, &stream, driver_config);
 
   driver.process_stream_until(END_OF_STREAM);
-  driver.prep_query();
+  driver.prep_query(CONNECTIVITY);
+  driver.check_verifier(GraphVerifier(1024, "./cumul_sample.txt"));
   
   cc_alg.connected_components();
   cc_alg.calc_spanning_forest();
@@ -323,10 +325,11 @@ TEST(CCAlgTest, MTStreamWithMultipleQueries) {
         verify.edge_update(upd.edge);
         ASSERT_NE(upd.type, BREAKPOINT);
       }
-      cc_alg.set_verifier(std::make_unique<GraphVerifier>(verify));
 
       driver.process_stream_until(upd_per_query * (i + 1));
-      driver.prep_query();
+      driver.prep_query(CONNECTIVITY);
+      driver.check_verifier(verify);
+
       cc_alg.connected_components();
     }
 
@@ -337,10 +340,11 @@ TEST(CCAlgTest, MTStreamWithMultipleQueries) {
       verify.edge_update(upd.edge);
       ASSERT_NE(upd.type, BREAKPOINT);
     }
-    cc_alg.set_verifier(std::make_unique<GraphVerifier>(verify));
 
     driver.process_stream_until(END_OF_STREAM);
-    driver.prep_query();
+    driver.prep_query(CONNECTIVITY);
+    driver.check_verifier(verify);
+
     cc_alg.connected_components();
   }
 }
diff --git a/tools/process_stream.cpp b/tools/process_stream.cpp
index 3f9127f7..53c49586 100644
--- a/tools/process_stream.cpp
+++ b/tools/process_stream.cpp
@@ -95,7 +95,7 @@ int main(int argc, char **argv) {
   driver.process_stream_until(END_OF_STREAM);
 
   auto cc_start = std::chrono::steady_clock::now();
-  driver.prep_query();
+  driver.prep_query(CONNECTIVITY);
   auto CC_num = cc_alg.connected_components().size();
   std::chrono::duration<double> cc_time = std::chrono::steady_clock::now() - cc_start;
   std::chrono::duration<double> insert_time = driver.flush_end - ins_start;
@@ -116,7 +116,7 @@ int main(int argc, char **argv) {
 
 
   cc_start = std::chrono::steady_clock::now();
-  driver.prep_query();
+  driver.prep_query(CONNECTIVITY);
   CC_num = cc_alg.connected_components().size();
   cc_time = std::chrono::steady_clock::now() - cc_start;
   insert_time = driver.flush_end - ins_start;

From fd3c971c2fd17b2a3d1de06deb32d0c4e2da9c5c Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Mon, 7 Apr 2025 21:56:44 -0400
Subject: [PATCH 35/37] allow multiple queries in process stream tool

---
 tools/process_stream.cpp | 59 +++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 25 deletions(-)

diff --git a/tools/process_stream.cpp b/tools/process_stream.cpp
index 53c49586..ed5312b7 100644
--- a/tools/process_stream.cpp
+++ b/tools/process_stream.cpp
@@ -61,20 +61,28 @@ void track_insertions(uint64_t total, GraphSketchDriver<CCSketchAlg> *driver,
 }
 
 int main(int argc, char **argv) {
-  if (argc != 4) {
-    std::cout << "ERROR: Incorrect number of arguments!" << std::endl;
-    std::cout << "Arguments: stream_file, graph_workers, reader_threads" << std::endl;
+  if (argc != 5) {
+    std::cerr << "ERROR: Incorrect number of arguments!" << std::endl;
+    std::cerr << "Arguments: stream_file, num_queries, graph_workers, reader_threads" << std::endl;
     exit(EXIT_FAILURE);
   }
 
   shutdown = false;
   std::string stream_file = argv[1];
-  int num_threads = std::atoi(argv[2]);
+  int num_queries = std::atoi(argv[2]);
+  if (num_queries < 1 || num_queries > 1000) {
+    std::cerr << "ERROR: Invalid number of queries! Must be > 0 and <= 1000" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  int num_threads = std::atoi(argv[3]);
   if (num_threads < 1) {
-    std::cout << "ERROR: Invalid number of graph workers! Must be > 0." << std::endl;
+    std::cerr << "ERROR: Invalid number of graph workers! Must be > 0." << std::endl;
     exit(EXIT_FAILURE);
   }
-  size_t reader_threads = std::atol(argv[3]);
+  size_t reader_threads = std::atol(argv[4]);
+
+  double query_percent = 1.0 / num_queries;
+  size_t queries_in_stream = num_queries - 1;
 
   BinaryFileStream stream(stream_file);
   node_id_t num_nodes = stream.vertices();
@@ -85,23 +93,41 @@ int main(int argc, char **argv) {
   std::cout << std::endl;
 
   auto driver_config = DriverConfiguration().gutter_sys(CACHETREE).worker_threads(num_threads);
-  auto cc_config = CCAlgConfiguration().batch_factor(1);
+  auto cc_config = CCAlgConfiguration().batch_factor(1.0/5);
   CCSketchAlg cc_alg{num_nodes, get_seed(), cc_config};
   GraphSketchDriver<CCSketchAlg> driver{&cc_alg, &stream, driver_config, reader_threads};
 
   auto ins_start = std::chrono::steady_clock::now();
   std::thread querier(track_insertions, num_updates, &driver, ins_start);
 
+  for (size_t q = 0; q < queries_in_stream; q++) {
+    driver.process_stream_until((q+1) * query_percent * num_updates);
+    auto cc_start = std::chrono::steady_clock::now();
+    driver.prep_query(CONNECTIVITY);
+    auto CC_num = cc_alg.connected_components().size();
+    std::chrono::duration<double> cc_time = std::chrono::steady_clock::now() - cc_start;
+    std::chrono::duration<double> flush_time = driver.flush_end - driver.flush_start;
+    std::chrono::duration<double> cc_alg_time = cc_alg.cc_alg_end - cc_alg.cc_alg_start;
+
+    std::cout << "Query " << q + 1 << std::endl;
+    std::cout << "Total CC query latency:       " << cc_time.count() << std::endl;
+    std::cout << "  Flush Gutters(sec):           " << flush_time.count() << std::endl;
+    std::cout << "  Boruvka's Algorithm(sec):     " << cc_alg_time.count() << std::endl;
+    std::cout << "Connected Components:         " << CC_num << std::endl;
+  }
+
+  // finish the stream
   driver.process_stream_until(END_OF_STREAM);
 
   auto cc_start = std::chrono::steady_clock::now();
   driver.prep_query(CONNECTIVITY);
   auto CC_num = cc_alg.connected_components().size();
   std::chrono::duration<double> cc_time = std::chrono::steady_clock::now() - cc_start;
-  std::chrono::duration<double> insert_time = driver.flush_end - ins_start;
   std::chrono::duration<double> flush_time = driver.flush_end - driver.flush_start;
   std::chrono::duration<double> cc_alg_time = cc_alg.cc_alg_end - cc_alg.cc_alg_start;
 
+
+  std::chrono::duration<double> insert_time = driver.flush_end - ins_start;
   shutdown = true;
   querier.join();
 
@@ -113,21 +139,4 @@ int main(int argc, char **argv) {
   std::cout << "  Boruvka's Algorithm(sec):     " << cc_alg_time.count() << std::endl;
   std::cout << "Connected Components:         " << CC_num << std::endl;
   std::cout << "Maximum Memory Usage(MiB):    " << get_max_mem_used() << std::endl;
-
-
-  cc_start = std::chrono::steady_clock::now();
-  driver.prep_query(CONNECTIVITY);
-  CC_num = cc_alg.connected_components().size();
-  cc_time = std::chrono::steady_clock::now() - cc_start;
-  insert_time = driver.flush_end - ins_start;
-  flush_time = driver.flush_end - driver.flush_start;
-  cc_alg_time = cc_alg.cc_alg_end - cc_alg.cc_alg_start;
-
-  std::cout << "SECOND QUERY" << std::endl;
-  std::cout << "Total CC query latency:       " << cc_time.count() << std::endl;
-  std::cout << "  Flush Gutters(sec):           " << flush_time.count() << std::endl;
-  std::cout << "  Boruvka's Algorithm(sec):     " << cc_alg_time.count() << std::endl;
-  std::cout << "Connected Components:         " << CC_num << std::endl;
-  std::cout << "Maximum Memory Usage(MiB):    " << get_max_mem_used() << std::endl;
-
 }

From 984ad116cb743ceab661a19c5fee22cddddb9c0d Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Mon, 7 Apr 2025 21:58:38 -0400
Subject: [PATCH 36/37] switch batch factor back to 1

---
 tools/process_stream.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/process_stream.cpp b/tools/process_stream.cpp
index ed5312b7..9e30b07e 100644
--- a/tools/process_stream.cpp
+++ b/tools/process_stream.cpp
@@ -93,7 +93,7 @@ int main(int argc, char **argv) {
   std::cout << std::endl;
 
   auto driver_config = DriverConfiguration().gutter_sys(CACHETREE).worker_threads(num_threads);
-  auto cc_config = CCAlgConfiguration().batch_factor(1.0/5);
+  auto cc_config = CCAlgConfiguration().batch_factor(1.0);
   CCSketchAlg cc_alg{num_nodes, get_seed(), cc_config};
   GraphSketchDriver<CCSketchAlg> driver{&cc_alg, &stream, driver_config, reader_threads};
 

From 96b53ffb3c6e15afac50ee9e3e2b930b50c9090a Mon Sep 17 00:00:00 2001
From: Evan West <etwest@cs.stonybrook.edu>
Date: Sun, 4 May 2025 15:41:10 -0400
Subject: [PATCH 37/37] make github workflow work good

---
 .github/workflows/cmake.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index ca10a22b..e8512465 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, ubuntu-20.04]
+        os: [ubuntu-latest]
         flags: ['"-DL0_SAMPLING"', '"-DNO_EAGER_DSU"', '""']
 
     steps: