From 9ebdb6988dd2157823a41c805cfbbe394860230a Mon Sep 17 00:00:00 2001 From: Evan West Date: Fri, 10 Nov 2023 17:43:24 -0500 Subject: [PATCH 01/37] progress on better query algorithm. Not working yet --- CMakeLists.txt | 14 +- include/cc_alg_configuration.h | 3 - include/cc_sketch_alg.h | 44 +++-- src/cc_sketch_alg.cpp | 314 ++++++++++++++++++++++++--------- src/sketch.cpp | 3 + 5 files changed, 271 insertions(+), 107 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a66c474d..cdbd929d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,10 +28,10 @@ else() message(STATUS "${CMAKE_CXX_COMPILER_ID} not recognized, no flags added") endif() -#add_compile_options(-fsanitize=address) -#add_link_options(-fsanitize=address) -#add_compile_options(-fsanitize=undefined) -#add_link_options(-fsanitize=undefined) +add_compile_options(-fsanitize=address) +add_link_options(-fsanitize=address) +add_compile_options(-fsanitize=undefined) +add_link_options(-fsanitize=undefined) # Check if this project is the top directory or build type is Debug # If so, build executables, otherwise, only build libraries @@ -184,3 +184,9 @@ if (BUILD_BENCH) add_dependencies(bench_cc GraphZeppelin benchmark) target_link_libraries(bench_cc GraphZeppelin benchmark::benchmark xxhash) endif() + + +add_executable(omp_test + test.cpp +) +target_link_libraries(omp_test PRIVATE GraphZeppelin) \ No newline at end of file diff --git a/include/cc_alg_configuration.h b/include/cc_alg_configuration.h index e27182ca..52da61c6 100644 --- a/include/cc_alg_configuration.h +++ b/include/cc_alg_configuration.h @@ -31,9 +31,6 @@ class CCAlgConfiguration { friend std::ostream& operator<< (std::ostream &out, const CCAlgConfiguration &conf); - // no use of equal operator - CCAlgConfiguration& operator=(const CCAlgConfiguration &) = delete; - // moving and copying allowed CCAlgConfiguration(const CCAlgConfiguration &oth) = default; CCAlgConfiguration (CCAlgConfiguration &&) = default; diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h index 40bf6269..1183c48c 100644 --- a/include/cc_sketch_alg.h +++ b/include/cc_sketch_alg.h @@ -1,6 +1,6 @@ #pragma once -#include // REMOVE LATER +#include #include #include #include @@ -27,12 +27,39 @@ class UpdateLockedException : public std::exception { } }; +struct MergeInstr { + node_id_t root; + node_id_t child; + + inline bool operator< (const MergeInstr &oth) const { + if (root == oth.root) + return child < oth.child; + return root < oth.root; + } +}; + +struct alignas(64) GlobalMergeData { + Sketch sketch; + std::mutex mtx; + size_t num_merge_needed = -1; + size_t num_merge_done = 0; + + GlobalMergeData(node_id_t num_nodes, size_t seed) + : sketch(Sketch::calc_vector_length(num_nodes), seed, Sketch::calc_cc_samples(num_nodes)) {} + + GlobalMergeData(const GlobalMergeData&& other) + : sketch(other.sketch) { + num_merge_needed = other.num_merge_needed; + num_merge_done = other.num_merge_done; + } +}; + /** * Algorithm for computing connected components on undirected graph streams * (no self-edges or multi-edges) */ class CCSketchAlg { - protected: + private: node_id_t num_nodes; size_t seed; bool update_locked = false; @@ -61,21 +88,14 @@ class CCSketchAlg { * @param query an array of sketch sample results * @param reps an array containing node indices for the representative of each supernode */ - bool sample_supernodes(std::vector &merge_instr); + bool sample_supernode(Sketch &skt); /** * @param reps set containing the roots of each supernode * @param merge_instr a list of lists of supernodes to be merged */ - void merge_supernodes(const size_t next_round, - const std::vector &merge_instr); - - /** - * @param reps set containing the roots of each supernode - * @param merge_instr an array where each vertex indicates its supernode root - */ - void undo_merge_supernodes(const size_t cur_round, - const std::vector &merge_instr); + bool perform_boruvka_round(const size_t cur_round, const std::vector &merge_instr, + std::vector &global_merges); /** * Main parallel algorithm utilizing Boruvka and L_0 sampling. diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp index 3a434697..a236b609 100644 --- a/src/cc_sketch_alg.cpp +++ b/src/cc_sketch_alg.cpp @@ -5,6 +5,7 @@ #include #include #include +#include CCSketchAlg::CCSketchAlg(node_id_t num_nodes, CCAlgConfiguration config) : num_nodes(num_nodes), dsu(num_nodes), config(config) { @@ -131,138 +132,275 @@ void CCSketchAlg::update(GraphUpdate upd) { sketches[edge.dst]->update(static_cast(concat_pairing_fn(edge.src, edge.dst))); } -bool CCSketchAlg::sample_supernodes(std::vector &merge_instr) { - bool except = false; +// sample from a sketch that represents a supernode of vertices +// that is, 1 or more vertices merged together during Boruvka +inline bool CCSketchAlg::sample_supernode(Sketch &skt) { bool modified = false; - std::exception_ptr err; -#pragma omp parallel for default(shared) - for (node_id_t root = 0; root < num_nodes; root++) { - if (merge_instr[root] != root) { - // don't query non-roots - continue; - } + SketchSample sample = skt.sample(); - SketchSample sample_result; - - // wrap in a try/catch because exiting through exception is undefined behavior in OMP - try { - sample_result = sketches[root]->sample(); - } catch (...) { - except = true; - err = std::current_exception(); - } + Edge e = inv_concat_pairing_fn(sample.idx); + SampleResult result_type = sample.result; - Edge e = inv_concat_pairing_fn(sample_result.idx); - SampleResult result_type = sample_result.result; + // std::cout << "Sample: " << result_type << " e:" << e.src << " " << e.dst << std::endl; - if (result_type == FAIL) { - modified = true; - } else if (result_type == GOOD) { - DSUMergeRet m_ret = dsu.merge(e.src, e.dst); - if (m_ret.merged) { + if (result_type == FAIL) { + modified = true; + } else if (result_type == GOOD) { + DSUMergeRet m_ret = dsu.merge(e.src, e.dst); + if (m_ret.merged) { #ifdef VERIFY_SAMPLES_F - verifier->verify_edge(e); + verifier->verify_edge(e); #endif - modified = true; - // Update spanning forest - auto src = std::min(e.src, e.dst); - auto dst = std::max(e.src, e.dst); - { - std::unique_lock lk(spanning_forest_mtx[src]); - spanning_forest[src].insert(dst); - } - } + modified = true; + // Update spanning forest + auto src = std::min(e.src, e.dst); + auto dst = std::max(e.src, e.dst); + { + std::unique_lock lk(spanning_forest_mtx[src]); + spanning_forest[src].insert(dst); } } - - // Did one of our threads produce an exception? - if (except) std::rethrow_exception(err); + } + return modified; } -void CCSketchAlg::merge_supernodes(const size_t next_round, - const std::vector &merge_instr) { -#pragma omp parallel default(shared) +/* + * Returns the ith half-open range in the division of [0, length] into divisions segments. + */ +inline std::pair get_ith_partition(node_id_t length, size_t i, + size_t divisions) { + double div_factor = (double)length / divisions; + return {ceil(div_factor * i), ceil(div_factor * (i + 1))}; +} + +/* + * Returns the half-open range idx that contains idx + * Inverse of get_ith_partition + */ +inline size_t get_partition_idx(node_id_t length, node_id_t idx, size_t divisions) { + double div_factor = (double)length / divisions; + return idx / div_factor; +} + +inline node_id_t find_last_partition_of_root(const std::vector &merge_instr, + const node_id_t root, node_id_t min_hint, + size_t num_threads) { + node_id_t max = merge_instr.size() - 1; + node_id_t min = min_hint; + MergeInstr target = {root, (node_id_t) -1}; + + while (min < max) { + node_id_t mid = min + (max - min) / 2; + + if (merge_instr[mid] < target) { + min = mid + 1; + } else { + max = mid; + } + } + + if (merge_instr[min].root != root) + min = min - 1; + + assert(merge_instr[min].root == root); + assert(min == merge_instr.size() - 1 || merge_instr[min + 1].root > root); + return get_partition_idx(merge_instr.size(), min, num_threads); +} + +// merge the global and return if it is safe to query now +inline bool merge_global(const size_t cur_round, const Sketch &local_sketch, + GlobalMergeData &global) { + std::unique_lock lk(global.mtx); + global.sketch.range_merge(local_sketch, cur_round, 1); + ++global.num_merge_done; + assert(global.num_merge_done <= global.num_merge_needed); + + return global.num_merge_done >= global.num_merge_needed; +} + +bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, + const std::vector &merge_instr, + std::vector &global_merges) { + bool modified = false; + bool except = false; + std::exception_ptr err; + for (size_t i = 0; i < global_merges.size(); i++) { + global_merges[i].sketch.zero_contents(); + global_merges[i].num_merge_needed = -1; + global_merges[i].num_merge_done = 0; + } + + std::atomic num_query; + num_query = 0; + +#pragma omp parallel default(shared) num_threads(8) { // some thread local variables Sketch local_sketch(Sketch::calc_vector_length(num_nodes), seed, Sketch::calc_cc_samples(num_nodes)); - node_id_t cur_root = 0; - bool first_root = true; -#pragma omp for - for (node_id_t i = 0; i < num_nodes; i++) { - if (merge_instr[i] == i) continue; - - node_id_t root = merge_instr[i]; - if (root != cur_root || first_root) { - if (!first_root) { - std::unique_lock lk(sketches[cur_root]->mutex); - sketches[cur_root]->range_merge(local_sketch, next_round, 1); + + size_t thr_id = omp_get_thread_num(); + size_t num_threads = omp_get_num_threads(); + std::pair partition = get_ith_partition(num_nodes, thr_id, num_threads); + node_id_t start = partition.first; + node_id_t end = partition.second; + assert(start < end); + +#pragma omp critical + std::cout << thr_id << ": " << start << " " << end << std::endl; + + // node_id_t left_root = merge_instr[start].root; + // node_id_t right_root = merge_instr[end - 1].root; + + bool root_from_left = false; + if (start > 0) { + root_from_left = merge_instr[start - 1].root == merge_instr[start].root; + } + bool root_exits_right = false; + if (end < num_nodes) { + root_exits_right = merge_instr[end - 1].root == merge_instr[end].root; + } + + node_id_t cur_root = merge_instr[start].root; +#pragma omp critical + { + for (node_id_t i = start; i < end; i++) { + node_id_t root = merge_instr[i].root; + node_id_t child = merge_instr[i].child; + std::cout << thr_id << ": " << child << " into " << root << std::endl; + std::cout << "root_from_left " << root_from_left << " root_exits_right " << root_exits_right << std::endl; + + if (root != cur_root) { + if (root_from_left) { + // we hold the global for this merge + bool query_ready = merge_global(cur_round, local_sketch, global_merges[thr_id]); + if (query_ready) { + try { + num_query += 1; + if (sample_supernode(global_merges[thr_id].sketch) && !modified) modified = true; + } catch (...) { + except = true; + err = std::current_exception(); + } + } + + // set root_from_left to false + root_from_left = false; + } else { + // This is an entirely local computation + // std::cout << std::endl; + try { + num_query += 1; + if (sample_supernode(local_sketch) && !modified) modified = true; + } catch (...) { + except = true; + err = std::current_exception(); + } } + cur_root = root; local_sketch.zero_contents(); - first_root = false; } - local_sketch.range_merge(*sketches[i], next_round, 1); + // std::cout << " " << child; + local_sketch.range_merge(*sketches[child], cur_round, 1); } - if (!first_root) { - std::unique_lock lk(sketches[cur_root]->mutex); - sketches[cur_root]->range_merge(local_sketch, next_round, 1); + if (root_exits_right || root_from_left) { + // global merge where we may or may not own it + size_t global_id = find_last_partition_of_root(merge_instr, cur_root, start, num_threads); + if (!root_from_left) { + // Resolved root_from_left, so we are the first thread to encounter this root + // set the number of threads that will merge into this component + std::unique_lock lk(global_merges[global_id].mtx); + global_merges[global_id].num_merge_needed = global_id - thr_id + 1; + } + bool query_ready = merge_global(cur_round, local_sketch, global_merges[global_id]); + if (query_ready) { + try { + num_query += 1; + if (sample_supernode(global_merges[thr_id].sketch) && !modified) modified = true; + } catch (...) { + except = true; + err = std::current_exception(); + } + } + } else { + // This is an entirely local computation + // std::cout << std::endl; + try { + num_query += 1; + if (sample_supernode(local_sketch) && !modified) modified = true; + } catch (...) { + except = true; + err = std::current_exception(); + } } } -} + } + + std::cout << "Number of roots queried = " << num_query << std::endl; -void CCSketchAlg::undo_merge_supernodes(const size_t cur_round, - const std::vector &merge_instr) { - if (cur_round > 0) merge_supernodes(cur_round, merge_instr); + if (except) { + // if one of our threads produced an exception throw it here + std::rethrow_exception(err); + } + + return modified; } std::vector> CCSketchAlg::boruvka_emulation() { update_locked = true; cc_alg_start = std::chrono::steady_clock::now(); - std::vector merge_instr(num_nodes); + std::vector merge_instr(num_nodes); + + size_t num_threads = omp_get_max_threads(); + std::vector global_merges; + global_merges.reserve(num_threads); + for (size_t i = 0; i < num_threads; i++) { + global_merges.emplace_back(num_nodes, seed); + } dsu.reset(); for (node_id_t i = 0; i < num_nodes; ++i) { - merge_instr[i] = i; + merge_instr[i] = {i, i}; spanning_forest[i].clear(); } size_t round_num = 0; bool modified = true; while (true) { - // auto start = std::chrono::steady_clock::now(); - try { - modified = sample_supernodes(merge_instr); - } catch (...) { - undo_merge_supernodes(round_num, merge_instr); - std::rethrow_exception(std::current_exception()); - } - // std::cout << "sample: " - // << std::chrono::duration(std::chrono::steady_clock::now() - start).count() - // << std::endl; - - // start = std::chrono::steady_clock::now(); - undo_merge_supernodes(round_num, merge_instr); - // std::cout << "undo merge: " - // << std::chrono::duration(std::chrono::steady_clock::now() - start).count() - // << std::endl; + auto start = std::chrono::steady_clock::now(); + modified = perform_boruvka_round(round_num, merge_instr, global_merges); + std::cout << "round: " << round_num << " = " + << std::chrono::duration(std::chrono::steady_clock::now() - start).count() + << std::endl; if (!modified) break; - // calculate updated merge instructions + // calculate updated merge instructions for next round + start = std::chrono::steady_clock::now(); #pragma omp parallel for for (node_id_t i = 0; i < num_nodes; i++) - merge_instr[i] = dsu.find_root(i); - - // prepare for the next round by merging - // start = std::chrono::steady_clock::now(); - merge_supernodes(round_num + 1, merge_instr); - // std::cout << "merge: " - // << std::chrono::duration(std::chrono::steady_clock::now() - start).count() - // << std::endl; + merge_instr[i] = {dsu.find_root(i), i}; + + std::sort(merge_instr.begin(), merge_instr.end()); + + size_t num_roots = 1; + size_t cur_root = merge_instr[0].root; + for (size_t i = 1; i < num_nodes; i++) { + if (merge_instr[i].root != cur_root) { + num_roots += 1; + cur_root = merge_instr[i].root; + } + } + std::cout << "Number of roots = " << num_roots << std::endl; + + + std::cout << "post round processing = " + << std::chrono::duration(std::chrono::steady_clock::now() - start).count() + << std::endl; ++round_num; } last_query_rounds = round_num; diff --git a/src/sketch.cpp b/src/sketch.cpp index 6edbea29..9a0306b2 100644 --- a/src/sketch.cpp +++ b/src/sketch.cpp @@ -159,6 +159,9 @@ void Sketch::range_merge(const Sketch &other, size_t start_sample, size_t n_samp return; } + // update sample idx to point at beginning of this range if before it + sample_idx = std::max(sample_idx, start_sample); + // merge deterministic buffer buckets[num_buckets - 1].alpha ^= other.buckets[num_buckets - 1].alpha; buckets[num_buckets - 1].gamma ^= other.buckets[num_buckets - 1].gamma; From 23c94ff358f4df382b5db524f49120d8631c1d79 Mon Sep 17 00:00:00 2001 From: Evan West Date: Sat, 11 Nov 2023 14:18:21 -0500 Subject: [PATCH 02/37] working but somewhat slow --- CMakeLists.txt | 14 +++----- src/cc_sketch_alg.cpp | 56 +++++++++++++------------------ test/sketch_test.cpp | 2 ++ test/util/file_graph_verifier.cpp | 19 ++++++++++- 4 files changed, 47 insertions(+), 44 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cdbd929d..a66c474d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,10 +28,10 @@ else() message(STATUS "${CMAKE_CXX_COMPILER_ID} not recognized, no flags added") endif() -add_compile_options(-fsanitize=address) -add_link_options(-fsanitize=address) -add_compile_options(-fsanitize=undefined) -add_link_options(-fsanitize=undefined) +#add_compile_options(-fsanitize=address) +#add_link_options(-fsanitize=address) +#add_compile_options(-fsanitize=undefined) +#add_link_options(-fsanitize=undefined) # Check if this project is the top directory or build type is Debug # If so, build executables, otherwise, only build libraries @@ -184,9 +184,3 @@ if (BUILD_BENCH) add_dependencies(bench_cc GraphZeppelin benchmark) target_link_libraries(bench_cc GraphZeppelin benchmark::benchmark xxhash) endif() - - -add_executable(omp_test - test.cpp -) -target_link_libraries(omp_test PRIVATE GraphZeppelin) \ No newline at end of file diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp index a236b609..439b838e 100644 --- a/src/cc_sketch_alg.cpp +++ b/src/cc_sketch_alg.cpp @@ -141,7 +141,7 @@ inline bool CCSketchAlg::sample_supernode(Sketch &skt) { Edge e = inv_concat_pairing_fn(sample.idx); SampleResult result_type = sample.result; - // std::cout << "Sample: " << result_type << " e:" << e.src << " " << e.dst << std::endl; + // std::cout << " " << result_type << " e:" << e.src << " " << e.dst << std::endl; if (result_type == FAIL) { modified = true; @@ -231,9 +231,6 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, global_merges[i].num_merge_done = 0; } - std::atomic num_query; - num_query = 0; - #pragma omp parallel default(shared) num_threads(8) { // some thread local variables @@ -247,9 +244,6 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, node_id_t end = partition.second; assert(start < end); -#pragma omp critical - std::cout << thr_id << ": " << start << " " << end << std::endl; - // node_id_t left_root = merge_instr[start].root; // node_id_t right_root = merge_instr[end - 1].root; @@ -263,21 +257,22 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, } node_id_t cur_root = merge_instr[start].root; -#pragma omp critical - { + + // std::cout << thr_id << std::endl; + // std::cout << " Component " << cur_root << ":"; for (node_id_t i = start; i < end; i++) { node_id_t root = merge_instr[i].root; node_id_t child = merge_instr[i].child; - std::cout << thr_id << ": " << child << " into " << root << std::endl; - std::cout << "root_from_left " << root_from_left << " root_exits_right " << root_exits_right << std::endl; if (root != cur_root) { if (root_from_left) { // we hold the global for this merge + // std::cout << " merge global (we own)" << std::endl; bool query_ready = merge_global(cur_round, local_sketch, global_merges[thr_id]); if (query_ready) { + // std::cout << "Performing query!"; try { - num_query += 1; + // num_query += 1; if (sample_supernode(global_merges[thr_id].sketch) && !modified) modified = true; } catch (...) { except = true; @@ -289,9 +284,9 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, root_from_left = false; } else { // This is an entirely local computation - // std::cout << std::endl; + // std::cout << " query local"; try { - num_query += 1; + // num_query += 1; if (sample_supernode(local_sketch) && !modified) modified = true; } catch (...) { except = true; @@ -300,6 +295,7 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, } cur_root = root; + // std::cout << " Component " << cur_root << ":"; local_sketch.zero_contents(); } @@ -310,6 +306,7 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, if (root_exits_right || root_from_left) { // global merge where we may or may not own it size_t global_id = find_last_partition_of_root(merge_instr, cur_root, start, num_threads); + // std::cout << " merge global (" << global_id << ")" << std::endl; if (!root_from_left) { // Resolved root_from_left, so we are the first thread to encounter this root // set the number of threads that will merge into this component @@ -318,9 +315,10 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, } bool query_ready = merge_global(cur_round, local_sketch, global_merges[global_id]); if (query_ready) { + // std::cout << "Performing query!"; try { - num_query += 1; - if (sample_supernode(global_merges[thr_id].sketch) && !modified) modified = true; + // num_query += 1; + if (sample_supernode(global_merges[global_id].sketch) && !modified) modified = true; } catch (...) { except = true; err = std::current_exception(); @@ -328,9 +326,9 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, } } else { // This is an entirely local computation - // std::cout << std::endl; + // std::cout << " query local"; try { - num_query += 1; + // num_query += 1; if (sample_supernode(local_sketch) && !modified) modified = true; } catch (...) { except = true; @@ -338,9 +336,8 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, } } } - } - std::cout << "Number of roots queried = " << num_query << std::endl; + // std::cout << "Number of roots queried = " << num_query << std::endl; if (except) { // if one of our threads produced an exception throw it here @@ -385,20 +382,13 @@ std::vector> CCSketchAlg::boruvka_emulation() { for (node_id_t i = 0; i < num_nodes; i++) merge_instr[i] = {dsu.find_root(i), i}; - std::sort(merge_instr.begin(), merge_instr.end()); - - size_t num_roots = 1; - size_t cur_root = merge_instr[0].root; - for (size_t i = 1; i < num_nodes; i++) { - if (merge_instr[i].root != cur_root) { - num_roots += 1; - cur_root = merge_instr[i].root; - } - } - std::cout << "Number of roots = " << num_roots << std::endl; - + std::cout << " finding roots = " + << std::chrono::duration(std::chrono::steady_clock::now() - start).count() + << std::endl; - std::cout << "post round processing = " + start = std::chrono::steady_clock::now(); + std::sort(merge_instr.begin(), merge_instr.end()); + std::cout << " sorting = " << std::chrono::duration(std::chrono::steady_clock::now() - start).count() << std::endl; ++round_num; diff --git a/test/sketch_test.cpp b/test/sketch_test.cpp index cdc57c13..f9bfccb8 100644 --- a/test/sketch_test.cpp +++ b/test/sketch_test.cpp @@ -444,6 +444,8 @@ TEST(SketchTestSuite, TestRawBucketUpdate) { sk2.reset_sample_state(); sample = sk2.sample(); ASSERT_EQ(sample.result, ZERO); + + delete[] copy_data; } ASSERT_GT(successes, 0); } diff --git a/test/util/file_graph_verifier.cpp b/test/util/file_graph_verifier.cpp index eb5e4eed..a212be3c 100644 --- a/test/util/file_graph_verifier.cpp +++ b/test/util/file_graph_verifier.cpp @@ -69,8 +69,25 @@ void FileGraphVerifier::verify_soln(std::vector> &retval) { auto temp {retval}; std::sort(temp.begin(),temp.end()); std::sort(kruskal_ref.begin(),kruskal_ref.end()); - if (kruskal_ref != temp) + if (kruskal_ref != temp) { + std::cout << "Provided CC:" << std::endl; + for (auto cc : temp) { + for (auto v : cc) { + std::cout << " " << v; + } + std::cout << std::endl; + } + + std::cout << "Expected CC:" << std::endl; + for (auto cc : kruskal_ref) { + for (auto v : cc) { + std::cout << " " << v; + } + std::cout << std::endl; + } + throw IncorrectCCException(); + } std::cout << "Solution ok: " << retval.size() << " CCs found." << std::endl; } From 3aaaa64fad66f046b82de677a644f6d04b99f20c Mon Sep 17 00:00:00 2001 From: Evan West Date: Mon, 13 Nov 2023 19:01:53 -0500 Subject: [PATCH 03/37] improved query performance --- CMakeLists.txt | 1 + include/cc_sketch_alg.h | 6 ++++++ src/cc_sketch_alg.cpp | 36 ++++++++++++++++++++++++++++++++---- 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a66c474d..975f1a9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -184,3 +184,4 @@ if (BUILD_BENCH) add_dependencies(bench_cc GraphZeppelin benchmark) target_link_libraries(bench_cc GraphZeppelin benchmark::benchmark xxhash) endif() + diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h index 1183c48c..d466dab8 100644 --- a/include/cc_sketch_alg.h +++ b/include/cc_sketch_alg.h @@ -83,6 +83,12 @@ class CCSketchAlg { Sketch **delta_sketches = nullptr; size_t num_delta_sketches; + /** + * Run the first round of Boruvka. We can do things faster here because we know there will + * be no merging we have to do. + */ + bool run_round_zero(); + /** * Update the query array with new samples * @param query an array of sketch sample results diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp index 439b838e..a0bc318a 100644 --- a/src/cc_sketch_alg.cpp +++ b/src/cc_sketch_alg.cpp @@ -219,9 +219,36 @@ inline bool merge_global(const size_t cur_round, const Sketch &local_sketch, return global.num_merge_done >= global.num_merge_needed; } +// faster query procedure optimized for when we know there is no merging to do (i.e. round 0) +inline bool CCSketchAlg::run_round_zero() { + bool modified = false; + bool except = false; + std::exception_ptr err; +#pragma omp parallel for + for (node_id_t i = 0; i < num_nodes; i++) { + try { + // num_query += 1; + if (sample_supernode(*sketches[i]) && !modified) modified = true; + } catch (...) { + except = true; + err = std::current_exception(); + } + } + if (except) { + // if one of our threads produced an exception throw it here + std::rethrow_exception(err); + } + + return modified; +} + bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, const std::vector &merge_instr, std::vector &global_merges) { + if (cur_round == 0) { + return run_round_zero(); + } + bool modified = false; bool except = false; std::exception_ptr err; @@ -231,7 +258,7 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, global_merges[i].num_merge_done = 0; } -#pragma omp parallel default(shared) num_threads(8) +#pragma omp parallel default(shared) { // some thread local variables Sketch local_sketch(Sketch::calc_vector_length(num_nodes), seed, @@ -379,9 +406,10 @@ std::vector> CCSketchAlg::boruvka_emulation() { // calculate updated merge instructions for next round start = std::chrono::steady_clock::now(); #pragma omp parallel for - for (node_id_t i = 0; i < num_nodes; i++) - merge_instr[i] = {dsu.find_root(i), i}; - + for (node_id_t i = 0; i < num_nodes; i++) { + node_id_t child = merge_instr[i].child; + merge_instr[i].root = dsu.find_root(child); + } std::cout << " finding roots = " << std::chrono::duration(std::chrono::steady_clock::now() - start).count() << std::endl; From ab2c6eca2e0c9cdfbbc13f6592110d0cd8751255 Mon Sep 17 00:00:00 2001 From: Evan West Date: Fri, 17 Nov 2023 13:48:49 -0500 Subject: [PATCH 04/37] remove sort bottleneck. Begin investigating post_processing as bottleneck --- include/cc_sketch_alg.h | 6 ++ src/cc_sketch_alg.cpp | 153 +++++++++++++++++++++++++++++++++------ tools/process_stream.cpp | 19 ++++- 3 files changed, 153 insertions(+), 25 deletions(-) diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h index d466dab8..038ff751 100644 --- a/include/cc_sketch_alg.h +++ b/include/cc_sketch_alg.h @@ -96,6 +96,12 @@ class CCSketchAlg { */ bool sample_supernode(Sketch &skt); + + /** + * Calculate the instructions for what vertices to merge to form each component + */ + void create_merge_instructions(std::vector &merge_instr); + /** * @param reps set containing the roots of each supernode * @param merge_instr a list of lists of supernodes to be merged diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp index a0bc318a..fcbe562c 100644 --- a/src/cc_sketch_alg.cpp +++ b/src/cc_sketch_alg.cpp @@ -6,6 +6,7 @@ #include #include #include +#include CCSketchAlg::CCSketchAlg(node_id_t num_nodes, CCAlgConfiguration config) : num_nodes(num_nodes), dsu(num_nodes), config(config) { @@ -269,7 +270,7 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, std::pair partition = get_ith_partition(num_nodes, thr_id, num_threads); node_id_t start = partition.first; node_id_t end = partition.second; - assert(start < end); + assert(start <= end); // node_id_t left_root = merge_instr[start].root; // node_id_t right_root = merge_instr[end - 1].root; @@ -374,7 +375,90 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, return modified; } +inline void CCSketchAlg::create_merge_instructions(std::vector &merge_instr) { + std::vector cc_prefix(num_nodes, 0); + node_id_t range_sums[omp_get_max_threads()]; + +#pragma omp parallel default(shared) + { + // thread local variables + std::unordered_map> local_ccs; + std::vector local_cc_idx; + + size_t thr_id = omp_get_thread_num(); + size_t num_threads = omp_get_num_threads(); + std::pair partition = get_ith_partition(num_nodes, thr_id, num_threads); + node_id_t start = partition.first; + node_id_t end = partition.second; + + for (node_id_t i = start; i < end; i++) { + node_id_t child = merge_instr[i].child; + node_id_t root = dsu.find_root(child); + if (local_ccs.count(root) == 0) { + local_ccs[root] = {child}; + } else { + local_ccs[root].push_back(child); + } + } + + // each thread loops over its local_ccs and updates cc_prefix + for (auto const &cc : local_ccs) { + node_id_t root = cc.first; + const std::vector &vertices = cc.second; + + node_id_t idx; +#pragma omp atomic capture + {idx = cc_prefix[root]; cc_prefix[root] += vertices.size(); } + + local_cc_idx.push_back(idx); + } +#pragma omp barrier + + // perform a prefix sum over cc_prefix + for (node_id_t i = start + 1; i < end; i++) { + cc_prefix[i] += cc_prefix[i-1]; + } +#pragma omp barrier + + // perform single threaded prefix sum of the resulting sums from each thread +#pragma omp single + { + range_sums[0] = 0; + for (int t = 1; t < omp_get_num_threads(); t++) { + node_id_t cur = get_ith_partition(num_nodes, t - 1, num_threads).second - 1; + range_sums[t] = cc_prefix[cur] + range_sums[t - 1]; + } + } + + // in parallel finish the prefix sums + if (thr_id > 0) { + for (node_id_t i = start; i < end; i++) { + cc_prefix[i] += range_sums[thr_id]; + } + } +#pragma omp barrier + + // Finally, write the local_ccs to the correct portion of the merge_instr array + node_id_t i = 0; + for (auto const &cc : local_ccs) { + node_id_t root = cc.first; + const std::vector &vertices = cc.second; + node_id_t thr_idx = local_cc_idx[i]; + + node_id_t placement = thr_idx; + if (root > 0) + placement += cc_prefix[root - 1]; + + for (size_t j = 0; j < vertices.size(); j++) { + merge_instr[placement + j] = {root, vertices[j]}; + } + i++; + } + } +} + std::vector> CCSketchAlg::boruvka_emulation() { + auto start = std::chrono::steady_clock::now(); update_locked = true; cc_alg_start = std::chrono::steady_clock::now(); @@ -394,10 +478,16 @@ std::vector> CCSketchAlg::boruvka_emulation() { } size_t round_num = 0; bool modified = true; + std::cout << std::endl; + std::cout << " pre boruvka processing = " + << std::chrono::duration(std::chrono::steady_clock::now() - start).count() + << std::endl; + while (true) { - auto start = std::chrono::steady_clock::now(); + std::cout << " Round: " << round_num << std::endl; + start = std::chrono::steady_clock::now(); modified = perform_boruvka_round(round_num, merge_instr, global_merges); - std::cout << "round: " << round_num << " = " + std::cout << " perform_boruvka_round = " << std::chrono::duration(std::chrono::steady_clock::now() - start).count() << std::endl; @@ -405,37 +495,32 @@ std::vector> CCSketchAlg::boruvka_emulation() { // calculate updated merge instructions for next round start = std::chrono::steady_clock::now(); -#pragma omp parallel for - for (node_id_t i = 0; i < num_nodes; i++) { - node_id_t child = merge_instr[i].child; - merge_instr[i].root = dsu.find_root(child); - } - std::cout << " finding roots = " - << std::chrono::duration(std::chrono::steady_clock::now() - start).count() - << std::endl; - - start = std::chrono::steady_clock::now(); - std::sort(merge_instr.begin(), merge_instr.end()); - std::cout << " sorting = " + create_merge_instructions(merge_instr); + std::cout << " create_merge_instructions = " << std::chrono::duration(std::chrono::steady_clock::now() - start).count() << std::endl; ++round_num; } + start = std::chrono::steady_clock::now(); last_query_rounds = round_num; dsu_valid = true; shared_dsu_valid = true; auto retval = cc_from_dsu(); - cc_alg_end = std::chrono::steady_clock::now(); update_locked = false; + std::cout << " post boruvka processing = " + << std::chrono::duration(std::chrono::steady_clock::now() - start).count() + << std::endl; + return retval; } std::vector> CCSketchAlg::connected_components() { + cc_alg_start = std::chrono::steady_clock::now(); + // if the DSU holds the answer, use that if (shared_dsu_valid) { - cc_alg_start = std::chrono::steady_clock::now(); #ifdef VERIFY_SAMPLES_F for (node_id_t src = 0; src < num_nodes; ++src) { for (const auto &dst : spanning_forest[src]) { @@ -455,6 +540,7 @@ std::vector> CCSketchAlg::connected_components() { bool except = false; std::exception_ptr err; + auto start = std::chrono::steady_clock::now(); try { ret = boruvka_emulation(); #ifdef VERIFY_SAMPLES_F @@ -464,9 +550,11 @@ std::vector> CCSketchAlg::connected_components() { except = true; err = std::current_exception(); } + std::cout << "boruvka_emulation = " + << std::chrono::duration(std::chrono::steady_clock::now() - start).count() + << std::endl; - // get ready for ingesting more from the stream - // reset dsu and resume graph workers + // get ready for ingesting more from the stream by resetting the sketches sample state for (node_id_t i = 0; i < num_nodes; i++) { sketches[i]->reset_sample_state(); } @@ -474,6 +562,7 @@ std::vector> CCSketchAlg::connected_components() { // check if boruvka error'd if (except) std::rethrow_exception(err); + cc_alg_end = std::chrono::steady_clock::now(); return ret; } @@ -538,13 +627,29 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) { return ret; } -std::vector> CCSketchAlg::cc_from_dsu() { +inline std::vector> CCSketchAlg::cc_from_dsu() { // calculate connected components using DSU structure - std::map> temp; - for (node_id_t i = 0; i < num_nodes; ++i) temp[dsu.find_root(i)].insert(i); + std::vector merge_instr(num_nodes); + for (node_id_t i = 0; i < num_nodes; ++i) { + merge_instr[i] = {i, i}; + } + + create_merge_instructions(merge_instr); + std::vector> retval; - retval.reserve(temp.size()); - for (const auto &it : temp) retval.push_back(it.second); + std::set cc; + cc.insert(merge_instr[0].child); + node_id_t cur_root = merge_instr[0].root; + for (node_id_t i = 1; i < num_nodes; i++) { + if (merge_instr[i].root != cur_root) { + retval.push_back(cc); + cc.clear(); + cur_root = merge_instr[i].root; + } + cc.insert(merge_instr[i].child); + } + retval.push_back(cc); + return retval; } diff --git a/tools/process_stream.cpp b/tools/process_stream.cpp index 87d2e28c..b5777e04 100644 --- a/tools/process_stream.cpp +++ b/tools/process_stream.cpp @@ -92,8 +92,8 @@ int main(int argc, char **argv) { auto cc_start = std::chrono::steady_clock::now(); driver.prep_query(); auto CC_num = cc_alg.connected_components().size(); - std::chrono::duration insert_time = driver.flush_end - ins_start; std::chrono::duration cc_time = std::chrono::steady_clock::now() - cc_start; + std::chrono::duration insert_time = driver.flush_end - ins_start; std::chrono::duration flush_time = driver.flush_end - driver.flush_start; std::chrono::duration cc_alg_time = cc_alg.cc_alg_end - cc_alg.cc_alg_start; @@ -108,4 +108,21 @@ int main(int argc, char **argv) { std::cout << " Boruvka's Algorithm(sec): " << cc_alg_time.count() << std::endl; std::cout << "Connected Components: " << CC_num << std::endl; std::cout << "Maximum Memory Usage(MiB): " << get_max_mem_used() << std::endl; + + + cc_start = std::chrono::steady_clock::now(); + driver.prep_query(); + CC_num = cc_alg.connected_components().size(); + cc_time = std::chrono::steady_clock::now() - cc_start; + insert_time = driver.flush_end - ins_start; + flush_time = driver.flush_end - driver.flush_start; + cc_alg_time = cc_alg.cc_alg_end - cc_alg.cc_alg_start; + + std::cout << "SECOND QUERY" << std::endl; + std::cout << "Total CC query latency: " << cc_time.count() << std::endl; + std::cout << " Flush Gutters(sec): " << flush_time.count() << std::endl; + std::cout << " Boruvka's Algorithm(sec): " << cc_alg_time.count() << std::endl; + std::cout << "Connected Components: " << CC_num << std::endl; + std::cout << "Maximum Memory Usage(MiB): " << get_max_mem_used() << std::endl; + } From f1ae69f7d4bf9d067137c97f0090754afdd4088d Mon Sep 17 00:00:00 2001 From: Evan West Date: Sun, 26 Nov 2023 16:30:37 -0500 Subject: [PATCH 05/37] make seed a mandatory argument of the CC algorithm --- CMakeLists.txt | 2 +- include/cc_sketch_alg.h | 14 ++-- src/cc_sketch_alg.cpp | 77 ++++++++++----------- test/{graph_test.cpp => cc_alg_test.cpp} | 45 ++++++------ tools/process_stream.cpp | 7 +- tools/statistical_testing/graph_testing.cpp | 6 +- tools/test_correctness.cpp | 7 +- 7 files changed, 86 insertions(+), 72 deletions(-) rename test/{graph_test.cpp => cc_alg_test.cpp} (90%) diff --git a/CMakeLists.txt b/CMakeLists.txt index a66c474d..e194f5b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,7 +131,7 @@ target_compile_definitions(GraphZeppelinVerifyCC PUBLIC XXH_INLINE_ALL VERIFY_SA if (BUILD_EXE) add_executable(tests test/test_runner.cpp - test/graph_test.cpp + test/cc_alg_test.cpp test/sketch_test.cpp test/dsu_test.cpp test/util_test.cpp diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h index 40bf6269..5ce376d6 100644 --- a/include/cc_sketch_alg.h +++ b/include/cc_sketch_alg.h @@ -32,8 +32,8 @@ class UpdateLockedException : public std::exception { * (no self-edges or multi-edges) */ class CCSketchAlg { - protected: - node_id_t num_nodes; + private: + node_id_t num_vertices; size_t seed; bool update_locked = false; // a set containing one "representative" from each supernode @@ -94,11 +94,11 @@ class CCSketchAlg { CCAlgConfiguration config; // constructor for use when reading from a serialized file - CCSketchAlg(node_id_t num_nodes, size_t seed, std::ifstream &binary_stream, + CCSketchAlg(node_id_t num_vertices, size_t seed, std::ifstream &binary_stream, CCAlgConfiguration config); public: - CCSketchAlg(node_id_t num_nodes, CCAlgConfiguration config = CCAlgConfiguration()); + CCSketchAlg(node_id_t num_vertices, size_t seed, CCAlgConfiguration config = CCAlgConfiguration()); ~CCSketchAlg(); // construct a CC algorithm from a serialized file @@ -127,8 +127,8 @@ class CCSketchAlg { num_delta_sketches = num_workers; delta_sketches = new Sketch *[num_delta_sketches]; for (size_t i = 0; i < num_delta_sketches; i++) { - delta_sketches[i] = new Sketch(Sketch::calc_vector_length(num_nodes), seed, - Sketch::calc_cc_samples(num_nodes)); + delta_sketches[i] = new Sketch(Sketch::calc_vector_length(num_vertices), seed, + Sketch::calc_cc_samples(num_vertices)); } } @@ -211,6 +211,6 @@ class CCSketchAlg { size_t last_query_rounds = 0; // getters - inline node_id_t get_num_vertices() { return num_nodes; } + inline node_id_t get_num_vertices() { return num_vertices; } inline size_t get_seed() { return seed; } }; diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp index 3a434697..59bc5180 100644 --- a/src/cc_sketch_alg.cpp +++ b/src/cc_sketch_alg.cpp @@ -6,25 +6,20 @@ #include #include -CCSketchAlg::CCSketchAlg(node_id_t num_nodes, CCAlgConfiguration config) - : num_nodes(num_nodes), dsu(num_nodes), config(config) { +CCSketchAlg::CCSketchAlg(node_id_t num_vertices, size_t seed, CCAlgConfiguration config) + : num_vertices(num_vertices), seed(seed), dsu(num_vertices), config(config) { representatives = new std::set(); - sketches = new Sketch *[num_nodes]; - seed = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now().time_since_epoch()) - .count(); - std::mt19937_64 r(seed); - seed = r(); - - vec_t sketch_vec_len = Sketch::calc_vector_length(num_nodes); - size_t sketch_num_samples = Sketch::calc_cc_samples(num_nodes); - for (node_id_t i = 0; i < num_nodes; ++i) { + sketches = new Sketch *[num_vertices]; + + vec_t sketch_vec_len = Sketch::calc_vector_length(num_vertices); + size_t sketch_num_samples = Sketch::calc_cc_samples(num_vertices); + for (node_id_t i = 0; i < num_vertices; ++i) { representatives->insert(i); sketches[i] = new Sketch(sketch_vec_len, seed, sketch_num_samples); } - spanning_forest = new std::unordered_set[num_nodes]; - spanning_forest_mtx = new std::mutex[num_nodes]; + spanning_forest = new std::unordered_set[num_vertices]; + spanning_forest_mtx = new std::mutex[num_vertices]; dsu_valid = true; shared_dsu_valid = true; } @@ -34,38 +29,38 @@ CCSketchAlg *CCSketchAlg::construct_from_serialized_data(const std::string &inpu double sketches_factor; auto binary_in = std::ifstream(input_file, std::ios::binary); size_t seed; - node_id_t num_nodes; + node_id_t num_vertices; binary_in.read((char *)&seed, sizeof(seed)); - binary_in.read((char *)&num_nodes, sizeof(num_nodes)); + binary_in.read((char *)&num_vertices, sizeof(num_vertices)); binary_in.read((char *)&sketches_factor, sizeof(sketches_factor)); config.sketches_factor(sketches_factor); - return new CCSketchAlg(num_nodes, seed, binary_in, config); + return new CCSketchAlg(num_vertices, seed, binary_in, config); } -CCSketchAlg::CCSketchAlg(node_id_t num_nodes, size_t seed, std::ifstream &binary_stream, +CCSketchAlg::CCSketchAlg(node_id_t num_vertices, size_t seed, std::ifstream &binary_stream, CCAlgConfiguration config) - : num_nodes(num_nodes), seed(seed), dsu(num_nodes), config(config) { + : num_vertices(num_vertices), seed(seed), dsu(num_vertices), config(config) { representatives = new std::set(); - sketches = new Sketch *[num_nodes]; + sketches = new Sketch *[num_vertices]; - vec_t sketch_vec_len = Sketch::calc_vector_length(num_nodes); - size_t sketch_num_samples = Sketch::calc_cc_samples(num_nodes); - for (node_id_t i = 0; i < num_nodes; ++i) { + vec_t sketch_vec_len = Sketch::calc_vector_length(num_vertices); + size_t sketch_num_samples = Sketch::calc_cc_samples(num_vertices); + for (node_id_t i = 0; i < num_vertices; ++i) { representatives->insert(i); sketches[i] = new Sketch(sketch_vec_len, seed, binary_stream, sketch_num_samples); } binary_stream.close(); - spanning_forest = new std::unordered_set[num_nodes]; - spanning_forest_mtx = new std::mutex[num_nodes]; + spanning_forest = new std::unordered_set[num_vertices]; + spanning_forest_mtx = new std::mutex[num_vertices]; dsu_valid = false; shared_dsu_valid = false; } CCSketchAlg::~CCSketchAlg() { - for (size_t i = 0; i < num_nodes; ++i) delete sketches[i]; + for (size_t i = 0; i < num_vertices; ++i) delete sketches[i]; delete[] sketches; if (delta_sketches != nullptr) { for (size_t i = 0; i < num_delta_sketches; i++) delete delta_sketches[i]; @@ -136,7 +131,7 @@ bool CCSketchAlg::sample_supernodes(std::vector &merge_instr) { bool modified = false; std::exception_ptr err; #pragma omp parallel for default(shared) - for (node_id_t root = 0; root < num_nodes; root++) { + for (node_id_t root = 0; root < num_vertices; root++) { if (merge_instr[root] != root) { // don't query non-roots continue; @@ -185,12 +180,12 @@ void CCSketchAlg::merge_supernodes(const size_t next_round, #pragma omp parallel default(shared) { // some thread local variables - Sketch local_sketch(Sketch::calc_vector_length(num_nodes), seed, - Sketch::calc_cc_samples(num_nodes)); + Sketch local_sketch(Sketch::calc_vector_length(num_vertices), seed, + Sketch::calc_cc_samples(num_vertices)); node_id_t cur_root = 0; bool first_root = true; #pragma omp for - for (node_id_t i = 0; i < num_nodes; i++) { + for (node_id_t i = 0; i < num_vertices; i++) { if (merge_instr[i] == i) continue; node_id_t root = merge_instr[i]; @@ -223,10 +218,10 @@ std::vector> CCSketchAlg::boruvka_emulation() { update_locked = true; cc_alg_start = std::chrono::steady_clock::now(); - std::vector merge_instr(num_nodes); + std::vector merge_instr(num_vertices); dsu.reset(); - for (node_id_t i = 0; i < num_nodes; ++i) { + for (node_id_t i = 0; i < num_vertices; ++i) { merge_instr[i] = i; spanning_forest[i].clear(); } @@ -254,7 +249,7 @@ std::vector> CCSketchAlg::boruvka_emulation() { // calculate updated merge instructions #pragma omp parallel for - for (node_id_t i = 0; i < num_nodes; i++) + for (node_id_t i = 0; i < num_vertices; i++) merge_instr[i] = dsu.find_root(i); // prepare for the next round by merging @@ -281,7 +276,7 @@ std::vector> CCSketchAlg::connected_components() { if (shared_dsu_valid) { cc_alg_start = std::chrono::steady_clock::now(); #ifdef VERIFY_SAMPLES_F - for (node_id_t src = 0; src < num_nodes; ++src) { + for (node_id_t src = 0; src < num_vertices; ++src) { for (const auto &dst : spanning_forest[src]) { verifier->verify_edge({src, dst}); } @@ -311,7 +306,7 @@ std::vector> CCSketchAlg::connected_components() { // get ready for ingesting more from the stream // reset dsu and resume graph workers - for (node_id_t i = 0; i < num_nodes; i++) { + for (node_id_t i = 0; i < num_vertices; i++) { sketches[i]->reset_sample_state(); } @@ -327,7 +322,7 @@ std::vector>> CCSketchAlg::calc_span std::vector>> forest; - for (node_id_t src = 0; src < num_nodes; src++) { + for (node_id_t src = 0; src < num_vertices; src++) { if (spanning_forest[src].size() > 0) { std::vector edge_list; edge_list.reserve(spanning_forest[src].size()); @@ -345,7 +340,7 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) { if (dsu_valid) { cc_alg_start = std::chrono::steady_clock::now(); #ifdef VERIFY_SAMPLES_F - for (node_id_t src = 0; src < num_nodes; ++src) { + for (node_id_t src = 0; src < num_vertices; ++src) { for (const auto &dst : spanning_forest[src]) { verifier->verify_edge({src, dst}); } @@ -372,7 +367,7 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) { // get ready for ingesting more from the stream // reset dsu and resume graph workers - for (node_id_t i = 0; i < num_nodes; i++) { + for (node_id_t i = 0; i < num_vertices; i++) { sketches[i]->reset_sample_state(); } @@ -385,7 +380,7 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) { std::vector> CCSketchAlg::cc_from_dsu() { // calculate connected components using DSU structure std::map> temp; - for (node_id_t i = 0; i < num_nodes; ++i) temp[dsu.find_root(i)].insert(i); + for (node_id_t i = 0; i < num_vertices; ++i) temp[dsu.find_root(i)].insert(i); std::vector> retval; retval.reserve(temp.size()); for (const auto &it : temp) retval.push_back(it.second); @@ -395,9 +390,9 @@ std::vector> CCSketchAlg::cc_from_dsu() { void CCSketchAlg::write_binary(const std::string &filename) { auto binary_out = std::fstream(filename, std::ios::out | std::ios::binary); binary_out.write((char *)&seed, sizeof(seed)); - binary_out.write((char *)&num_nodes, sizeof(num_nodes)); + binary_out.write((char *)&num_vertices, sizeof(num_vertices)); binary_out.write((char *)&config._sketches_factor, sizeof(config._sketches_factor)); - for (node_id_t i = 0; i < num_nodes; ++i) { + for (node_id_t i = 0; i < num_vertices; ++i) { sketches[i]->serialize(binary_out); } binary_out.close(); diff --git a/test/graph_test.cpp b/test/cc_alg_test.cpp similarity index 90% rename from test/graph_test.cpp rename to test/cc_alg_test.cpp index 380eddf2..55b567eb 100644 --- a/test/graph_test.cpp +++ b/test/cc_alg_test.cpp @@ -11,6 +11,11 @@ #include "graph_sketch_driver.h" #include "mat_graph_verifier.h" +static size_t get_seed() { + auto now = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(now.time_since_epoch()).count(); +} + /** * For many of these tests (especially for those upon very sparse and small graphs) * we allow for a certain number of failures per test. @@ -23,11 +28,11 @@ // We create this class and instantiate a paramaterized test suite so that we // can run these tests both with the GutterTree and with StandAloneGutters -class GraphTest : public testing::TestWithParam {}; -INSTANTIATE_TEST_SUITE_P(GraphTestSuite, GraphTest, +class CCAlgTest : public testing::TestWithParam {}; +INSTANTIATE_TEST_SUITE_P(CCAlgTestSuite, CCAlgTest, testing::Values(GUTTERTREE, STANDALONE, CACHETREE)); -TEST_P(GraphTest, SmallGraphConnectivity) { +TEST_P(CCAlgTest, SmallGraphConnectivity) { auto driver_config = DriverConfiguration().gutter_sys(GetParam()); const std::string fname = __FILE__; size_t pos = fname.find_last_of("\\/"); @@ -35,7 +40,7 @@ TEST_P(GraphTest, SmallGraphConnectivity) { AsciiFileStream stream{curr_dir + "/res/multiples_graph_1024.txt", false}; node_id_t num_nodes = stream.vertices(); - CCSketchAlg cc_alg{num_nodes}; + CCSketchAlg cc_alg{num_nodes, get_seed()}; cc_alg.set_verifier( std::make_unique(1024, curr_dir + "/res/multiples_graph_1024.txt")); @@ -45,7 +50,7 @@ TEST_P(GraphTest, SmallGraphConnectivity) { ASSERT_EQ(78, cc_alg.connected_components().size()); } -TEST_P(GraphTest, TestCorrectnessOnSmallRandomGraphs) { +TEST_P(CCAlgTest, TestCorrectnessOnSmallRandomGraphs) { auto driver_config = DriverConfiguration().gutter_sys(GetParam()); int num_trials = 5; while (num_trials--) { @@ -53,7 +58,7 @@ TEST_P(GraphTest, TestCorrectnessOnSmallRandomGraphs) { AsciiFileStream stream{"./sample.txt"}; node_id_t num_nodes = stream.vertices(); - CCSketchAlg cc_alg{num_nodes}; + CCSketchAlg cc_alg{num_nodes, get_seed()}; cc_alg.set_verifier(std::make_unique(1024, "./cumul_sample.txt")); GraphSketchDriver driver(&cc_alg, &stream, driver_config); @@ -64,7 +69,7 @@ TEST_P(GraphTest, TestCorrectnessOnSmallRandomGraphs) { } } -TEST_P(GraphTest, TestCorrectnessOnSmallSparseGraphs) { +TEST_P(CCAlgTest, TestCorrectnessOnSmallSparseGraphs) { auto driver_config = DriverConfiguration().gutter_sys(GetParam()); int num_trials = 5; while (num_trials--) { @@ -72,7 +77,7 @@ TEST_P(GraphTest, TestCorrectnessOnSmallSparseGraphs) { AsciiFileStream stream{"./sample.txt"}; node_id_t num_nodes = stream.vertices(); - CCSketchAlg cc_alg{num_nodes}; + CCSketchAlg cc_alg{num_nodes, get_seed()}; cc_alg.set_verifier(std::make_unique(1024, "./cumul_sample.txt")); GraphSketchDriver driver(&cc_alg, &stream, driver_config); @@ -83,7 +88,7 @@ TEST_P(GraphTest, TestCorrectnessOnSmallSparseGraphs) { } } -TEST_P(GraphTest, TestCorrectnessOfReheating) { +TEST_P(CCAlgTest, TestCorrectnessOfReheating) { auto driver_config = DriverConfiguration().gutter_sys(GetParam()); int num_trials = 5; while (num_trials--) { @@ -92,7 +97,7 @@ TEST_P(GraphTest, TestCorrectnessOfReheating) { AsciiFileStream stream{"./sample.txt"}; node_id_t num_nodes = stream.vertices(); - CCSketchAlg cc_alg{num_nodes}; + CCSketchAlg cc_alg{num_nodes, get_seed()}; cc_alg.set_verifier(std::make_unique(1024, "./cumul_sample.txt")); GraphSketchDriver driver(&cc_alg, &stream, driver_config); @@ -114,7 +119,7 @@ TEST_P(GraphTest, TestCorrectnessOfReheating) { } // Test the multithreaded system by using multiple worker threads -TEST_P(GraphTest, MultipleWorkers) { +TEST_P(CCAlgTest, MultipleWorkers) { auto driver_config = DriverConfiguration().gutter_sys(GetParam()).worker_threads(8); int num_trials = 5; while (num_trials--) { @@ -122,7 +127,7 @@ TEST_P(GraphTest, MultipleWorkers) { AsciiFileStream stream{"./sample.txt"}; node_id_t num_nodes = stream.vertices(); - CCSketchAlg cc_alg{num_nodes}; + CCSketchAlg cc_alg{num_nodes, get_seed()}; cc_alg.set_verifier(std::make_unique(1024, "./cumul_sample.txt")); GraphSketchDriver driver(&cc_alg, &stream, driver_config); @@ -132,7 +137,7 @@ TEST_P(GraphTest, MultipleWorkers) { } } -TEST_P(GraphTest, TestPointQuery) { +TEST_P(CCAlgTest, TestPointQuery) { auto driver_config = DriverConfiguration().gutter_sys(GetParam()); const std::string fname = __FILE__; size_t pos = fname.find_last_of("\\/"); @@ -140,7 +145,7 @@ TEST_P(GraphTest, TestPointQuery) { AsciiFileStream stream{curr_dir + "/res/multiples_graph_1024.txt", false}; node_id_t num_nodes = stream.vertices(); - CCSketchAlg cc_alg{num_nodes}; + CCSketchAlg cc_alg{num_nodes, get_seed()}; cc_alg.set_verifier( std::make_unique(1024, curr_dir + "/res/multiples_graph_1024.txt")); @@ -164,7 +169,7 @@ TEST_P(GraphTest, TestPointQuery) { } } -TEST(GraphTest, TestQueryDuringStream) { +TEST(CCAlgTest, TestQueryDuringStream) { auto driver_config = DriverConfiguration().gutter_sys(STANDALONE); auto cc_config = CCAlgConfiguration(); generate_stream({1024, 0.002, 0.5, 0, "./sample.txt", "./cumul_sample.txt"}); @@ -174,7 +179,7 @@ TEST(GraphTest, TestQueryDuringStream) { edge_id_t num_edges = stream.edges(); edge_id_t tenth = num_edges / 10; - CCSketchAlg cc_alg{num_nodes, cc_config}; + CCSketchAlg cc_alg{num_nodes, get_seed(), cc_config}; GraphSketchDriver driver(&cc_alg, &stream, driver_config); MatGraphVerifier verify(num_nodes); @@ -210,9 +215,9 @@ TEST(GraphTest, TestQueryDuringStream) { cc_alg.connected_components(); } -TEST(GraphTest, EagerDSUTest) { +TEST(CCAlgTest, EagerDSUTest) { node_id_t num_nodes = 100; - CCSketchAlg cc_alg{num_nodes}; + CCSketchAlg cc_alg{num_nodes, get_seed()}; MatGraphVerifier verify(num_nodes); // This should be a spanning forest edge @@ -258,7 +263,7 @@ TEST(GraphTest, EagerDSUTest) { cc_alg.connected_components(); } -TEST(GraphTest, MTStreamWithMultipleQueries) { +TEST(CCAlgTest, MTStreamWithMultipleQueries) { for (int t = 1; t <= 3; t++) { auto driver_config = DriverConfiguration().gutter_sys(STANDALONE); @@ -273,7 +278,7 @@ TEST(GraphTest, MTStreamWithMultipleQueries) { std::cerr << num_nodes << " " << num_edges << std::endl; - CCSketchAlg cc_alg{num_nodes}; + CCSketchAlg cc_alg{num_nodes, get_seed()}; GraphSketchDriver driver(&cc_alg, &stream, driver_config, 4); MatGraphVerifier verify(num_nodes); diff --git a/tools/process_stream.cpp b/tools/process_stream.cpp index 87d2e28c..28f46361 100644 --- a/tools/process_stream.cpp +++ b/tools/process_stream.cpp @@ -12,6 +12,11 @@ static double get_max_mem_used() { return (double) data.ru_maxrss / 1024.0; } +static size_t get_seed() { + auto now = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(now.time_since_epoch()).count(); +} + /* * Function which is run in a seperate thread and will query * the graph for the number of updates it has processed @@ -81,7 +86,7 @@ int main(int argc, char **argv) { auto driver_config = DriverConfiguration().gutter_sys(CACHETREE).worker_threads(num_threads); auto cc_config = CCAlgConfiguration().batch_factor(1); - CCSketchAlg cc_alg{num_nodes, cc_config}; + CCSketchAlg cc_alg{num_nodes, get_seed(), cc_config}; GraphSketchDriver driver{&cc_alg, &stream, driver_config, reader_threads}; auto ins_start = std::chrono::steady_clock::now(); diff --git a/tools/statistical_testing/graph_testing.cpp b/tools/statistical_testing/graph_testing.cpp index cddb7053..dee89912 100644 --- a/tools/statistical_testing/graph_testing.cpp +++ b/tools/statistical_testing/graph_testing.cpp @@ -6,11 +6,15 @@ #include "file_graph_verifier.h" static DriverConfiguration driver_config; +static size_t get_seed() { + auto now = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(now.time_since_epoch()).count(); +} static inline int do_run() { AsciiFileStream stream{"./sample.txt"}; node_id_t n = stream.vertices(); - CCSketchAlg cc_alg{n}; + CCSketchAlg cc_alg{n, get_seed()}; cc_alg.set_verifier(std::make_unique(n, "./cumul_sample.txt")); GraphSketchDriver driver(&cc_alg, &stream, driver_config); driver.process_stream_until(END_OF_STREAM); diff --git a/tools/test_correctness.cpp b/tools/test_correctness.cpp index b80ca204..00e7f822 100644 --- a/tools/test_correctness.cpp +++ b/tools/test_correctness.cpp @@ -8,6 +8,11 @@ #include #include +static size_t get_seed() { + auto now = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(now.time_since_epoch()).count(); +} + struct CorrectnessResults { size_t num_failures = 0; std::vector num_round_hist; @@ -43,7 +48,7 @@ CorrectnessResults test_path_correctness(size_t num_vertices, size_t num_graphs, verifier.reset_cc_state(); for (size_t s = 0; s < samples_per_graph; s++) { - CCSketchAlg cc_alg(num_vertices); + CCSketchAlg cc_alg(num_vertices, get_seed()); node_id_t cur_node = copy_vertices[0]; for (size_t i = 1; i < num_vertices; i++) { From ec061c758d8f1bfcb978ee5ed254c17b32c1b400 Mon Sep 17 00:00:00 2001 From: Evan West Date: Tue, 28 Nov 2023 20:29:47 -0500 Subject: [PATCH 06/37] oops forgot some files --- include/return_types.h | 32 ++++++++++++++++++++++++++++++++ src/return_types.cpp | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 include/return_types.h create mode 100644 src/return_types.cpp diff --git a/include/return_types.h b/include/return_types.h new file mode 100644 index 00000000..b1fa1257 --- /dev/null +++ b/include/return_types.h @@ -0,0 +1,32 @@ +// This file defines the query return types from the cc algorithm class +#include +#include +#include +#include + +#include "dsu.h" +#include "types.h" + +// This class defines the connected components of a graph +class ConnectedComponents { + private: + node_id_t *parent_arr; + node_id_t num_vertices; + node_id_t num_cc; + + public: + ConnectedComponents(node_id_t num_vertices, DisjointSetUnion_MT &dsu); + ~ConnectedComponents(); + + std::vector> get_component_sets(); + bool is_connected(node_id_t a, node_id_t b) { return parent_arr[a] == parent_arr[b]; } + node_id_t size() { return num_cc; } +}; + +// // This class defines a spanning forest of a graph +// class SpanningForest { +// private: + +// public: + +// }; diff --git a/src/return_types.cpp b/src/return_types.cpp new file mode 100644 index 00000000..9698f7a3 --- /dev/null +++ b/src/return_types.cpp @@ -0,0 +1,32 @@ +#include "return_types.h" + +#include + +ConnectedComponents::ConnectedComponents(node_id_t num_vertices, DisjointSetUnion_MT &dsu) + : parent_arr(new node_id_t[num_vertices]), num_vertices(num_vertices) { + + size_t temp_cc = 0; +#pragma omp parallel for + for (node_id_t i = 0; i < num_vertices; i++) { + parent_arr[i] = dsu.find_root(i); + if (parent_arr[i] == i) { +#pragma omp atomic update + temp_cc += 1; + } + } + + num_cc = temp_cc; +} + +ConnectedComponents::~ConnectedComponents() { + delete[] parent_arr; +} + +std::vector> ConnectedComponents::get_component_sets() { + std::map> temp; + for (node_id_t i = 0; i < num_vertices; ++i) temp[parent_arr[i]].insert(i); + std::vector> retval; + retval.reserve(temp.size()); + for (const auto &it : temp) retval.push_back(it.second); + return retval; +} From c23514c2630f728b8d039568131bf2836800c3c4 Mon Sep 17 00:00:00 2001 From: Evan West Date: Wed, 29 Nov 2023 13:40:53 -0500 Subject: [PATCH 07/37] add spanning forest return type --- include/cc_sketch_alg.h | 2 +- include/return_types.h | 16 +++--- src/cc_sketch_alg.cpp | 112 ++++++++++++++++++---------------------- src/return_types.cpp | 18 +++++-- 4 files changed, 75 insertions(+), 73 deletions(-) diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h index ac464f6e..a58f2043 100644 --- a/include/cc_sketch_alg.h +++ b/include/cc_sketch_alg.h @@ -218,7 +218,7 @@ class CCSketchAlg { * that is, unless you really know what you're doing. * @return an adjacency list representation of the spanning forest of the graph */ - std::vector>> calc_spanning_forest(); + SpanningForest calc_spanning_forest(); #ifdef VERIFY_SAMPLES_F std::unique_ptr verifier; diff --git a/include/return_types.h b/include/return_types.h index b1fa1257..d7967c2e 100644 --- a/include/return_types.h +++ b/include/return_types.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "dsu.h" @@ -23,10 +24,13 @@ class ConnectedComponents { node_id_t size() { return num_cc; } }; -// // This class defines a spanning forest of a graph -// class SpanningForest { -// private: - -// public: +// This class defines a spanning forest of a graph +class SpanningForest { + private: + std::vector edges; + node_id_t num_vertices; + public: + SpanningForest(node_id_t num_vertices, const std::unordered_set *spanning_forest); -// }; + const std::vector& get_edges() { return edges; } +}; diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp index 9b4d0779..78fc54a4 100644 --- a/src/cc_sketch_alg.cpp +++ b/src/cc_sketch_alg.cpp @@ -453,7 +453,7 @@ inline void CCSketchAlg::create_merge_instructions(std::vector &merg } void CCSketchAlg::boruvka_emulation() { - auto start = std::chrono::steady_clock::now(); + // auto start = std::chrono::steady_clock::now(); update_locked = true; cc_alg_start = std::chrono::steady_clock::now(); @@ -473,27 +473,27 @@ void CCSketchAlg::boruvka_emulation() { } size_t round_num = 0; bool modified = true; - std::cout << std::endl; - std::cout << " pre boruvka processing = " - << std::chrono::duration(std::chrono::steady_clock::now() - start).count() - << std::endl; + // std::cout << std::endl; + // std::cout << " pre boruvka processing = " + // << std::chrono::duration(std::chrono::steady_clock::now() - start).count() + // << std::endl; while (true) { - std::cout << " Round: " << round_num << std::endl; - start = std::chrono::steady_clock::now(); + // std::cout << " Round: " << round_num << std::endl; + // start = std::chrono::steady_clock::now(); modified = perform_boruvka_round(round_num, merge_instr, global_merges); - std::cout << " perform_boruvka_round = " - << std::chrono::duration(std::chrono::steady_clock::now() - start).count() - << std::endl; + // std::cout << " perform_boruvka_round = " + // << std::chrono::duration(std::chrono::steady_clock::now() - start).count() + // << std::endl; if (!modified) break; // calculate updated merge instructions for next round - start = std::chrono::steady_clock::now(); + // start = std::chrono::steady_clock::now(); create_merge_instructions(merge_instr); - std::cout << " create_merge_instructions = " - << std::chrono::duration(std::chrono::steady_clock::now() - start).count() - << std::endl; + // std::cout << " create_merge_instructions = " + // << std::chrono::duration(std::chrono::steady_clock::now() - start).count() + // << std::endl; ++round_num; } last_query_rounds = round_num; @@ -521,11 +521,11 @@ ConnectedComponents CCSketchAlg::connected_components() { bool except = false; std::exception_ptr err; try { - auto start = std::chrono::steady_clock::now(); + // auto start = std::chrono::steady_clock::now(); boruvka_emulation(); - std::cout << " boruvka's algorithm = " - << std::chrono::duration(std::chrono::steady_clock::now() - start).count() - << std::endl; + // std::cout << " boruvka's algorithm = " + // << std::chrono::duration(std::chrono::steady_clock::now() - start).count() + // << std::endl; } catch (...) { except = true; err = std::current_exception(); @@ -548,29 +548,18 @@ ConnectedComponents CCSketchAlg::connected_components() { return cc; } -std::vector>> CCSketchAlg::calc_spanning_forest() { +SpanningForest CCSketchAlg::calc_spanning_forest() { // TODO: Could probably optimize this a bit by writing new code connected_components(); - - std::vector>> forest; - - for (node_id_t src = 0; src < num_vertices; src++) { - if (spanning_forest[src].size() > 0) { - std::vector edge_list; - edge_list.reserve(spanning_forest[src].size()); - for (node_id_t dst : spanning_forest[src]) { - edge_list.push_back(dst); - } - forest.push_back({src, edge_list}); - } - } - return forest; + + return SpanningForest(num_vertices, spanning_forest); } bool CCSketchAlg::point_query(node_id_t a, node_id_t b) { - // DSU check before calling force_flush() + cc_alg_start = std::chrono::steady_clock::now(); + + // if the DSU holds the answer, use that if (dsu_valid) { - cc_alg_start = std::chrono::steady_clock::now(); #ifdef VERIFY_SAMPLES_F for (node_id_t src = 0; src < num_vertices; ++src) { for (const auto &dst : spanning_forest[src]) { @@ -578,37 +567,38 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) { } } #endif - bool retval = (dsu.find_root(a) == dsu.find_root(b)); - cc_alg_end = std::chrono::steady_clock::now(); - return retval; - } + } + // The DSU does not hold the answer, make it so + else { + bool except = false; + std::exception_ptr err; + bool ret; + try { + boruvka_emulation(); + } catch (...) { + except = true; + err = std::current_exception(); + } - bool except = false; - std::exception_ptr err; - bool ret; - try { - boruvka_emulation(); -#ifdef VERIFY_SAMPLES_F - ConnectedComponents cc(num_vertices, dsu); - auto cc_sets = cc.get_component_sets(); - verifier->verify_soln(cc_sets); -#endif - ret = (dsu.find_root(a) == dsu.find_root(b)); - } catch (...) { - except = true; - err = std::current_exception(); - } + // get ready for ingesting more from the stream + // reset dsu and resume graph workers + for (node_id_t i = 0; i < num_vertices; i++) { + sketches[i]->reset_sample_state(); + } - // get ready for ingesting more from the stream - // reset dsu and resume graph workers - for (node_id_t i = 0; i < num_vertices; i++) { - sketches[i]->reset_sample_state(); + // check if boruvka errored + if (except) std::rethrow_exception(err); } - // check if boruvka errored - if (except) std::rethrow_exception(err); +#ifdef VERIFY_SAMPLES_F + ConnectedComponents cc(num_vertices, dsu); + auto cc_sets = cc.get_component_sets(); + verifier->verify_soln(cc_sets); +#endif - return ret; + bool retval = (dsu.find_root(a) == dsu.find_root(b)); + cc_alg_end = std::chrono::steady_clock::now(); + return retval; } void CCSketchAlg::write_binary(const std::string &filename) { diff --git a/src/return_types.cpp b/src/return_types.cpp index 9698f7a3..8b2726dc 100644 --- a/src/return_types.cpp +++ b/src/return_types.cpp @@ -2,9 +2,9 @@ #include -ConnectedComponents::ConnectedComponents(node_id_t num_vertices, DisjointSetUnion_MT &dsu) +ConnectedComponents::ConnectedComponents(node_id_t num_vertices, + DisjointSetUnion_MT &dsu) : parent_arr(new node_id_t[num_vertices]), num_vertices(num_vertices) { - size_t temp_cc = 0; #pragma omp parallel for for (node_id_t i = 0; i < num_vertices; i++) { @@ -18,9 +18,7 @@ ConnectedComponents::ConnectedComponents(node_id_t num_vertices, DisjointSetUnio num_cc = temp_cc; } -ConnectedComponents::~ConnectedComponents() { - delete[] parent_arr; -} +ConnectedComponents::~ConnectedComponents() { delete[] parent_arr; } std::vector> ConnectedComponents::get_component_sets() { std::map> temp; @@ -30,3 +28,13 @@ std::vector> ConnectedComponents::get_component_sets() { for (const auto &it : temp) retval.push_back(it.second); return retval; } + +SpanningForest::SpanningForest(node_id_t num_vertices, + const std::unordered_set *spanning_forest) + : num_vertices(num_vertices) { + for (node_id_t src = 0; src < num_vertices; src++) { + for (node_id_t dst : spanning_forest[src]) { + edges.push_back({src, dst}); + } + } +} From 3fbf54e43c2fb8776287d8aa8c1b0e6b132eca84 Mon Sep 17 00:00:00 2001 From: Evan West Date: Fri, 1 Dec 2023 12:58:40 -0500 Subject: [PATCH 08/37] fewer rounds from math --- include/sketch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/sketch.h b/include/sketch.h index 7dc09bce..44db971c 100644 --- a/include/sketch.h +++ b/include/sketch.h @@ -170,10 +170,11 @@ class Sketch { #ifdef L0_SAMPLING static constexpr size_t default_cols_per_sample = 7; + // NOTE: can improve this but leaving for comparison purposes static constexpr double num_samples_div = log2(3) - 1; #else static constexpr size_t default_cols_per_sample = 1; - static constexpr double num_samples_div = log2(3) - 1; + static constexpr double num_samples_div = 1 - log2(2 - 0.8); #endif }; From 93d153ebdfa1e4a70bbe205f6114a2140eda796d Mon Sep 17 00:00:00 2001 From: Evan West Date: Sat, 2 Dec 2023 16:43:17 -0500 Subject: [PATCH 09/37] fix tests --- test/sketch_test.cpp | 51 +++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/test/sketch_test.cpp b/test/sketch_test.cpp index cdc57c13..14847a82 100644 --- a/test/sketch_test.cpp +++ b/test/sketch_test.cpp @@ -2,11 +2,17 @@ #include "bucket.h" #include #include +#include #include "testing_vector.h" +static size_t get_seed() { + auto now = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(now.time_since_epoch()).count(); +} + static const int num_columns = 7; TEST(SketchTestSuite, TestSampleResults) { - Sketch sketch1(10, rand(), 1, num_columns); + Sketch sketch1(10, get_seed(), 1, num_columns); ASSERT_EQ(sketch1.sample().result, ZERO); sketch1.update(1); ASSERT_THROW(sketch1.sample(), OutOfSamplesException); @@ -59,7 +65,7 @@ TEST(SketchTestSuite, TestSampleResults) { TEST(SketchTestSuite, GIVENonlyIndexZeroUpdatedTHENitWorks) { // GIVEN only the index 0 is updated - Sketch sketch(40, rand(), 1, num_columns); + Sketch sketch(40, get_seed(), 1, num_columns); sketch.update(0); sketch.update(0); sketch.update(0); @@ -84,7 +90,7 @@ void test_sketch_sample(unsigned long num_sketches, unsigned long sample_incorrect_failures = 0; for (unsigned long i = 0; i < num_sketches; i++) { Testing_Vector test_vec = Testing_Vector(vec_size, num_updates); - Sketch sketch(vec_size, rand() + i * 7, 1, num_columns); + Sketch sketch(vec_size, get_seed() + i * 7, 1, num_columns); auto start_time = std::chrono::steady_clock::now(); for (unsigned long j = 0; j < num_updates; j++){ sketch.update(test_vec.get_update(j)); @@ -151,7 +157,7 @@ void test_sketch_merge(unsigned long num_sketches, unsigned long all_bucket_failures = 0; unsigned long sample_incorrect_failures = 0; for (unsigned long i = 0; i < num_sketches; i++){ - const long seed = rand() + 7 * i; + const long seed = get_seed() + 7 * i; Sketch sketch1(vec_size, seed, 1, num_columns); Sketch sketch2(vec_size, seed, 1, num_columns); Testing_Vector test_vec1 = Testing_Vector(vec_size, num_updates); @@ -209,8 +215,8 @@ TEST(SketchTestSuite, TestSketchMerge) { } TEST(SketchTestSuite, TestSketchRangeMerge) { - Sketch skt1(2048, rand(), 10, 3); - Sketch skt2(2048, rand(), 10, 3); + Sketch skt1(2048, get_seed(), 10, 3); + Sketch skt2(2048, get_seed(), 10, 3); skt1.sample(); skt1.range_merge(skt2, 1, 1); @@ -235,13 +241,14 @@ void test_sketch_large(unsigned long vec_size, unsigned long num_updates) { // therefore we need to ensure that in this test that we don't do more than that num_updates = std::min(num_updates, vec_size / 4); - Sketch sketch(vec_size, rand(), 1, 2 * log2(vec_size)); - //Keep seed for replaying update stream later - unsigned long seed = rand(); - srand(seed); + // Keep seed for replaying update stream later + unsigned long seed = get_seed(); + Sketch sketch(vec_size, seed, 1, 2 * log2(vec_size)); + + std::mt19937_64 gen(seed); auto start_time = std::chrono::steady_clock::now(); for (unsigned long j = 0; j < num_updates; j++){ - sketch.update(static_cast(rand() % vec_size)); + sketch.update(static_cast(gen() % vec_size)); } std::cout << "Updating vector of size " << vec_size << " with " << num_updates << " updates took " << std::chrono::duration( @@ -255,10 +262,10 @@ void test_sketch_large(unsigned long vec_size, unsigned long num_updates) { //Multiple queries shouldn't happen, but if we do get here fail test ASSERT_LT(res_idx, vec_size) << "Sampled index out of bounds"; //Replay update stream, keep track of the sampled index - srand(seed); + gen = std::mt19937_64(seed); bool actual_delta = false; for (unsigned long j = 0; j < num_updates; j++){ - vec_t update_idx = static_cast(rand() % vec_size); + vec_t update_idx = static_cast(gen() % vec_size); if (update_idx == res_idx) { actual_delta = !actual_delta; } @@ -291,7 +298,7 @@ TEST(SketchTestSuite, TestSerialization) { unsigned long vec_size = 1 << 10; unsigned long num_updates = 10000; Testing_Vector test_vec = Testing_Vector(vec_size, num_updates); - auto seed = rand(); + auto seed = get_seed(); Sketch sketch(vec_size, seed, 3, num_columns); for (unsigned long j = 0; j < num_updates; j++){ sketch.update(test_vec.get_update(j)); @@ -323,7 +330,7 @@ TEST(SketchTestSuite, TestExhaustiveQuery) { size_t runs = 10; size_t vec_size = 2000; for (size_t i = 0; i < runs; i++) { - Sketch sketch(vec_size, rand() + 7 * i, 1, log2(vec_size)); + Sketch sketch(vec_size, get_seed() + 7 * i, 1, log2(vec_size)); sketch.update(1); sketch.update(2); @@ -355,8 +362,8 @@ TEST(SketchTestSuite, TestExhaustiveQuery) { } TEST(SketchTestSuite, TestSampleInsertGrinder) { - size_t nodes = 1024; - Sketch sketch(Sketch::calc_vector_length(nodes), rand(), Sketch::calc_cc_samples(nodes)); + size_t nodes = 4096; + Sketch sketch(Sketch::calc_vector_length(nodes), get_seed(), Sketch::calc_cc_samples(nodes)); for (size_t src = 0; src < nodes - 1; src++) { for (size_t dst = src + 7; dst < nodes; dst += 7) { @@ -376,12 +383,12 @@ TEST(SketchTestSuite, TestSampleInsertGrinder) { Edge e = inv_concat_pairing_fn(ret.idx); ASSERT_EQ((e.dst - e.src) % 7, 0); } - ASSERT_GE(successes, log2(nodes)); + ASSERT_GE(successes, 2); } TEST(SketchTestSuite, TestSampleDeleteGrinder) { - size_t nodes = 1024; - Sketch sketch(Sketch::calc_vector_length(nodes), rand(), Sketch::calc_cc_samples(nodes)); + size_t nodes = 4096; + Sketch sketch(Sketch::calc_vector_length(nodes), get_seed(), Sketch::calc_cc_samples(nodes)); // insert for (size_t src = 0; src < nodes - 1; src++) { @@ -410,13 +417,13 @@ TEST(SketchTestSuite, TestSampleDeleteGrinder) { ASSERT_EQ((e.dst - e.src) % 7, 0); ASSERT_EQ(e.src % 2, 0); } - ASSERT_GE(successes, log2(nodes)); + ASSERT_GE(successes, 2); } TEST(SketchTestSuite, TestRawBucketUpdate) { size_t successes = 0; for (size_t t = 0; t < 20; t++) { - size_t seed = rand() + 5 * t; + size_t seed = get_seed() + 5 * t; Sketch sk1(4096, seed, 1, 1); Sketch sk2(4096, seed, 1, 1); From 8546f1da03e9ceca8a4be8a5c32a2fb4f80cfe43 Mon Sep 17 00:00:00 2001 From: Daniel DeLayo Date: Mon, 22 Jan 2024 11:54:49 -0500 Subject: [PATCH 10/37] statistical testing --- CMakeLists.txt | 5 + tools/statistical_testing/sketch_testing.cpp | 147 ++++++++++++++++++ .../statistical_testing/sum_sketch_testing.py | 54 +++++++ 3 files changed, 206 insertions(+) create mode 100644 tools/statistical_testing/sketch_testing.cpp create mode 100644 tools/statistical_testing/sum_sketch_testing.py diff --git a/CMakeLists.txt b/CMakeLists.txt index a66c474d..d775131d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -148,6 +148,11 @@ if (BUILD_EXE) test/util/graph_gen.cpp) add_dependencies(statistical_test GraphZeppelinVerifyCC) target_link_libraries(statistical_test PRIVATE GraphZeppelinVerifyCC) + + add_executable(statistical_sketch_test + tools/statistical_testing/sketch_testing.cpp) + add_dependencies(statistical_sketch_test GraphZeppelinVerifyCC) + target_link_libraries(statistical_sketch_test PRIVATE GraphZeppelinVerifyCC) # executables for experiment/benchmarking add_executable(efficient_gen diff --git a/tools/statistical_testing/sketch_testing.cpp b/tools/statistical_testing/sketch_testing.cpp new file mode 100644 index 00000000..3329a429 --- /dev/null +++ b/tools/statistical_testing/sketch_testing.cpp @@ -0,0 +1,147 @@ +#include +#include +#include +#include + +#include "sketch.h" +#include "cc_alg_configuration.h" + +std::random_device dev; +std::mt19937_64 rng(dev()); +using rand_type = std::mt19937_64::result_type; + + +rand_type gen(rand_type n) +{ + std::uniform_int_distribution dist(0,n-1); + return dist(rng); +} + +rand_type seed = gen(1ll << 62); + +rand_type gen_seed() +{ + //std::uniform_int_distribution dist(0,1ll << 63); + //return dist(rng); + return seed++; +} + + +enum ResultType { + R_GOOD=0, + R_BAD=1, + R_HASHFAIL=2 +}; + +ResultType test_z(rand_type n, rand_type z) +{ + assert(z >= 1); + assert(z <= n*n); + Sketch sketch(n, gen_seed(), 1, 1); + + // Generate z edges and track them + /*std::unordered_set edges; + while (edges.size() < z) + { + edges.insert(gen(n*n)); + } + + for (const auto& r : edges) + { + sketch.update(r); + } + */ + for (rand_type i = 0; i < z; i++) + sketch.update(i); + // Sample the sketches + SketchSample query_ret = sketch.sample(); + SampleResult ret_code = query_ret.result; + + assert(ret_code != ZERO); + + if (ret_code == GOOD) + { + //if (edges.find(res) == edges.end()) + // return R_HASHFAIL; + return R_GOOD; + } + return R_BAD; +} + +std::pair fit_to_binomial(rand_type ngood, rand_type ntrials) +{ + double p = ngood / (1.0 * ntrials); + double variance = ntrials * p * (1-p); + double stddev = sqrt(variance); + return std::pair(p, stddev/ntrials); +} + +std::pair test_nz_pair(rand_type n, rand_type z) +{ + int ntrials = 500; + int results[3] = {0,0,0}; + for (int i = 0; i < ntrials; i++) + results[test_z(n, z)]++; + //std::cout << "GOOD: " << results[0] << std::endl; + //std::cout << "BAD: " << results[1] << std::endl; + //std::cout << "HASHFAIL: " << results[2] << std::endl; + int ngood = results[0]; + // Fit to binomial + return fit_to_binomial(ngood, ntrials); +} + +void test_n_one(rand_type n, rand_type* good, rand_type max_z) +{ + Sketch sketch(n*n, gen_seed(), 1, 1); + for (rand_type i = 0; i < max_z; i++) + { + sketch.update(i); + // Sample the sketches + SketchSample query_ret = sketch.sample(); + SampleResult ret_code = query_ret.result; + //assert(ret_code != ZERO); + if (ret_code == GOOD) + good[i]++; + sketch.reset_sample_state(); + } +} + +void test_n(rand_type n) +{ + int ntrials = 500; + rand_type max_z = 1+(n*n)/4; + // Default init to 0? + rand_type* good = new rand_type[max_z]; + for (int i = 0; i < ntrials; i++) + test_n_one(n, good, max_z); + + double worst_3sigma = 1; + rand_type worst_i = 0; + for (rand_type i = 0; i < max_z; i++) + { + auto pair = fit_to_binomial(good[i], ntrials); + double ans = pair.first; + double stddev = pair.second; + std::cout << i << ": " << ans << " +- " << stddev << std::endl; + if (ans - 3 * stddev < worst_3sigma) + { + worst_i = i; + worst_3sigma = ans-3*stddev; + } + } + auto pair = fit_to_binomial(good[worst_i], ntrials); + double ans = pair.first; + double stddev = pair.second; + std::cout << "WORST" << std::endl; + std::cout << worst_i << ": " << ans << " +- " << stddev << std::endl; + + delete[] good; +} + +int main() +{ + std::cout << CCAlgConfiguration() << std::endl; + rand_type n = 1 << 13; + std::cout << "TESTING: " << n << " TO " << (n*n)/4 << std::endl; + test_n(n); +} diff --git a/tools/statistical_testing/sum_sketch_testing.py b/tools/statistical_testing/sum_sketch_testing.py new file mode 100644 index 00000000..01052777 --- /dev/null +++ b/tools/statistical_testing/sum_sketch_testing.py @@ -0,0 +1,54 @@ +import sys +import re + +prob = r"([0-9]*[.])?[0-9]+" +which = r"[0-9]+" + +pattern = re.compile("(" + which + "): (" + prob + ") \+- (" + prob + ")") + +def parse(filename): + with open(filename) as file: + lines = file.readlines()[:4000000] + stats = [] + for l in lines: + match = pattern.match(l) + if match: + t = (int(match.group(1)), float(match.group(2)), float(match.group(4))) + stats.append(t) + return stats + +def above(stats, target, sigmas): + above = 0 + below = 0 + + + for s in stats: + if (s[1] - sigmas * s[2] > target): + above += 1 + else: + below += 1 + + print (above / (above + below)) + + +def mean(stats, sigmas): + summ = 0 + count = 0 + for s in stats: + count += 1 + summ += s[1] - sigmas * s[2] + print(summ/count) + + +stats = parse(sys.argv[1]) + +above(stats, 0.71, 0) + +mean(stats, 0) + + + + + + + From 395089d1de69eb4ab3f326e52f31ba8536535366 Mon Sep 17 00:00:00 2001 From: Evan West Date: Mon, 5 Feb 2024 13:50:10 -0500 Subject: [PATCH 11/37] Failed sketch merge should leave sketch in bad state --- include/cc_sketch_alg.h | 1 + include/sketch.h | 1 + src/sketch.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h index a58f2043..ebf8547d 100644 --- a/include/cc_sketch_alg.h +++ b/include/cc_sketch_alg.h @@ -241,4 +241,5 @@ class CCSketchAlg { // getters inline node_id_t get_num_vertices() { return num_vertices; } inline size_t get_seed() { return seed; } + inline size_t max_rounds() { return sketches[0]->get_num_samples(); } }; diff --git a/include/sketch.h b/include/sketch.h index 44db971c..23f009f0 100644 --- a/include/sketch.h +++ b/include/sketch.h @@ -164,6 +164,7 @@ class Sketch { inline size_t checksum_seed() const { return seed; } inline size_t get_columns() const { return num_columns; } inline size_t get_buckets() const { return num_buckets; } + inline size_t get_num_samples() const { return num_samples; } static size_t calc_bkt_per_col(size_t n) { return ceil(log2(n)) + 1; } static size_t calc_cc_samples(size_t n) { return ceil(log2(n) / num_samples_div); } diff --git a/src/sketch.cpp b/src/sketch.cpp index 9a0306b2..0c687fa1 100644 --- a/src/sketch.cpp +++ b/src/sketch.cpp @@ -156,6 +156,7 @@ void Sketch::merge(const Sketch &other) { void Sketch::range_merge(const Sketch &other, size_t start_sample, size_t n_samples) { if (start_sample + n_samples > num_samples) { assert(false); + sample_idx = num_samples; // sketch is in a fail state! return; } From 36c8ff4debba998c59e894aaf560a751b1302fcf Mon Sep 17 00:00:00 2001 From: Evan West Date: Thu, 8 Feb 2024 16:28:17 -0500 Subject: [PATCH 12/37] adjust to begin incorporation with streaming utilities --- CMakeLists.txt | 40 +++------ include/ascii_file_stream.h | 106 ------------------------ include/binary_file_stream.h | 153 ----------------------------------- include/graph_stream.h | 67 --------------- include/types.h | 34 +------- 5 files changed, 15 insertions(+), 385 deletions(-) delete mode 100644 include/ascii_file_stream.h delete mode 100644 include/binary_file_stream.h delete mode 100644 include/graph_stream.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 72ee8ac9..14d811cf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,19 +43,19 @@ else() message (STATUS "GraphZeppelin building executables") endif() -# Get xxHash +# Get GutterTree Project FetchContent_Declare( - xxhash + GutterTree - GIT_REPOSITORY https://github.com/Cyan4973/xxHash.git - GIT_TAG v0.8.0 + GIT_REPOSITORY https://github.com/GraphStreamingProject/GutterTree.git + GIT_TAG main ) -# Get GutterTree Project +# Get StreamingUtilities FetchContent_Declare( - GutterTree + StreamingUtilities - GIT_REPOSITORY https://github.com/GraphStreamingProject/GutterTree.git + GIT_REPOSITORY https://github.com/GraphStreamingProject/StreamingUtilities.git GIT_TAG main ) @@ -72,21 +72,7 @@ if (BUILD_BENCH) FetchContent_MakeAvailable(benchmark) endif() -FetchContent_MakeAvailable(xxHash GutterTree) -##### -# Some additional steps for xxHash as it is unofficial -##### -#xxHash messes with BUILD_SHARED_LIBS if it is empty -set(SAVED_BUILD_SHARED_LIBS "${BUILD_SHARED_LIBS}") - -add_subdirectory( - "${xxhash_SOURCE_DIR}/cmake_unofficial" - "${xxhash_BINARY_DIR}" - EXCLUDE_FROM_ALL -) -#Restore BUILD_SHARED_LIBS -set(BUILD_SHARED_LIBS "${SAVED_BUILD_SHARED_LIBS}" CACHE BOOL "" FORCE) - +FetchContent_MakeAvailable(GutterTree StreamingUtilities) # AVAILABLE COMPILATION DEFINITIONS: # VERIFY_SAMPLES_F Use a deterministic connected-components @@ -107,8 +93,8 @@ add_library(GraphZeppelin src/cc_alg_configuration.cpp src/sketch.cpp src/util.cpp) -add_dependencies(GraphZeppelin GutterTree) -target_link_libraries(GraphZeppelin PUBLIC xxhash GutterTree) +add_dependencies(GraphZeppelin GutterTree StreamingUtilities) +target_link_libraries(GraphZeppelin PUBLIC xxhash GutterTree StreamingUtilities) target_include_directories(GraphZeppelin PUBLIC include/) target_compile_options(GraphZeppelin PUBLIC -fopenmp) target_link_options(GraphZeppelin PUBLIC -fopenmp) @@ -123,8 +109,8 @@ add_library(GraphZeppelinVerifyCC src/util.cpp test/util/file_graph_verifier.cpp test/util/mat_graph_verifier.cpp) -add_dependencies(GraphZeppelinVerifyCC GutterTree) -target_link_libraries(GraphZeppelinVerifyCC PUBLIC xxhash GutterTree) +add_dependencies(GraphZeppelinVerifyCC GutterTree StreamingUtilities) +target_link_libraries(GraphZeppelinVerifyCC PUBLIC xxhash GutterTree StreamingUtilities) target_include_directories(GraphZeppelinVerifyCC PUBLIC include/ include/test/) target_compile_options(GraphZeppelinVerifyCC PUBLIC -fopenmp) target_link_options(GraphZeppelinVerifyCC PUBLIC -fopenmp) @@ -156,7 +142,7 @@ if (BUILD_EXE) src/util.cpp test/util/efficient_gen/edge_gen.cpp test/util/efficient_gen/efficient_gen.cpp) - target_link_libraries(efficient_gen PRIVATE xxhash GraphZeppelinCommon) + target_link_libraries(efficient_gen PRIVATE xxhash GraphZeppelinCommon StreamingUtilities) # executable for converting to stream format add_executable(to_binary_format diff --git a/include/ascii_file_stream.h b/include/ascii_file_stream.h deleted file mode 100644 index 2fb10147..00000000 --- a/include/ascii_file_stream.h +++ /dev/null @@ -1,106 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "graph_stream.h" - -class AsciiFileStream : public GraphStream { - public: - AsciiFileStream(std::string file_name, bool has_type = true) - : file_name(file_name), has_type(has_type) { - - bool stream_exists = false; - { - std::fstream check(file_name, std::fstream::in); - stream_exists = check.is_open(); - } - - if (stream_exists) - stream_file.open(file_name, std::fstream::in | std::fstream::out); - else - stream_file.open(file_name, std::fstream::in | std::fstream::out | std::fstream::trunc); - - if (!stream_file.is_open()) - throw StreamException("AsciiFileStream: could not open " + file_name); - - if (stream_exists) - stream_file >> num_vertices >> num_edges; - } - - inline size_t get_update_buffer(GraphStreamUpdate* upd_buf, size_t num_updates) { - assert(upd_buf != nullptr); - - size_t i = 0; - for (; i < num_updates; i++) { - GraphStreamUpdate& upd = upd_buf[i]; - - if (upd_offset >= num_edges || upd_offset >= break_edge_idx) { - upd.type = BREAKPOINT; - upd.edge = {0, 0}; - return i + 1; - } - int type = INSERT; - if (has_type) - stream_file >> type; - stream_file >> upd.edge.src >> upd.edge.dst; - upd.type = type; - ++upd_offset; - } - return i; - } - - // get_update_buffer() is not thread safe - inline bool get_update_is_thread_safe() { return false; } - - inline void write_header(node_id_t num_verts, edge_id_t num_edg) { - stream_file.seekp(0); // seek to beginning - stream_file << num_verts << " " << num_edg << std::endl; - num_vertices = num_verts; - num_edges = num_edg; - } - - inline void write_updates(GraphStreamUpdate* upd_buf, edge_id_t num_updates) { - for (edge_id_t i = 0; i < num_updates; i++) { - auto upd = upd_buf[i]; - if (has_type) - stream_file << (int) upd.type << " "; - stream_file << upd.edge.src << " " << upd.edge.dst << std::endl; - } - } - - inline void set_num_edges(edge_id_t num_edg) { - num_edges = num_edg; - } - - inline void seek(edge_id_t pos) { - if (pos != 0) - throw StreamException("AsciiFileStream: stream does not support seeking by update index"); - stream_file.seekp(0); stream_file.seekg(0); - upd_offset = 0; - } - - inline bool set_break_point(edge_id_t break_idx) { - if (break_idx < upd_offset) return false; - break_edge_idx = break_idx; - return true; - } - - inline void serialize_metadata(std::ostream& out) { - out << AsciiFile << " " << file_name << std::endl; - } - - static GraphStream* construct_from_metadata(std::istream& in) { - std::string file_name_from_stream; - in >> file_name_from_stream; - return new AsciiFileStream(file_name_from_stream); - } - - private: - const std::string file_name; - const bool has_type; - std::fstream stream_file; - edge_id_t break_edge_idx = -1; - edge_id_t upd_offset = 0; -}; diff --git a/include/binary_file_stream.h b/include/binary_file_stream.h deleted file mode 100644 index b3dd9f61..00000000 --- a/include/binary_file_stream.h +++ /dev/null @@ -1,153 +0,0 @@ -#pragma once -#include -#include //open and close - -#include -#include -#include -#include -#include - -#include "graph_stream.h" - -class BinaryFileStream : public GraphStream { - public: - /** - * Open a BinaryFileStream - * @param file_name Name of the stream file - */ - BinaryFileStream(std::string file_name, bool open_read_only = true) - : read_only(open_read_only), file_name(file_name) { - if (read_only) - stream_fd = open(file_name.c_str(), O_RDONLY, S_IRUSR); - else - stream_fd = open(file_name.c_str(), O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR); - - if (!stream_fd) - throw StreamException("BinaryFileStream: Could not open stream file " + file_name + - ". Does it exist?"); - - // read header from the input file - if (read_only) { - if (read(stream_fd, (char*)&num_vertices, sizeof(num_vertices)) != sizeof(num_vertices)) - throw StreamException("BinaryFileStream: Could not read number of nodes"); - if (read(stream_fd, (char*)&num_edges, sizeof(num_edges)) != sizeof(num_edges)) - throw StreamException("BinaryFileStream: Could not read number of edges"); - - end_of_file = (num_edges * edge_size) + header_size; - stream_off = header_size; - set_break_point(-1); - } - } - - ~BinaryFileStream() { - if (stream_fd) close(stream_fd); - } - - inline size_t get_update_buffer(GraphStreamUpdate* upd_buf, size_t num_updates) { - assert(upd_buf != nullptr); - - // many threads may execute this line simultaneously creating edge cases - size_t bytes_to_read = num_updates * edge_size; - size_t read_off = stream_off.fetch_add(bytes_to_read, std::memory_order_relaxed); - - // catch these edge cases here - if (read_off + bytes_to_read > break_index) { - bytes_to_read = read_off > break_index ? 0 : break_index - read_off; - stream_off = break_index.load(); - upd_buf[bytes_to_read / edge_size] = {BREAKPOINT, {0, 0}}; - } - // read into the buffer - assert(bytes_to_read % edge_size == 0); - size_t bytes_read = 0; - while (bytes_read < bytes_to_read) { - int r = - pread(stream_fd, upd_buf + bytes_read, bytes_to_read - bytes_read, read_off + bytes_read); - if (r == -1) throw StreamException("BinaryFileStream: Could not perform pread"); - if (r == 0) throw StreamException("BinaryFileStream: pread() got no data"); - bytes_read += r; - } - - size_t upds_read = bytes_to_read / edge_size; - if (upds_read < num_updates) { - GraphStreamUpdate& upd = upd_buf[upds_read]; - upd.type = BREAKPOINT; - upd.edge = {0, 0}; - return upds_read + 1; - } - return upds_read; - } - - // get_update_buffer() is thread safe! :) - inline bool get_update_is_thread_safe() { return true; } - - // write the number of nodes and edges to the stream - inline void write_header(node_id_t num_verts, edge_id_t num_edg) { - if (read_only) throw StreamException("BinaryFileStream: stream not open for writing!"); - - lseek(stream_fd, 0, SEEK_SET); - int r1 = write(stream_fd, (char*)&num_verts, sizeof(num_verts)); - int r2 = write(stream_fd, (char*)&num_edg, sizeof(num_edg)); - - if (r1 + r2 != header_size) { - perror("write_header"); - throw StreamException("BinaryFileStream: could not write header to stream file"); - } - - stream_off = header_size; - num_vertices = num_verts; - num_edges = num_edg; - end_of_file = (num_edges * edge_size) + header_size; - } - - // write an edge to the stream - inline void write_updates(GraphStreamUpdate* upd, edge_id_t num_updates) { - if (read_only) throw StreamException("BinaryFileStream: stream not open for writing!"); - - size_t bytes_to_write = num_updates * edge_size; - // size_t write_off = stream_off.fetch_add(bytes_to_write, std::memory_order_relaxed); - - size_t bytes_written = 0; - while (bytes_written < bytes_to_write) { - int r = write(stream_fd, (char*)upd + bytes_written, bytes_to_write - bytes_written); - if (r == -1) throw StreamException("BinaryFileStream: Could not perform write"); - bytes_written += r; - } - } - - // seek to a position in the stream - inline void seek(edge_id_t edge_idx) { stream_off = edge_idx * edge_size + header_size; } - - inline bool set_break_point(edge_id_t break_idx) { - edge_id_t byte_index = END_OF_STREAM; - if (break_idx != END_OF_STREAM) { - byte_index = header_size + break_idx * edge_size; - } - if (byte_index < stream_off) return false; - break_index = byte_index; - if (break_index > end_of_file) break_index = end_of_file; - return true; - } - - inline void serialize_metadata(std::ostream& out) { - out << BinaryFile << " " << file_name << std::endl; - } - - static GraphStream* construct_from_metadata(std::istream& in) { - std::string file_name_from_stream; - in >> file_name_from_stream; - return new BinaryFileStream(file_name_from_stream); - } - - private: - int stream_fd; - edge_id_t end_of_file; - std::atomic stream_off; - std::atomic break_index; - const bool read_only; // is stream read only? - const std::string file_name; - - // size of binary encoded edge and buffer read size - static constexpr size_t edge_size = sizeof(GraphStreamUpdate); - static constexpr size_t header_size = sizeof(node_id_t) + sizeof(edge_id_t); -}; diff --git a/include/graph_stream.h b/include/graph_stream.h deleted file mode 100644 index 2cd4a968..00000000 --- a/include/graph_stream.h +++ /dev/null @@ -1,67 +0,0 @@ -#pragma once -#include -#include -#include - -#include "types.h" - -#pragma pack(push,1) -struct GraphStreamUpdate { - uint8_t type; - Edge edge; -}; -#pragma pack(pop) - -static constexpr edge_id_t END_OF_STREAM = (edge_id_t) -1; - -// Enum that defines the types of streams -enum StreamType { - BinaryFile, - AsciiFile, -}; - -class GraphStream { - public: - virtual ~GraphStream() = default; - inline node_id_t vertices() { return num_vertices; } - inline edge_id_t edges() { return num_edges; } - - // Extract a buffer of many updates from the stream - virtual size_t get_update_buffer(GraphStreamUpdate* upd_buf, edge_id_t num_updates) = 0; - - // Query the GraphStream to see if get_update_buffer is thread-safe - // this is implemenation dependent - virtual bool get_update_is_thread_safe() = 0; - - // Move read pointer to new location in stream - // Child classes may choose to throw an error if seek is called - // For example, a GraphStream recieved over the network would - // likely not support seek - virtual void seek(edge_id_t edge_idx) = 0; - - // Query handling - // Call this function to register a query at a future edge index - // This function returns true if the query is correctly registered - virtual bool set_break_point(edge_id_t query_idx) = 0; - - // Serialize GraphStream metadata for distribution - // So that stream reading can happen simultaneously - virtual void serialize_metadata(std::ostream &out) = 0; - - // construct a stream object from serialized metadata - static GraphStream* construct_stream_from_metadata(std::istream &in); - - protected: - node_id_t num_vertices = 0; - edge_id_t num_edges = 0; - private: - static std::unordered_map constructor_map; -}; - -class StreamException : public std::exception { - private: - std::string err_msg; - public: - StreamException(std::string err) : err_msg(err) {} - virtual const char* what() const throw() { return err_msg.c_str(); } -}; diff --git a/include/types.h b/include/types.h index 76e45164..6fea6b26 100644 --- a/include/types.h +++ b/include/types.h @@ -2,43 +2,13 @@ #include #include #include +#include typedef uint64_t col_hash_t; static const auto& vec_hash = XXH3_64bits_withSeed; static const auto& col_hash = XXH3_64bits_withSeed; -// Is a stream update an insertion or a deletion -// BREAKPOINT: special type that indicates that a break point has been reached -// a break point may be either the end of the stream or the index of a query -enum UpdateType { - INSERT = 0, - DELETE = 1, - BREAKPOINT = 2 -}; - -struct Edge { - node_id_t src = 0; - node_id_t dst = 0; - - bool operator< (const Edge&oth) const { - if (src == oth.src) - return dst < oth.dst; - return src < oth.src; - } - bool operator== (const Edge&oth) const { - return src == oth.src && dst == oth.dst; - } -}; -namespace std { - template <> - struct hash { - auto operator()(const Edge&edge) const -> size_t { - std::hash h; - return h(edge.dst) + (31 * h(edge.src)); - } - }; -} - +// Graph Stream Updates are parsed into the GraphUpdate type for more convinient processing struct GraphUpdate { Edge edge; UpdateType type; From 3d0c64382b8c323a09b50a44235b0c04a045504c Mon Sep 17 00:00:00 2001 From: Evan West Date: Sat, 10 Feb 2024 19:59:38 -0500 Subject: [PATCH 13/37] fix the bug and remove more streaming stuff from this repo --- CMakeLists.txt | 27 ---- include/graph_sketch_driver.h | 4 +- include/test/efficient_gen.h | 9 -- include/test/graph_gen.h | 26 ---- include/worker_thread_group.h | 2 +- src/cc_sketch_alg.cpp | 4 +- test/cc_alg_test.cpp | 40 +++-- test/util/efficient_gen/edge_gen.cpp | 118 -------------- test/util/efficient_gen/efficient_gen.cpp | 19 --- test/util/graph_gen.cpp | 145 ------------------ test/util/graph_gen_test.cpp | 12 -- tools/statistical_testing/analyze_results.py | 73 --------- tools/statistical_testing/graph_testing.cpp | 96 ------------ .../medium_test_expected.txt | 2 - tools/statistical_testing/requirements.txt | 3 - .../small_test_expected.txt | 1 - tools/statistical_testing/stat_config.txt | 5 - tools/statistical_testing/test_runner.py | 130 ---------------- tools/to_binary_format.cpp | 98 ------------ tools/validate_binary_stream.cpp | 45 ------ 20 files changed, 33 insertions(+), 826 deletions(-) delete mode 100644 include/test/efficient_gen.h delete mode 100644 include/test/graph_gen.h delete mode 100644 test/util/efficient_gen/edge_gen.cpp delete mode 100644 test/util/efficient_gen/efficient_gen.cpp delete mode 100644 test/util/graph_gen.cpp delete mode 100644 test/util/graph_gen_test.cpp delete mode 100644 tools/statistical_testing/analyze_results.py delete mode 100644 tools/statistical_testing/graph_testing.cpp delete mode 100644 tools/statistical_testing/medium_test_expected.txt delete mode 100644 tools/statistical_testing/requirements.txt delete mode 100644 tools/statistical_testing/small_test_expected.txt delete mode 100644 tools/statistical_testing/stat_config.txt delete mode 100644 tools/statistical_testing/test_runner.py delete mode 100644 tools/to_binary_format.cpp delete mode 100644 tools/validate_binary_stream.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 14d811cf..9384c358 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -124,31 +124,10 @@ if (BUILD_EXE) test/dsu_test.cpp test/util_test.cpp test/util/file_graph_verifier.cpp - test/util/graph_gen.cpp - test/util/graph_gen_test.cpp test/util/graph_verifier_test.cpp) add_dependencies(tests GraphZeppelinVerifyCC) target_link_libraries(tests PRIVATE GraphZeppelinVerifyCC) - add_executable(statistical_test - tools/statistical_testing/graph_testing.cpp - test/util/file_graph_verifier.cpp - test/util/graph_gen.cpp) - add_dependencies(statistical_test GraphZeppelinVerifyCC) - target_link_libraries(statistical_test PRIVATE GraphZeppelinVerifyCC) - - # executables for experiment/benchmarking - add_executable(efficient_gen - src/util.cpp - test/util/efficient_gen/edge_gen.cpp - test/util/efficient_gen/efficient_gen.cpp) - target_link_libraries(efficient_gen PRIVATE xxhash GraphZeppelinCommon StreamingUtilities) - - # executable for converting to stream format - add_executable(to_binary_format - tools/to_binary_format.cpp) - target_link_libraries(to_binary_format PRIVATE GraphZeppelinCommon) - # executable for processing a binary graph stream add_executable(process_stream tools/process_stream.cpp) @@ -158,12 +137,6 @@ if (BUILD_EXE) add_executable(test_correctness tools/test_correctness.cpp) target_link_libraries(test_correctness PRIVATE GraphZeppelinVerifyCC) - - # tool for validating that a binary stream appears correct - add_executable(validate_binary_stream - tools/validate_binary_stream.cpp - ) - target_link_libraries(validate_binary_stream PRIVATE GraphZeppelin) endif() if (BUILD_BENCH) diff --git a/include/graph_sketch_driver.h b/include/graph_sketch_driver.h index 1ad6f0ef..38f77eba 100644 --- a/include/graph_sketch_driver.h +++ b/include/graph_sketch_driver.h @@ -58,8 +58,8 @@ class GraphSketchDriver { FRIEND_TEST(GraphTest, TestSupernodeRestoreAfterCCFailure); public: GraphSketchDriver(Alg *sketching_alg, GraphStream *stream, DriverConfiguration config, - size_t num_inserters = 1) - : sketching_alg(sketching_alg), stream(stream), num_stream_threads(num_inserters) { + size_t num_stream_threads = 1) + : sketching_alg(sketching_alg), stream(stream), num_stream_threads(num_stream_threads) { sketching_alg->allocate_worker_memory(config.get_worker_threads()); // set the leaf size of the guttering system appropriately if (config.gutter_conf().get_gutter_bytes() == GutteringConfiguration::uninit_param) { diff --git a/include/test/efficient_gen.h b/include/test/efficient_gen.h deleted file mode 100644 index 2a578c00..00000000 --- a/include/test/efficient_gen.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -void write_edges(uint32_t n, double p, const std::string& out_f); -// insert, delete based on a geometric distribution with ratio p -// i.e. p% of edges will be deleted, p^2% will be re-inserted, p^3 will be re-deleted -// until 1 element is left -void insert_delete(double p, const std::string& in_file, const std::string& out_file); - -void write_cumul(const std::string& stream_f, const std::string& cumul_f); diff --git a/include/test/graph_gen.h b/include/test/graph_gen.h deleted file mode 100644 index 24f03359..00000000 --- a/include/test/graph_gen.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once -#include -#include - -typedef struct genSet { - long n; // number of nodes - double p; // prob of edge between nodes - double r; // geometric insertion/removal - int max_appearances; // the maximum number of times an edge can show up - // in the stream. 0 for no limit. - std::string out_file; // file to write stream - std::string cumul_out_file; // file to write cumul graph - genSet(long n, double p, double r, int max_appearances, - std::string out_file, std::string cumul_out_file) - : n(n), p(p), r(r), max_appearances - (max_appearances), out_file(std::move(out_file)), cumul_out_file - (std::move(cumul_out_file)) {} -} GraphGenSettings; - -/** - * Generates a 1024-node graph with approximately 60,000 edge insert/deletes. - * Writes stream output to sample.txt - * Writes cumulative output to cumul_sample.txt - */ -void generate_stream(const GraphGenSettings& settings = - {1024,0.03,0.5,0,"./sample.txt", "./cumul_sample.txt"}); diff --git a/include/worker_thread_group.h b/include/worker_thread_group.h index 6575afda..a7ee26a6 100644 --- a/include/worker_thread_group.h +++ b/include/worker_thread_group.h @@ -88,7 +88,7 @@ class WorkerThread { } } } - int id; + const int id; GraphSketchDriver *driver; GutteringSystem *gts; std::condition_variable &flush_condition; diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp index 78fc54a4..27855b31 100644 --- a/src/cc_sketch_alg.cpp +++ b/src/cc_sketch_alg.cpp @@ -109,12 +109,12 @@ void CCSketchAlg::apply_update_batch(int thr_id, node_id_t src_vertex, delta_sketch.update(static_cast(concat_pairing_fn(src_vertex, dst))); } - std::unique_lock(sketches[src_vertex]->mutex); + std::unique_lock lk(sketches[src_vertex]->mutex); sketches[src_vertex]->merge(delta_sketch); } void CCSketchAlg::apply_raw_buckets_update(node_id_t src_vertex, Bucket *raw_buckets) { - std::unique_lock(sketches[src_vertex]->mutex); + std::unique_lock lk(sketches[src_vertex]->mutex); sketches[src_vertex]->merge_raw_bucket_buffer(raw_buckets); } diff --git a/test/cc_alg_test.cpp b/test/cc_alg_test.cpp index 457534fa..a92cd406 100644 --- a/test/cc_alg_test.cpp +++ b/test/cc_alg_test.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -7,7 +8,6 @@ #include "cc_sketch_alg.h" #include "file_graph_verifier.h" -#include "graph_gen.h" #include "graph_sketch_driver.h" #include "mat_graph_verifier.h" @@ -16,6 +16,21 @@ static size_t get_seed() { return std::chrono::duration_cast(now.time_since_epoch()).count(); } +// helper function to generate a dynamic binary stream and its cumulative insert only stream +void generate_stream(size_t seed, node_id_t num_vertices, double density, double delete_portion, + double adtl_portion, size_t rounds, std::string stream_name, + std::string cumul_name) { + // remove old versions of the stream files + std::remove(stream_name.c_str()); + std::remove(cumul_name.c_str()); + + // generate new stream files + DynamicErdosGenerator dy_stream(seed, num_vertices, density, delete_portion, adtl_portion, + rounds); + dy_stream.to_ascii_file(stream_name); + dy_stream.write_cumulative_file(cumul_name); +} + /** * For many of these tests (especially for those upon very sparse and small graphs) * we allow for a certain number of failures per test. @@ -54,7 +69,7 @@ TEST_P(CCAlgTest, TestCorrectnessOnSmallRandomGraphs) { auto driver_config = DriverConfiguration().gutter_sys(GetParam()); int num_trials = 5; while (num_trials--) { - generate_stream(); + generate_stream(get_seed(), 1024, 0.03, 0.5, 0.005, 3, "sample.txt", "cumul_sample.txt"); AsciiFileStream stream{"./sample.txt"}; node_id_t num_nodes = stream.vertices(); @@ -73,7 +88,7 @@ TEST_P(CCAlgTest, TestCorrectnessOnSmallSparseGraphs) { auto driver_config = DriverConfiguration().gutter_sys(GetParam()); int num_trials = 5; while (num_trials--) { - generate_stream({1024, 0.002, 0.5, 0, "./sample.txt", "./cumul_sample.txt"}); + generate_stream(get_seed(), 1024, 0.002, 0.5, 0.005, 3, "sample.txt", "cumul_sample.txt"); AsciiFileStream stream{"./sample.txt"}; node_id_t num_nodes = stream.vertices(); @@ -92,7 +107,7 @@ TEST_P(CCAlgTest, TestCorrectnessOfReheating) { auto driver_config = DriverConfiguration().gutter_sys(GetParam()); int num_trials = 5; while (num_trials--) { - generate_stream({1024, 0.002, 0.5, 0, "./sample.txt", "./cumul_sample.txt"}); + generate_stream(get_seed(), 1024, 0.002, 0.5, 0.005, 3, "sample.txt", "cumul_sample.txt"); AsciiFileStream stream{"./sample.txt"}; node_id_t num_nodes = stream.vertices(); @@ -123,11 +138,13 @@ TEST_P(CCAlgTest, MultipleWorkers) { auto driver_config = DriverConfiguration().gutter_sys(GetParam()).worker_threads(8); int num_trials = 5; while (num_trials--) { - generate_stream({1024, 0.002, 0.5, 0, "./sample.txt", "./cumul_sample.txt"}); + size_t seed = get_seed(); + generate_stream(seed, 1024, 0.002, 0.5, 0.5, 3, "sample.txt", "cumul_sample.txt"); AsciiFileStream stream{"./sample.txt"}; node_id_t num_nodes = stream.vertices(); - CCSketchAlg cc_alg{num_nodes, get_seed()}; + seed = get_seed(); + CCSketchAlg cc_alg{num_nodes, seed}; cc_alg.set_verifier(std::make_unique(1024, "./cumul_sample.txt")); GraphSketchDriver driver(&cc_alg, &stream, driver_config); @@ -172,18 +189,17 @@ TEST_P(CCAlgTest, TestPointQuery) { TEST(CCAlgTest, TestQueryDuringStream) { auto driver_config = DriverConfiguration().gutter_sys(STANDALONE); auto cc_config = CCAlgConfiguration(); - generate_stream({1024, 0.002, 0.5, 0, "./sample.txt", "./cumul_sample.txt"}); + generate_stream(get_seed(), 1024, 0.03, 0.5, 0.05, 3, "sample.txt", "cumul_sample.txt"); std::ifstream in{"./sample.txt"}; AsciiFileStream stream{"./sample.txt"}; node_id_t num_nodes = stream.vertices(); edge_id_t num_edges = stream.edges(); - edge_id_t tenth = num_edges / 10; + edge_id_t tenth = num_edges / 10; CCSketchAlg cc_alg{num_nodes, get_seed(), cc_config}; GraphSketchDriver driver(&cc_alg, &stream, driver_config); MatGraphVerifier verify(num_nodes); - int type; node_id_t a, b; @@ -197,7 +213,7 @@ TEST(CCAlgTest, TestQueryDuringStream) { } verify.reset_cc_state(); - driver.process_stream_until(tenth * (j+1)); + driver.process_stream_until(tenth * (j + 1)); driver.prep_query(); cc_alg.set_verifier(std::make_unique(verify)); cc_alg.connected_components(); @@ -284,7 +300,7 @@ TEST(CCAlgTest, MTStreamWithMultipleQueries) { size_t num_queries = 10; size_t upd_per_query = num_edges / num_queries; - for (size_t i = 0; i < num_queries-1; i++) { + for (size_t i = 0; i < num_queries - 1; i++) { for (size_t j = 0; j < upd_per_query; j++) { GraphStreamUpdate upd; verify_stream.get_update_buffer(&upd, 1); @@ -294,7 +310,7 @@ TEST(CCAlgTest, MTStreamWithMultipleQueries) { verify.reset_cc_state(); cc_alg.set_verifier(std::make_unique(verify)); - driver.process_stream_until(upd_per_query * (i+1)); + driver.process_stream_until(upd_per_query * (i + 1)); driver.prep_query(); cc_alg.connected_components(); } diff --git a/test/util/efficient_gen/edge_gen.cpp b/test/util/efficient_gen/edge_gen.cpp deleted file mode 100644 index 5187a756..00000000 --- a/test/util/efficient_gen/edge_gen.cpp +++ /dev/null @@ -1,118 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "../../../include/test/efficient_gen.h" -#include "../../../include/types.h" -#include "../../../include/util.h" - -typedef uint32_t ul; -typedef uint64_t ull; - -std::ofstream& operator<< (std::ofstream &os, const std::pair p) { - os << p.first << " " << p.second; - return os; -} - -void write_edges(ul n, double p, const std::string& out_f) { - ull num_edges = ((ull)n*(n-1))/2; - ull* arr = (ull*) malloc(num_edges*sizeof(ull)); - ul idx = 0; - - std::cout << "Generating possible edges" << std::endl; - for (unsigned i=0; i < n; ++i) { - for (unsigned j=i+1;j < n; ++j) { - arr[idx++] = concat_pairing_fn(i, j); - } - } - - std::cout << "Permuting edges" << std::endl; - std::shuffle(arr,arr+num_edges, std::mt19937(std::random_device()())); - std::ofstream out(out_f); - ull m = (ull) (num_edges*p); - out << n << " " << m << std::endl; - - std::cout << "Writing edges to file" << std::endl; - while (m--) { - Edge e = inv_concat_pairing_fn(arr[m]); - out << e.src << " " << e.dst << std::endl; - } - - out.close(); - free(arr); -} - -void insert_delete(double p, const std::string& in_file, const std::string& out_file) { - std::cout << "Deleting and reinserting some edges" << std::endl; - std::ifstream in(in_file); - std::ofstream out(out_file); - int n; ull m; in >> n >> m; - - ull full_m = m; - ull ins_del_arr[(ull)log2(m)+2]; - std::fill(ins_del_arr,ins_del_arr + (ull)log2(m)+2,0); - ins_del_arr[0] = m; - for (unsigned i = 0; ins_del_arr[i] > 1; ++i) { - ins_del_arr[i+1] = (ul)(ins_del_arr[i]*p); - full_m += ins_del_arr[i+1]; - } - - out << n << " " << full_m << std::endl; - - ull* memoized = (ull*) malloc(ins_del_arr[1]*sizeof(ull)); - ul a,b; - - for (unsigned i=0;i> a >> b; - out << "0 " << a << " " << b << std::endl; - memoized[i] = concat_pairing_fn(a, b); - } - - for (unsigned i=ins_del_arr[1];i> a >> b; - out << "0 " << a << " " << b << std::endl; - } - - for (unsigned i = 1; ins_del_arr[i] >= 1; ++i) { - int temp = i%2; - for (unsigned j=0;j> n >> m; - std::vector> adj(n,std::vector(n,false)); - bool type; - int a,b; - for (ull i=1;i<=m;++i) { - in >> type >> a >> b; - if ((type == INSERT && adj[a][b] == 1) || (type == DELETE && adj[a][b] == 0)) { - std::cerr << "Insertion/deletion error at line " << i - << " in " << stream_f; - return; - } - adj[a][b] = !adj[a][b]; - } - // write cumul output - ull m_cumul = 0; - for (int i = 0; i < n; ++i) { - for (int j = 0; j < n; ++j) { - if (adj[i][j]) ++m_cumul; - } - } - out << n << " " << m_cumul << std::endl; - for (int i = 0; i < n; ++i) { - for (int j = 0; j < n; ++j) { - if (adj[i][j]) out << i << " " << j << std::endl; - } - } -} diff --git a/test/util/efficient_gen/efficient_gen.cpp b/test/util/efficient_gen/efficient_gen.cpp deleted file mode 100644 index 93aa5b30..00000000 --- a/test/util/efficient_gen/efficient_gen.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include -#include "../../../include/test/efficient_gen.h" - -int main() { - int n; double p, r = 0.1; std::string s,t; char c = 0; bool cumul = false; - std::cout << "n: "; std::cin >> n; - std::cout << "p: "; std::cin >> p; - std::cout << "r: "; std::cin >> r; - std::cout << "cumul (y/n): "; std::cin >> c; - if (c == 'y' || c == 'Y') cumul = true; - std::cout << "Out file: "; std::cin >> s; - if (cumul) { std::cout << "Cumul out: "; std::cin >> t; } - - auto start = time(nullptr); - write_edges(n, p, "./TEMP_F"); - insert_delete(r,"./TEMP_F", s); - if (cumul) write_cumul(s,t); - std::cout << "Completed in " << time(nullptr)-start << " seconds" << std::endl; -} diff --git a/test/util/graph_gen.cpp b/test/util/graph_gen.cpp deleted file mode 100644 index 8114abc0..00000000 --- a/test/util/graph_gen.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include "graph_gen.h" -#include "types.h" -#include "util.h" - -#include -#include -#include -#include - -#define endl '\n' - -typedef uint32_t ul; -typedef uint64_t ull; - -const ull ULLMAX = std::numeric_limits
    ::max(); - - -std::ofstream& operator<< (std::ofstream &os, const std::pair p) { - os << p.first << " " << p.second; - return os; -} - -void write_edges(long n, double p, const std::string& out_f) { - ul num_edges = (n*(n-1))/2; - ull* arr = (ull*) malloc(num_edges*sizeof(ull)); - ul e = 0; - for (unsigned i = 0; i < n; ++i) { - for (unsigned j = i+1; j < n; ++j) { - arr[e++] = concat_pairing_fn(i, j); - } - } - std::shuffle(arr,arr+num_edges, std::mt19937(std::random_device()())); - std::ofstream out(out_f); - ul m = (ul) (num_edges*p); - out << n << " " << m << endl; - - while (m--) { - Edge e = inv_concat_pairing_fn(arr[m]); - out << e.src << " " << e.dst << endl; - } - out.flush(); - out.close(); - free(arr); -} - -void insert_delete(double p, int max_appearances, const std::string& in_file, - const std::string& out_file) { - std::ifstream in(in_file); - std::ofstream out(out_file); - int n; ul m; in >> n >> m; - long long full_m = m; - ull ins_del_arr[(ul)log2(m)+2]; - std::fill(ins_del_arr,ins_del_arr + (ul)log2(m)+2,0); - ins_del_arr[0] = m; - if (max_appearances == 0) { - for (unsigned i = 0; ins_del_arr[i] > 1; ++i) { - ins_del_arr[i + 1] = (ull) (ins_del_arr[i] * p); - full_m += ins_del_arr[i + 1]; - } - } else { - for (int i = 0; i < max_appearances - 1; ++i) { - ins_del_arr[i + 1] = (ull) (ins_del_arr[i] * p); - full_m += ins_del_arr[i + 1]; - } - } - - out << n << " " << full_m << endl; - - ull* memoized = (ull*) malloc(ins_del_arr[1]*sizeof(ull)); - ul a,b; - - for (unsigned i=0;i> a >> b; - out << "0 " << a << " " << b << endl; - memoized[i] = concat_pairing_fn(a, b); - } - - for (unsigned i=ins_del_arr[1];i> a >> b; - out << "0 " << a << " " << b << endl; - } - - in.close(); - - unsigned stopping = 1; - if (max_appearances == 0) { - for (; ins_del_arr[stopping] >= 1; ++stopping); - } else { - stopping = max_appearances; - } - for (unsigned i = 1; i < stopping; ++i) { - int temp = i % 2; - for (unsigned j = 0; j < ins_del_arr[i]; ++j) { - out << temp << " "; - Edge e = inv_concat_pairing_fn(memoized[j]); - out << e.src << " " << e.dst << endl; - } - } - out.flush(); - out.close(); - free(memoized); -} - -void write_cumul(const std::string& stream_f, const std::string& cumul_f) { - std::ifstream in(stream_f); - std::ofstream out(cumul_f); - int n; ull m; in >> n >> m; - std::vector> adj(n,std::vector(n,false)); - bool type; - int a,b; - for (ull i=1;i<=m;++i) { - in >> type >> a >> b; - if ((type == INSERT && adj[a][b] == 1) || (type == DELETE && adj[a][b] == 0)) { - std::cerr << "Insertion/deletion error at line " << i - << " in " << stream_f; - return; - } - adj[a][b] = !adj[a][b]; - } - - in.close(); - - // write cumul output - ull m_cumul = 0; - for (int i = 0; i < n; ++i) { - for (int j = 0; j < n; ++j) { - if (adj[i][j]) ++m_cumul; - } - } - out << n << " " << m_cumul << endl; - for (int i = 0; i < n; ++i) { - for (int j = 0; j < n; ++j) { - if (adj[i][j]) out << i << " " << j << endl; - } - } - out.flush(); - out.close(); -} - -void generate_stream(const GraphGenSettings& settings) { - write_edges(settings.n, settings.p, "./TEMP_F"); - insert_delete(settings.r, settings.max_appearances, "./TEMP_F", settings - .out_file); - write_cumul(settings.out_file,settings.cumul_out_file); -} diff --git a/test/util/graph_gen_test.cpp b/test/util/graph_gen_test.cpp deleted file mode 100644 index edd4d9f8..00000000 --- a/test/util/graph_gen_test.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include -#include "../../include/test/graph_gen.h" - -TEST(GraphGenTestSuite, TestGeneration) { - std::string fname = __FILE__; - size_t pos = fname.find_last_of("\\/"); - std::string curr_dir = (std::string::npos == pos) ? "" : fname.substr(0, pos); - generate_stream(); - struct stat buffer; - ASSERT_FALSE(stat("./sample.txt", &buffer)); - ASSERT_FALSE(stat("./cumul_sample.txt", &buffer)); -} diff --git a/tools/statistical_testing/analyze_results.py b/tools/statistical_testing/analyze_results.py deleted file mode 100644 index 2c284fba..00000000 --- a/tools/statistical_testing/analyze_results.py +++ /dev/null @@ -1,73 +0,0 @@ - -import numpy as np -import argparse -from scipy.stats import ttest_ind, norm - -def check_error(test_name, test_result_file, expected_result_file, confidence=0.95): - print('::::: ', test_name, ' :::::', sep='') - test_file = open(test_result_file) - test_result = np.loadtxt(test_file) - - test_file = open(expected_result_file) - test_expect = np.loadtxt(test_file) - - result_t = test_result.transpose() - test_failures = result_t[0,:] - test_runs = result_t[1,:] - - total_expect_failures = test_expect[0] - total_expect_runs = test_expect[1] - - assert (test_runs == 100).all(), "Each bin must be of size 100" - - # First step: Verify that there is not a dependency between tests and upon the graph - if (test_failures >= 6).any(): - return True, "Dependency between tests or upon input graph found" - - # Second step: Verify that the number of test failures does not deviate from the expectation - total_test_failures = np.sum(test_failures) - total_test_runs = np.sum(test_runs) - - assert total_test_runs == total_expect_runs, "The number of runs must be the same" - pr = total_expect_failures / total_expect_runs - critical_z_val = norm.ppf(1 - (1 - confidence) / 2) - z_test_deviation = np.ceil(critical_z_val * np.sqrt(pr * (1-pr) / total_expect_runs) * total_expect_runs) - print("Number of test failures:", total_test_failures, "{0}%".format(total_test_failures/total_test_runs)) - print("Total number of failures is allowed to deviate by at most", z_test_deviation) - print("Deviation is", total_test_failures - total_expect_failures) - if total_test_failures - z_test_deviation > total_expect_failures: - return True, "Test error is statistically greater than expectation {0}/{1}".format(int(total_test_failures), int(total_test_runs)) - - if total_test_failures + z_test_deviation < total_expect_failures: - return True, "Test error is statistically less than expectation {0}/{1}".format(int(total_test_failures), int(total_test_runs)) - - return False, "No statistical deviation detected {0}/{1}".format(int(total_test_failures), int(total_test_runs)) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Statistical testing on graph tests.') - parser.add_argument('small', metavar="small output", type=str, - help='the file which contains the results from the small graph test') - parser.add_argument('medium', metavar="medium output", type=str, - help='the file which contains the results from the medium graph test') - parser.add_argument('iso', metavar="medium iso output", type=str, - help='the file which contains the results from the medium+iso graph test') - - parser.add_argument('small_exp', metavar="small expect", type=str, - help="the file which contains the results from a correct branch for small graph") - parser.add_argument('medium_exp', metavar="medium expect", type=str, - help="the file which contains the results from a correct branch for medium graph") - parser.add_argument('iso_exp', metavar="medium iso expect", type=str, - help="the file which contains the results from a correct branch for medium+iso graph") - args = parser.parse_args() - - stat_result = check_error("small_test", args.small, args.small_exp, 0.1) - print(stat_result[0]) - print(stat_result[1]) - - stat_result = check_error("medium_test", args.medium, args.medium_exp, 0.1) - print(stat_result[0]) - print(stat_result[1]) - - stat_result = check_error("medium_iso_test", args.iso, args.iso_exp, 0.1) - print(stat_result[0]) - print(stat_result[1]) diff --git a/tools/statistical_testing/graph_testing.cpp b/tools/statistical_testing/graph_testing.cpp deleted file mode 100644 index dee89912..00000000 --- a/tools/statistical_testing/graph_testing.cpp +++ /dev/null @@ -1,96 +0,0 @@ -#include -#include "graph_sketch_driver.h" -#include "cc_sketch_alg.h" -#include "ascii_file_stream.h" -#include "graph_gen.h" -#include "file_graph_verifier.h" - -static DriverConfiguration driver_config; -static size_t get_seed() { - auto now = std::chrono::high_resolution_clock::now(); - return std::chrono::duration_cast(now.time_since_epoch()).count(); -} - -static inline int do_run() { - AsciiFileStream stream{"./sample.txt"}; - node_id_t n = stream.vertices(); - CCSketchAlg cc_alg{n, get_seed()}; - cc_alg.set_verifier(std::make_unique(n, "./cumul_sample.txt")); - GraphSketchDriver driver(&cc_alg, &stream, driver_config); - driver.process_stream_until(END_OF_STREAM); - driver.prep_query(); - try { - cc_alg.connected_components(); - } catch (std::exception const &err) { - return 1; - } - return 0; -} - -int small_graph_test(int runs) { - int failures = 0; - for (int i = 0; i < runs; i++) { - generate_stream({1024,0.002,0.5,0,"./sample.txt","./cumul_sample.txt"}); - failures += do_run(); - } - return failures; -} - -int medium_graph_test(int runs) { - int failures = 0; - for (int i = 0; i < runs; i++) { - generate_stream({2048,0.002,0.5,0,"./sample.txt","./cumul_sample.txt"}); - failures += do_run(); - } - return failures; -} - -int main() { - int runs = 100; - int num_trails = 500; - std::vector trial_list; - std::ofstream out; - - // run both with GutterTree and StandAloneGutters - for(int i = 0; i < 2; i++) { - bool use_tree = (bool) i; - - // setup configuration file per buffering - driver_config.gutter_sys(use_tree ? GUTTERTREE : STANDALONE); - driver_config.worker_threads(4); - std::string prefix = use_tree? "tree" : "gutters"; - std::string test_name; - - /************* small graph test *************/ - test_name = prefix + "_" + "small_graph_test"; - fprintf(stderr, "%s\n", test_name.c_str()); - out.open("./" + test_name); - for(int i = 0; i < num_trails; i++) { - if (i % 50 == 0) fprintf(stderr, "trial %i\n", i); - int trial_result = small_graph_test(runs); - trial_list.push_back(trial_result); - } - // output the results of these trials - for (unsigned i = 0; i < trial_list.size(); i++) { - out << trial_list[i] << " " << runs << "\n"; - } - trial_list.clear(); - out.close(); - - /************* medium graph test ************/ - test_name = prefix + "_" + "medium_graph_test"; - fprintf(stderr, "%s\n", test_name.c_str()); - out.open("./" + test_name); - for(int i = 0; i < num_trails; i++) { - if (i % 50 == 0) fprintf(stderr, "trial %i\n", i); - int trial_result = medium_graph_test(runs); - trial_list.push_back(trial_result); - } - // output the results of these trials - for (unsigned i = 0; i < trial_list.size(); i++) { - out << trial_list[i] << " " << runs << "\n"; - } - trial_list.clear(); - out.close(); - } -} diff --git a/tools/statistical_testing/medium_test_expected.txt b/tools/statistical_testing/medium_test_expected.txt deleted file mode 100644 index 03e815e8..00000000 --- a/tools/statistical_testing/medium_test_expected.txt +++ /dev/null @@ -1,2 +0,0 @@ -180 50000 - diff --git a/tools/statistical_testing/requirements.txt b/tools/statistical_testing/requirements.txt deleted file mode 100644 index db9b7bba..00000000 --- a/tools/statistical_testing/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -numpy>=1.21.2 -scipy>=1.7.1 -GitPython>=3.1.24 diff --git a/tools/statistical_testing/small_test_expected.txt b/tools/statistical_testing/small_test_expected.txt deleted file mode 100644 index 022b6d48..00000000 --- a/tools/statistical_testing/small_test_expected.txt +++ /dev/null @@ -1 +0,0 @@ -228 50000 diff --git a/tools/statistical_testing/stat_config.txt b/tools/statistical_testing/stat_config.txt deleted file mode 100644 index f4a33f1c..00000000 --- a/tools/statistical_testing/stat_config.txt +++ /dev/null @@ -1,5 +0,0 @@ -build_path=./build -stat_path=./test/statistical_testing -confidence=0.95 -usr= -pwd= diff --git a/tools/statistical_testing/test_runner.py b/tools/statistical_testing/test_runner.py deleted file mode 100644 index 1b4a39d3..00000000 --- a/tools/statistical_testing/test_runner.py +++ /dev/null @@ -1,130 +0,0 @@ -import subprocess -import importlib -import datetime -import smtplib -import git -SMTP_PORT = 465 - -importlib.import_module('analyze_results') -from analyze_results import check_error - -''' -Configure the system by reading from the configuration file -''' -def configure(): - build_path = "./" - stat_path = "./" - confidence = 0.95 - usr = "" - pwd = "" - with open('test/statistical_testing/stat_config.txt') as config: - lines = config.readlines() - for line in lines: - line_pair = line.split('=') - if line_pair[0].rstrip() == 'build_path': - build_path = line_pair[1].rstrip() - elif line_pair[0].rstrip() == 'stat_path': - stat_path = line_pair[1].rstrip() - elif line_pair[0].rstrip() == 'confidence': - confidence = float(line_pair[1].rstrip()) - elif line_pair[0].rstrip() == 'usr': - usr = line_pair[1].rstrip() - elif line_pair[0].rstrip() == 'pwd': - pwd = line_pair[1].rstrip() - else: - print("Error: unknown configuration parameter", line_pair[0]) - exit(1) - - return build_path, stat_path, confidence, usr, pwd - -''' -Run the statistical_testing executables -''' -def run_test(build_path): - subprocess.run(build_path + '/statistical_test', stdout=subprocess.DEVNULL, check=True) - -''' -Format the results of the test and raise an error if necessary -''' -def log_result(test_name, err, err_dsc): - if err: - return 'ERROR Test: ' + test_name + ' = ' + err_dsc - else: - return 'PASSED Test: ' + test_name + ' = ' + err_dsc -''' -Send an email containing the log -''' -def send_email(err_found, log, usr, pwd): - server_ssl = smtplib.SMTP_SSL('smtp.gmail.com', SMTP_PORT) - server_ssl.ehlo() - - today = datetime.datetime.today() - - server_ssl.login(usr, pwd) - subject = '' - if err_found: - subject = 'ERROR: ' - subject += 'Statistical Testing Log {0}/{1}/{2}'.format(str(today.month), str(today.day), str(today.year)) - - msg = "\r\n".join([ - "From: "+usr, - "To: graph.stat.testing@gmail.com", - "Subject:"+subject, - "", - log - ]) - server_ssl.sendmail(usr, "graph.stat.testing@gmail.com", msg) - server_ssl.quit() - -if __name__ == "__main__": - # Setup - build_path, stat_path, confidence, usr, pwd = configure() - assert usr != '' and pwd != '', "must specifiy user and password in configuration file" - - try: - repo = git.Repo("./") - buf_repo = git.Repo(build_path + "/GutterTree/src/GutterTree") - except: - print("Must run code at root directory of StreamingRepo and must have GutterTree code present in build dir") - exit(1) - head = repo.heads[0] - stream_commit_hash = head.commit.hexsha - stream_commit_msg = head.commit.message - - head = buf_repo.heads[0] - buffer_commit_hash = head.commit.hexsha - buffer_commit_msg = head.commit.message - - log = "StreamRepo Commit: " + stream_commit_hash + "\n" + stream_commit_msg + "\n" - log += "GutterTree Commit: " + buffer_commit_hash + "\n" + buffer_commit_msg + "\n" - - # Run the tests - run_test(build_path) - - for pre in ["tree", "gutters"]: - if pre == "tree": - log += "GutterTree\n" - else: - log += "StandAloneGutters\n" - - # Collect statistical results - # test_name, test_result_file, expected_result_file - try: - print("small test") - small_err, small_dsc = check_error('small test', pre + 'small_graph_test', stat_path + '/small_test_expected.txt') - except Exception as err: - small_err = True - small_dsc = "test threw expection: {0}".format(err) - try: - print("medium test") - medium_err, medium_dsc = check_error('medium test', pre + 'medium_graph_test', stat_path + '/medium_test_expected.txt') - except Exception as err: - medium_err = True - medium_dsc = "test threw expection: {0}".format(err) - - # Create a log, and send email - log += log_result('small test', small_err, small_dsc) + "\n" - log += log_result('medium test', medium_err, medium_dsc) + "\n" - - print("Sending email!") - send_email(small_err or medium_err, log, usr, pwd) diff --git a/tools/to_binary_format.cpp b/tools/to_binary_format.cpp deleted file mode 100644 index 290fde6b..00000000 --- a/tools/to_binary_format.cpp +++ /dev/null @@ -1,98 +0,0 @@ -#include -#include -#include -#include -#include -#include - -int main(int argc, char **argv) { - if (argc < 3 || argc > 5) { - std::cout << "Incorrect number of arguments. " - "Expected [2-4] but got " << argc-1 << std::endl; - std::cout << "Arguments are: ascii_stream out_file_name [--update_type] [--verbose]" << std::endl; - std::cout << "ascii_stream: The file to parse into binary format" << std::endl; - std::cout << "out_file_name: Where the binary stream will be written" << std::endl; - std::cout << "--update_type: If present then ascii stream indicates insertions vs deletions" << std::endl; - std::cout << "--silent: If present then no warnings are printed when stream corrections are made" << std::endl; - exit(EXIT_FAILURE); - } - - std::ifstream txt_file(argv[1]); - if (!txt_file) { - std::cerr << "ERROR: could not open input file!" << std::endl; - exit(EXIT_FAILURE); - } - std::ofstream out_file(argv[2], std::ios_base::binary | std::ios_base::out); - if (!out_file) { - std::cerr << "ERROR: could not open output file! " << argv[2] << ": " << strerror(errno) << std::endl; - exit(EXIT_FAILURE); - } - - bool update_type = false; - bool silent = false; - for (int i = 3; i < argc; i++) { - if (std::string(argv[i]) == "--update_type") - update_type = true; - else if (std::string(argv[i]) == "--silent") { - silent = true; - } - else { - std::cerr << "Did not recognize argument: " << argv[i] << " Expected '--update_type' or '--silent'"; - return EXIT_FAILURE; - } - } - - node_id_t num_nodes; - edge_id_t num_edges; - - txt_file >> num_nodes >> num_edges; - - std::cout << "Parsed ascii stream header. . ." << std::endl; - std::cout << "Number of nodes: " << num_nodes << std::endl; - std::cout << "Number of updates: " << num_edges << std::endl; - if (update_type) - std::cout << "Assuming that update format is: upd_type src dst" << std::endl; - else - std::cout << "Assuming that update format is: src dst" << std::endl; - - - out_file.write((char *) &num_nodes, sizeof(num_nodes)); - out_file.write((char *) &num_edges, sizeof(num_edges)); - - std::vector> adj_mat(num_nodes); - for (node_id_t i = 0; i < num_nodes; ++i) - adj_mat[i] = std::vector(num_nodes - i); - - bool u; - node_id_t src; - node_id_t dst; - - while(num_edges--) { - u = false; - if (update_type) - txt_file >> u >> src >> dst; - else - txt_file >> src >> dst; - - if (src > dst) { - if (!silent && u != adj_mat[dst][src - dst]) { - std::cout << "WARNING: update " << u << " " << src << " " << dst; - std::cout << " is double insert or delete before insert. Correcting." << std::endl; - } - u = adj_mat[dst][src - dst]; - adj_mat[dst][src - dst] = !adj_mat[dst][src - dst]; - } else { - if (!silent && u != adj_mat[src][dst - src]) { - std::cout << "WARNING: update " << u << " " << src << " " << dst; - std::cout << " is double insert or delete before insert. Correcting." << std::endl; - } - u = adj_mat[src][dst - src]; - adj_mat[src][dst - src] = !adj_mat[src][dst - src]; - } - - out_file.write((char *) &u, sizeof(u)); - out_file.write((char *) &src, sizeof(src)); - out_file.write((char *) &dst, sizeof(dst)); - } -} - diff --git a/tools/validate_binary_stream.cpp b/tools/validate_binary_stream.cpp deleted file mode 100644 index 41227832..00000000 --- a/tools/validate_binary_stream.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include - -int main(int argc, char **argv) { - if (argc != 2) { - std::cout << "Incorrect Number of Arguments!" << std::endl; - std::cout << "Arguments: stream_file" << std::endl; - exit(EXIT_FAILURE); - } - - BinaryFileStream stream(argv[1]); - node_id_t nodes = stream.vertices(); - size_t edges = stream.edges(); - - std::cout << "Attempting to validate stream " << argv[1] << std::endl; - std::cout << "Number of nodes = " << nodes << std::endl; - std::cout << "Number of updates = " << edges << std::endl; - - // validate the src and dst of each node in the stream and ensure there are enough of them - bool err = false; - for (size_t e = 0; e < edges; e++) { - GraphStreamUpdate upd; - try { - stream.get_update_buffer(&upd, 1); - } catch (...) { - std::cerr << "ERROR: Could not get edge at index: " << e << std::endl; - err = true; - std::rethrow_exception(std::current_exception()); - break; - } - Edge edge = upd.edge; - UpdateType u = static_cast(upd.type); - std::cerr << u << " " << edge.src << " " << edge.dst << std::endl; - if (edge.src >= nodes || edge.dst >= nodes || (u != INSERT && u != DELETE) || - edge.src == edge.dst) { - std::cerr << "ERROR: edge idx:" << e << "=(" << edge.src << "," << edge.dst << "), " << u - << std::endl; - err = true; - } - if (e % 1000000000 == 0 && e != 0) std::cout << e << std::endl; - } - - if (!err) std::cout << "Stream validated!" << std::endl; - if (err) std::cout << "Stream invalid!" << std::endl; -} - From 2534e219047f849107c70e282093a17b84d85531 Mon Sep 17 00:00:00 2001 From: Evan West Date: Mon, 12 Feb 2024 12:13:18 -0500 Subject: [PATCH 14/37] swap unnecessary unique_lock for lock_guard --- src/cc_sketch_alg.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp index 27855b31..ecd67fcb 100644 --- a/src/cc_sketch_alg.cpp +++ b/src/cc_sketch_alg.cpp @@ -109,12 +109,12 @@ void CCSketchAlg::apply_update_batch(int thr_id, node_id_t src_vertex, delta_sketch.update(static_cast(concat_pairing_fn(src_vertex, dst))); } - std::unique_lock lk(sketches[src_vertex]->mutex); + std::lock_guard lk(sketches[src_vertex]->mutex); sketches[src_vertex]->merge(delta_sketch); } void CCSketchAlg::apply_raw_buckets_update(node_id_t src_vertex, Bucket *raw_buckets) { - std::unique_lock lk(sketches[src_vertex]->mutex); + std::lock_guard lk(sketches[src_vertex]->mutex); sketches[src_vertex]->merge_raw_bucket_buffer(raw_buckets); } @@ -152,7 +152,7 @@ inline bool CCSketchAlg::sample_supernode(Sketch &skt) { auto src = std::min(e.src, e.dst); auto dst = std::max(e.src, e.dst); { - std::unique_lock lk(spanning_forest_mtx[src]); + std::lock_guard lk(spanning_forest_mtx[src]); spanning_forest[src].insert(dst); } } @@ -207,7 +207,7 @@ inline node_id_t find_last_partition_of_root(const std::vector &merg // merge the global and return if it is safe to query now inline bool merge_global(const size_t cur_round, const Sketch &local_sketch, GlobalMergeData &global) { - std::unique_lock lk(global.mtx); + std::lock_guard lk(global.mtx); global.sketch.range_merge(local_sketch, cur_round, 1); ++global.num_merge_done; assert(global.num_merge_done <= global.num_merge_needed); @@ -333,7 +333,7 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, if (!root_from_left) { // Resolved root_from_left, so we are the first thread to encounter this root // set the number of threads that will merge into this component - std::unique_lock lk(global_merges[global_id].mtx); + std::lock_guard lk(global_merges[global_id].mtx); global_merges[global_id].num_merge_needed = global_id - thr_id + 1; } bool query_ready = merge_global(cur_round, local_sketch, global_merges[global_id]); From 057f91b1ea57d3300863f4eecbee6104d528fee6 Mon Sep 17 00:00:00 2001 From: Daniel DeLayo Date: Mon, 12 Feb 2024 15:42:00 -0500 Subject: [PATCH 15/37] move to tools --- tools/{statistical_testing => }/sum_sketch_testing.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) rename tools/{statistical_testing => }/sum_sketch_testing.py (88%) diff --git a/tools/statistical_testing/sum_sketch_testing.py b/tools/sum_sketch_testing.py similarity index 88% rename from tools/statistical_testing/sum_sketch_testing.py rename to tools/sum_sketch_testing.py index 01052777..55b666e7 100644 --- a/tools/statistical_testing/sum_sketch_testing.py +++ b/tools/sum_sketch_testing.py @@ -27,6 +27,7 @@ def above(stats, target, sigmas): above += 1 else: below += 1 + print("BELOW") print (above / (above + below)) @@ -42,9 +43,11 @@ def mean(stats, sigmas): stats = parse(sys.argv[1]) -above(stats, 0.71, 0) +above(stats, 0.76, 0) +#above(stats, 0.78, 1) +#above(stats, 0.78, 2) -mean(stats, 0) +#mean(stats, 3) From d669088bbc76e05bef99a4c7ac99286937b32aa2 Mon Sep 17 00:00:00 2001 From: Daniel DeLayo Date: Thu, 15 Feb 2024 15:01:12 -0500 Subject: [PATCH 16/37] move to tools 2 --- CMakeLists.txt | 2 +- tools/{statistical_testing => }/sketch_testing.cpp | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tools/{statistical_testing => }/sketch_testing.cpp (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index a326e6e7..3c695baa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -152,7 +152,7 @@ if (BUILD_EXE) target_link_libraries(statistical_test PRIVATE GraphZeppelinVerifyCC) add_executable(statistical_sketch_test - tools/statistical_testing/sketch_testing.cpp) + tools/sketch_testing.cpp) add_dependencies(statistical_sketch_test GraphZeppelinVerifyCC) target_link_libraries(statistical_sketch_test PRIVATE GraphZeppelinVerifyCC) diff --git a/tools/statistical_testing/sketch_testing.cpp b/tools/sketch_testing.cpp similarity index 100% rename from tools/statistical_testing/sketch_testing.cpp rename to tools/sketch_testing.cpp From c607b581a2c62ee8e2a4d19612c6513bba94a0fc Mon Sep 17 00:00:00 2001 From: Evan West Date: Thu, 15 Feb 2024 16:13:00 -0500 Subject: [PATCH 17/37] Make documentation accurate again --- README.md | 56 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 5884f01a..625f5464 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ This is the source code of GraphZeppelin: a compact, fast, and scalable graph pr The full experiments for our SIGMOD paper can be found in our [Experiments Repository](https://github.com/GraphStreamingProject/ZeppelinExperiments). Our experiments were replicated by the SIGMOD reproducibility committee, details can be found in the [reproducibility report](https://reproducibility.sigmod.org/rep_rep/2023/Dayan-SIGMODReproReport26.pdf). +Since submitting to SIGMOD, GraphZeppelin has been continually updated improve robustness, performance, and reduce memory consumption. + ## Installing and Running GraphZeppelin ### Requirements - Unix OS (not Mac, tested on Ubuntu) @@ -16,28 +18,43 @@ The full experiments for our SIGMOD paper can be found in our [Experiments Repos This library can easily be included with other cmake projects using FetchContent or ExternalProject. -### Basic Example +## Minimal Example ``` -#include -#include +#include +#include +#include +#include std::string file_name = "/path/to/binary/stream"; int main() { - BinaryGraphStream stream(file_name, 1024*32); // Create a stream object for parsing a stream 'file_name' with 32 KiB buffer - node_id_t num_nodes = stream.nodes(); // Extract the number of nodes from the stream - size_t num_updates = stream.edges(); // Extract the number of edge updates from the stream - Graph g{num_nodes}; // Create a empty graph with 'num_nodes' nodes - - for (size_t e = 0; e < num_updates; e++) // Loop through all the updates in the stream - g.update(stream.get_edge()); // Update the graph by applying the next edge update - - auto CC = g.connected_components(); // Extract the connected components in the graph defined by the stream + BinaryFileStream stream(file_name); // Create a stream object for parsing a graph stream 'file_name' + node_id_t num_vertices = stream.vertices(); // Extract the number of graph vertices from the stream + CCSketchAlg cc_alg{ // Create connected components sketch algorithm + num_vertices, // vertices in graph + size_t(time(NULL)), // seed + CCAlgConfiguration() // configuration + }; + GraphSketchDriver driver{ // Create a driver to manage the CC algorithm + &cc_alg, // algorithm to update + &stream, // stream to read + DriverConfiguration() // configuration + }; + driver.process_stream_until(END_OF_STREAM); // Tell the driver to process the entire graph stream + driver.prep_query(); // Ensure that all updates have been processed + auto CC = cc_alg.connected_components(); // Extract the connected components } ``` A more detailed example can be found in `tools/process_stream.cpp`. -### Binary Stream Format +## Configuration +GraphZeppelin has a number of parameters both for the driver and the sketch algorithm. Examples of these parameters include the number of threads and which GutteringSystem to run for the driver and the desired batch size for the algorithm. +To achieve high performance, it is important to set these parameters correctly. See `tools/process_stream.cpp`. + +The driver options are set with the `DriverConfiguration` object (see `include/driver_configuration.h`). +The algorithm configuration is allowed to vary by algorithm. The connected components algorithm options is managed with the `CCAlgConfiguration` object (see `include/cc_alg_configuration.h`). + +## Binary Stream Format GraphZeppelin uses a binary stream format for efficient file parsing. The format of these files is as follows. ``` ... @@ -50,17 +67,14 @@ Each edge_update has the following format: | 1 byte | 4 bytes | 4 bytes | ``` -The UpdateType is 0 to indicate an insertion of the associated edge and 1 to indicate a deletion. +Where UpdateType is 0 to indicate an insertion and 1 to indicate a deletion. -### Other Stream Formats -Other file formats can be used by writing a simple file parser that passes graph `update()` the expected edge update format `GraphUpdate := std::pair`. See our unit tests under `/test/graph_test.cpp` for examples of string based stream parsing. +See our [StreamingUtilities](https://github.com/GraphStreamingProject/StreamingUtilities) repository for more details. -If receiving edge updates over the network it is equally straightforward to define a stream format that will receive, parse, and provide those updates to the graph `update()` function. - -## Configuration -GraphZeppelin has a number of parameters. These can be defined with the `GraphConfiguration` object. Key parameters include the number of graph workers and the guttering system to use for buffering updates. +## GutteringSystems +To achieve high update throughput, GraphZeppelin buffers updates in what we call a GutteringSystem. Choosing the correct GutteringSystem is important for performance. If you expect storage to include on disk data-structures, choose the `GutterTree`. Otherwise, choose the `CacheTree`. -See `include/graph_configuration.h` for more details. +For more details see the [GutteringSystems](https://github.com/GraphStreamingProject/GutterTree) repository. ## Debugging You can enable the symbol table and turn off compiler optimizations for debugging with tools like `gdb` or `valgrind` by performing the following steps From bdfed7ffacd89f40afd8d82b73844ac4f9666f18 Mon Sep 17 00:00:00 2001 From: Evan West Date: Thu, 15 Feb 2024 21:39:41 -0500 Subject: [PATCH 18/37] GraphSketchDriver doc in README --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 625f5464..b6965a22 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,10 @@ Where UpdateType is 0 to indicate an insertion and 1 to indicate a deletion. See our [StreamingUtilities](https://github.com/GraphStreamingProject/StreamingUtilities) repository for more details. + +## GraphSketchDriver +The `GraphSketchDriver` is responsible for managing the flow of data through the various components of our system. It is templatized by the specific sketch algorithm one is running. If using GraphZeppelin on a single machine, we recommend using the `GraphSketchDriver` for any vertex-based sketch algorithm. When implementing a new algorithm, the class must provide an interface to the driver. This interface is described at the top of `include/graph_sketch_driver.h` and is depicted in the Data Flow Documentation. + ## GutteringSystems To achieve high update throughput, GraphZeppelin buffers updates in what we call a GutteringSystem. Choosing the correct GutteringSystem is important for performance. If you expect storage to include on disk data-structures, choose the `GutterTree`. Otherwise, choose the `CacheTree`. From 085e5ed8f1fa65f796a6c0f6277feaeeb30637d2 Mon Sep 17 00:00:00 2001 From: Evan West Date: Thu, 15 Feb 2024 21:50:24 -0500 Subject: [PATCH 19/37] get rid of doxy and add initial control flow documentation --- .gitignore | 8 +- Doxyfile | 2608 ------------------------------------------ README.md | 2 +- docs/control_flow.md | 34 + 4 files changed, 36 insertions(+), 2616 deletions(-) delete mode 100755 Doxyfile create mode 100644 docs/control_flow.md diff --git a/.gitignore b/.gitignore index 2e1a7d7e..04ab3644 100644 --- a/.gitignore +++ b/.gitignore @@ -48,16 +48,10 @@ # other IDEs /.vscode -#Doxygen docs -/docs/ - -# Our configuration file -streaming.conf -test/statistical_testing/stat_config.txt - # Mac Files *.DS_Store # Python stuff for statistical testing *__pycache__/ *test_env/ + diff --git a/Doxyfile b/Doxyfile deleted file mode 100755 index e5fdb804..00000000 --- a/Doxyfile +++ /dev/null @@ -1,2608 +0,0 @@ -# Doxyfile 1.8.20 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project. -# -# All text after a double hash (##) is considered a comment and is placed in -# front of the TAG it is preceding. -# -# All text after a single hash (#) is considered a comment and will be ignored. -# The format is: -# TAG = value [value, ...] -# For lists, items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (\" \"). - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the configuration -# file that follow. The default is UTF-8 which is also the encoding used for all -# text before the first occurrence of this tag. Doxygen uses libiconv (or the -# iconv built into libc) for the transcoding. See -# https://www.gnu.org/software/libiconv/ for the list of possible encodings. -# The default value is: UTF-8. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by -# double-quotes, unless you are using Doxywizard) that should identify the -# project for which the documentation is generated. This name is used in the -# title of most generated pages and in a few other places. -# The default value is: My Project. - -PROJECT_NAME = "My Project" - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. This -# could be handy for archiving the generated documentation or if some version -# control system is used. - -PROJECT_NUMBER = - -# Using the PROJECT_BRIEF tag one can provide an optional one line description -# for a project that appears at the top of each page and should give viewer a -# quick idea about the purpose of the project. Keep the description short. - -PROJECT_BRIEF = - -# With the PROJECT_LOGO tag one can specify a logo or an icon that is included -# in the documentation. The maximum height of the logo should not exceed 55 -# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy -# the logo to the output directory. - -PROJECT_LOGO = - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path -# into which the generated documentation will be written. If a relative path is -# entered, it will be relative to the location where doxygen was started. If -# left blank the current directory will be used. - -OUTPUT_DIRECTORY = "docs/" - -# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- -# directories (in 2 levels) under the output directory of each output format and -# will distribute the generated files over these directories. Enabling this -# option can be useful when feeding doxygen a huge amount of source files, where -# putting all generated files in the same directory would otherwise causes -# performance problems for the file system. -# The default value is: NO. - -CREATE_SUBDIRS = NO - -# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII -# characters to appear in the names of generated files. If set to NO, non-ASCII -# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode -# U+3044. -# The default value is: NO. - -ALLOW_UNICODE_NAMES = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, -# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), -# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, -# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, -# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, -# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, -# Ukrainian and Vietnamese. -# The default value is: English. - -OUTPUT_LANGUAGE = English - -# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all generated output in the proper direction. -# Possible values are: None, LTR, RTL and Context. -# The default value is: None. - -OUTPUT_TEXT_DIRECTION = None - -# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member -# descriptions after the members that are listed in the file and class -# documentation (similar to Javadoc). Set to NO to disable this. -# The default value is: YES. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief -# description of a member or function before the detailed description -# -# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. -# The default value is: YES. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator that is -# used to form the text in various listings. Each string in this list, if found -# as the leading text of the brief description, will be stripped from the text -# and the result, after processing the whole list, is used as the annotated -# text. Otherwise, the brief description is used as-is. If left blank, the -# following values are used ($name is automatically replaced with the name of -# the entity):The $name class, The $name widget, The $name file, is, provides, -# specifies, contains, represents, a, an and the. - -ABBREVIATE_BRIEF = "The $name class" \ - "The $name widget" \ - "The $name file" \ - is \ - provides \ - specifies \ - contains \ - represents \ - a \ - an \ - the - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# doxygen will generate a detailed section even if there is only a brief -# description. -# The default value is: NO. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. -# The default value is: NO. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path -# before files name in the file list and in the header files. If set to NO the -# shortest path that makes the file name unique will be used -# The default value is: YES. - -FULL_PATH_NAMES = YES - -# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. -# Stripping is only done if one of the specified strings matches the left-hand -# part of the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the path to -# strip. -# -# Note that you can specify absolute paths here, but also relative paths, which -# will be relative from the directory where doxygen is started. -# This tag requires that the tag FULL_PATH_NAMES is set to YES. - -STRIP_FROM_PATH = - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the -# path mentioned in the documentation of a class, which tells the reader which -# header file to include in order to use a class. If left blank only the name of -# the header file containing the class definition is used. Otherwise one should -# specify the list of include paths that are normally passed to the compiler -# using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but -# less readable) file names. This can be useful is your file systems doesn't -# support long names like on DOS, Mac, or CD-ROM. -# The default value is: NO. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the -# first line (until the first dot) of a Javadoc-style comment as the brief -# description. If set to NO, the Javadoc-style will behave just like regular Qt- -# style comments (thus requiring an explicit @brief command for a brief -# description.) -# The default value is: NO. - -JAVADOC_AUTOBRIEF = NO - -# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line -# such as -# /*************** -# as being the beginning of a Javadoc-style comment "banner". If set to NO, the -# Javadoc-style will behave just like regular comments and it will not be -# interpreted by doxygen. -# The default value is: NO. - -JAVADOC_BANNER = NO - -# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first -# line (until the first dot) of a Qt-style comment as the brief description. If -# set to NO, the Qt-style will behave just like regular Qt-style comments (thus -# requiring an explicit \brief command for a brief description.) -# The default value is: NO. - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a -# multi-line C++ special comment block (i.e. a block of //! or /// comments) as -# a brief description. This used to be the default behavior. The new default is -# to treat a multi-line C++ comment block as a detailed description. Set this -# tag to YES if you prefer the old behavior instead. -# -# Note that setting this tag to YES also means that rational rose comments are -# not recognized any more. -# The default value is: NO. - -MULTILINE_CPP_IS_BRIEF = NO - -# By default Python docstrings are displayed as preformatted text and doxygen's -# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the -# doxygen's special commands can be used and the contents of the docstring -# documentation blocks is shown as doxygen documentation. -# The default value is: YES. - -PYTHON_DOCSTRING = YES - -# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the -# documentation from any documented member that it re-implements. -# The default value is: YES. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new -# page for each member. If set to NO, the documentation of a member will be part -# of the file/class/namespace that contains it. -# The default value is: NO. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen -# uses this value to replace tabs by spaces in code fragments. -# Minimum value: 1, maximum value: 16, default value: 4. - -TAB_SIZE = 4 - -# This tag can be used to specify a number of aliases that act as commands in -# the documentation. An alias has the form: -# name=value -# For example adding -# "sideeffect=@par Side Effects:\n" -# will allow you to put the command \sideeffect (or @sideeffect) in the -# documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines (in the resulting output). You can put ^^ in the value part of an -# alias to insert a newline as if a physical newline was in the original file. -# When you need a literal { or } or , in the value part of an alias you have to -# escape them by means of a backslash (\), this can lead to conflicts with the -# commands \{ and \} for these it is advised to use the version @{ and @} or use -# a double escape (\\{ and \\}) - -ALIASES = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources -# only. Doxygen will then generate output that is more tailored for C. For -# instance, some of the names that are used will be different. The list of all -# members will be omitted, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_FOR_C = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or -# Python sources only. Doxygen will then generate output that is more tailored -# for that language. For instance, namespaces will be presented as packages, -# qualified scopes will look different, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources. Doxygen will then generate output that is tailored for Fortran. -# The default value is: NO. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for VHDL. -# The default value is: NO. - -OPTIMIZE_OUTPUT_VHDL = NO - -# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice -# sources only. Doxygen will then generate output that is more tailored for that -# language. For instance, namespaces will be presented as modules, types will be -# separated into more groups, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_SLICE = NO - -# Doxygen selects the parser to use depending on the extension of the files it -# parses. With this tag you can assign which parser to use for a given -# extension. Doxygen has a built-in mapping, but you can override or extend it -# using this tag. The format is ext=language, where ext is a file extension, and -# language is one of the parsers supported by doxygen: IDL, Java, JavaScript, -# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL, -# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: -# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser -# tries to guess whether the code is fixed or free formatted code, this is the -# default for Fortran type files). For instance to make doxygen treat .inc files -# as Fortran files (default is PHP), and .f files as C (default is Fortran), -# use: inc=Fortran f=C. -# -# Note: For files without extension you can use no_extension as a placeholder. -# -# Note that for custom extensions you also need to set FILE_PATTERNS otherwise -# the files are not read by doxygen. - -EXTENSION_MAPPING = - -# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments -# according to the Markdown format, which allows for more readable -# documentation. See https://daringfireball.net/projects/markdown/ for details. -# The output of markdown processing is further processed by doxygen, so you can -# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in -# case of backward compatibilities issues. -# The default value is: YES. - -MARKDOWN_SUPPORT = YES - -# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up -# to that level are automatically included in the table of contents, even if -# they do not have an id attribute. -# Note: This feature currently applies only to Markdown headings. -# Minimum value: 0, maximum value: 99, default value: 5. -# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. - -TOC_INCLUDE_HEADINGS = 5 - -# When enabled doxygen tries to link words that correspond to documented -# classes, or namespaces to their corresponding documentation. Such a link can -# be prevented in individual cases by putting a % sign in front of the word or -# globally by setting AUTOLINK_SUPPORT to NO. -# The default value is: YES. - -AUTOLINK_SUPPORT = YES - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should set this -# tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); -# versus func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. -# The default value is: NO. - -BUILTIN_STL_SUPPORT = NO - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. -# The default value is: NO. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: -# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen -# will parse them like normal C++ but will assume all classes use public instead -# of private inheritance when no explicit protection keyword is present. -# The default value is: NO. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate -# getter and setter methods for a property. Setting this option to YES will make -# doxygen to replace the get and set methods by a property in the documentation. -# This will only work if the methods are indeed getting or setting a simple -# type. If this is not the case, or you want to show the methods anyway, you -# should set this option to NO. -# The default value is: YES. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. -# The default value is: NO. - -DISTRIBUTE_GROUP_DOC = NO - -# If one adds a struct or class to a group and this option is enabled, then also -# any nested class or struct is added to the same group. By default this option -# is disabled and one has to add nested compounds explicitly via \ingroup. -# The default value is: NO. - -GROUP_NESTED_COMPOUNDS = NO - -# Set the SUBGROUPING tag to YES to allow class member groups of the same type -# (for instance a group of public functions) to be put as a subgroup of that -# type (e.g. under the Public Functions section). Set it to NO to prevent -# subgrouping. Alternatively, this can be done per class using the -# \nosubgrouping command. -# The default value is: YES. - -SUBGROUPING = YES - -# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions -# are shown inside the group in which they are included (e.g. using \ingroup) -# instead of on a separate page (for HTML and Man pages) or section (for LaTeX -# and RTF). -# -# Note that this feature does not work in combination with -# SEPARATE_MEMBER_PAGES. -# The default value is: NO. - -INLINE_GROUPED_CLASSES = NO - -# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions -# with only public data fields or simple typedef fields will be shown inline in -# the documentation of the scope in which they are defined (i.e. file, -# namespace, or group documentation), provided this scope is documented. If set -# to NO, structs, classes, and unions are shown on a separate page (for HTML and -# Man pages) or section (for LaTeX and RTF). -# The default value is: NO. - -INLINE_SIMPLE_STRUCTS = NO - -# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or -# enum is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically be -# useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. -# The default value is: NO. - -TYPEDEF_HIDES_STRUCT = NO - -# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This -# cache is used to resolve symbols given their name and scope. Since this can be -# an expensive process and often the same symbol appears multiple times in the -# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small -# doxygen will become slower. If the cache is too large, memory is wasted. The -# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range -# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 -# symbols. At the end of a run doxygen will report the cache usage and suggest -# the optimal cache size from a speed point of view. -# Minimum value: 0, maximum value: 9, default value: 0. - -LOOKUP_CACHE_SIZE = 0 - -# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use -# during processing. When set to 0 doxygen will based this on the number of -# cores available in the system. You can set it explicitly to a value larger -# than 0 to get more control over the balance between CPU load and processing -# speed. At this moment only the input processing can be done using multiple -# threads. Since this is still an experimental feature the default is set to 1, -# which efficively disables parallel processing. Please report any issues you -# encounter. Generating dot graphs in parallel is controlled by the -# DOT_NUM_THREADS setting. -# Minimum value: 0, maximum value: 32, default value: 1. - -NUM_PROC_THREADS = 1 - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in -# documentation are documented, even if no documentation was available. Private -# class members and static file members will be hidden unless the -# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. -# Note: This will also disable the warnings about undocumented members that are -# normally produced when WARNINGS is set to YES. -# The default value is: NO. - -EXTRACT_ALL = NO - -# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will -# be included in the documentation. -# The default value is: NO. - -EXTRACT_PRIVATE = NO - -# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual -# methods of a class will be included in the documentation. -# The default value is: NO. - -EXTRACT_PRIV_VIRTUAL = NO - -# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal -# scope will be included in the documentation. -# The default value is: NO. - -EXTRACT_PACKAGE = NO - -# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be -# included in the documentation. -# The default value is: NO. - -EXTRACT_STATIC = NO - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined -# locally in source files will be included in the documentation. If set to NO, -# only classes defined in header files are included. Does not have any effect -# for Java sources. -# The default value is: YES. - -EXTRACT_LOCAL_CLASSES = YES - -# This flag is only useful for Objective-C code. If set to YES, local methods, -# which are defined in the implementation section but not in the interface are -# included in the documentation. If set to NO, only methods in the interface are -# included. -# The default value is: NO. - -EXTRACT_LOCAL_METHODS = NO - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base name of -# the file that contains the anonymous namespace. By default anonymous namespace -# are hidden. -# The default value is: NO. - -EXTRACT_ANON_NSPACES = NO - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all -# undocumented members inside documented classes or files. If set to NO these -# members will be included in the various overviews, but no documentation -# section is generated. This option has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. If set -# to NO, these classes will be included in the various overviews. This option -# has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend -# declarations. If set to NO, these declarations will be included in the -# documentation. -# The default value is: NO. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any -# documentation blocks found inside the body of a function. If set to NO, these -# blocks will be appended to the function's detailed documentation block. -# The default value is: NO. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation that is typed after a -# \internal command is included. If the tag is set to NO then the documentation -# will be excluded. Set it to YES to include the internal documentation. -# The default value is: NO. - -INTERNAL_DOCS = NO - -# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file -# names in lower-case letters. If set to YES, upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# (including Cygwin) and Mac users are advised to set this option to NO. -# The default value is: system dependent. - -CASE_SENSE_NAMES = NO - -# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with -# their full class and namespace scopes in the documentation. If set to YES, the -# scope will be hidden. -# The default value is: NO. - -HIDE_SCOPE_NAMES = NO - -# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will -# append additional text to a page's title, such as Class Reference. If set to -# YES the compound reference will be hidden. -# The default value is: NO. - -HIDE_COMPOUND_REFERENCE= NO - -# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of -# the files that are included by a file in the documentation of that file. -# The default value is: YES. - -SHOW_INCLUDE_FILES = YES - -# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each -# grouped member an include statement to the documentation, telling the reader -# which file to include in order to use the member. -# The default value is: NO. - -SHOW_GROUPED_MEMB_INC = NO - -# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include -# files with double quotes in the documentation rather than with sharp brackets. -# The default value is: NO. - -FORCE_LOCAL_INCLUDES = NO - -# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the -# documentation for inline members. -# The default value is: YES. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the -# (detailed) documentation of file and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. -# The default value is: YES. - -SORT_MEMBER_DOCS = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief -# descriptions of file, namespace and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. Note that -# this will also influence the order of the classes in the class list. -# The default value is: NO. - -SORT_BRIEF_DOCS = NO - -# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the -# (brief and detailed) documentation of class members so that constructors and -# destructors are listed first. If set to NO the constructors will appear in the -# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. -# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief -# member documentation. -# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting -# detailed member documentation. -# The default value is: NO. - -SORT_MEMBERS_CTORS_1ST = NO - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy -# of group names into alphabetical order. If set to NO the group names will -# appear in their defined order. -# The default value is: NO. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by -# fully-qualified names, including namespaces. If set to NO, the class list will -# be sorted only by class name, not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the alphabetical -# list. -# The default value is: NO. - -SORT_BY_SCOPE_NAME = NO - -# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper -# type resolution of all parameters of a function it will reject a match between -# the prototype and the implementation of a member function even if there is -# only one candidate or it is obvious which candidate to choose by doing a -# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still -# accept a match between prototype and implementation in such cases. -# The default value is: NO. - -STRICT_PROTO_MATCHING = NO - -# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo -# list. This list is created by putting \todo commands in the documentation. -# The default value is: YES. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test -# list. This list is created by putting \test commands in the documentation. -# The default value is: YES. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug -# list. This list is created by putting \bug commands in the documentation. -# The default value is: YES. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) -# the deprecated list. This list is created by putting \deprecated commands in -# the documentation. -# The default value is: YES. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional documentation -# sections, marked by \if ... \endif and \cond -# ... \endcond blocks. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the -# initial value of a variable or macro / define can have for it to appear in the -# documentation. If the initializer consists of more lines than specified here -# it will be hidden. Use a value of 0 to hide initializers completely. The -# appearance of the value of individual variables and macros / defines can be -# controlled using \showinitializer or \hideinitializer command in the -# documentation regardless of this setting. -# Minimum value: 0, maximum value: 10000, default value: 30. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at -# the bottom of the documentation of classes and structs. If set to YES, the -# list will mention the files that were used to generate the documentation. -# The default value is: YES. - -SHOW_USED_FILES = YES - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This -# will remove the Files entry from the Quick Index and from the Folder Tree View -# (if specified). -# The default value is: YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces -# page. This will remove the Namespaces entry from the Quick Index and from the -# Folder Tree View (if specified). -# The default value is: YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command command input-file, where command is the value of the -# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided -# by doxygen. Whatever the program writes to standard output is used as the file -# version. For an example see the documentation. - -FILE_VERSION_FILTER = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed -# by doxygen. The layout file controls the global structure of the generated -# output files in an output format independent way. To create the layout file -# that represents doxygen's defaults, run doxygen with the -l option. You can -# optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. -# -# Note that if you run doxygen from a directory containing a file called -# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE -# tag is left empty. - -LAYOUT_FILE = - -# The CITE_BIB_FILES tag can be used to specify one or more bib files containing -# the reference definitions. This must be a list of .bib files. The .bib -# extension is automatically appended if omitted. This requires the bibtex tool -# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. -# For LaTeX the style of the bibliography can be controlled using -# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the -# search path. See also \cite for info how to create references. - -CITE_BIB_FILES = - -#--------------------------------------------------------------------------- -# Configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated to -# standard output by doxygen. If QUIET is set to YES this implies that the -# messages are off. -# The default value is: NO. - -QUIET = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES -# this implies that the warnings are on. -# -# Tip: Turn warnings on while writing the documentation. -# The default value is: YES. - -WARNINGS = YES - -# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate -# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag -# will automatically be disabled. -# The default value is: YES. - -WARN_IF_UNDOCUMENTED = YES - -# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. -# The default value is: YES. - -WARN_IF_DOC_ERROR = YES - -# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that -# are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. If -# EXTRACT_ALL is set to YES then this flag will automatically be disabled. -# The default value is: NO. - -WARN_NO_PARAMDOC = NO - -# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when -# a warning is encountered. -# The default value is: NO. - -WARN_AS_ERROR = NO - -# The WARN_FORMAT tag determines the format of the warning messages that doxygen -# can produce. The string should contain the $file, $line, and $text tags, which -# will be replaced by the file and line number from which the warning originated -# and the warning text. Optionally the format may contain $version, which will -# be replaced by the version of the file (if it could be obtained via -# FILE_VERSION_FILTER) -# The default value is: $file:$line: $text. - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning and error -# messages should be written. If left blank the output is written to standard -# error (stderr). - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# Configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag is used to specify the files and/or directories that contain -# documented source files. You may enter file names like myfile.cpp or -# directories like /usr/src/myproject. Separate the files or directories with -# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING -# Note: If this tag is empty the current directory is searched. - -INPUT = - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses -# libiconv (or the iconv built into libc) for the transcoding. See the libiconv -# documentation (see: https://www.gnu.org/software/libiconv/) for the list of -# possible encodings. -# The default value is: UTF-8. - -INPUT_ENCODING = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and -# *.h) to filter out the source-files in the directories. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# read by doxygen. -# -# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, -# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), -# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen -# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, -# *.vhdl, *.ucf, *.qsf and *.ice. - -FILE_PATTERNS = *.c \ - *.cc \ - *.cxx \ - *.cpp \ - *.c++ \ - *.java \ - *.ii \ - *.ixx \ - *.ipp \ - *.i++ \ - *.inl \ - *.idl \ - *.ddl \ - *.odl \ - *.h \ - *.hh \ - *.hxx \ - *.hpp \ - *.h++ \ - *.cs \ - *.d \ - *.php \ - *.php4 \ - *.php5 \ - *.phtml \ - *.inc \ - *.m \ - *.markdown \ - *.md \ - *.mm \ - *.dox \ - *.doc \ - *.txt \ - *.py \ - *.pyw \ - *.f90 \ - *.f95 \ - *.f03 \ - *.f08 \ - *.f18 \ - *.f \ - *.for \ - *.vhd \ - *.vhdl \ - *.ucf \ - *.qsf \ - *.ice - -# The RECURSIVE tag can be used to specify whether or not subdirectories should -# be searched for input files as well. -# The default value is: NO. - -RECURSIVE = NO - -# The EXCLUDE tag can be used to specify files and/or directories that should be -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. -# -# Note that relative paths are relative to the directory from which doxygen is -# run. - -EXCLUDE = - -# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or -# directories that are symbolic links (a Unix file system feature) are excluded -# from the input. -# The default value is: NO. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories for example use the pattern */test/* - -EXCLUDE_PATTERNS = - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories use the pattern */test/* - -EXCLUDE_SYMBOLS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or directories -# that contain example code fragments that are included (see the \include -# command). - -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and -# *.h) to filter out the source-files in the directories. If left blank all -# files are included. - -EXAMPLE_PATTERNS = * - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude commands -# irrespective of the value of the RECURSIVE tag. -# The default value is: NO. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or directories -# that contain images that are to be included in the documentation (see the -# \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command: -# -# -# -# where is the value of the INPUT_FILTER tag, and is the -# name of an input file. Doxygen will then use the output that the filter -# program writes to standard output. If FILTER_PATTERNS is specified, this tag -# will be ignored. -# -# Note that the filter must not add or remove lines; it is applied before the -# code is scanned, but not when the output code is generated. If lines are added -# or removed, the anchors will not be placed correctly. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. The filters are a list of the form: pattern=filter -# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how -# filters are used. If the FILTER_PATTERNS tag is empty or if none of the -# patterns match the file name, INPUT_FILTER is applied. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will also be used to filter the input files that are used for -# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). -# The default value is: NO. - -FILTER_SOURCE_FILES = NO - -# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file -# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and -# it is also possible to disable source filtering for a specific pattern using -# *.ext= (so without naming a filter). -# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. - -FILTER_SOURCE_PATTERNS = - -# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that -# is part of the input, its contents will be placed on the main page -# (index.html). This can be useful if you have a project on for instance GitHub -# and want to reuse the introduction page also for the doxygen output. - -USE_MDFILE_AS_MAINPAGE = - -#--------------------------------------------------------------------------- -# Configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will be -# generated. Documented entities will be cross-referenced with these sources. -# -# Note: To get rid of all source code in the generated output, make sure that -# also VERBATIM_HEADERS is set to NO. -# The default value is: NO. - -SOURCE_BROWSER = NO - -# Setting the INLINE_SOURCES tag to YES will include the body of functions, -# classes and enums directly into the documentation. -# The default value is: NO. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any -# special comment blocks from generated source code fragments. Normal C, C++ and -# Fortran comments will always remain visible. -# The default value is: YES. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES then for each documented -# entity all documented functions referencing it will be listed. -# The default value is: NO. - -REFERENCED_BY_RELATION = NO - -# If the REFERENCES_RELATION tag is set to YES then for each documented function -# all documented entities called/used by that function will be listed. -# The default value is: NO. - -REFERENCES_RELATION = NO - -# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set -# to YES then the hyperlinks from functions in REFERENCES_RELATION and -# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will -# link to the documentation. -# The default value is: YES. - -REFERENCES_LINK_SOURCE = YES - -# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the -# source code will show a tooltip with additional information such as prototype, -# brief description and links to the definition and documentation. Since this -# will make the HTML file larger and loading of large files a bit slower, you -# can opt to disable this feature. -# The default value is: YES. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -SOURCE_TOOLTIPS = YES - -# If the USE_HTAGS tag is set to YES then the references to source code will -# point to the HTML generated by the htags(1) tool instead of doxygen built-in -# source browser. The htags tool is part of GNU's global source tagging system -# (see https://www.gnu.org/software/global/global.html). You will need version -# 4.8.6 or higher. -# -# To use it do the following: -# - Install the latest version of global -# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file -# - Make sure the INPUT points to the root of the source tree -# - Run doxygen as normal -# -# Doxygen will invoke htags (and that will in turn invoke gtags), so these -# tools must be available from the command line (i.e. in the search path). -# -# The result: instead of the source browser generated by doxygen, the links to -# source code will now point to the output of htags. -# The default value is: NO. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a -# verbatim copy of the header file for each class for which an include is -# specified. Set to NO to disable this. -# See also: Section \class. -# The default value is: YES. - -VERBATIM_HEADERS = YES - -# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the -# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the -# cost of reduced performance. This can be particularly helpful with template -# rich C++ code for which doxygen's built-in parser lacks the necessary type -# information. -# Note: The availability of this option depends on whether or not doxygen was -# generated with the -Duse_libclang=ON option for CMake. -# The default value is: NO. - -CLANG_ASSISTED_PARSING = NO - -# If clang assisted parsing is enabled you can provide the compiler with command -# line options that you would normally use when invoking the compiler. Note that -# the include paths will already be set by doxygen for the files and directories -# specified with INPUT and INCLUDE_PATH. -# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. - -CLANG_OPTIONS = - -# If clang assisted parsing is enabled you can provide the clang parser with the -# path to the directory containing a file called compile_commands.json. This -# file is the compilation database (see: -# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the -# options used when the source files were built. This is equivalent to -# specifying the "-p" option to a clang tool, such as clang-check. These options -# will then be passed to the parser. Any options specified with CLANG_OPTIONS -# will be added as well. -# Note: The availability of this option depends on whether or not doxygen was -# generated with the -Duse_libclang=ON option for CMake. - -CLANG_DATABASE_PATH = - -#--------------------------------------------------------------------------- -# Configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all -# compounds will be generated. Enable this if the project contains a lot of -# classes, structs, unions or interfaces. -# The default value is: YES. - -ALPHABETICAL_INDEX = YES - -# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in -# which the alphabetical index list will be split. -# Minimum value: 1, maximum value: 20, default value: 5. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -COLS_IN_ALPHA_INDEX = 5 - -# In case all classes in a project start with a common prefix, all classes will -# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag -# can be used to specify a prefix (or a list of prefixes) that should be ignored -# while generating the index headers. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output -# The default value is: YES. - -GENERATE_HTML = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a -# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of -# it. -# The default directory is: html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each -# generated HTML page (for example: .htm, .php, .asp). -# The default value is: .html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a user-defined HTML header file for -# each generated HTML page. If the tag is left blank doxygen will generate a -# standard header. -# -# To get valid HTML the header file that includes any scripts and style sheets -# that doxygen needs, which is dependent on the configuration options used (e.g. -# the setting GENERATE_TREEVIEW). It is highly recommended to start with a -# default header using -# doxygen -w html new_header.html new_footer.html new_stylesheet.css -# YourConfigFile -# and then modify the file new_header.html. See also section "Doxygen usage" -# for information on how to generate the default header that doxygen normally -# uses. -# Note: The header is subject to change so you typically have to regenerate the -# default header when upgrading to a newer version of doxygen. For a description -# of the possible markers and block names see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each -# generated HTML page. If the tag is left blank doxygen will generate a standard -# footer. See HTML_HEADER for more information on how to generate a default -# footer and what special commands can be used inside the footer. See also -# section "Doxygen usage" for information on how to generate the default footer -# that doxygen normally uses. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style -# sheet that is used by each HTML page. It can be used to fine-tune the look of -# the HTML output. If left blank doxygen will generate a default style sheet. -# See also section "Doxygen usage" for information on how to generate the style -# sheet that doxygen normally uses. -# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as -# it is more robust and this tag (HTML_STYLESHEET) will in the future become -# obsolete. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_STYLESHEET = - -# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined -# cascading style sheets that are included after the standard style sheets -# created by doxygen. Using this option one can overrule certain style aspects. -# This is preferred over using HTML_STYLESHEET since it does not replace the -# standard style sheet and is therefore more robust against future updates. -# Doxygen will copy the style sheet files to the output directory. -# Note: The order of the extra style sheet files is of importance (e.g. the last -# style sheet in the list overrules the setting of the previous ones in the -# list). For an example see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_STYLESHEET = - -# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or -# other source files which should be copied to the HTML output directory. Note -# that these files will be copied to the base HTML output directory. Use the -# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these -# files. In the HTML_STYLESHEET file, use the file name only. Also note that the -# files will be copied as-is; there are no commands or markers available. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_FILES = - -# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen -# will adjust the colors in the style sheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see -# https://en.wikipedia.org/wiki/Hue for more information. For instance the value -# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 -# purple, and 360 is red again. -# Minimum value: 0, maximum value: 359, default value: 220. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_HUE = 220 - -# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A -# value of 255 will produce the most vivid colors. -# Minimum value: 0, maximum value: 255, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_SAT = 100 - -# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the -# luminance component of the colors in the HTML output. Values below 100 -# gradually make the output lighter, whereas values above 100 make the output -# darker. The value divided by 100 is the actual gamma applied, so 80 represents -# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not -# change the gamma. -# Minimum value: 40, maximum value: 240, default value: 80. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_GAMMA = 80 - -# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML -# page will contain the date and time when the page was generated. Setting this -# to YES can help to show when doxygen was last run and thus if the -# documentation is up to date. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_TIMESTAMP = NO - -# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML -# documentation will contain a main index with vertical navigation menus that -# are dynamically created via JavaScript. If disabled, the navigation index will -# consists of multiple levels of tabs that are statically embedded in every HTML -# page. Disable this option to support browsers that do not have JavaScript, -# like the Qt help browser. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_MENUS = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_SECTIONS = NO - -# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries -# shown in the various tree structured indices initially; the user can expand -# and collapse entries dynamically later on. Doxygen will expand the tree to -# such a level that at most the specified number of entries are visible (unless -# a fully collapsed tree already exceeds this amount). So setting the number of -# entries 1 will produce a full collapsed tree by default. 0 is a special value -# representing an infinite number of entries and will result in a full expanded -# tree by default. -# Minimum value: 0, maximum value: 9999, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_INDEX_NUM_ENTRIES = 100 - -# If the GENERATE_DOCSET tag is set to YES, additional index files will be -# generated that can be used as input for Apple's Xcode 3 integrated development -# environment (see: https://developer.apple.com/xcode/), introduced with OSX -# 10.5 (Leopard). To create a documentation set, doxygen will generate a -# Makefile in the HTML output directory. Running make will produce the docset in -# that directory and running make install will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at -# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy -# genXcode/_index.html for more information. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_DOCSET = NO - -# This tag determines the name of the docset feed. A documentation feed provides -# an umbrella under which multiple documentation sets from a single provider -# (such as a company or product suite) can be grouped. -# The default value is: Doxygen generated docs. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# This tag specifies a string that should uniquely identify the documentation -# set bundle. This should be a reverse domain-name style string, e.g. -# com.mycompany.MyDocSet. Doxygen will append .docset to the name. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify -# the documentation publisher. This should be a reverse domain-name style -# string, e.g. com.mycompany.MyDocSet.documentation. -# The default value is: org.doxygen.Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_ID = org.doxygen.Publisher - -# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. -# The default value is: Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_NAME = Publisher - -# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three -# additional HTML index files: index.hhp, index.hhc, and index.hhk. The -# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on -# Windows. -# -# The HTML Help Workshop contains a compiler that can convert all HTML output -# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML -# files are now used as the Windows 98 help format, and will replace the old -# Windows help format (.hlp) on all Windows platforms in the future. Compressed -# HTML files also contain an index, a table of contents, and you can search for -# words in the documentation. The HTML workshop also contains a viewer for -# compressed HTML files. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_HTMLHELP = NO - -# The CHM_FILE tag can be used to specify the file name of the resulting .chm -# file. You can add a path in front of the file if the result should not be -# written to the html output directory. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_FILE = - -# The HHC_LOCATION tag can be used to specify the location (absolute path -# including file name) of the HTML help compiler (hhc.exe). If non-empty, -# doxygen will try to run the HTML help compiler on the generated index.hhp. -# The file has to be specified with full path. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -HHC_LOCATION = - -# The GENERATE_CHI flag controls if a separate .chi index file is generated -# (YES) or that it should be included in the main .chm file (NO). -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -GENERATE_CHI = NO - -# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) -# and project file content. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_INDEX_ENCODING = - -# The BINARY_TOC flag controls whether a binary table of contents is generated -# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it -# enables the Previous and Next buttons. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members to -# the table of contents of the HTML help documentation and to the tree view. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -TOC_EXPAND = NO - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and -# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that -# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help -# (.qch) of the generated HTML documentation. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_QHP = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify -# the file name of the resulting .qch file. The path specified is relative to -# the HTML output folder. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QCH_FILE = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help -# Project output. For more information please see Qt Help Project / Namespace -# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_NAMESPACE = org.doxygen.Project - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt -# Help Project output. For more information please see Qt Help Project / Virtual -# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual- -# folders). -# The default value is: doc. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_VIRTUAL_FOLDER = doc - -# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom -# filter to add. For more information please see Qt Help Project / Custom -# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_NAME = - -# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the -# custom filter to add. For more information please see Qt Help Project / Custom -# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_ATTRS = - -# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this -# project's filter section matches. Qt Help Project / Filter Attributes (see: -# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_SECT_FILTER_ATTRS = - -# The QHG_LOCATION tag can be used to specify the location of Qt's -# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the -# generated .qhp file. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHG_LOCATION = - -# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be -# generated, together with the HTML files, they form an Eclipse help plugin. To -# install this plugin and make it available under the help contents menu in -# Eclipse, the contents of the directory containing the HTML and XML files needs -# to be copied into the plugins directory of eclipse. The name of the directory -# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. -# After copying Eclipse needs to be restarted before the help appears. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_ECLIPSEHELP = NO - -# A unique identifier for the Eclipse help plugin. When installing the plugin -# the directory name containing the HTML and XML files should also have this -# name. Each documentation set should have its own identifier. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. - -ECLIPSE_DOC_ID = org.doxygen.Project - -# If you want full control over the layout of the generated HTML pages it might -# be necessary to disable the index and replace it with your own. The -# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top -# of each HTML page. A value of NO enables the index and the value YES disables -# it. Since the tabs in the index contain the same information as the navigation -# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -DISABLE_INDEX = NO - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. If the tag -# value is set to YES, a side panel will be generated containing a tree-like -# index structure (just like the one that is generated for HTML Help). For this -# to work a browser that supports JavaScript, DHTML, CSS and frames is required -# (i.e. any modern browser). Windows users are probably better off using the -# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_TREEVIEW = NO - -# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that -# doxygen will group on one line in the generated HTML documentation. -# -# Note that a value of 0 will completely suppress the enum values from appearing -# in the overview section. -# Minimum value: 0, maximum value: 20, default value: 4. -# This tag requires that the tag GENERATE_HTML is set to YES. - -ENUM_VALUES_PER_LINE = 4 - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used -# to set the initial width (in pixels) of the frame in which the tree is shown. -# Minimum value: 0, maximum value: 1500, default value: 250. -# This tag requires that the tag GENERATE_HTML is set to YES. - -TREEVIEW_WIDTH = 250 - -# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to -# external symbols imported via tag files in a separate window. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -EXT_LINKS_IN_WINDOW = NO - -# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg -# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see -# https://inkscape.org) to generate formulas as SVG images instead of PNGs for -# the HTML output. These images will generally look nicer at scaled resolutions. -# Possible values are: png (the default) and svg (looks nicer but requires the -# pdf2svg or inkscape tool). -# The default value is: png. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FORMULA_FORMAT = png - -# Use this tag to change the font size of LaTeX formulas included as images in -# the HTML documentation. When you change the font size after a successful -# doxygen run you need to manually remove any form_*.png images from the HTML -# output directory to force them to be regenerated. -# Minimum value: 8, maximum value: 50, default value: 10. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_FONTSIZE = 10 - -# Use the FORMULA_TRANSPARENT tag to determine whether or not the images -# generated for formulas are transparent PNGs. Transparent PNGs are not -# supported properly for IE 6.0, but are supported on all modern browsers. -# -# Note that when changing this option you need to delete any form_*.png files in -# the HTML output directory before the changes have effect. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_TRANSPARENT = YES - -# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands -# to create new LaTeX commands to be used in formulas as building blocks. See -# the section "Including formulas" for details. - -FORMULA_MACROFILE = - -# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see -# https://www.mathjax.org) which uses client side JavaScript for the rendering -# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX -# installed or if you want to formulas look prettier in the HTML output. When -# enabled you may also need to install MathJax separately and configure the path -# to it using the MATHJAX_RELPATH option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -USE_MATHJAX = NO - -# When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/latest/output.html) for more details. -# Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. -# The default value is: HTML-CSS. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_FORMAT = HTML-CSS - -# When MathJax is enabled you need to specify the location relative to the HTML -# output directory using the MATHJAX_RELPATH option. The destination directory -# should contain the MathJax.js script. For instance, if the mathjax directory -# is located at the same level as the HTML output directory, then -# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax -# Content Delivery Network so you can quickly see the result without installing -# MathJax. However, it is strongly recommended to install a local copy of -# MathJax from https://www.mathjax.org before deployment. -# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_RELPATH = https://cdn.jsdelivr.net/npm/mathjax@2 - -# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax -# extension names that should be enabled during MathJax rendering. For example -# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_EXTENSIONS = - -# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces -# of code that will be used on startup of the MathJax code. See the MathJax site -# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an -# example see the documentation. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_CODEFILE = - -# When the SEARCHENGINE tag is enabled doxygen will generate a search box for -# the HTML output. The underlying search engine uses javascript and DHTML and -# should work on any modern browser. Note that when using HTML help -# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) -# there is already a search function so this one should typically be disabled. -# For large projects the javascript based search engine can be slow, then -# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to -# search using the keyboard; to jump to the search box use + S -# (what the is depends on the OS and browser, but it is typically -# , /