diff --git a/CMakeLists.txt b/CMakeLists.txt index 51c1051..642f891 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,7 +76,8 @@ set(BUILD_SHARED_LIBS "${SAVED_BUILD_SHARED_LIBS}" CACHE BOOL "" FORCE) add_library(StreamingUtilities src/static_erdos_generator.cpp - src/dynamic_erdos_generator.cpp) + src/dynamic_erdos_generator.cpp + src/single_cut_generator.cpp) add_dependencies(StreamingUtilities xxhash GraphZeppelinCommon) target_link_libraries(StreamingUtilities PUBLIC xxhash GraphZeppelinCommon) target_include_directories(StreamingUtilities PUBLIC include/) @@ -93,6 +94,11 @@ if (BUILD_EXE) add_dependencies(run_erdos_gen StreamingUtilities) target_link_libraries(run_erdos_gen PRIVATE StreamingUtilities) + add_executable(run_single_cut_gen + tools/run_single_cut_gen.cpp) + add_dependencies(run_single_cut_gen StreamingUtilities) + target_link_libraries(run_single_cut_gen PRIVATE StreamingUtilities) + add_executable(stream_file_converter tools/stream_file_converter.cpp) add_dependencies(stream_file_converter StreamingUtilities) @@ -102,5 +108,11 @@ if (BUILD_EXE) tools/stream_validator.cpp) add_dependencies(stream_validator StreamingUtilities) target_link_libraries(stream_validator PRIVATE StreamingUtilities) + + add_executable(binary_to_static + tools/to_static.cpp) + add_dependencies(binary_to_static StreamingUtilities) + target_link_libraries(binary_to_static PUBLIC StreamingUtilities) + endif() diff --git a/include/single_cut_generator.h b/include/single_cut_generator.h new file mode 100644 index 0000000..70d208a --- /dev/null +++ b/include/single_cut_generator.h @@ -0,0 +1,34 @@ +#pragma once +#include "stream_types.h" +#include +#include +#include + +// Single cut stream generator +// Builds two connected components then repeatedly updates edges in the cut between the two +// Several rounds of adding n edges and then removing n edges, and adding and removing a single edge n times +class SingleCutGenerator { + private: + std::vector updates; + node_id_t num_vertices; + edge_id_t num_edges; + edge_id_t edge_idx = 0; + public: + /* + * Constructor + * @param num_vertices number of vertices in the graph + * @param rounds Number of rounds + */ + SingleCutGenerator(node_id_t num_vertices, size_t rounds = 0); + + // these functions write all the stream edges to a file + void to_binary_file(std::string file_name); + void to_ascii_file(std::string file_name); + void write_cumulative_file(std::string file_name); + + GraphStreamUpdate get_next_edge(); + + // getters + node_id_t get_num_vertices() { return num_vertices; } + edge_id_t get_num_edges() { return num_edges; } +}; \ No newline at end of file diff --git a/src/single_cut_generator.cpp b/src/single_cut_generator.cpp new file mode 100644 index 0000000..0417e8b --- /dev/null +++ b/src/single_cut_generator.cpp @@ -0,0 +1,79 @@ +#include "single_cut_generator.h" +#include "ascii_file_stream.h" +#include "binary_file_stream.h" + +SingleCutGenerator::SingleCutGenerator(node_id_t num_vertices, size_t rounds) + : num_vertices(num_vertices) { + if (log2(num_vertices) - size_t(log2(num_vertices)) != 0) { + throw StreamException("SingleCutGenerator: Number of vertices must be a power of 2!"); + } + if (rounds == 0) // Default number of rounds is n/8 + rounds = num_vertices / 8; + num_edges = num_vertices-2 + 2*rounds*num_vertices; + + updates.reserve(num_edges); + // Build two large components + GraphStreamUpdate update; + update.type = INSERT; + for (node_id_t u=0; uwrite_header(gen.get_num_vertices(), gen.get_num_edges()); + + for (edge_id_t i = 0; i < gen.get_num_edges(); i++) { + upds[buffer_size++] = gen.get_next_edge(); + if (buffer_size >= buffer_capacity) { + stream->write_updates(upds, buffer_size); + buffer_size = 0; + } + } + if (buffer_size > 0) { + stream->write_updates(upds, buffer_size); + } +} + +void SingleCutGenerator::to_binary_file(std::string file_name) { + edge_idx = 0; + BinaryFileStream output_stream(file_name, false); + write_to_file(&output_stream, *this); +} +void SingleCutGenerator::to_ascii_file(std::string file_name) { + edge_idx = 0; + AsciiFileStream output_stream(file_name, true); + write_to_file(&output_stream, *this); +} + +GraphStreamUpdate SingleCutGenerator::get_next_edge() { return updates[edge_idx++]; } diff --git a/tools/run_single_cut_gen.cpp b/tools/run_single_cut_gen.cpp new file mode 100644 index 0000000..f65bfdf --- /dev/null +++ b/tools/run_single_cut_gen.cpp @@ -0,0 +1,16 @@ +#include +#include +#include + +#include "single_cut_generator.h" + +int main() { + std::cout << "SINGLE CUT STREAM" << std::endl; + SingleCutGenerator scut_stream(8192); + std::cout << "num_vertices = " << scut_stream.get_num_vertices() << std::endl; + std::cout << "num_edges = " << scut_stream.get_num_edges() << std::endl; + + // write out to binary stream file + std::string file_name = "scut_" + std::to_string(int(log2(scut_stream.get_num_vertices()))) + "_stream_binary"; + scut_stream.to_binary_file(file_name); +} diff --git a/tools/to_static.cpp b/tools/to_static.cpp new file mode 100644 index 0000000..e8cc731 --- /dev/null +++ b/tools/to_static.cpp @@ -0,0 +1,56 @@ +#include "binary_file_stream.h" + +#include +#include +#include +#include + +/* + * Converts a binary graph stream to a static ascii edge list + */ + +int main(int argc, char **argv) { + if (argc != 3) { + std::cout << "Incorrect number of arguments. Expected two but got " << argc - 1 << std::endl; + std::cout << "Arguments are: input_stream, output_file" << std::endl; + exit(EXIT_FAILURE); + } + std::string input = argv[1]; + std::string output = argv[2]; + + assert(input != output); + + BinaryFileStream stream(input); + std::ofstream out_file(output); + + node_id_t num_nodes = stream.vertices(); + long m = stream.edges(); + + std::vector> adj_mat(num_nodes); + for (node_id_t i = 0; i < num_nodes; i++) adj_mat[i] = std::vector(num_nodes - i); + + while (m--) { + GraphStreamUpdate upd; + stream.get_update_buffer(&upd, 1); + node_id_t src = upd.edge.src; + node_id_t dst = upd.edge.dst; + if (src > dst) std::swap(src, dst); + dst = dst - src; + adj_mat[src][dst] = !adj_mat[src][dst]; + } + + std::cout << "Updating adjacency matrix done. Writing static graph to file." << std::endl; + uint64_t edges = 0; + for (node_id_t i = 0; i < num_nodes; i++) { + for (node_id_t j = 0; j < num_nodes - i; j++) { + if (adj_mat[i][j]) edges++; + } + } + + out_file << num_nodes << " " << edges << std::endl; + for (node_id_t i = 0; i < num_nodes; i++) { + for (node_id_t j = 0; j < num_nodes - i; j++) { + if (adj_mat[i][j]) out_file << i << "\t" << j + i << std::endl; + } + } +}