Skip to content

Added code to generate single cut graph stream #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ set(BUILD_SHARED_LIBS "${SAVED_BUILD_SHARED_LIBS}" CACHE BOOL "" FORCE)

add_library(StreamingUtilities
src/static_erdos_generator.cpp
src/dynamic_erdos_generator.cpp)
src/dynamic_erdos_generator.cpp
src/single_cut_generator.cpp)
add_dependencies(StreamingUtilities xxhash GraphZeppelinCommon)
target_link_libraries(StreamingUtilities PUBLIC xxhash GraphZeppelinCommon)
target_include_directories(StreamingUtilities PUBLIC include/)
Expand All @@ -93,6 +94,11 @@ if (BUILD_EXE)
add_dependencies(run_erdos_gen StreamingUtilities)
target_link_libraries(run_erdos_gen PRIVATE StreamingUtilities)

add_executable(run_single_cut_gen
tools/run_single_cut_gen.cpp)
add_dependencies(run_single_cut_gen StreamingUtilities)
target_link_libraries(run_single_cut_gen PRIVATE StreamingUtilities)

add_executable(stream_file_converter
tools/stream_file_converter.cpp)
add_dependencies(stream_file_converter StreamingUtilities)
Expand All @@ -102,5 +108,11 @@ if (BUILD_EXE)
tools/stream_validator.cpp)
add_dependencies(stream_validator StreamingUtilities)
target_link_libraries(stream_validator PRIVATE StreamingUtilities)

add_executable(binary_to_static
tools/to_static.cpp)
add_dependencies(binary_to_static StreamingUtilities)
target_link_libraries(binary_to_static PUBLIC StreamingUtilities)

endif()

34 changes: 34 additions & 0 deletions include/single_cut_generator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#pragma once
#include "stream_types.h"
#include <cmath>
#include <string>
#include <vector>

// Single cut stream generator
// Builds two connected components then repeatedly updates edges in the cut between the two
// Several rounds of adding n edges and then removing n edges, and adding and removing a single edge n times
class SingleCutGenerator {
private:
std::vector<GraphStreamUpdate> updates;
node_id_t num_vertices;
edge_id_t num_edges;
edge_id_t edge_idx = 0;
public:
/*
* Constructor
* @param num_vertices number of vertices in the graph
* @param rounds Number of rounds
*/
SingleCutGenerator(node_id_t num_vertices, size_t rounds = 0);

// these functions write all the stream edges to a file
void to_binary_file(std::string file_name);
void to_ascii_file(std::string file_name);
void write_cumulative_file(std::string file_name);

GraphStreamUpdate get_next_edge();

// getters
node_id_t get_num_vertices() { return num_vertices; }
edge_id_t get_num_edges() { return num_edges; }
};
79 changes: 79 additions & 0 deletions src/single_cut_generator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#include "single_cut_generator.h"
#include "ascii_file_stream.h"
#include "binary_file_stream.h"

SingleCutGenerator::SingleCutGenerator(node_id_t num_vertices, size_t rounds)
: num_vertices(num_vertices) {
if (log2(num_vertices) - size_t(log2(num_vertices)) != 0) {
throw StreamException("SingleCutGenerator: Number of vertices must be a power of 2!");
}
if (rounds == 0) // Default number of rounds is n/8
rounds = num_vertices / 8;
num_edges = num_vertices-2 + 2*rounds*num_vertices;

updates.reserve(num_edges);
// Build two large components
GraphStreamUpdate update;
update.type = INSERT;
for (node_id_t u=0; u<num_vertices/2-1; u++) {
update.edge = {u,u+1};
updates.push_back(update);
update.edge = {u+num_vertices/2,u+1+num_vertices/2};
updates.push_back(update);
}
// Repeatedly add and remove edges between the two components rounds times
for (int i = 0; i < rounds; i++) {
std::cout << "GENERATING ROUND " << i << " OF " << rounds << std::endl;
// First insert a bunch of edges across the cut and then delete them
update.type = INSERT;
for (node_id_t u=0; u<num_vertices/2; u++) {
update.edge = {u,u+num_vertices/2};
updates.push_back(update);
}
update.type = DELETE;
for (node_id_t u=0; u<num_vertices/2; u++) {
update.edge = {u,u+num_vertices/2};
updates.push_back(update);
}
// Next repeatedly insert and delete one edge across the cut
for (node_id_t u=0; u<num_vertices/2; u++) {
update.type = INSERT;
update.edge = {u,u+num_vertices/2};
updates.push_back(update);
update.type = DELETE;
update.edge = {u,u+num_vertices/2};
updates.push_back(update);
}
}
}

void write_to_file(GraphStream *stream, SingleCutGenerator &gen) {
size_t buffer_capacity = 4096;
GraphStreamUpdate upds[buffer_capacity];
size_t buffer_size = 0;
stream->write_header(gen.get_num_vertices(), gen.get_num_edges());

for (edge_id_t i = 0; i < gen.get_num_edges(); i++) {
upds[buffer_size++] = gen.get_next_edge();
if (buffer_size >= buffer_capacity) {
stream->write_updates(upds, buffer_size);
buffer_size = 0;
}
}
if (buffer_size > 0) {
stream->write_updates(upds, buffer_size);
}
}

void SingleCutGenerator::to_binary_file(std::string file_name) {
edge_idx = 0;
BinaryFileStream output_stream(file_name, false);
write_to_file(&output_stream, *this);
}
void SingleCutGenerator::to_ascii_file(std::string file_name) {
edge_idx = 0;
AsciiFileStream output_stream(file_name, true);
write_to_file(&output_stream, *this);
}

GraphStreamUpdate SingleCutGenerator::get_next_edge() { return updates[edge_idx++]; }
16 changes: 16 additions & 0 deletions tools/run_single_cut_gen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#include <iostream>
#include <cmath>
#include <string>

#include "single_cut_generator.h"

int main() {
std::cout << "SINGLE CUT STREAM" << std::endl;
SingleCutGenerator scut_stream(8192);
std::cout << "num_vertices = " << scut_stream.get_num_vertices() << std::endl;
std::cout << "num_edges = " << scut_stream.get_num_edges() << std::endl;

// write out to binary stream file
std::string file_name = "scut_" + std::to_string(int(log2(scut_stream.get_num_vertices()))) + "_stream_binary";
scut_stream.to_binary_file(file_name);
}
56 changes: 56 additions & 0 deletions tools/to_static.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#include "binary_file_stream.h"

#include <cassert>
#include <fstream>
#include <iostream>
#include <vector>

/*
* Converts a binary graph stream to a static ascii edge list
*/

int main(int argc, char **argv) {
if (argc != 3) {
std::cout << "Incorrect number of arguments. Expected two but got " << argc - 1 << std::endl;
std::cout << "Arguments are: input_stream, output_file" << std::endl;
exit(EXIT_FAILURE);
}
std::string input = argv[1];
std::string output = argv[2];

assert(input != output);

BinaryFileStream stream(input);
std::ofstream out_file(output);

node_id_t num_nodes = stream.vertices();
long m = stream.edges();

std::vector<std::vector<bool>> adj_mat(num_nodes);
for (node_id_t i = 0; i < num_nodes; i++) adj_mat[i] = std::vector<bool>(num_nodes - i);

while (m--) {
GraphStreamUpdate upd;
stream.get_update_buffer(&upd, 1);
node_id_t src = upd.edge.src;
node_id_t dst = upd.edge.dst;
if (src > dst) std::swap(src, dst);
dst = dst - src;
adj_mat[src][dst] = !adj_mat[src][dst];
}

std::cout << "Updating adjacency matrix done. Writing static graph to file." << std::endl;
uint64_t edges = 0;
for (node_id_t i = 0; i < num_nodes; i++) {
for (node_id_t j = 0; j < num_nodes - i; j++) {
if (adj_mat[i][j]) edges++;
}
}

out_file << num_nodes << " " << edges << std::endl;
for (node_id_t i = 0; i < num_nodes; i++) {
for (node_id_t j = 0; j < num_nodes - i; j++) {
if (adj_mat[i][j]) out_file << i << "\t" << j + i << std::endl;
}
}
}