diff --git a/libs/libvtrutil/src/vtr_ndmatrix.h b/libs/libvtrutil/src/vtr_ndmatrix.h index dc69c7f60b..57571cc865 100644 --- a/libs/libvtrutil/src/vtr_ndmatrix.h +++ b/libs/libvtrutil/src/vtr_ndmatrix.h @@ -34,12 +34,12 @@ class NdMatrixProxy { * @param dim_stride: The stride of this dimension (i.e. how many element in memory between indicies of this dimension) * @param start: Pointer to the start of the sub-matrix this proxy represents */ - NdMatrixProxy(const size_t* dim_sizes, const size_t* dim_strides, T* start) + NdMatrixProxy(const size_t* dim_sizes, const size_t* dim_strides, T* start) : dim_sizes_(dim_sizes) , dim_strides_(dim_strides) , start_(start) {} - NdMatrixProxy& operator=(const NdMatrixProxy& other) = delete; + NdMatrixProxy& operator=(const NdMatrixProxy& other) = delete; ///@brief const [] operator const NdMatrixProxy operator[](size_t index) const { @@ -76,12 +76,12 @@ class NdMatrixProxy { * @param dim_stride: The stride of this dimension (i.e. how many element in memory between indicies of this dimension) * @param start: Pointer to the start of the sub-matrix this proxy represents */ - NdMatrixProxy(const size_t* dim_sizes, const size_t* dim_stride, T* start) + NdMatrixProxy(const size_t* dim_sizes, const size_t* dim_stride, T* start) : dim_sizes_(dim_sizes) , dim_strides_(dim_stride) , start_(start) {} - NdMatrixProxy& operator=(const NdMatrixProxy& other) = delete; + NdMatrixProxy& operator=(const NdMatrixProxy& other) = delete; ///@brief const [] operator const T& operator[](size_t index) const { @@ -407,3 +407,4 @@ using Matrix = NdMatrix; } // namespace vtr #endif + diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index 6900fa80bd..609b85df75 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -552,7 +552,6 @@ void SetupPackerOpts(const t_options& Options, //TODO: document? PackerOpts->global_clocks = true; /* DEFAULT */ - PackerOpts->hill_climbing_flag = false; /* DEFAULT */ PackerOpts->allow_unrelated_clustering = Options.allow_unrelated_clustering; PackerOpts->connection_driven = Options.connection_driven_clustering; diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index ab261e6b7b..b89c21ca4e 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -706,7 +706,6 @@ static void ShowPackerOpts(const t_packer_opts& PackerOpts) { } VTR_LOG("PackerOpts.connection_driven: %s", (PackerOpts.connection_driven ? "true\n" : "false\n")); VTR_LOG("PackerOpts.global_clocks: %s", (PackerOpts.global_clocks ? "true\n" : "false\n")); - VTR_LOG("PackerOpts.hill_climbing_flag: %s", (PackerOpts.hill_climbing_flag ? "true\n" : "false\n")); VTR_LOG("PackerOpts.inter_cluster_net_delay: %f\n", PackerOpts.inter_cluster_net_delay); VTR_LOG("PackerOpts.timing_driven: %s", (PackerOpts.timing_driven ? "true\n" : "false\n")); VTR_LOG("PackerOpts.target_external_pin_util: %s", vtr::join(PackerOpts.target_external_pin_util, " ").c_str()); diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index df280c52c5..e33c1ac004 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -790,7 +790,6 @@ struct t_packer_opts { std::string sdc_file_name; std::string output_file; bool global_clocks; - bool hill_climbing_flag; bool timing_driven; enum e_cluster_seed cluster_seed_type; float alpha; diff --git a/vpr/src/pack/attraction_groups.cpp b/vpr/src/pack/attraction_groups.cpp index 1cf6f428e3..8d151c6c6a 100644 --- a/vpr/src/pack/attraction_groups.cpp +++ b/vpr/src/pack/attraction_groups.cpp @@ -1,4 +1,5 @@ #include "attraction_groups.h" +#include "globals.h" AttractionInfo::AttractionInfo(bool attraction_groups_on) { const auto& floorplanning_ctx = g_vpr_ctx.floorplanning(); diff --git a/vpr/src/pack/attraction_groups.h b/vpr/src/pack/attraction_groups.h index 813d6e0fb1..ae2409cf77 100644 --- a/vpr/src/pack/attraction_groups.h +++ b/vpr/src/pack/attraction_groups.h @@ -10,8 +10,7 @@ #include "vtr_strong_id.h" #include "vtr_vector.h" -#include "atom_netlist.h" -#include "globals.h" +#include "atom_netlist_fwd.h" /** * @file @@ -78,7 +77,7 @@ class AttractionInfo { void add_attraction_group(const AttractionGroup& group_info); - int num_attraction_groups(); + int num_attraction_groups() const; int get_att_group_pulls() const; @@ -118,7 +117,7 @@ inline void AttractionInfo::set_atom_attraction_group(const AtomBlockId atom_id, attraction_groups[group_id].group_atoms.push_back(atom_id); } -inline int AttractionInfo::num_attraction_groups() { +inline int AttractionInfo::num_attraction_groups() const { return attraction_groups.size(); } diff --git a/vpr/src/pack/cluster_legalizer.h b/vpr/src/pack/cluster_legalizer.h index 1b0756cce3..61de0587a7 100644 --- a/vpr/src/pack/cluster_legalizer.h +++ b/vpr/src/pack/cluster_legalizer.h @@ -421,6 +421,13 @@ class ClusterLegalizer { return cluster.pr; } + /// @brief Gets the current number of molecules in the cluster. + inline size_t get_num_molecules_in_cluster(LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + return cluster.molecules.size(); + } + /// @brief Gets the ID of the cluster that contains the given atom block. inline LegalizationClusterId get_atom_cluster(AtomBlockId blk_id) const { VTR_ASSERT_SAFE(blk_id.is_valid() && (size_t)blk_id < atom_cluster_.size()); diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index 75a1ce82a5..0978817a0c 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -1,6 +1,6 @@ #include "cluster_util.h" #include -#include +#include #include "PreClusterTimingGraphResolver.h" #include "PreClusterDelayCalculator.h" @@ -14,8 +14,6 @@ #include "tatum/TimingReporter.hpp" #include "tatum/echo_writer.hpp" #include "vpr_context.h" -#include "vtr_math.h" -#include "SetupGrid.h" /**********************************/ /* Global variables in clustering */ @@ -125,12 +123,7 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, } } -void free_clustering_data(const t_packer_opts& packer_opts, - t_clustering_data& clustering_data) { - - if (packer_opts.hill_climbing_flag) - delete[] clustering_data.hill_climbing_inputs_avail; - +void free_clustering_data(t_clustering_data& clustering_data) { delete[] clustering_data.unclustered_list_head; delete[] clustering_data.memory_pool; } @@ -161,8 +154,7 @@ void print_pack_status_header() { VTR_LOG("------------------- -------------------------- ---------\n"); } -void print_pack_status(int num_clb, - int tot_num_molecules, +void print_pack_status(int tot_num_molecules, int num_molecules_processed, int& mols_since_last_print, int device_width, @@ -178,7 +170,10 @@ void print_pack_status(int num_clb, int int_molecule_increment = (int)(print_frequency * tot_num_molecules); - if (mols_since_last_print == int_molecule_increment) { + int num_clusters_created = cluster_legalizer.clusters().size(); + + if (mols_since_last_print >= int_molecule_increment || + num_molecules_processed == tot_num_molecules) { VTR_LOG( "%6d/%-6d %3d%% " "%26d " @@ -186,13 +181,16 @@ void print_pack_status(int num_clb, num_molecules_processed, tot_num_molecules, int_percentage, - num_clb, + num_clusters_created, device_width, device_height); VTR_LOG("\n"); fflush(stdout); mols_since_last_print = 0; + // FIXME: This really should not be here. This has nothing to do with + // printing the pack status! Abstract this into the candidate + // selector class. if (attraction_groups.num_attraction_groups() > 0) { rebuild_attraction_groups(attraction_groups, cluster_legalizer); } @@ -326,8 +324,6 @@ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, const Prepacker& prepacker, t_clustering_data& clustering_data, - std::unordered_map& net_output_feeds_driving_block_input, - int& unclustered_list_head_size, int num_molecules) { /* Allocates the main data structures used for clustering and properly * * initializes them. */ @@ -335,7 +331,7 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, /* alloc and load list of molecules to pack */ clustering_data.unclustered_list_head = new t_molecule_link[max_molecule_stats.num_used_ext_inputs + 1]; - unclustered_list_head_size = max_molecule_stats.num_used_ext_inputs + 1; + clustering_data.unclustered_list_head_size = max_molecule_stats.num_used_ext_inputs + 1; for (int i = 0; i <= max_molecule_stats.num_used_ext_inputs; i++) { clustering_data.unclustered_list_head[i] = t_molecule_link(); @@ -366,20 +362,6 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, next_ptr++; } - - /* load net info */ - for (AtomNetId net : atom_ctx.nlist.nets()) { - AtomPinId driver_pin = atom_ctx.nlist.net_driver(net); - AtomBlockId driver_block = atom_ctx.nlist.pin_block(driver_pin); - - for (AtomPinId sink_pin : atom_ctx.nlist.net_sinks(net)) { - AtomBlockId sink_block = atom_ctx.nlist.pin_block(sink_pin); - - if (driver_block == sink_block) { - net_output_feeds_driving_block_input[net]++; - } - } - } } /*****************************************/ @@ -521,164 +503,6 @@ void update_connection_gain_values(const AtomNetId net_id, } } -void try_fill_cluster(ClusterLegalizer& cluster_legalizer, - const Prepacker& prepacker, - const t_packer_opts& packer_opts, - t_pack_molecule*& prev_molecule, - t_pack_molecule*& next_molecule, - int& num_same_molecules, - t_cluster_progress_stats& cluster_stats, - int num_clb, - const LegalizationClusterId legalization_cluster_id, - AttractionInfo& attraction_groups, - vtr::vector>& clb_inter_blk_nets, - bool allow_unrelated_clustering, - const int& high_fanout_threshold, - const std::unordered_set& is_clock, - const std::unordered_set& is_global, - const std::shared_ptr& timing_info, - e_block_pack_status& block_pack_status, - t_molecule_link* unclustered_list_head, - const int& unclustered_list_head_size, - std::unordered_map& net_output_feeds_driving_block_input, - std::map>& primitive_candidate_block_types) { - const AtomContext& atom_ctx = g_vpr_ctx.atom(); - const DeviceContext& device_ctx = g_vpr_ctx.device(); - - block_pack_status = cluster_legalizer.add_mol_to_cluster(next_molecule, - legalization_cluster_id); - - auto blk_id = next_molecule->atom_block_ids[next_molecule->root]; - VTR_ASSERT(blk_id); - - std::string blk_name = atom_ctx.nlist.block_name(blk_id); - const t_model* blk_model = atom_ctx.nlist.block_model(blk_id); - - if (block_pack_status != e_block_pack_status::BLK_PASSED) { - if (packer_opts.pack_verbosity > 2) { - if (block_pack_status == e_block_pack_status::BLK_FAILED_ROUTE) { - VTR_LOG("\tNO_ROUTE: '%s' (%s)", blk_name.c_str(), blk_model->name); - VTR_LOGV(next_molecule->pack_pattern, " molecule %s molecule_size %zu", - next_molecule->pack_pattern->name, next_molecule->atom_block_ids.size()); - VTR_LOG("\n"); - fflush(stdout); - } else if (block_pack_status == e_block_pack_status::BLK_FAILED_FLOORPLANNING) { - VTR_LOG("\tFAILED_FLOORPLANNING_CONSTRAINTS_CHECK: '%s' (%s)", blk_name.c_str(), blk_model->name); - VTR_LOG("\n"); - } else { - VTR_LOG("\tFAILED_FEASIBILITY_CHECK: '%s' (%s)", blk_name.c_str(), blk_model->name, block_pack_status); - VTR_LOGV(next_molecule->pack_pattern, " molecule %s molecule_size %zu", - next_molecule->pack_pattern->name, next_molecule->atom_block_ids.size()); - VTR_LOG("\n"); - fflush(stdout); - } - } - - next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), - attraction_groups, - allow_unrelated_clustering, - packer_opts.prioritize_transitive_connectivity, - packer_opts.transitive_fanout_threshold, - packer_opts.feasible_block_array_size, - &cluster_stats.num_unrelated_clustering_attempts, - prepacker, - cluster_legalizer, - clb_inter_blk_nets, - legalization_cluster_id, - packer_opts.pack_verbosity, - unclustered_list_head, - unclustered_list_head_size, - primitive_candidate_block_types); - if (prev_molecule == next_molecule) { - num_same_molecules++; - } - return; - } - - /* Continue packing by filling smallest cluster */ - if (packer_opts.pack_verbosity > 2) { - VTR_LOG("\tPASSED: '%s' (%s)", blk_name.c_str(), blk_model->name); - VTR_LOGV(next_molecule->pack_pattern, " molecule %s molecule_size %zu", - next_molecule->pack_pattern->name, next_molecule->atom_block_ids.size()); - VTR_LOG("\n"); - } - - fflush(stdout); - - //Since molecule passed, update num_molecules_processed - cluster_stats.num_molecules_processed++; - cluster_stats.mols_since_last_print++; - print_pack_status(num_clb, cluster_stats.num_molecules, - cluster_stats.num_molecules_processed, - cluster_stats.mols_since_last_print, - device_ctx.grid.width(), - device_ctx.grid.height(), - attraction_groups, - cluster_legalizer); - - update_cluster_stats(next_molecule, - cluster_legalizer, - is_clock, //Set of all clocks - is_global, //Set of all global signals (currently clocks) - packer_opts.global_clocks, packer_opts.alpha, packer_opts.beta, packer_opts.timing_driven, - packer_opts.connection_driven, - high_fanout_threshold, - *timing_info, - attraction_groups, - net_output_feeds_driving_block_input); - cluster_stats.num_unrelated_clustering_attempts = 0; - - if (packer_opts.timing_driven) { - cluster_stats.blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */ - } - next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), - attraction_groups, - allow_unrelated_clustering, - packer_opts.prioritize_transitive_connectivity, - packer_opts.transitive_fanout_threshold, - packer_opts.feasible_block_array_size, - &cluster_stats.num_unrelated_clustering_attempts, - prepacker, - cluster_legalizer, - clb_inter_blk_nets, - legalization_cluster_id, - packer_opts.pack_verbosity, - unclustered_list_head, - unclustered_list_head_size, - primitive_candidate_block_types); - - if (prev_molecule == next_molecule) { - num_same_molecules++; - } -} - -void store_cluster_info_and_free(const t_packer_opts& packer_opts, - const LegalizationClusterId legalization_cluster_id, - const t_logical_block_type_ptr logic_block_type, - const t_pb_type* le_pb_type, - std::vector& le_count, - const ClusterLegalizer& cluster_legalizer, - vtr::vector>& clb_inter_blk_nets) { - const AtomContext& atom_ctx = g_vpr_ctx.atom(); - - /* store info that will be used later in packing from pb_stats and free the rest */ - t_pb* cur_pb = cluster_legalizer.get_cluster_pb(legalization_cluster_id); - t_pb_stats* pb_stats = cur_pb->pb_stats; - for (const AtomNetId mnet_id : pb_stats->marked_nets) { - int external_terminals = atom_ctx.nlist.net_pins(mnet_id).size() - pb_stats->num_pins_of_net_in_pb[mnet_id]; - /* Check if external terminals of net is within the fanout limit and that there exists external terminals */ - if (external_terminals < packer_opts.transitive_fanout_threshold && external_terminals > 0) { - clb_inter_blk_nets[legalization_cluster_id].push_back(mnet_id); - } - } - - // update the data structure holding the LE counts - update_le_count(cur_pb, logic_block_type, le_pb_type, le_count); - - //print clustering progress incrementally - //print_pack_status(num_clb, num_molecules, num_molecules_processed, mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height()); -} - /*****************************************/ void update_timing_gain_values(const AtomNetId net_id, t_pb* cur_pb, @@ -686,7 +510,7 @@ void update_timing_gain_values(const AtomNetId net_id, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, const SetupTimingInfo& timing_info, const std::unordered_set& is_global, - std::unordered_map& net_output_feeds_driving_block_input) { + const std::unordered_set& net_output_feeds_driving_block_input) { /*This function is called when the timing_gain values on the atom net* *net_id requires updating. */ float timinggain; @@ -696,7 +520,7 @@ void update_timing_gain_values(const AtomNetId net_id, /* Check if this atom net lists its driving atom block twice. If so, avoid * * double counting this atom block by skipping the first (driving) pin. */ auto pins = atom_ctx.nlist.net_pins(net_id); - if (net_output_feeds_driving_block_input[net_id] != 0) + if (net_output_feeds_driving_block_input.count(net_id) != 0) pins = atom_ctx.nlist.net_sinks(net_id); if (net_relation_to_clustered_block == OUTPUT @@ -747,7 +571,7 @@ void mark_and_update_partial_gain(const AtomNetId net_id, const SetupTimingInfo& timing_info, const std::unordered_set& is_global, const int high_fanout_net_threshold, - std::unordered_map& net_output_feeds_driving_block_input) { + const std::unordered_set& net_output_feeds_driving_block_input) { const AtomContext& atom_ctx = g_vpr_ctx.atom(); t_pb* cur_pb = atom_ctx.lookup.atom_pb(clustered_blk_id)->parent_pb; @@ -781,7 +605,7 @@ void mark_and_update_partial_gain(const AtomNetId net_id, * If so, avoid double counting by skipping the first (driving) pin. */ auto pins = atom_ctx.nlist.net_pins(net_id); - if (net_output_feeds_driving_block_input[net_id] != 0) + if (net_output_feeds_driving_block_input.count(net_id) != 0) //We implicitly assume here that net_output_feeds_driver_block_input[net_id] is 2 //(i.e. the net loops back to the block only once) pins = atom_ctx.nlist.net_sinks(net_id); @@ -891,7 +715,7 @@ void update_cluster_stats(const t_pack_molecule* molecule, const int high_fanout_net_threshold, const SetupTimingInfo& timing_info, AttractionInfo& attraction_groups, - std::unordered_map& net_output_feeds_driving_block_input) { + const std::unordered_set& net_output_feeds_driving_block_input) { int molecule_size; int iblock; @@ -995,115 +819,6 @@ void update_cluster_stats(const t_pack_molecule* molecule, } } -void start_new_cluster(ClusterLegalizer& cluster_legalizer, - LegalizationClusterId& legalization_cluster_id, - t_pack_molecule* molecule, - std::map& num_used_type_instances, - const float target_device_utilization, - const t_arch* arch, - const std::string& device_layout_name, - const std::map>& primitive_candidate_block_types, - int verbosity, - bool balance_block_type_utilization) { - - const AtomContext& atom_ctx = g_vpr_ctx.atom(); - DeviceContext& mutable_device_ctx = g_vpr_ctx.mutable_device(); - const DeviceContext& device_ctx = g_vpr_ctx.mutable_device(); - - /* Allocate a dummy initial cluster and load a atom block as a seed and check if it is legal */ - AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; - const std::string& root_atom_name = atom_ctx.nlist.block_name(root_atom); - const t_model* root_model = atom_ctx.nlist.block_model(root_atom); - - auto itr = primitive_candidate_block_types.find(root_model); - VTR_ASSERT(itr != primitive_candidate_block_types.end()); - std::vector candidate_types = itr->second; - - if (balance_block_type_utilization) { - //We sort the candidate types in ascending order by their current utilization. - //This means that the packer will prefer to use types with lower utilization. - //This is a naive approach to try balancing utilization when multiple types can - //support the same primitive(s). - std::stable_sort(candidate_types.begin(), candidate_types.end(), - [&](t_logical_block_type_ptr lhs, t_logical_block_type_ptr rhs) { - int lhs_num_instances = 0; - int rhs_num_instances = 0; - // Count number of instances for each type - for (auto type : lhs->equivalent_tiles) - lhs_num_instances += device_ctx.grid.num_instances(type, -1); - for (auto type : rhs->equivalent_tiles) - rhs_num_instances += device_ctx.grid.num_instances(type, -1); - - float lhs_util = vtr::safe_ratio(num_used_type_instances[lhs], lhs_num_instances); - float rhs_util = vtr::safe_ratio(num_used_type_instances[rhs], rhs_num_instances); - //Lower util first - return lhs_util < rhs_util; - }); - } - - if (verbosity > 2) { - VTR_LOG("\tSeed: '%s' (%s)", root_atom_name.c_str(), root_model->name); - VTR_LOGV(molecule->pack_pattern, " molecule_type %s molecule_size %zu", - molecule->pack_pattern->name, molecule->atom_block_ids.size()); - VTR_LOG("\n"); - } - - //Try packing into each candidate type - bool success = false; - t_logical_block_type_ptr block_type; - LegalizationClusterId new_cluster_id; - for (auto type : candidate_types) { - //Try packing into each mode - e_block_pack_status pack_result = e_block_pack_status::BLK_STATUS_UNDEFINED; - for (int j = 0; j < type->pb_graph_head->pb_type->num_modes && !success; j++) { - std::tie(pack_result, new_cluster_id) = cluster_legalizer.start_new_cluster(molecule, type, j); - success = (pack_result == e_block_pack_status::BLK_PASSED); - } - - if (success) { - VTR_LOGV(verbosity > 2, "\tPASSED_SEED: Block Type %s\n", type->name.c_str()); - // If clustering succeeds return the new_cluster_id and type. - legalization_cluster_id = new_cluster_id; - block_type = type; - break; - } else { - VTR_LOGV(verbosity > 2, "\tFAILED_SEED: Block Type %s\n", type->name.c_str()); - } - } - - if (!success) { - //Explored all candidates - if (molecule->type == MOLECULE_FORCED_PACK) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Can not find any logic block that can implement molecule.\n" - "\tPattern %s %s\n", - molecule->pack_pattern->name, - root_atom_name.c_str()); - } else { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Can not find any logic block that can implement molecule.\n" - "\tAtom %s (%s)\n", - root_atom_name.c_str(), root_model->name); - } - } - - VTR_ASSERT(success); - - //Successfully create cluster - num_used_type_instances[block_type]++; - - /* Expand FPGA size if needed */ - // Check used type instances against the possible equivalent physical locations - unsigned int num_instances = 0; - for (auto equivalent_tile : block_type->equivalent_tiles) { - num_instances += device_ctx.grid.num_instances(equivalent_tile, -1); - } - - if (num_used_type_instances[block_type] > num_instances) { - mutable_device_ctx.grid = create_device_grid(device_layout_name, arch->grid_layouts, num_used_type_instances, target_device_utilization); - } -} - t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, AttractionInfo& attraction_groups, const enum e_gain_type gain_mode, @@ -1114,7 +829,7 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, bool prioritize_transitive_connectivity, int transitive_fanout_threshold, const int feasible_block_array_size, - std::map>& primitive_candidate_block_types) { + const std::map>& primitive_candidate_block_types) { /* * This routine populates a list of feasible blocks outside the cluster, then returns the best candidate for the cluster. * If there are no feasible blocks it returns a nullptr. @@ -1269,7 +984,7 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, AttractionInfo& attraction_groups, const int feasible_block_array_size, LegalizationClusterId legalization_cluster_id, - std::map>& primitive_candidate_block_types) { + const std::map>& primitive_candidate_block_types) { const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; auto cluster_type = cluster_legalizer.get_cluster_type(legalization_cluster_id); @@ -1301,7 +1016,7 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, const auto& atom_model = atom_nlist.block_model(atom_id); auto itr = primitive_candidate_block_types.find(atom_model); VTR_ASSERT(itr != primitive_candidate_block_types.end()); - std::vector& candidate_types = itr->second; + const std::vector& candidate_types = itr->second; //Only consider molecules that are unpacked and of the correct type if (!cluster_legalizer.is_atom_clustered(atom_id) @@ -1322,7 +1037,7 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, const auto& atom_model = atom_nlist.block_model(atom_id); auto itr = primitive_candidate_block_types.find(atom_model); VTR_ASSERT(itr != primitive_candidate_block_types.end()); - std::vector& candidate_types = itr->second; + const std::vector& candidate_types = itr->second; //Only consider molecules that are unpacked and of the correct type if (!cluster_legalizer.is_atom_clustered(atom_id) @@ -1353,7 +1068,7 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, const auto& atom_model = atom_nlist.block_model(blk_id); auto itr = primitive_candidate_block_types.find(atom_model); VTR_ASSERT(itr != primitive_candidate_block_types.end()); - std::vector& candidate_types = itr->second; + const std::vector& candidate_types = itr->second; //Only consider molecules that are unpacked and of the correct type if (!cluster_legalizer.is_atom_clustered(blk_id) @@ -1414,7 +1129,7 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, int verbosity, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, - std::map>& primitive_candidate_block_types) { + const std::map>& primitive_candidate_block_types) { /* Finds the block with the greatest gain that satisfies the * input, clock and capacity constraints of a cluster that are * passed in. If no suitable block is found it returns nullptr. @@ -1654,6 +1369,26 @@ std::map> identify_primiti return model_candidates; } +std::unordered_set identify_net_output_feeds_driving_block_input(const AtomNetlist& atom_netlist) { + std::unordered_set net_output_feeds_driving_block_input; + + for (AtomNetId net : atom_netlist.nets()) { + AtomPinId driver_pin = atom_netlist.net_driver(net); + AtomBlockId driver_block = atom_netlist.pin_block(driver_pin); + + for (AtomPinId sink_pin : atom_netlist.net_sinks(net)) { + AtomBlockId sink_block = atom_netlist.pin_block(sink_pin); + + if (driver_block == sink_block) { + net_output_feeds_driving_block_input.insert(net); + break; + } + } + } + + return net_output_feeds_driving_block_input; +} + size_t update_pb_type_count(const t_pb* pb, std::map& pb_type_count, size_t depth) { size_t max_depth = depth; @@ -1723,7 +1458,7 @@ void print_pb_type_count(const ClusteredNetlist& clb_nlist) { VTR_LOG("\n"); } -t_logical_block_type_ptr identify_logic_block_type(std::map>& primitive_candidate_block_types) { +t_logical_block_type_ptr identify_logic_block_type(const std::map>& primitive_candidate_block_types) { std::string lut_name = ".names"; for (auto& model : primitive_candidate_block_types) { @@ -1759,7 +1494,12 @@ t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type) { return nullptr; } -void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count) { +void update_le_count(const t_pb* pb, + const t_logical_block_type_ptr logic_block_type, + const t_pb_type* le_pb_type, + int& num_logic_le, + int& num_reg_le, + int& num_logic_and_reg_le) { // if this cluster doesn't contain LEs or there // are no les in this architecture, ignore it if (!logic_block_type || pb->pb_graph_node != logic_block_type->pb_graph_head || !le_pb_type) @@ -1785,15 +1525,15 @@ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_ auto has_used_adder = pb_used_for_blif_model(&parent_pb->child_pbs[0][ile], adder); auto has_used_ff = pb_used_for_blif_model(&parent_pb->child_pbs[0][ile], ff); - // First type of LEs: used for logic and registers if ((has_used_lut || has_used_adder) && has_used_ff) { - le_count[0]++; - // Second type of LEs: used for logic only + // First type of LEs: used for logic and registers + num_logic_and_reg_le++; } else if (has_used_lut || has_used_adder) { - le_count[1]++; - // Third type of LEs: used for registers only + // Second type of LEs: used for logic only + num_logic_le++; } else if (has_used_ff) { - le_count[2]++; + // Third type of LEs: used for registers only + num_reg_le++; } } } @@ -1825,12 +1565,19 @@ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name) return false; } -void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type) { +void print_le_count(int num_logic_le, + int num_reg_le, + int num_logic_and_reg_le, + const t_pb_type* le_pb_type) { + VTR_ASSERT(le_pb_type != nullptr); + + int num_total_le = num_logic_and_reg_le + num_logic_le + num_reg_le; + VTR_LOG("\nLogic Element (%s) detailed count:\n", le_pb_type->name); - VTR_LOG(" Total number of Logic Elements used : %d\n", le_count[0] + le_count[1] + le_count[2]); - VTR_LOG(" LEs used for logic and registers : %d\n", le_count[0]); - VTR_LOG(" LEs used for logic only : %d\n", le_count[1]); - VTR_LOG(" LEs used for registers only : %d\n\n", le_count[2]); + VTR_LOG(" Total number of Logic Elements used : %d\n", num_total_le); + VTR_LOG(" LEs used for logic and registers : %d\n", num_logic_and_reg_le); + VTR_LOG(" LEs used for logic only : %d\n", num_logic_le); + VTR_LOG(" LEs used for registers only : %d\n\n", num_reg_le); } t_pb* get_top_level_pb(t_pb* pb) { diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h index c794daf066..ca54c4046a 100644 --- a/vpr/src/pack/cluster_util.h +++ b/vpr/src/pack/cluster_util.h @@ -1,6 +1,7 @@ #ifndef CLUSTER_UTIL_H #define CLUSTER_UTIL_H +#include #include #include "cluster_legalizer.h" #include "pack_types.h" @@ -63,18 +64,9 @@ struct t_molecule_stats { int num_used_ext_outputs = 0; //Number of *used external* output pins across all primitives in molecule }; -struct t_cluster_progress_stats { - int num_molecules = 0; - int num_molecules_processed = 0; - int mols_since_last_print = 0; - int blocks_since_last_analysis = 0; - int num_unrelated_clustering_attempts = 0; -}; - /* Useful data structures for creating or modifying clusters */ struct t_clustering_data { - int* hill_climbing_inputs_avail; - + int unclustered_list_head_size = 0; /* Keeps a linked list of the unclustered blocks to speed up looking for * * unclustered blocks with a certain number of *external* inputs. * * [0..lut_size]. Unclustered_list_head[i] points to the head of the * @@ -83,16 +75,6 @@ struct t_clustering_data { //Maintaining a linked list of free molecule data for speed t_molecule_link* memory_pool = nullptr; - - /* Does the atom block that drives the output of this atom net also appear as a * - * receiver (input) pin of the atom net? If so, then by how much? - * - * This is used in the gain routines to avoid double counting the connections from * - * the current cluster to other blocks (hence yielding better clusterings). * - * The only time an atom block should connect to the same atom net * - * twice is when one connection is an output and the other is an input, * - * so this should take care of all multiple connections. */ - std::unordered_map net_output_feeds_driving_block_input; }; /***********************************/ @@ -112,8 +94,7 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, /* * @brief Free the clustering data structures. */ -void free_clustering_data(const t_packer_opts& packer_opts, - t_clustering_data& clustering_data); +void free_clustering_data(t_clustering_data& clustering_data); /* * @brief Check clustering legality and output it. @@ -154,8 +135,6 @@ void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, const Prepacker& prepacker, t_clustering_data& clustering_data, - std::unordered_map& net_output_feeds_driving_block_input, - int& unclustered_list_head_size, int num_molecules); /* @@ -195,8 +174,7 @@ void print_pack_status_header(); /* * @brief Incrementally print progress updates during clustering. */ -void print_pack_status(int num_clb, - int tot_num_molecules, +void print_pack_status(int tot_num_molecules, int num_molecules_processed, int& mols_since_last_print, int device_width, @@ -212,42 +190,6 @@ void print_pack_status(int num_clb, void rebuild_attraction_groups(AttractionInfo& attraction_groups, const ClusterLegalizer& cluster_legalizer); -/* - * @brief Try to pack next_molecule into the given cluster. If this succeeds - * prepares the next_molecule with a new value to pack next iteration. - * - * This method will print the pack status and update the cluster stats. - */ -void try_fill_cluster(ClusterLegalizer& cluster_legalizer, - const Prepacker& prepacker, - const t_packer_opts& packer_opts, - t_pack_molecule*& prev_molecule, - t_pack_molecule*& next_molecule, - int& num_same_molecules, - t_cluster_progress_stats& cluster_stats, - int num_clb, - const LegalizationClusterId legalization_cluster_id, - AttractionInfo& attraction_groups, - vtr::vector>& clb_inter_blk_nets, - bool allow_unrelated_clustering, - const int& high_fanout_threshold, - const std::unordered_set& is_clock, - const std::unordered_set& is_global, - const std::shared_ptr& timing_info, - e_block_pack_status& block_pack_status, - t_molecule_link* unclustered_list_head, - const int& unclustered_list_head_size, - std::unordered_map& net_output_feeds_driving_block_input, - std::map>& primitive_candidate_block_types); - -void store_cluster_info_and_free(const t_packer_opts& packer_opts, - const LegalizationClusterId clb_index, - const t_logical_block_type_ptr logic_block_type, - const t_pb_type* le_pb_type, - std::vector& le_count, - const ClusterLegalizer& cluster_legalizer, - vtr::vector>& clb_inter_blk_nets); - void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clustered_blk_id, t_pb* cur_pb, @@ -260,7 +202,7 @@ void update_timing_gain_values(const AtomNetId net_id, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, const SetupTimingInfo& timing_info, const std::unordered_set& is_global, - std::unordered_map& net_output_feeds_driving_block_input); + const std::unordered_set& net_output_feeds_driving_block_input); /* * @brief Updates the marked data structures, and if gain_flag is GAIN, the gain @@ -281,7 +223,7 @@ void mark_and_update_partial_gain(const AtomNetId net_id, const SetupTimingInfo& timing_info, const std::unordered_set& is_global, const int high_fanout_net_threshold, - std::unordered_map& net_output_feeds_driving_block_input); + const std::unordered_set& net_output_feeds_driving_block_input); /* * @brief Updates the total gain array to reflect the desired tradeoff between @@ -309,24 +251,7 @@ void update_cluster_stats(const t_pack_molecule* molecule, const int high_fanout_net_threshold, const SetupTimingInfo& timing_info, AttractionInfo& attraction_groups, - std::unordered_map& net_output_feeds_driving_block_input); - -/* - * @brief Given a starting seed block, start_new_cluster determines the next - * cluster type to use. - * - * It expands the FPGA if it cannot find a legal cluster for the atom block - */ -void start_new_cluster(ClusterLegalizer& cluster_legalizer, - LegalizationClusterId& legalization_cluster_id, - t_pack_molecule* molecule, - std::map& num_used_type_instances, - const float target_device_utilization, - const t_arch* arch, - const std::string& device_layout_name, - const std::map>& primitive_candidate_block_types, - int verbosity, - bool balance_block_type_utilization); + const std::unordered_set& net_output_feeds_driving_block_input); /* * @brief Get candidate molecule to pack into currently open cluster @@ -351,7 +276,7 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, bool prioritize_transitive_connectivity, int transitive_fanout_threshold, const int feasible_block_array_size, - std::map>& primitive_candidate_block_types); + const std::map>& primitive_candidate_block_types); /* * @brief Add molecules with strong connectedness to the current cluster to the @@ -392,7 +317,7 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, AttractionInfo& attraction_groups, const int feasible_block_array_size, LegalizationClusterId clb_index, - std::map>& primitive_candidate_block_types); + const std::map>& primitive_candidate_block_types); /* * @brief Add molecules based on transitive connections (eg. 2 hops away) with @@ -421,7 +346,7 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, int verbosity, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, - std::map>& primitive_candidate_block_types); + const std::map>& primitive_candidate_block_types); /* * @brief Calculates molecule statistics for a single molecule. @@ -455,6 +380,12 @@ void load_transitive_fanout_candidates(LegalizationClusterId cluster_index, std::map> identify_primitive_candidate_block_types(); +/** + * @brief Identify which nets in the atom netlist are driven by the same atom + * block that they appear as a receiver (input) pin of. + */ +std::unordered_set identify_net_output_feeds_driving_block_input(const AtomNetlist& atom_netlist); + /** * @brief This function update the pb_type_count data structure by incrementing * the number of used pb_types in the given packed cluster t_pb @@ -465,7 +396,12 @@ size_t update_pb_type_count(const t_pb* pb, std::map& pb_type_c * @brief This function updates the le_count data structure from the given * packed cluster. */ -void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count); +void update_le_count(const t_pb* pb, + const t_logical_block_type_ptr logic_block_type, + const t_pb_type* le_pb_type, + int& num_logic_le, + int& num_reg_le, + int& num_logic_and_reg_le); void print_pb_type_count_recurr(t_pb_type* type, size_t max_name_chars, size_t curr_depth, std::map& pb_type_count); @@ -478,7 +414,7 @@ void print_pb_type_count(const ClusteredNetlist& clb_nlist); * @brief This function identifies the logic block type which is defined by the * block type which has a lut primitive. */ -t_logical_block_type_ptr identify_logic_block_type(std::map>& primitive_candidate_block_types); +t_logical_block_type_ptr identify_logic_block_type(const std::map>& primitive_candidate_block_types); /* * @brief This function returns the pb_type that is similar to Logic Element (LE) @@ -499,7 +435,10 @@ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name); /* * @brief Print the LE count data strurture. */ -void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type); +void print_le_count(int num_logic_le, + int num_reg_le, + int num_logic_and_reg_le, + const t_pb_type* le_pb_type); /* * @brief Given a pointer to a pb in a cluster, this routine returns a pointer diff --git a/vpr/src/pack/greedy_clusterer.cpp b/vpr/src/pack/greedy_clusterer.cpp index 98ca424362..d861289706 100644 --- a/vpr/src/pack/greedy_clusterer.cpp +++ b/vpr/src/pack/greedy_clusterer.cpp @@ -37,22 +37,45 @@ */ #include "greedy_clusterer.h" +#include #include +#include #include +#include "SetupGrid.h" #include "atom_netlist.h" #include "attraction_groups.h" #include "cluster_legalizer.h" #include "cluster_util.h" #include "constraints_report.h" #include "greedy_seed_selector.h" +#include "pack_types.h" #include "physical_types.h" #include "prepack.h" +#include "vpr_context.h" +#include "vtr_math.h" #include "vtr_vector.h" +namespace { + +/** + * @brief Struct to hold statistics on the progress of clustering. + */ +struct t_cluster_progress_stats { + // The total number of molecules in the design. + int num_molecules = 0; + // The number of molecules which have been clustered. + int num_molecules_processed = 0; + // The number of molecules clustered since the last time the status was + // logged. + int mols_since_last_print = 0; +}; + +} // namespace + GreedyClusterer::GreedyClusterer(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const AtomNetlist& atom_netlist, - const t_arch* arch, + const t_arch& arch, const t_pack_high_fanout_thresholds& high_fanout_thresholds, const std::unordered_set& is_clock, const std::unordered_set& is_global) @@ -63,113 +86,60 @@ GreedyClusterer::GreedyClusterer(const t_packer_opts& packer_opts, high_fanout_thresholds_(high_fanout_thresholds), is_clock_(is_clock), is_global_(is_global), - primitive_candidate_block_types_(identify_primitive_candidate_block_types()) {} + primitive_candidate_block_types_(identify_primitive_candidate_block_types()), + log_verbosity_(packer_opts.pack_verbosity), + net_output_feeds_driving_block_input_(identify_net_output_feeds_driving_block_input(atom_netlist)) { + +} std::map GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer, Prepacker& prepacker, bool allow_unrelated_clustering, bool balance_block_type_utilization, - AttractionInfo& attraction_groups) { - - /* Does the actual work of clustering multiple netlist blocks * - * into clusters. */ - - /* Algorithm employed - * 1. Find type that can legally hold block and create cluster with pb info - * 2. Populate started cluster - * 3. Repeat 1 until no more blocks need to be clustered - * - */ - - /* This routine returns a map that details the number of used block type instances. - * The bool floorplan_regions_overfull also acts as a return value - it is set to - * true when one or more floorplan regions have more blocks assigned to them than - * they can fit. - */ + AttractionInfo& attraction_groups, + DeviceContext& mutable_device_ctx) { + // This routine returns a map that details the number of used block type + // instances. + std::map num_used_type_instances; /**************************************************************** * Initialization *****************************************************************/ - t_clustering_data clustering_data; - t_cluster_progress_stats cluster_stats; - - //int num_molecules, num_molecules_processed, mols_since_last_print, blocks_since_last_analysis, - int num_blocks_hill_added; - - const int verbosity = packer_opts_.pack_verbosity; - - int unclustered_list_head_size; - std::unordered_map net_output_feeds_driving_block_input; - cluster_stats.num_molecules_processed = 0; - cluster_stats.mols_since_last_print = 0; - - std::map num_used_type_instances; - - enum e_block_pack_status block_pack_status; - - t_pack_molecule *next_molecule, *prev_molecule; - - auto& device_ctx = g_vpr_ctx.mutable_device(); + // The clustering stats holds information used for logging the progress + // of the clustering to the user. + t_cluster_progress_stats clustering_stats; + clustering_stats.num_molecules = prepacker.get_num_molecules(); + // TODO: Create a ClusteringTimingManager class. + // This code relies on the prepacker, once the prepacker is moved to + // the constructor, this code can also move to the constructor. std::shared_ptr clustering_delay_calc; std::shared_ptr timing_info; + // Default criticalities set to zero (e.g. if not timing driven) + vtr::vector atom_criticality(atom_netlist_.blocks().size(), 0.f); + if (packer_opts_.timing_driven) { + calc_init_packing_timing(packer_opts_, analysis_opts_, prepacker, + clustering_delay_calc, timing_info, atom_criticality); + } - // this data structure tracks the number of Logic Elements (LEs) used. It is - // populated only for architectures which has LEs. The architecture is assumed - // to have LEs only iff it has a logic block that contains LUT primitives and is - // the first pb_block to have more than one instance from the top of the hierarchy - // (All parent pb_block have one instance only and one mode only). Index 0 holds - // the number of LEs that are used for both logic (LUTs/adders) and registers. - // Index 1 holds the number of LEs that are used for logic (LUTs/adders) only. - // Index 2 holds the number of LEs that are used for registers only. - std::vector le_count(3, 0); - - int total_clb_num = 0; + // Calculate the max molecule stats, which is used for gain calculation. + const t_molecule_stats max_molecule_stats = prepacker.calc_max_molecule_stats(atom_netlist_); + // Initialize the information for the greedy candidate selector. + // TODO: Abstract into a candidate selector class. /* TODO: This is memory inefficient, fix if causes problems */ /* Store stats on nets used by packed block, useful for determining transitively connected blocks * (eg. [A1, A2, ..]->[B1, B2, ..]->C implies cluster [A1, A2, ...] and C have a weak link) */ vtr::vector> clb_inter_blk_nets(atom_netlist_.blocks().size()); - - const t_molecule_stats max_molecule_stats = prepacker.calc_max_molecule_stats(atom_netlist_); - - cluster_stats.num_molecules = prepacker.get_num_molecules(); - - if (packer_opts_.hill_climbing_flag) { - size_t max_cluster_size = cluster_legalizer.get_max_cluster_size(); - clustering_data.hill_climbing_inputs_avail = new int[max_cluster_size + 1]; - for (size_t i = 0; i < max_cluster_size + 1; i++) - clustering_data.hill_climbing_inputs_avail[i] = 0; - } else { - clustering_data.hill_climbing_inputs_avail = nullptr; /* if used, die hard */ - } - -#if 0 - check_for_duplicate_inputs (); -#endif - + // FIXME: This should be abstracted into a selector class. This is only used + // for gain calculation and selecting candidate molecules. + t_clustering_data clustering_data; alloc_and_init_clustering(max_molecule_stats, prepacker, - clustering_data, net_output_feeds_driving_block_input, - unclustered_list_head_size, cluster_stats.num_molecules); - - // find the cluster type that has lut primitives - auto logic_block_type = identify_logic_block_type(primitive_candidate_block_types_); - // find a LE pb_type within the found logic_block_type - auto le_pb_type = identify_le_block_type(logic_block_type); - - cluster_stats.blocks_since_last_analysis = 0; - num_blocks_hill_added = 0; - - //Default criticalities set to zero (e.g. if not timing driven) - vtr::vector atom_criticality(atom_netlist_.blocks().size(), 0.); - - if (packer_opts_.timing_driven) { - calc_init_packing_timing(packer_opts_, analysis_opts_, prepacker, - clustering_delay_calc, timing_info, atom_criticality); - } + clustering_data, + clustering_stats.num_molecules); // Create the greedy seed selector. GreedySeedSelector seed_selector(atom_netlist_, @@ -179,195 +149,470 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer, atom_criticality); // Pick the first seed molecule. - t_pack_molecule* istart = seed_selector.get_next_seed(prepacker, - cluster_legalizer); - - print_pack_status_header(); + t_pack_molecule* seed_mol = seed_selector.get_next_seed(prepacker, + cluster_legalizer); /**************************************************************** * Clustering *****************************************************************/ - while (istart != nullptr) { - bool is_cluster_legal = false; + print_pack_status_header(); + + // Continue clustering as long as a valid seed is returned from the seed + // selector. + while (seed_mol != nullptr) { + // Check to ensure that this molecule is unclustered. + VTR_ASSERT(!cluster_legalizer.is_mol_clustered(seed_mol)); + // The basic algorithm: // 1) Try to put all the molecules in that you can without doing the // full intra-lb route. Then do full legalization at the end. // 2) If the legalization at the end fails, try again, but this time // do full legalization for each molecule added to the cluster. - const ClusterLegalizationStrategy legalization_strategies[] = {ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE, - ClusterLegalizationStrategy::FULL}; - for (const ClusterLegalizationStrategy strategy : legalization_strategies) { - // If the cluster is legal, no need to try a stronger cluster legalizer - // mode. - if (is_cluster_legal) - break; - // Set the legalization strategy of the cluster legalizer. - cluster_legalizer.set_legalization_strategy(strategy); - - LegalizationClusterId legalization_cluster_id; - - VTR_LOGV(verbosity > 2, "Complex block %d:\n", total_clb_num); - - start_new_cluster(cluster_legalizer, - legalization_cluster_id, - istart, - num_used_type_instances, - packer_opts_.target_device_utilization, - arch_, packer_opts_.device_layout, - primitive_candidate_block_types_, - verbosity, - balance_block_type_utilization); - - //initial molecule in cluster has been processed - cluster_stats.num_molecules_processed++; - cluster_stats.mols_since_last_print++; - print_pack_status(total_clb_num, - cluster_stats.num_molecules, - cluster_stats.num_molecules_processed, - cluster_stats.mols_since_last_print, - device_ctx.grid.width(), - device_ctx.grid.height(), - attraction_groups, - cluster_legalizer); - - VTR_LOGV(verbosity > 2, - "Complex block %d: '%s' (%s) ", total_clb_num, - cluster_legalizer.get_cluster_pb(legalization_cluster_id)->name, - cluster_legalizer.get_cluster_type(legalization_cluster_id)->name.c_str()); - VTR_LOGV(verbosity > 2, "."); - //Progress dot for seed-block - fflush(stdout); - - int high_fanout_threshold = high_fanout_thresholds_.get_threshold(cluster_legalizer.get_cluster_type(legalization_cluster_id)->name); - update_cluster_stats(istart, + + // Try to grow a cluster from the seed molecule without doing intra-lb + // route for each molecule (i.e. just use faster but not fully + // conservative legality checks). + LegalizationClusterId new_cluster_id = try_grow_cluster(seed_mol, + ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE, + cluster_legalizer, + prepacker, + allow_unrelated_clustering, + balance_block_type_utilization, + *timing_info, + clb_inter_blk_nets, + clustering_data, + attraction_groups, + num_used_type_instances, + mutable_device_ctx); + + if (!new_cluster_id.is_valid()) { + // If the previous strategy failed, try to grow the cluster again, + // but this time perform full legalization for each molecule added + // to the cluster. + new_cluster_id = try_grow_cluster(seed_mol, + ClusterLegalizationStrategy::FULL, + cluster_legalizer, + prepacker, + allow_unrelated_clustering, + balance_block_type_utilization, + *timing_info, + clb_inter_blk_nets, + clustering_data, + attraction_groups, + num_used_type_instances, + mutable_device_ctx); + } + + // Ensure that at the seed was packed successfully. + VTR_ASSERT(new_cluster_id.is_valid()); + VTR_ASSERT(cluster_legalizer.is_mol_clustered(seed_mol)); + + // Update the clustering progress stats. + size_t num_molecules_in_cluster = cluster_legalizer.get_num_molecules_in_cluster(new_cluster_id); + clustering_stats.num_molecules_processed += num_molecules_in_cluster; + clustering_stats.mols_since_last_print += num_molecules_in_cluster; + + // Print the current progress of the packing after a cluster has been + // successfully created. + print_pack_status(clustering_stats.num_molecules, + clustering_stats.num_molecules_processed, + clustering_stats.mols_since_last_print, + mutable_device_ctx.grid.width(), + mutable_device_ctx.grid.height(), + attraction_groups, + cluster_legalizer); + + // Pick new seed. + seed_mol = seed_selector.get_next_seed(prepacker, + cluster_legalizer); + } + + // If this architecture has LE physical block, report its usage. + report_le_physical_block_usage(cluster_legalizer); + + // Free the clustering data. + // FIXME: This struct should use standard data structures so it does not + // have to be freed like this. This is also specific to the candidate + // gain calculation. + free_clustering_data(clustering_data); + + return num_used_type_instances; +} + +LegalizationClusterId GreedyClusterer::try_grow_cluster( + t_pack_molecule* seed_mol, + ClusterLegalizationStrategy strategy, + ClusterLegalizer& cluster_legalizer, + Prepacker& prepacker, + bool allow_unrelated_clustering, + bool balance_block_type_utilization, + SetupTimingInfo& timing_info, + vtr::vector>& clb_inter_blk_nets, + t_clustering_data& clustering_data, + AttractionInfo& attraction_groups, + std::map& num_used_type_instances, + DeviceContext& mutable_device_ctx) { + + // Check to ensure that this molecule is unclustered. + VTR_ASSERT(!cluster_legalizer.is_mol_clustered(seed_mol)); + + // Set the legalization strategy of the cluster legalizer. + cluster_legalizer.set_legalization_strategy(strategy); + + // Use the seed to start a new cluster. + LegalizationClusterId legalization_cluster_id = start_new_cluster(seed_mol, + cluster_legalizer, + balance_block_type_utilization, + num_used_type_instances, + mutable_device_ctx); + + int high_fanout_threshold = high_fanout_thresholds_.get_threshold(cluster_legalizer.get_cluster_type(legalization_cluster_id)->name); + update_cluster_stats(seed_mol, + cluster_legalizer, + is_clock_, //Set of clock nets + is_global_, //Set of global nets (currently all clocks) + packer_opts_.global_clocks, + packer_opts_.alpha, packer_opts_.beta, + packer_opts_.timing_driven, packer_opts_.connection_driven, + high_fanout_threshold, + timing_info, + attraction_groups, + net_output_feeds_driving_block_input_); + + int num_unrelated_clustering_attempts = 0; + t_pack_molecule *candidate_mol; + candidate_mol = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), + attraction_groups, + allow_unrelated_clustering, + packer_opts_.prioritize_transitive_connectivity, + packer_opts_.transitive_fanout_threshold, + packer_opts_.feasible_block_array_size, + &num_unrelated_clustering_attempts, + prepacker, + cluster_legalizer, + clb_inter_blk_nets, + legalization_cluster_id, + log_verbosity_, + clustering_data.unclustered_list_head, + clustering_data.unclustered_list_head_size, + primitive_candidate_block_types_); + + /* + * When attraction groups are created, the purpose is to pack more densely by adding more molecules + * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are + * not on), the cluster keeps being packed until the get_molecule routines return either a repeated + * molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the + * cluster until a nullptr is returned. So, the number of repeated molecules allowed is increased to a + * large value. + */ + int max_num_repeated_molecules = 1; + if (attraction_groups.num_attraction_groups() > 0) + max_num_repeated_molecules = attraction_groups_max_repeated_molecules_; + + // Continuously try to cluster candidate molecules into the cluster + // until one of the following occurs: + // 1) No candidate molecule is proposed. + // 2) The same candidate was proposed multiple times. + int num_repeated_molecules = 0; + while (candidate_mol != nullptr && num_repeated_molecules < max_num_repeated_molecules) { + // Try to cluster the candidate molecule into the cluster. + bool success = try_add_candidate_mol_to_cluster(candidate_mol, + legalization_cluster_id, + cluster_legalizer); + + // If the candidate molecule was clustered successfully, update + // the cluster stats. + if (success) { + update_cluster_stats(candidate_mol, cluster_legalizer, - is_clock_, //Set of clock nets - is_global_, //Set of global nets (currently all clocks) + is_clock_, //Set of all clocks + is_global_, //Set of all global signals (currently clocks) packer_opts_.global_clocks, - packer_opts_.alpha, packer_opts_.beta, - packer_opts_.timing_driven, packer_opts_.connection_driven, + packer_opts_.alpha, + packer_opts_.beta, + packer_opts_.timing_driven, + packer_opts_.connection_driven, high_fanout_threshold, - *timing_info, - attraction_groups, - net_output_feeds_driving_block_input); - total_clb_num++; - - if (packer_opts_.timing_driven) { - cluster_stats.blocks_since_last_analysis++; - /*it doesn't make sense to do a timing analysis here since there* - *is only one atom block clustered it would not change anything */ - } - cluster_stats.num_unrelated_clustering_attempts = 0; - next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), - attraction_groups, - allow_unrelated_clustering, - packer_opts_.prioritize_transitive_connectivity, - packer_opts_.transitive_fanout_threshold, - packer_opts_.feasible_block_array_size, - &cluster_stats.num_unrelated_clustering_attempts, - prepacker, - cluster_legalizer, - clb_inter_blk_nets, - legalization_cluster_id, - verbosity, - clustering_data.unclustered_list_head, - unclustered_list_head_size, - primitive_candidate_block_types_); - prev_molecule = istart; - - /* - * When attraction groups are created, the purpose is to pack more densely by adding more molecules - * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are - * not on), the cluster keeps being packed until the get_molecule routines return either a repeated - * molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the - * cluster until a nullptr is returned. So, the number of repeated molecules allowed is increased to a - * large value. - */ - int max_num_repeated_molecules = 0; - if (attraction_groups.num_attraction_groups() > 0) { - max_num_repeated_molecules = attraction_groups_max_repeated_molecules_; - } else { - max_num_repeated_molecules = 1; - } - int num_repeated_molecules = 0; - - while (next_molecule != nullptr && num_repeated_molecules < max_num_repeated_molecules) { - prev_molecule = next_molecule; - - try_fill_cluster(cluster_legalizer, - prepacker, - packer_opts_, - prev_molecule, - next_molecule, - num_repeated_molecules, - cluster_stats, - total_clb_num, - legalization_cluster_id, - attraction_groups, - clb_inter_blk_nets, - allow_unrelated_clustering, - high_fanout_threshold, - is_clock_, - is_global_, timing_info, - block_pack_status, - clustering_data.unclustered_list_head, - unclustered_list_head_size, - net_output_feeds_driving_block_input, - primitive_candidate_block_types_); - } - - if (strategy == ClusterLegalizationStrategy::FULL) { - // If the legalizer fully legalized for every molecule added, - // the cluster should be legal. - is_cluster_legal = true; - } else { - // If the legalizer did not check everything for every molecule, - // need to check that the full cluster is legal (need to perform - // intra-lb routing). - is_cluster_legal = cluster_legalizer.check_cluster_legality(legalization_cluster_id); - } - - if (is_cluster_legal) { - // Pick new seed. - istart = seed_selector.get_next_seed(prepacker, - cluster_legalizer); - // Update cluster stats. - if (packer_opts_.timing_driven && num_blocks_hill_added > 0) - cluster_stats.blocks_since_last_analysis += num_blocks_hill_added; - - store_cluster_info_and_free(packer_opts_, legalization_cluster_id, logic_block_type, le_pb_type, le_count, cluster_legalizer, clb_inter_blk_nets); - // Since the cluster will no longer be added to beyond this point, - // clean the cluster of any data not strictly necessary for - // creating the clustered netlist. - cluster_legalizer.clean_cluster(legalization_cluster_id); - } else { - // If the cluster is not legal, requeue used mols. - num_used_type_instances[cluster_legalizer.get_cluster_type(legalization_cluster_id)]--; - total_clb_num--; - // Destroy the illegal cluster. - cluster_legalizer.destroy_cluster(legalization_cluster_id); - cluster_legalizer.compress(); - } + attraction_groups, + net_output_feeds_driving_block_input_); + num_unrelated_clustering_attempts = 0; } + + // Get the next candidate molecule. + t_pack_molecule* prev_candidate_mol = candidate_mol; + candidate_mol = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), + attraction_groups, + allow_unrelated_clustering, + packer_opts_.prioritize_transitive_connectivity, + packer_opts_.transitive_fanout_threshold, + packer_opts_.feasible_block_array_size, + &num_unrelated_clustering_attempts, + prepacker, + cluster_legalizer, + clb_inter_blk_nets, + legalization_cluster_id, + log_verbosity_, + clustering_data.unclustered_list_head, + clustering_data.unclustered_list_head_size, + primitive_candidate_block_types_); + + // If the next candidate molecule is the same as the previous + // candidate molecule, increment the number of repreated + // molecules counter. + if (candidate_mol == prev_candidate_mol) + num_repeated_molecules++; } - // if this architecture has LE physical block, report its usage - if (le_pb_type) { - print_le_count(le_count, le_pb_type); + // Ensure that the cluster is legal. When the cluster legalization + // strategy is full, it must be legal. + if (strategy != ClusterLegalizationStrategy::FULL) { + // If the legalizer did not check everything for every molecule, + // need to check that the full cluster is legal (need to perform + // intra-lb routing). + bool is_cluster_legal = cluster_legalizer.check_cluster_legality(legalization_cluster_id); + + if (!is_cluster_legal) { + // If the cluster is not legal, undo the cluster. + // Update the used type instances. + num_used_type_instances[cluster_legalizer.get_cluster_type(legalization_cluster_id)]--; + // Destroy the illegal cluster. + cluster_legalizer.destroy_cluster(legalization_cluster_id); + cluster_legalizer.compress(); + // Cluster failed to grow. + return LegalizationClusterId(); + } } - // Ensure that we have kept track of the number of clusters correctly. - // TODO: The total_clb_num variable could probably just be replaced by - // clusters().size(). - VTR_ASSERT(cluster_legalizer.clusters().size() == (size_t)total_clb_num); + VTR_ASSERT(legalization_cluster_id.is_valid()); + + // Legal cluster was created. Store cluster info and clean cluster. + + // store info that will be used later in packing from pb_stats. + // FIXME: If this is used for gain, it should be moved into the selector + // class. Perhaps a finalize_cluster_gain method. + t_pb* cur_pb = cluster_legalizer.get_cluster_pb(legalization_cluster_id); + t_pb_stats* pb_stats = cur_pb->pb_stats; + for (const AtomNetId mnet_id : pb_stats->marked_nets) { + int external_terminals = atom_netlist_.net_pins(mnet_id).size() - pb_stats->num_pins_of_net_in_pb[mnet_id]; + // Check if external terminals of net is within the fanout limit and + // that there exists external terminals. + if (external_terminals < packer_opts_.transitive_fanout_threshold && external_terminals > 0) { + clb_inter_blk_nets[legalization_cluster_id].push_back(mnet_id); + } + } - // Free the clustering data. - // FIXME: This struct should use standard data structures so it does not - // have to be freed like this. - free_clustering_data(packer_opts_, clustering_data); + // Since the cluster will no longer be added to beyond this point, + // clean the cluster of any data not strictly necessary for + // creating the clustered netlist. + cluster_legalizer.clean_cluster(legalization_cluster_id); - return num_used_type_instances; + // Cluster has been grown successfully. + return legalization_cluster_id; +} + +LegalizationClusterId GreedyClusterer::start_new_cluster( + t_pack_molecule* seed_mol, + ClusterLegalizer& cluster_legalizer, + bool balance_block_type_utilization, + std::map& num_used_type_instances, + DeviceContext& mutable_device_ctx) { + + /* Allocate a dummy initial cluster and load a atom block as a seed and check if it is legal */ + AtomBlockId root_atom = seed_mol->atom_block_ids[seed_mol->root]; + const std::string& root_atom_name = atom_netlist_.block_name(root_atom); + const t_model* root_model = atom_netlist_.block_model(root_atom); + + auto itr = primitive_candidate_block_types_.find(root_model); + VTR_ASSERT(itr != primitive_candidate_block_types_.end()); + std::vector candidate_types = itr->second; + + if (balance_block_type_utilization) { + //We sort the candidate types in ascending order by their current utilization. + //This means that the packer will prefer to use types with lower utilization. + //This is a naive approach to try balancing utilization when multiple types can + //support the same primitive(s). + std::stable_sort(candidate_types.begin(), candidate_types.end(), + [&](t_logical_block_type_ptr lhs, t_logical_block_type_ptr rhs) { + int lhs_num_instances = 0; + int rhs_num_instances = 0; + // Count number of instances for each type + for (auto type : lhs->equivalent_tiles) + lhs_num_instances += mutable_device_ctx.grid.num_instances(type, -1); + for (auto type : rhs->equivalent_tiles) + rhs_num_instances += mutable_device_ctx.grid.num_instances(type, -1); + + float lhs_util = vtr::safe_ratio(num_used_type_instances[lhs], lhs_num_instances); + float rhs_util = vtr::safe_ratio(num_used_type_instances[rhs], rhs_num_instances); + //Lower util first + return lhs_util < rhs_util; + }); + } + + if (log_verbosity_ > 2) { + VTR_LOG("\tSeed: '%s' (%s)", root_atom_name.c_str(), root_model->name); + VTR_LOGV(seed_mol->pack_pattern, " molecule_type %s molecule_size %zu", + seed_mol->pack_pattern->name, seed_mol->atom_block_ids.size()); + VTR_LOG("\n"); + } + + //Try packing into each candidate type + bool success = false; + t_logical_block_type_ptr block_type; + LegalizationClusterId new_cluster_id; + for (auto type : candidate_types) { + //Try packing into each mode + e_block_pack_status pack_result = e_block_pack_status::BLK_STATUS_UNDEFINED; + for (int j = 0; j < type->pb_graph_head->pb_type->num_modes && !success; j++) { + std::tie(pack_result, new_cluster_id) = cluster_legalizer.start_new_cluster(seed_mol, type, j); + success = (pack_result == e_block_pack_status::BLK_PASSED); + } + + if (success) { + VTR_LOGV(log_verbosity_ > 2, "\tPASSED_SEED: Block Type %s\n", type->name.c_str()); + // If clustering succeeds return the new_cluster_id and type. + block_type = type; + break; + } else { + VTR_LOGV(log_verbosity_ > 2, "\tFAILED_SEED: Block Type %s\n", type->name.c_str()); + } + } + + if (!success) { + //Explored all candidates + if (seed_mol->type == MOLECULE_FORCED_PACK) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Can not find any logic block that can implement molecule.\n" + "\tPattern %s %s\n", + seed_mol->pack_pattern->name, + root_atom_name.c_str()); + } else { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Can not find any logic block that can implement molecule.\n" + "\tAtom %s (%s)\n", + root_atom_name.c_str(), root_model->name); + } + } + + VTR_ASSERT(success); + VTR_ASSERT(new_cluster_id.is_valid()); + + VTR_LOGV(log_verbosity_ > 2, + "Complex block %zu: '%s' (%s) ", size_t(new_cluster_id), + cluster_legalizer.get_cluster_pb(new_cluster_id)->name, + cluster_legalizer.get_cluster_type(new_cluster_id)->name.c_str()); + VTR_LOGV(log_verbosity_ > 2, "."); + //Progress dot for seed-block + fflush(stdout); + + // TODO: Below may make more sense in its own method. + + // Successfully created cluster + num_used_type_instances[block_type]++; + + /* Expand FPGA size if needed */ + // Check used type instances against the possible equivalent physical locations + unsigned int num_instances = 0; + for (auto equivalent_tile : block_type->equivalent_tiles) { + num_instances += mutable_device_ctx.grid.num_instances(equivalent_tile, -1); + } + + if (num_used_type_instances[block_type] > num_instances) { + mutable_device_ctx.grid = create_device_grid(packer_opts_.device_layout, + arch_.grid_layouts, + num_used_type_instances, + packer_opts_.target_device_utilization); + } + + return new_cluster_id; +} + +bool GreedyClusterer::try_add_candidate_mol_to_cluster(t_pack_molecule* candidate_mol, + LegalizationClusterId legalization_cluster_id, + ClusterLegalizer& cluster_legalizer) { + VTR_ASSERT(candidate_mol != nullptr); + VTR_ASSERT(!cluster_legalizer.is_mol_clustered(candidate_mol)); + VTR_ASSERT(legalization_cluster_id.is_valid()); + + e_block_pack_status pack_status = cluster_legalizer.add_mol_to_cluster(candidate_mol, + legalization_cluster_id); + + // Print helpful debugging log messages. + if (log_verbosity_ > 2) { + switch (pack_status) { + case e_block_pack_status::BLK_PASSED: + VTR_LOG("\tPassed: "); + break; + case e_block_pack_status::BLK_FAILED_ROUTE: + VTR_LOG("\tNO_ROUTE: "); + break; + case e_block_pack_status::BLK_FAILED_FLOORPLANNING: + VTR_LOG("\tFAILED_FLOORPLANNING_CONSTRAINTS_CHECK: "); + break; + case e_block_pack_status::BLK_FAILED_FEASIBLE: + VTR_LOG("\tFAILED_FEASIBILITY_CHECK: "); + break; + case e_block_pack_status::BLK_FAILED_NOC_GROUP: + VTR_LOG("\tFAILED_NOC_GROUP_CHECK: "); + break; + default: + VPR_FATAL_ERROR(VPR_ERROR_PACK, "Unknown pack status thrown."); + break; + } + // Get the block name and model name + AtomBlockId blk_id = candidate_mol->atom_block_ids[candidate_mol->root]; + VTR_ASSERT(blk_id.is_valid()); + std::string blk_name = atom_netlist_.block_name(blk_id); + const t_model* blk_model = atom_netlist_.block_model(blk_id); + VTR_LOG("'%s' (%s)", blk_name.c_str(), blk_model->name); + VTR_LOGV(candidate_mol->pack_pattern, " molecule %s molecule_size %zu", + candidate_mol->pack_pattern->name, + candidate_mol->atom_block_ids.size()); + VTR_LOG("\n"); + fflush(stdout); + } + + return pack_status == e_block_pack_status::BLK_PASSED; +} + +void GreedyClusterer::report_le_physical_block_usage(const ClusterLegalizer& cluster_legalizer) { + // find the cluster type that has lut primitives + auto logic_block_type = identify_logic_block_type(primitive_candidate_block_types_); + // find a LE pb_type within the found logic_block_type + auto le_pb_type = identify_le_block_type(logic_block_type); + + // If this architecture does not have an LE physical block, cannot report + // its usage. + if (le_pb_type == nullptr) + return; + + // Track the number of Logic Elements (LEs) used. This is populated only for + // architectures which has LEs. The architecture is assumed to have LEs iff + // it has a logic block that contains LUT primitives and is the first + // pb_block to have more than one instance from the top of the hierarchy + // (All parent pb_block have one instance only and one mode only). + + // The number of LEs that are used for logic (LUTs/adders) only. + int num_logic_le = 0; + // The number of LEs that are used for registers only. + int num_reg_le = 0; + // The number of LEs that are used for both logic (LUTs/adders) and registers. + int num_logic_and_reg_le = 0; + + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + // Update the data structure holding the LE counts + update_le_count(cluster_legalizer.get_cluster_pb(cluster_id), + logic_block_type, + le_pb_type, + num_logic_le, + num_reg_le, + num_logic_and_reg_le); + } + + // if this architecture has LE physical block, report its usage + if (le_pb_type) { + print_le_count(num_logic_le, num_reg_le, num_logic_and_reg_le, le_pb_type); + } } diff --git a/vpr/src/pack/greedy_clusterer.h b/vpr/src/pack/greedy_clusterer.h index 816043c91b..6df695b333 100644 --- a/vpr/src/pack/greedy_clusterer.h +++ b/vpr/src/pack/greedy_clusterer.h @@ -10,17 +10,22 @@ #include #include +#include +#include "cluster_legalizer.h" #include "physical_types.h" +#include "vtr_vector.h" // Forward declarations class AtomNetId; class AtomNetlist; class AttractionInfo; -class ClusterLegalizer; +class DeviceContext; class Prepacker; +class SetupTimingInfo; +class t_pack_high_fanout_thresholds; +class t_pack_molecule; struct t_analysis_opts; struct t_clustering_data; -struct t_pack_high_fanout_thresholds; struct t_packer_opts; /** @@ -75,7 +80,7 @@ class GreedyClusterer { GreedyClusterer(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const AtomNetlist& atom_netlist, - const t_arch* arch, + const t_arch& arch, const t_pack_high_fanout_thresholds& high_fanout_thresholds, const std::unordered_set& is_clock, const std::unordered_set& is_global); @@ -102,13 +107,16 @@ class GreedyClusterer { * have multiple logical block types to which they can cluster, * e.g. multiple sizes of physical RAMs exist on the chip. * @param attraction_groups - * Information on the attraction groups used during the * clustering process. These are groups of primitives that have * extra attraction to each other; currently they are used to * guide the clusterer when it must cluster some parts of a * design densely due to user placement/floorplanning * constraints. They are created if some floorplan regions are * overfilled after a clustering attempt. + * @param mutable_device_ctx + * The mutable device context. The clusterer will modify the + * device context by potentially increasing the size of the + * device to fit the clustering. * * @return num_used_type_instances * The number of used logical blocks of each type by the @@ -120,9 +128,74 @@ class GreedyClusterer { Prepacker& prepacker, bool allow_unrelated_clustering, bool balance_block_type_utilization, - AttractionInfo& attraction_groups); + AttractionInfo& attraction_groups, + DeviceContext& mutable_device_ctx); private: + /** + * @brief Given a seed molecule and a legalization strategy, tries to grow + * a cluster greedily, starting with the provided seed and adding + * whatever other molecules seem beneficial and legal. Will return + * the ID of the cluster created. + * + * If the strategy is set to SKIP_INTRA_LB_ROUTE, the cluster will grow + * without performing intra-lb route every time a molecule is added to the + * cluster. It will perfrom intra-lb route at the end, after all molecules + * have been added. If this final intra-lb route fails, the cluster will be + * destroyed and an invalid cluster ID will be returned. + * + * If the strategy is set to FULL, the cluster will grow using the full + * legalizer for each molecule added. This cannot fail (assuming the seed + * can exist in a cluster), so it will always return a valid cluster ID. + */ + LegalizationClusterId try_grow_cluster(t_pack_molecule* seed_mol, + ClusterLegalizationStrategy strategy, + ClusterLegalizer& cluster_legalizer, + Prepacker& prepacker, + bool allow_unrelated_clustering, + bool balance_block_type_utilization, + SetupTimingInfo& timing_info, + vtr::vector>& clb_inter_blk_nets, + t_clustering_data& clustering_data, + AttractionInfo& attraction_groups, + std::map& num_used_type_instances, + DeviceContext& mutable_device_ctx); + + /** + * @brief Given a seed molecule, starts a new cluster by trying to find a + * good logical block type and mode to put it in. This method cannot + * fail (only crash if the seed cannot be clustered), so it should + * always return a valid ID to the cluster created. + * + * When balance_block_type_utilization is set to true, this method will try + * to select less used logical block types if it has the option to in order + * to balance logical block type utilization. + * + * If the device is to be auto-sized, this method will try to grow the + * device grid if it find thats more clusters of specific logical block + * types have been created than the device can support. + */ + LegalizationClusterId start_new_cluster(t_pack_molecule* seed_mol, + ClusterLegalizer& cluster_legalizer, + bool balance_block_type_utilization, + std::map& num_used_type_instances, + DeviceContext& mutable_device_ctx); + + /** + * @brief Try to add the given candidate molecule to the given cluster. + * Returns true if the molecule was clustered successfully, false + * otherwise. + */ + bool try_add_candidate_mol_to_cluster(t_pack_molecule* candidate_mol, + LegalizationClusterId legalization_cluster_id, + ClusterLegalizer& cluster_legalizer); + + /** + * @brief Log the physical block usage of the logic element in the + * architecture (if it has one). + */ + void report_le_physical_block_usage(const ClusterLegalizer& cluster_legalizer); + /* * When attraction groups are created, the purpose is to pack more densely by adding more molecules * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are @@ -144,7 +217,7 @@ class GreedyClusterer { const AtomNetlist& atom_netlist_; /// @brief The device architecture to cluster onto. - const t_arch* arch_ = nullptr; + const t_arch& arch_; /// @brief The high-fanout thresholds per logical block type. Used to ignore /// certain nets when calculating the gain for the next candidate @@ -158,6 +231,22 @@ class GreedyClusterer { const std::unordered_set& is_global_; /// @brief Pre-computed logical block types for each model in the architecture. - std::map> primitive_candidate_block_types_; + const std::map> primitive_candidate_block_types_; + + /// @brief The verbosity of log messages produced by the clusterer. + /// + /// Numbers larger than 2 will print info on the status of the packing for + /// each molecule. + const int log_verbosity_; + + /// @brief Does the atom block that drives the output of this atom net also + /// appear as a receiver (input) pin of the atom net? + /// + /// This is used in the gain routines to avoid double counting the + /// connections from the current cluster to other blocks (hence yielding + /// better clusterings). The only time an atom block should connect to the + /// same atom net twice is when one connection is an output and the other + /// is an input, so this should take care of all multiple connections. + const std::unordered_set net_output_feeds_driving_block_input_; }; diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index 7397003213..cb27a23e83 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -28,6 +28,9 @@ bool try_pack(t_packer_opts* packer_opts, std::vector* lb_type_rr_graphs) { const AtomContext& atom_ctx = g_vpr_ctx.atom(); const DeviceContext& device_ctx = g_vpr_ctx.device(); + // The clusterer modifies the device context by increasing the size of the + // device if needed. + DeviceContext& mutable_device_ctx = g_vpr_ctx.mutable_device(); std::unordered_set is_clock, is_global; VTR_LOG("Begin packing '%s'.\n", packer_opts->circuit_file_name.c_str()); @@ -113,7 +116,7 @@ bool try_pack(t_packer_opts* packer_opts, GreedyClusterer clusterer(*packer_opts, *analysis_opts, atom_ctx.nlist, - arch, + *arch, high_fanout_thresholds, is_clock, is_global); @@ -127,7 +130,8 @@ bool try_pack(t_packer_opts* packer_opts, prepacker, allow_unrelated_clustering, balance_block_type_util, - attraction_groups); + attraction_groups, + mutable_device_ctx); //Try to size/find a device bool fits_on_device = try_size_device_grid(*arch, num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout); diff --git a/vpr/src/pack/pack_types.h b/vpr/src/pack/pack_types.h index 3c587bcb46..95a460751b 100644 --- a/vpr/src/pack/pack_types.h +++ b/vpr/src/pack/pack_types.h @@ -14,6 +14,8 @@ #include "atom_netlist_fwd.h" #include "attraction_groups.h" +struct t_pack_molecule; + /************************************************************************** * Packing Algorithm Enumerations ***************************************************************************/ diff --git a/vpr/src/route/DecompNetlistRouter.h b/vpr/src/route/DecompNetlistRouter.h index 1f16105a35..a41d656c24 100644 --- a/vpr/src/route/DecompNetlistRouter.h +++ b/vpr/src/route/DecompNetlistRouter.h @@ -2,7 +2,8 @@ /** @file Parallel and net-decomposing case for NetlistRouter. Works like * \see ParallelNetlistRouter, but tries to "decompose" nets and assign them to - * the next level of the partition tree where possible. */ + * the next level of the partition tree where possible. + * See "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */ #include "netlist_routers.h" #include @@ -57,6 +58,8 @@ class DecompNetlistRouter : public NetlistRouter { * \ref route_net for each net, which will handle other global updates. * \return RouteIterResults for this iteration. */ RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack); + /** Inform the PartitionTree of the nets with updated bounding boxes */ + void handle_bb_updated_nets(const std::vector& nets); /** Set RCV enable flag for all routers managed by this netlist router. * Net decomposition does not work with RCV, so calling this fn with x=true is a fatal error. */ void set_rcv_enabled(bool x); @@ -65,10 +68,14 @@ class DecompNetlistRouter : public NetlistRouter { private: /** Should we decompose this net? */ bool should_decompose_net(ParentNetId net_id, const PartitionTreeNode& node); - /** Get a bitset with sinks to route before net decomposition */ + /** Get a bitset of sinks to route before net decomposition. Output bitset is + * [1..num_sinks] where the corresponding index is set to 1 if the sink needs to + * be routed */ vtr::dynamic_bitset<> get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node); - /** Get a bitset with sinks to route before virtual net decomposition */ - vtr::dynamic_bitset<> get_vnet_decomposition_mask(const VirtualNet& vnet, const PartitionTreeNode& node); + /** Get a bitset of sinks to route before virtual net decomposition. Output bitset is + * [1..num_sinks] where the corresponding index is set to 1 if the sink needs to + * be routed */ + vtr::dynamic_bitset<> get_decomposition_mask_vnet(const VirtualNet& vnet, const PartitionTreeNode& node); /** Decompose and route a regular net. Output the resulting vnets to \p left and \p right. * \return Success status: true if routing is successful and left and right now contain valid virtual nets: false otherwise. */ bool decompose_and_route_net(ParentNetId net_id, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right); @@ -115,6 +122,9 @@ class DecompNetlistRouter : public NetlistRouter { float _pres_fac; float _worst_neg_slack; + /** The partition tree. Holds the groups of nets for each partition */ + vtr::optional _tree; + /** Sinks to be always sampled for decomposition for each net: [0.._net_list.size()-1] * (i.e. when routing fails after decomposition for a sink, sample it on next iteration) */ vtr::vector> _net_known_samples; diff --git a/vpr/src/route/DecompNetlistRouter.tpp b/vpr/src/route/DecompNetlistRouter.tpp index fc1a6685e5..a009132c45 100644 --- a/vpr/src/route/DecompNetlistRouter.tpp +++ b/vpr/src/route/DecompNetlistRouter.tpp @@ -3,6 +3,7 @@ /** @file Impls for DecompNetlistRouter */ #include "DecompNetlistRouter.h" +#include "globals.h" #include "netlist_routers.h" #include "route_net.h" #include "sink_sampling.h" @@ -21,25 +22,44 @@ inline RouteIterResults DecompNetlistRouter::route_netlist(int itry, f _pres_fac = pres_fac; _worst_neg_slack = worst_neg_slack; + vtr::Timer timer; + /* Organize netlist into a PartitionTree. * Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */ - PartitionTree tree(_net_list); + if(!_tree){ + _tree = PartitionTree(_net_list); + PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s"); + } + + /* Remove all virtual nets: we will create them for each iteration. + * This needs to be done because the partition tree can change between iterations + * due to bounding box updates, which invalidates virtual nets */ + _tree->clear_vnets(); /* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */ - tbb::task_group g; - route_partition_tree_node(g, tree.root()); - g.wait(); + tbb::task_group group; + route_partition_tree_node(group, _tree->root()); + group.wait(); + PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s"); /* Combine results from threads */ RouteIterResults out; for (auto& results : _results_th) { out.stats.combine(results.stats); out.rerouted_nets.insert(out.rerouted_nets.end(), results.rerouted_nets.begin(), results.rerouted_nets.end()); + out.bb_updated_nets.insert(out.bb_updated_nets.end(), results.bb_updated_nets.begin(), results.bb_updated_nets.end()); out.is_routable &= results.is_routable; } + return out; } +template +void DecompNetlistRouter::handle_bb_updated_nets(const std::vector& nets) { + VTR_ASSERT(_tree); + _tree->update_nets(nets); +} + template void DecompNetlistRouter::set_rcv_enabled(bool x) { if (x) @@ -120,6 +140,10 @@ inline bool should_decompose_vnet(const VirtualNet& vnet, const PartitionTreeNod template void DecompNetlistRouter::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) { auto& route_ctx = g_vpr_ctx.mutable_routing(); + vtr::Timer timer; + + /* node.nets is an unordered set, copy into vector to sort */ + std::vector nets(node.nets.begin(), node.nets.end()); /* Sort so that nets with the most sinks are routed first. * We want to interleave virtual nets with regular ones, so sort an "index vector" @@ -129,15 +153,14 @@ void DecompNetlistRouter::route_partition_tree_node(tbb::task_group& g std::vector order(node.nets.size() + node.vnets.size()); std::iota(order.begin(), order.end(), 0); std::stable_sort(order.begin(), order.end(), [&](size_t i, size_t j) -> bool { - ParentNetId id1 = i < node.nets.size() ? node.nets[i] : node.vnets[i - node.nets.size()].net_id; - ParentNetId id2 = j < node.nets.size() ? node.nets[j] : node.vnets[j - node.nets.size()].net_id; + ParentNetId id1 = i < node.nets.size() ? nets[i] : node.vnets[i - nets.size()].net_id; + ParentNetId id2 = j < node.nets.size() ? nets[j] : node.vnets[j - nets.size()].net_id; return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size(); }); - vtr::Timer t; for (size_t i : order) { - if (i < node.nets.size()) { /* Regular net (not decomposed) */ - ParentNetId net_id = node.nets[i]; + if (i < nets.size()) { /* Regular net (not decomposed) */ + ParentNetId net_id = nets[i]; if (!should_route_net(_net_list, net_id, _connections_inf, _budgeting_inf, _worst_neg_slack, true)) continue; /* Setup the net (reset or prune) only once here in the flow. Then all calls to route_net turn off auto-setup */ @@ -188,6 +211,7 @@ void DecompNetlistRouter::route_partition_tree_node(tbb::task_group& g if (flags.retry_with_full_bb) { /* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */ route_ctx.route_bb[net_id] = full_device_bb(); + _results_th.local().bb_updated_nets.push_back(net_id); /* Disable decomposition for nets like this: they're already problematic */ _is_decomp_disabled[net_id] = true; continue; @@ -206,7 +230,7 @@ void DecompNetlistRouter::route_partition_tree_node(tbb::task_group& g continue; } } - /* Route the full vnet. Again we don't care about the flags, they should be handled by the regular path */ + /* Route the full vnet. We don't care about the flags, they should be handled by the regular path */ auto sink_mask = get_vnet_sink_mask(vnet); route_net( _routers_th.local(), @@ -234,7 +258,7 @@ void DecompNetlistRouter::route_partition_tree_node(tbb::task_group& g PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets and " + std::to_string(node.vnets.size()) - + " virtual nets routed in " + std::to_string(t.elapsed_sec()) + + " virtual nets routed in " + std::to_string(timer.elapsed_sec()) + " s"); /* This node is finished: add left & right branches to the task queue */ @@ -277,7 +301,7 @@ inline void make_vnet_pair(ParentNetId net_id, const t_bb& bb, Axis cutline_axis template bool DecompNetlistRouter::decompose_and_route_net(ParentNetId net_id, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right) { - auto& route_ctx = g_vpr_ctx.routing(); + auto& route_ctx = g_vpr_ctx.mutable_routing(); auto& net_bb = route_ctx.route_bb[net_id]; /* Sample enough sinks to provide branch-off points to the virtual nets we create */ @@ -382,7 +406,7 @@ inline std::string describe_vnet(const VirtualNet& vnet) { template bool DecompNetlistRouter::decompose_and_route_vnet(VirtualNet& vnet, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right) { /* Sample enough sinks to provide branch-off points to the virtual nets we create */ - auto sink_mask = get_vnet_decomposition_mask(vnet, node); + auto sink_mask = get_decomposition_mask_vnet(vnet, node); /* Route the *parent* net with the given mask: only the sinks we ask for will be routed */ auto flags = route_net( @@ -499,6 +523,7 @@ inline bool get_reduction_mask(ParentNetId net_id, Axis cutline_axis, int cutlin template vtr::dynamic_bitset<> DecompNetlistRouter::get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node) { const auto& route_ctx = g_vpr_ctx.routing(); + const RouteTree& tree = route_ctx.route_trees[net_id].value(); size_t num_sinks = tree.num_sinks(); @@ -512,6 +537,7 @@ vtr::dynamic_bitset<> DecompNetlistRouter::get_decomposition_mask(Pare bool is_reduced = get_reduction_mask(net_id, node.cutline_axis, node.cutline_pos, out); bool source_on_cutline = is_close_to_cutline(tree.root().inode, node.cutline_axis, node.cutline_pos, 1); + if (!is_reduced || source_on_cutline) convex_hull_downsample(net_id, route_ctx.route_bb[net_id], out); @@ -638,7 +664,7 @@ inline bool get_reduction_mask_vnet_with_source(const VirtualNet& vnet, Axis cut } template -vtr::dynamic_bitset<> DecompNetlistRouter::get_vnet_decomposition_mask(const VirtualNet& vnet, const PartitionTreeNode& node) { +vtr::dynamic_bitset<> DecompNetlistRouter::get_decomposition_mask_vnet(const VirtualNet& vnet, const PartitionTreeNode& node) { const auto& route_ctx = g_vpr_ctx.routing(); const RouteTree& tree = route_ctx.route_trees[vnet.net_id].value(); int num_sinks = tree.num_sinks(); @@ -652,8 +678,9 @@ vtr::dynamic_bitset<> DecompNetlistRouter::get_vnet_decomposition_mask if (inside_bb(tree.root().inode, vnet.clipped_bb)) { /* We have source, no need to sample after reduction in most cases */ bool is_reduced = get_reduction_mask_vnet_with_source(vnet, node.cutline_axis, node.cutline_pos, out); bool source_on_cutline = is_close_to_cutline(tree.root().inode, node.cutline_axis, node.cutline_pos, 1); - if (!is_reduced || source_on_cutline) + if (!is_reduced || source_on_cutline){ convex_hull_downsample(vnet.net_id, vnet.clipped_bb, out); + } } else { int reduced_sides = get_reduction_mask_vnet_no_source(vnet, node.cutline_axis, node.cutline_pos, out); if (reduced_sides < 2) { @@ -666,9 +693,11 @@ vtr::dynamic_bitset<> DecompNetlistRouter::get_vnet_decomposition_mask /* Sample if a sink is too close to the cutline (and unreached). * Those sinks are likely to fail routing */ for (size_t isink : isinks) { + RRNodeId rr_sink = route_ctx.net_rr_terminals[vnet.net_id][isink]; + if (!inside_bb(rr_sink, vnet.clipped_bb)) + continue; if (is_isink_reached.get(isink)) continue; - RRNodeId rr_sink = route_ctx.net_rr_terminals[vnet.net_id][isink]; if (is_close_to_cutline(rr_sink, node.cutline_axis, node.cutline_pos, 1)) { out.set(isink, true); continue; diff --git a/vpr/src/route/ParallelNetlistRouter.h b/vpr/src/route/ParallelNetlistRouter.h index 35a2da2509..e77fdf8344 100644 --- a/vpr/src/route/ParallelNetlistRouter.h +++ b/vpr/src/route/ParallelNetlistRouter.h @@ -8,8 +8,9 @@ * * Note that the parallel router does not support graphical router breakpoints. * - * [0]: F. KoÅŸar, "A net-decomposing parallel FPGA router", MS thesis, UofT ECE, 2023 */ + * [0]: "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */ #include "netlist_routers.h" +#include "vtr_optional.h" #include @@ -52,6 +53,8 @@ class ParallelNetlistRouter : public NetlistRouter { * \ref route_net for each net, which will handle other global updates. * \return RouteIterResults for this iteration. */ RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack); + /** Inform the PartitionTree of the nets with updated bounding boxes */ + void handle_bb_updated_nets(const std::vector& nets); void set_rcv_enabled(bool x); void set_timing_info(std::shared_ptr timing_info); @@ -95,6 +98,9 @@ class ParallelNetlistRouter : public NetlistRouter { int _itry; float _pres_fac; float _worst_neg_slack; + + /** The partition tree. Holds the groups of nets for each partition */ + vtr::optional _tree; }; #include "ParallelNetlistRouter.tpp" diff --git a/vpr/src/route/ParallelNetlistRouter.tpp b/vpr/src/route/ParallelNetlistRouter.tpp index 9cae0d84db..1268ed6030 100644 --- a/vpr/src/route/ParallelNetlistRouter.tpp +++ b/vpr/src/route/ParallelNetlistRouter.tpp @@ -2,6 +2,7 @@ /** @file Impls for ParallelNetlistRouter */ +#include #include "netlist_routers.h" #include "route_net.h" #include "vtr_time.h" @@ -20,18 +21,24 @@ inline RouteIterResults ParallelNetlistRouter::route_netlist(int itry, /* Organize netlist into a PartitionTree. * Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */ - PartitionTree tree(_net_list); + vtr::Timer timer; + if(!_tree){ + _tree = PartitionTree(_net_list); + PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s"); + } /* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */ - tbb::task_group g; - route_partition_tree_node(g, tree.root()); - g.wait(); + tbb::task_group group; + route_partition_tree_node(group, _tree->root()); + group.wait(); + PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s"); /* Combine results from threads */ RouteIterResults out; for (auto& results : _results_th) { out.stats.combine(results.stats); out.rerouted_nets.insert(out.rerouted_nets.end(), results.rerouted_nets.begin(), results.rerouted_nets.end()); + out.bb_updated_nets.insert(out.bb_updated_nets.end(), results.bb_updated_nets.begin(), results.bb_updated_nets.end()); out.is_routable &= results.is_routable; } return out; @@ -41,13 +48,16 @@ template void ParallelNetlistRouter::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) { auto& route_ctx = g_vpr_ctx.mutable_routing(); + /* node.nets is an unordered set, copy into vector to sort */ + std::vector nets(node.nets.begin(), node.nets.end()); + /* Sort so net with most sinks is routed first. */ - std::stable_sort(node.nets.begin(), node.nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool { + std::stable_sort(nets.begin(), nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool { return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size(); }); - vtr::Timer t; - for (auto net_id : node.nets) { + vtr::Timer timer; + for (auto net_id : nets) { auto flags = route_net( _routers_th.local(), _net_list, @@ -76,13 +86,18 @@ void ParallelNetlistRouter::route_partition_tree_node(tbb::task_group& if (flags.retry_with_full_bb) { /* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */ route_ctx.route_bb[net_id] = full_device_bb(); + _results_th.local().bb_updated_nets.push_back(net_id); continue; } if (flags.was_rerouted) { _results_th.local().rerouted_nets.push_back(net_id); } } - PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets routed in " + std::to_string(t.elapsed_sec()) + " s"); + + PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + + " nets and " + std::to_string(node.vnets.size()) + + " virtual nets routed in " + std::to_string(timer.elapsed_sec()) + + " s"); /* This node is finished: add left & right branches to the task queue */ if (node.left && node.right) { @@ -97,6 +112,12 @@ void ParallelNetlistRouter::route_partition_tree_node(tbb::task_group& } } +template +void ParallelNetlistRouter::handle_bb_updated_nets(const std::vector& nets) { + VTR_ASSERT(_tree); + _tree->update_nets(nets); +} + template void ParallelNetlistRouter::set_rcv_enabled(bool x) { for (auto& router : _routers_th) { diff --git a/vpr/src/route/SerialNetlistRouter.h b/vpr/src/route/SerialNetlistRouter.h index 5bb59df199..352de125b6 100644 --- a/vpr/src/route/SerialNetlistRouter.h +++ b/vpr/src/route/SerialNetlistRouter.h @@ -35,6 +35,7 @@ class SerialNetlistRouter : public NetlistRouter { ~SerialNetlistRouter() {} RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack); + void handle_bb_updated_nets(const std::vector& nets); void set_rcv_enabled(bool x); void set_timing_info(std::shared_ptr timing_info); diff --git a/vpr/src/route/SerialNetlistRouter.tpp b/vpr/src/route/SerialNetlistRouter.tpp index 7927d06c4e..63497d7d39 100644 --- a/vpr/src/route/SerialNetlistRouter.tpp +++ b/vpr/src/route/SerialNetlistRouter.tpp @@ -4,12 +4,15 @@ #include "SerialNetlistRouter.h" #include "route_net.h" +#include "vtr_time.h" template inline RouteIterResults SerialNetlistRouter::route_netlist(int itry, float pres_fac, float worst_neg_slack) { auto& route_ctx = g_vpr_ctx.mutable_routing(); RouteIterResults out; + vtr::Timer timer; + /* Sort so net with most sinks is routed first */ auto sorted_nets = std::vector(_net_list.nets().begin(), _net_list.nets().end()); std::stable_sort(sorted_nets.begin(), sorted_nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool { @@ -45,7 +48,9 @@ inline RouteIterResults SerialNetlistRouter::route_netlist(int itry, f } if (flags.retry_with_full_bb) { - /* Grow the BB and retry this net right away. */ + /* Grow the BB and retry this net right away. + * We don't populate out.bb_updated_nets for the serial router, since + * there is no partition tree to update. */ route_ctx.route_bb[net_id] = full_device_bb(); inet--; continue; @@ -59,9 +64,14 @@ inline RouteIterResults SerialNetlistRouter::route_netlist(int itry, f } } + PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s"); return out; } +template +void SerialNetlistRouter::handle_bb_updated_nets(const std::vector& /* nets */) { +} + template void SerialNetlistRouter::set_rcv_enabled(bool x) { _router.set_rcv_enabled(x); diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp index 7fd0f0d168..7216820726 100644 --- a/vpr/src/route/connection_router.cpp +++ b/vpr/src/route/connection_router.cpp @@ -4,6 +4,8 @@ #include "rr_graph.h" #include "rr_graph_fwd.h" +/** Used for the flat router. The node isn't relevant to the target if + * it is an intra-block node outside of our target block */ static bool relevant_node_to_target(const RRGraphView* rr_graph, RRNodeId node_to_add, RRNodeId target_node); @@ -972,6 +974,7 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( //Add existing routing starting from the target bin. //If the target's bin has insufficient existing routing add from the surrounding bins + constexpr int SINGLE_BIN_MIN_NODES = 2; bool done = false; bool found_node_on_same_layer = false; for (int dx : {0, -1, +1}) { @@ -989,6 +992,7 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( continue; RRNodeId rr_node_to_add = rt_node.inode; + /* Flat router: don't go into clusters other than the target one */ if (is_flat_) { if (!relevant_node_to_target(rr_graph_, rr_node_to_add, target_node)) continue; @@ -1014,7 +1018,6 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( } } - constexpr int SINGLE_BIN_MIN_NODES = 2; if (dx == 0 && dy == 0 && chan_nodes_added > SINGLE_BIN_MIN_NODES && found_node_on_same_layer) { //Target bin contained at least minimum amount of routing // @@ -1028,8 +1031,9 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( } if (done) break; } - - if (chan_nodes_added == 0 || !found_node_on_same_layer) { //If the target bin, and it's surrounding bins were empty, just add the full route tree + /* If we didn't find enough nodes to branch off near the target + * or they are on the wrong grid layer, just add the full route tree */ + if (chan_nodes_added <= SINGLE_BIN_MIN_NODES || !found_node_on_same_layer) { add_route_tree_to_heap(rt_root, target_node, cost_params, net_bounding_box); return net_bounding_box; } else { @@ -1042,15 +1046,9 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( static inline bool relevant_node_to_target(const RRGraphView* rr_graph, RRNodeId node_to_add, RRNodeId target_node) { - VTR_ASSERT(rr_graph->node_type(target_node) == t_rr_type::SINK); + VTR_ASSERT_SAFE(rr_graph->node_type(target_node) == t_rr_type::SINK); auto node_to_add_type = rr_graph->node_type(node_to_add); - if (node_to_add_type == t_rr_type::OPIN || node_to_add_type == t_rr_type::SOURCE || node_to_add_type == t_rr_type::CHANX || node_to_add_type == t_rr_type::CHANY || node_to_add_type == SINK) { - return true; - } else if (node_in_same_physical_tile(node_to_add, target_node)) { - VTR_ASSERT(node_to_add_type == IPIN); - return true; - } - return false; + return node_to_add_type != t_rr_type::IPIN || node_in_same_physical_tile(node_to_add, target_node); } static inline void update_router_stats(RouterStats* router_stats, diff --git a/vpr/src/route/netlist_routers.h b/vpr/src/route/netlist_routers.h index d5f5354a39..1524c2ddb3 100644 --- a/vpr/src/route/netlist_routers.h +++ b/vpr/src/route/netlist_routers.h @@ -15,6 +15,7 @@ * NetlistRouter-derived class is still a NetlistRouter, so that is transparent to the user * of this interface. */ +#include #include "NetPinTimingInvalidator.h" #include "clustered_netlist_utils.h" #include "connection_based_routing_fwd.h" @@ -37,6 +38,9 @@ struct RouteIterResults { bool is_routable = true; /** Net IDs with changed routing */ std::vector rerouted_nets; + /** Net IDs with changed bounding box for this iteration. + * Used by the parallel router to update the \ref PartitionTree */ + std::vector bb_updated_nets; /** RouterStats for this iteration */ RouterStats stats; }; @@ -53,6 +57,10 @@ class NetlistRouter { * \return RouteIterResults for this iteration. */ virtual RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack) = 0; + /** Handle net bounding box updates by passing them to the PartitionTree. + * No-op for the serial router */ + virtual void handle_bb_updated_nets(const std::vector& nets) = 0; + /** Enable RCV for each of the ConnectionRouters this NetlistRouter manages.*/ virtual void set_rcv_enabled(bool x) = 0; diff --git a/vpr/src/route/partition_tree.cpp b/vpr/src/route/partition_tree.cpp index b679fb90a1..ac95a9a528 100644 --- a/vpr/src/route/partition_tree.cpp +++ b/vpr/src/route/partition_tree.cpp @@ -1,6 +1,7 @@ #include "partition_tree.h" #include #include +#include /** Minimum number of nets inside a partition to continue further partitioning. * Mostly an arbitrary limit. At a certain point, the quality lost due to disturbed net ordering @@ -10,19 +11,32 @@ constexpr size_t MIN_NETS_TO_PARTITION = 256; PartitionTree::PartitionTree(const Netlist<>& netlist) { const auto& device_ctx = g_vpr_ctx.device(); - auto all_nets = std::vector(netlist.nets().begin(), netlist.nets().end()); + auto all_nets = std::unordered_set(netlist.nets().begin(), netlist.nets().end()); _root = build_helper(netlist, all_nets, 0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); } -std::unique_ptr PartitionTree::build_helper(const Netlist<>& netlist, const std::vector& nets, int x1, int y1, int x2, int y2) { +/** Build a branch of the PartitionTree given a set of \p nets and a bounding box. + * Calls itself recursively with smaller and smaller bounding boxes until there are less + * nets than \ref MIN_NETS_TO_PARTITION. */ +std::unique_ptr PartitionTree::build_helper(const Netlist<>& netlist, const std::unordered_set& nets, int x1, int y1, int x2, int y2) { if (nets.empty()) return nullptr; const auto& route_ctx = g_vpr_ctx.routing(); + + /* Only build this for 2 dimensions. Ignore the layers for now */ + const auto& device_ctx = g_vpr_ctx.device(); + int layer_max = device_ctx.grid.get_num_layers() - 1; + auto out = std::make_unique(); if (nets.size() < MIN_NETS_TO_PARTITION) { + out->bb = {x1, x2, y1, y2, 0, layer_max}; out->nets = nets; + /* Build net to ptree node lookup */ + for(auto net_id: nets){ + _net_to_ptree_node[net_id] = out.get(); + } return out; } @@ -113,22 +127,26 @@ std::unique_ptr PartitionTree::build_helper(const Netlist<>& /* Couldn't find a cutline: all cutlines result in a one-way cut */ if (std::isnan(best_pos)) { - out->nets = nets; /* We hope copy elision is smart enough to optimize this stuff out */ - return out; + out->bb = {x1, x2, y1, y2, 0, layer_max}; + out->nets = nets; + /* Build net to ptree node lookup */ + for(auto net_id: nets){ + _net_to_ptree_node[net_id] = out.get(); + } } /* Populate net IDs on each side and call next level of build_x */ - std::vector left_nets, right_nets, my_nets; + std::unordered_set left_nets, right_nets, my_nets; if (best_axis == Axis::X) { for (auto net_id : nets) { t_bb bb = route_ctx.route_bb[net_id]; if (bb.xmax < best_pos) { - left_nets.push_back(net_id); + left_nets.insert(net_id); } else if (bb.xmin > best_pos) { - right_nets.push_back(net_id); + right_nets.insert(net_id); } else { - my_nets.push_back(net_id); + my_nets.insert(net_id); } } @@ -139,11 +157,11 @@ std::unique_ptr PartitionTree::build_helper(const Netlist<>& for (auto net_id : nets) { t_bb bb = route_ctx.route_bb[net_id]; if (bb.ymax < best_pos) { - left_nets.push_back(net_id); + left_nets.insert(net_id); } else if (bb.ymin > best_pos) { - right_nets.push_back(net_id); + right_nets.insert(net_id); } else { - my_nets.push_back(net_id); + my_nets.insert(net_id); } } @@ -151,8 +169,52 @@ std::unique_ptr PartitionTree::build_helper(const Netlist<>& out->right = build_helper(netlist, right_nets, x1, std::floor(best_pos + 1), x2, y2); } + if(out->left) + out->left->parent = out.get(); + if(out->right) + out->right->parent = out.get(); + + out->bb = {x1, x2, y1, y2, 0, 0}; out->nets = my_nets; out->cutline_axis = best_axis; out->cutline_pos = best_pos; + + /* Build net to ptree node lookup */ + for(auto net_id: my_nets){ + _net_to_ptree_node[net_id] = out.get(); + } return out; } + +inline bool net_in_ptree_node(ParentNetId net_id, const PartitionTreeNode* node){ + auto& route_ctx = g_vpr_ctx.routing(); + const t_bb& bb = route_ctx.route_bb[net_id]; + return bb.xmin >= node->bb.xmin && bb.xmax <= node->bb.xmax && bb.ymin >= node->bb.ymin && bb.ymax <= node->bb.ymax; +} + +void PartitionTree::update_nets(const std::vector& nets) { + for(auto net_id: nets){ + PartitionTreeNode* old_ptree_node = _net_to_ptree_node[net_id]; + PartitionTreeNode* new_ptree_node = old_ptree_node; + while(!net_in_ptree_node(net_id, new_ptree_node)) + new_ptree_node = new_ptree_node->parent; + old_ptree_node->nets.erase(net_id); + new_ptree_node->nets.insert(net_id); + _net_to_ptree_node[net_id] = new_ptree_node; + } +} + +/** Delete all vnets from this tree */ +void PartitionTree::clear_vnets(void) { + std::stack stack; + stack.push(_root.get()); + while(!stack.empty()){ + PartitionTreeNode* node = stack.top(); + stack.pop(); + node->vnets.clear(); + if(node->left) + stack.push(node->left.get()); + if(node->right) + stack.push(node->right.get()); + } +} diff --git a/vpr/src/route/partition_tree.h b/vpr/src/route/partition_tree.h index ac15fea4bc..82b75976b8 100644 --- a/vpr/src/route/partition_tree.h +++ b/vpr/src/route/partition_tree.h @@ -1,6 +1,7 @@ #pragma once #include "connection_router.h" +#include "netlist_fwd.h" #include "router_stats.h" #include @@ -53,13 +54,15 @@ class VirtualNet { class PartitionTreeNode { public: /** Nets claimed by this node (intersected by cutline if branch, nets in final region if leaf) */ - std::vector nets; + std::unordered_set nets; /** Virtual nets assigned by the parent of this node (\see DecompNetlistRouter) */ std::vector vnets; /** Left subtree. */ std::unique_ptr left = nullptr; /** Right subtree. */ std::unique_ptr right = nullptr; + /** Parent node. */ + PartitionTreeNode* parent = nullptr; /* Axis of the cutline. */ Axis cutline_axis = Axis::X; /* Position of the cutline. It's a float, because cutlines are considered to be "between" integral coordinates. */ @@ -83,9 +86,20 @@ class PartitionTree { /** Access root. Shouldn't cause a segfault, because PartitionTree constructor always makes a _root */ inline PartitionTreeNode& root(void) { return *_root; } + /** Handle nets which had a bounding box update. + * Bounding boxes can only grow, so we should find a new partition tree node for + * these nets by moving them up until they fit in a node's bounds */ + void update_nets(const std::vector& nets); + + /** Delete all virtual nets in the tree. Used for the net decomposing router. + * Virtual nets are invalidated between iterations due to changing bounding + * boxes. */ + void clear_vnets(void); + private: std::unique_ptr _root; - std::unique_ptr build_helper(const Netlist<>& netlist, const std::vector& nets, int x1, int y1, int x2, int y2); + std::unordered_map _net_to_ptree_node; + std::unique_ptr build_helper(const Netlist<>& netlist, const std::unordered_set& nets, int x1, int y1, int x2, int y2); }; #ifdef VPR_DEBUG_PARTITION_TREE diff --git a/vpr/src/route/route.cpp b/vpr/src/route/route.cpp index 6bbc3449d8..d4dbc2a4d5 100644 --- a/vpr/src/route/route.cpp +++ b/vpr/src/route/route.cpp @@ -308,6 +308,8 @@ bool route(const Netlist<>& net_list, float iter_cumm_time = iteration_timer.elapsed_sec(); float iter_elapsed_time = iter_cumm_time - prev_iter_cumm_time; + PartitionTreeDebug::log("Iteration " + std::to_string(itry) + " took " + std::to_string(iter_elapsed_time) + " s"); + //Output progress print_route_status(itry, iter_elapsed_time, pres_fac, num_net_bounding_boxes_updated, iter_results.stats, overuse_info, wirelength_info, timing_info, est_success_iteration); @@ -424,11 +426,13 @@ bool route(const Netlist<>& net_list, /* * Prepare for the next iteration */ - if (router_opts.route_bb_update == e_route_bb_update::DYNAMIC) { - num_net_bounding_boxes_updated = dynamic_update_bounding_boxes(iter_results.rerouted_nets); + dynamic_update_bounding_boxes(iter_results.rerouted_nets, iter_results.bb_updated_nets); } + num_net_bounding_boxes_updated = iter_results.bb_updated_nets.size(); + netlist_router->handle_bb_updated_nets(iter_results.bb_updated_nets); + if (itry >= high_effort_congestion_mode_iteration_threshold) { //We are approaching the maximum number of routing iterations, //and still do not have a legal routing. Switch to a mode which diff --git a/vpr/src/route/route_utils.cpp b/vpr/src/route/route_utils.cpp index 60dec8d18d..b398066769 100644 --- a/vpr/src/route/route_utils.cpp +++ b/vpr/src/route/route_utils.cpp @@ -8,6 +8,7 @@ #include "draw_global.h" #include "draw_types.h" #include "net_delay.h" +#include "netlist_fwd.h" #include "overuse_report.h" #include "place_and_route.h" #include "route_debug.h" @@ -68,7 +69,7 @@ bool check_net_delays(const Netlist<>& net_list, NetPinsMatrix& net_delay // // Typically, only a small minority of nets (typically > 10%) have their BBs updated // each routing iteration. -size_t dynamic_update_bounding_boxes(const std::vector& updated_nets) { +void dynamic_update_bounding_boxes(const std::vector& rerouted_nets, std::vector out_bb_updated_nets) { auto& device_ctx = g_vpr_ctx.device(); auto& route_ctx = g_vpr_ctx.mutable_routing(); @@ -87,9 +88,7 @@ size_t dynamic_update_bounding_boxes(const std::vector& updated_net int grid_xmax = grid.width() - 1; int grid_ymax = grid.height() - 1; - size_t num_bb_updated = 0; - - for (ParentNetId net : updated_nets) { + for (ParentNetId net : rerouted_nets) { if (!route_ctx.route_trees[net]) continue; // Skip if no routing if (!route_ctx.net_status.is_routed(net)) @@ -133,13 +132,12 @@ size_t dynamic_update_bounding_boxes(const std::vector& updated_net } if (updated_bb) { - ++num_bb_updated; + out_bb_updated_nets.push_back(net); //VTR_LOG("Expanded net %6zu router BB to (%d,%d)x(%d,%d) based on net RR node BB (%d,%d)x(%d,%d)\n", size_t(net), //router_bb.xmin, router_bb.ymin, router_bb.xmax, router_bb.ymax, //curr_bb.xmin, curr_bb.ymin, curr_bb.xmax, curr_bb.ymax); } } - return num_bb_updated; } bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts, diff --git a/vpr/src/route/route_utils.h b/vpr/src/route/route_utils.h index edf5a3b59f..d129193ee1 100644 --- a/vpr/src/route/route_utils.h +++ b/vpr/src/route/route_utils.h @@ -2,6 +2,7 @@ /** @file Utility functions used in the top-level router (route.cpp). */ +#include "netlist_fwd.h" #include "router_stats.h" #include "timing_info.h" #include "vpr_net_pins_matrix.h" @@ -47,7 +48,7 @@ WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t avail bool check_net_delays(const Netlist<>& net_list, NetPinsMatrix& net_delay); /** Update bounding box for net if existing routing is close to boundary */ -size_t dynamic_update_bounding_boxes(const std::vector& updated_nets); +void dynamic_update_bounding_boxes(const std::vector& rerouted_nets, std::vector out_bb_updated_nets); /** Early exit code for cases where it is obvious that a successful route will not be found * Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */ diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index cc28561614..0be95dd111 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -23,7 +23,7 @@ * Larger values increase the time to compute the lookahead, but may give * more accurate lookahead estimates during routing. */ -static constexpr int MAX_TRACK_OFFSET = 16; +static constexpr int MAX_TRACK_OFFSET = 1; static void dijkstra_flood_to_wires(int itile, RRNodeId inode, util::t_src_opin_delays& src_opin_delays); diff --git a/vpr/src/route/sink_sampling.h b/vpr/src/route/sink_sampling.h index eb0df7f262..485bff9b3e 100644 --- a/vpr/src/route/sink_sampling.h +++ b/vpr/src/route/sink_sampling.h @@ -116,20 +116,16 @@ inline std::vector quickhull(const std::vector& points) { } // namespace sink_sampling /** Which side of the cutline is this RRNode on? - * Cutlines are always assumed to be at cutline_axis = (cutline_pos + 0.5). - * In the context of the parallel router, a RR node is considered to be inside a bounding - * box if its drive point is inside it (xlow, ylow if the node doesn't have a direction) */ + * Cutlines are always assumed to be at cutline_axis = (cutline_pos + 0.5). */ inline Side which_side(RRNodeId inode, Axis cutline_axis, int cutline_pos) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - Direction dir = rr_graph.node_direction(inode); - if (cutline_axis == Axis::X) { - int x = dir == Direction::DEC ? rr_graph.node_xhigh(inode) : rr_graph.node_xlow(inode); + int x = rr_graph.node_xlow(inode); return Side(x > cutline_pos); /* 1 is RIGHT */ } else { - int y = dir == Direction::DEC ? rr_graph.node_yhigh(inode) : rr_graph.node_ylow(inode); + int y = rr_graph.node_ylow(inode); return Side(y > cutline_pos); } } @@ -149,16 +145,22 @@ inline void convex_hull_downsample(ParentNetId net_id, const t_bb& net_bb, vtr:: RRNodeId rr_sink = route_ctx.net_rr_terminals[net_id][i]; if (!inside_bb(rr_sink, net_bb)) continue; - SinkPoint point{rr_graph.node_xlow(rr_sink), rr_graph.node_ylow(rr_sink), int(i)}; + int x = rr_graph.node_xlow(rr_sink); + int y = rr_graph.node_ylow(rr_sink); + SinkPoint point{x, y, int(i)}; sink_points.push_back(point); } auto hull = sink_sampling::quickhull(sink_points); + auto& is_isink_reached = tree.get_is_isink_reached(); + /* Sample if not source */ for (auto& point : hull) { if (point.isink == 0) /* source */ continue; + if(is_isink_reached.get(point.isink)) + continue; out.set(point.isink, true); } } diff --git a/vpr/src/route/spatial_route_tree_lookup.cpp b/vpr/src/route/spatial_route_tree_lookup.cpp index 3d3f7a2546..ddbb066a18 100644 --- a/vpr/src/route/spatial_route_tree_lookup.cpp +++ b/vpr/src/route/spatial_route_tree_lookup.cpp @@ -17,7 +17,11 @@ SpatialRouteTreeLookup build_route_tree_spatial_lookup(const Netlist<>& net_list float bb_area_per_sink = bb_area / fanout; float bin_area = BIN_AREA_PER_SINK_FACTOR * bb_area_per_sink; - float bin_dim = std::ceil(std::sqrt(bin_area)); + /* Set a minimum bin dimension so that we don't get minuscule bin sizes + * when flat routing is enabled and every LUT input becomes a sink. + * (P.S. This took some time to debug.) */ + constexpr float MIN_BIN_DIM = 3; + float bin_dim = std::max(MIN_BIN_DIM, std::ceil(std::sqrt(bin_area))); size_t bins_x = std::ceil(device_ctx.grid.width() / bin_dim); size_t bins_y = std::ceil(device_ctx.grid.height() / bin_dim); diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_multiclock_odin/func_multiclock/blanket/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_multiclock_odin/func_multiclock/blanket/config/golden_results.txt index 51984390d5..039f772d88 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_multiclock_odin/func_multiclock/blanket/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_multiclock_odin/func_multiclock/blanket/config/golden_results.txt @@ -1,4 +1,4 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time k6_frac_N10_mem32K_40nm.xml multiclock_output_and_latch.v common 11.99 vpr 255.45 MiB 0.11 36912 -1 -1 1 0.05 -1 -1 34700 -1 -1 2 6 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 261584 6 1 13 14 2 8 9 4 4 16 clb auto 101.0 MiB 0.11 13 244.4 MiB 0.04 0 0.875884 -3.21653 -0.875884 0.545 0.47 0.000264546 0.000241337 0.00754986 0.00454282 20 15 7 107788 107788 10441.3 652.579 0.66 0.0136677 0.00891098 742 1670 -1 15 14 32 32 476 268 0 0 476 268 32 32 0 0 45 42 0 0 51 45 0 0 32 32 0 0 205 79 0 0 111 38 0 0 32 0 0 0 0 0 32 0 0 1.31811 0.545 -4.12048 -1.31811 0 0 13748.8 859.301 0.01 0.04 0.18 -1 -1 0.01 0.00722654 0.00593545 k6_frac_N10_mem32K_40nm.xml multiclock_reader_writer.v common 12.96 vpr 261.56 MiB 0.15 45980 -1 -1 1 0.06 -1 -1 34932 -1 -1 2 3 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 267836 3 1 25 26 2 8 6 4 4 16 clb auto 106.4 MiB 0.87 17 249.9 MiB 0.03 0 0.571 -8.64803 -0.571 0.557849 0.47 0.000543454 0.000488368 0.00346482 0.00253954 20 19 1 107788 107788 10441.3 652.579 0.67 0.0113116 0.00855232 742 1670 -1 27 1 6 6 63 36 0 0 63 36 6 6 0 0 9 6 0 0 9 9 0 0 6 6 0 0 18 3 0 0 15 6 0 0 6 0 0 0 0 0 6 0 0 0.865 0.557849 -8.82275 -0.865 0 0 13748.8 859.301 0.01 0.04 0.17 -1 -1 0.01 0.00501901 0.00409753 -k6_frac_N10_mem32K_40nm.xml multiclock_separate_and_latch.v common 11.88 vpr 254.22 MiB 0.15 35980 -1 -1 1 0 -1 -1 32420 -1 -1 2 6 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 260320 6 2 10 12 2 8 10 4 4 16 clb auto 100.4 MiB 0.06 12 243.6 MiB 0.03 0 0.544641 -1.83465 -0.544641 nan 0.47 0.000504445 0.000240584 0.00477542 0.00228264 20 21 1 107788 107788 10441.3 652.579 0.64 0.00804976 0.00416003 742 1670 -1 19 1 6 6 148 96 0 0 148 96 6 6 0 0 18 16 0 0 18 18 0 0 6 6 0 0 53 27 0 0 47 23 0 0 6 0 0 0 0 0 6 0 0 0.81248 nan -2.54321 -0.81248 0 0 13748.8 859.301 0.01 0.02 0.18 -1 -1 0.01 0.00215701 0.00121245 +k6_frac_N10_mem32K_40nm.xml multiclock_separate_and_latch.v common 11.88 vpr 254.22 MiB 0.15 35980 -1 -1 1 0 -1 -1 32420 -1 -1 2 6 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 260320 6 2 10 12 2 8 10 4 4 16 clb auto 100.4 MiB 0.06 12 243.6 MiB 0.03 0 0.544641 -1.83465 -0.544641 nan 0.47 0.000504445 0.000240584 0.00477542 0.00228264 20 15 1 107788 107788 10441.3 652.579 0.64 0.00804976 0.00416003 742 1670 -1 13 1 6 6 148 96 0 0 148 96 6 6 0 0 18 16 0 0 18 18 0 0 6 6 0 0 53 27 0 0 47 23 0 0 6 0 0 0 0 0 6 0 0 0.81248 nan -2.54321 -0.81248 0 0 13748.8 859.301 0.01 0.02 0.18 -1 -1 0.01 0.00215701 0.00121245 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_multiclock_odin/func_multiclock/once/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_multiclock_odin/func_multiclock/once/config/golden_results.txt index f90ef2264d..b9631c0caf 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_multiclock_odin/func_multiclock/once/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_multiclock_odin/func_multiclock/once/config/golden_results.txt @@ -1,4 +1,4 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time k6_frac_N10_mem32K_40nm.xml multiclock_output_and_latch.v common 12.22 vpr 254.93 MiB 0.1 37000 -1 -1 1 0.05 -1 -1 34808 -1 -1 2 6 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 261052 6 1 13 14 2 8 9 4 4 16 clb auto 100.7 MiB 0.11 13 244.1 MiB 0.04 0 0.875884 -3.21653 -0.875884 0.545 0.47 0.000263443 0.000240838 0.00748415 0.00450484 20 15 7 107788 107788 10441.3 652.579 0.66 0.0136082 0.00886525 742 1670 -1 15 14 32 32 476 268 0 0 476 268 32 32 0 0 45 42 0 0 51 45 0 0 32 32 0 0 205 79 0 0 111 38 0 0 32 0 0 0 0 0 32 0 0 1.31811 0.545 -4.12048 -1.31811 0 0 13748.8 859.301 0.01 0.04 0.18 -1 -1 0.01 0.00739705 0.00603502 k6_frac_N10_mem32K_40nm.xml multiclock_reader_writer.v common 14.79 vpr 261.44 MiB 0.12 46076 -1 -1 1 0.05 -1 -1 34892 -1 -1 2 3 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 267712 3 1 25 26 2 8 6 4 4 16 clb auto 106.3 MiB 1 17 250.0 MiB 0.04 0 0.571 -8.64803 -0.571 0.557849 0.53 0.000560438 0.000505834 0.00360459 0.00262507 20 19 1 107788 107788 10441.3 652.579 0.76 0.0119539 0.00893362 742 1670 -1 27 1 6 6 63 36 0 0 63 36 6 6 0 0 9 6 0 0 9 9 0 0 6 6 0 0 18 3 0 0 15 6 0 0 6 0 0 0 0 0 6 0 0 0.865 0.557849 -8.82275 -0.865 0 0 13748.8 859.301 0.01 0.03 0.17 -1 -1 0.01 0.00502268 0.00406987 -k6_frac_N10_mem32K_40nm.xml multiclock_separate_and_latch.v common 12.05 vpr 254.23 MiB 0.11 35864 -1 -1 1 0.01 -1 -1 32648 -1 -1 2 6 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 260328 6 2 10 12 2 8 10 4 4 16 clb auto 100.4 MiB 0.06 12 243.6 MiB 0.03 0 0.544641 -1.83465 -0.544641 nan 0.47 0.000489919 0.000226814 0.00463432 0.00218307 20 21 1 107788 107788 10441.3 652.579 0.64 0.00773058 0.00396899 742 1670 -1 19 1 6 6 148 96 0 0 148 96 6 6 0 0 18 16 0 0 18 18 0 0 6 6 0 0 53 27 0 0 47 23 0 0 6 0 0 0 0 0 6 0 0 0.81248 nan -2.54321 -0.81248 0 0 13748.8 859.301 0.01 0.02 0.18 -1 -1 0.01 0.00224126 0.00124363 +k6_frac_N10_mem32K_40nm.xml multiclock_separate_and_latch.v common 12.05 vpr 254.23 MiB 0.11 35864 -1 -1 1 0.01 -1 -1 32648 -1 -1 2 6 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 260328 6 2 10 12 2 8 10 4 4 16 clb auto 100.4 MiB 0.06 12 243.6 MiB 0.03 0 0.544641 -1.83465 -0.544641 nan 0.47 0.000489919 0.000226814 0.00463432 0.00218307 20 15 1 107788 107788 10441.3 652.579 0.64 0.00773058 0.00396899 742 1670 -1 13 1 6 6 148 96 0 0 148 96 6 6 0 0 18 16 0 0 18 18 0 0 6 6 0 0 53 27 0 0 47 23 0 0 6 0 0 0 0 0 6 0 0 0.81248 nan -2.54321 -0.81248 0 0 13748.8 859.301 0.01 0.02 0.18 -1 -1 0.01 0.00224126 0.00124363 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_calc_method/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_calc_method/config/golden_results.txt index 63486ef001..9c0fd9e92e 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_calc_method/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_calc_method/config/golden_results.txt @@ -1,5 +1,5 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time - stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta_--place_delta_delay_matrix_calculation_method_astar 38.42 vpr 976.37 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 999804 10 10 168 178 1 68 30 11 8 88 io auto 953.4 MiB 0.55 370 858 95 697 66 976.4 MiB 0.06 0.00 6.45248 -69.1493 -6.45248 6.45248 3.16 0.00053133 0.000484838 0.0148989 0.0138589 -1 -1 -1 -1 32 693 33 0 0 153433. 1743.56 1.72 0.145798 0.129504 11830 34246 -1 570 10 235 725 56242 26416 6.94346 6.94346 -73.9579 -6.94346 0 0 205860. 2339.32 0.06 0.06 0.09 -1 -1 0.06 0.0256172 0.0239212 + stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta_--place_delta_delay_matrix_calculation_method_astar 38.42 vpr 976.37 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 999804 10 10 168 178 1 68 30 11 8 88 io auto 953.4 MiB 0.55 370 858 95 697 66 976.4 MiB 0.06 0.00 6.45248 -69.1493 -6.45248 6.45248 3.16 0.00053133 0.000484838 0.0148989 0.0138589 -1 -1 -1 -1 32 693 33 0 0 122746. 1394.84 1.72 0.145798 0.129504 11830 34246 -1 570 10 235 725 56242 26416 6.94346 6.94346 -73.9579 -6.94346 0 0 164688. 1871.45 0.06 0.06 0.09 -1 -1 0.06 0.0256172 0.0239212 stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta_override_--place_delta_delay_matrix_calculation_method_astar 37.07 vpr 976.44 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 999876 10 10 168 178 1 68 30 11 8 88 io auto 953.4 MiB 0.61 369 812 82 656 74 976.4 MiB 0.07 0.00 6.45248 -69.2479 -6.45248 6.45248 3.14 0.000419744 0.000381717 0.0118947 0.0110676 -1 -1 -1 -1 32 691 29 0 0 153433. 1743.56 1.12 0.12258 0.110164 11830 34246 -1 553 12 224 697 51846 24062 6.94346 6.94346 -73.4811 -6.94346 0 0 205860. 2339.32 0.05 0.07 0.09 -1 -1 0.05 0.0274519 0.0254462 stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta_--place_delta_delay_matrix_calculation_method_dijkstra 41.11 vpr 976.35 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 999784 10 10 168 178 1 68 30 11 8 88 io auto 953.4 MiB 0.79 370 812 89 663 60 976.4 MiB 0.12 0.00 6.52191 -68.7563 -6.52191 6.52191 4.09 0.000672013 0.000608225 0.017378 0.0162156 -1 -1 -1 -1 22 809 21 0 0 110609. 1256.92 2.28 0.128845 0.11551 11258 24748 -1 663 14 329 1173 67735 35710 7.04515 7.04515 -76.4932 -7.04515 0 0 134428. 1527.59 0.03 0.08 0.06 -1 -1 0.03 0.0372941 0.0337267 stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta_override_--place_delta_delay_matrix_calculation_method_dijkstra 42.24 vpr 976.56 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 1000000 10 10 168 178 1 68 30 11 8 88 io auto 953.4 MiB 0.76 368 812 95 656 61 976.6 MiB 0.15 0.00 6.34478 -68.8031 -6.34478 6.34478 4.44 0.000492867 0.000449805 0.0183566 0.017188 -1 -1 -1 -1 28 753 22 0 0 134428. 1527.59 1.92 0.132649 0.118834 11590 29630 -1 624 15 260 959 55378 26467 6.64742 6.64742 -72.827 -6.64742 0 0 173354. 1969.93 0.03 0.07 0.08 -1 -1 0.03 0.0270531 0.0242418 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_model/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_model/config/golden_results.txt index 9b0aec479a..c5f45b0d4e 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_model/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_model/config/golden_results.txt @@ -1,3 +1,3 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time - stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta 42.58 vpr 976.46 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 999900 10 10 168 178 1 68 30 11 8 88 io auto 953.3 MiB 0.70 393 628 105 491 32 976.5 MiB 0.15 0.00 6.51193 -69.1178 -6.51193 6.51193 3.23 0.00078609 0.000724519 0.016967 0.01616 -1 -1 -1 -1 20 893 28 0 0 100248. 1139.18 2.16 0.139573 0.124569 11180 23751 -1 831 19 496 1987 121384 60113 6.91414 6.91414 -78.1319 -6.91414 0 0 125464. 1425.72 0.03 0.09 0.06 -1 -1 0.03 0.0325178 0.0296648 + stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta 42.58 vpr 976.46 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 999900 10 10 168 178 1 68 30 11 8 88 io auto 953.3 MiB 0.70 393 628 105 491 32 976.5 MiB 0.15 0.00 6.51193 -69.1178 -6.51193 6.51193 3.23 0.00078609 0.000724519 0.016967 0.01616 -1 -1 -1 -1 20 893 28 0 0 100248. 1139.18 2.16 0.139573 0.124569 11180 23751 -1 831 19 496 1987 121384 60113 6.91414 6.91414 -78.1319 -6.91414 0 0 150556. 1710.86 0.03 0.09 0.06 -1 -1 0.03 0.0325178 0.0296648 stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta_override 41.59 vpr 976.45 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 999880 10 10 168 178 1 68 30 11 8 88 io auto 953.4 MiB 0.80 380 628 91 496 41 976.4 MiB 0.26 0.02 6.52338 -69.1003 -6.52338 6.52338 3.25 0.000777298 0.000711005 0.0177546 0.0168625 -1 -1 -1 -1 30 673 12 0 0 144567. 1642.81 1.66 0.128557 0.114566 11730 32605 -1 585 9 216 698 45031 21119 6.8993 6.8993 -73.7008 -6.8993 0 0 194014. 2204.70 0.05 0.06 0.09 -1 -1 0.05 0.0224124 0.0206008 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_target_pin_util/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_target_pin_util/config/golden_results.txt index 9ce2a117a7..e92c164cb1 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_target_pin_util/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_target_pin_util/config/golden_results.txt @@ -3,7 +3,7 @@ EArch.xml styr.blif common_--target_ext_pin_util_0.7 3.34 vpr 66.22 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 11 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 67808 10 10 168 178 1 73 31 6 6 36 clb auto 26.6 MiB 0.22 396 511 91 400 20 66.2 MiB 0.08 0.00 2.39024 -27.2311 -2.39024 2.39024 0.06 0.00063912 0.000574742 0.0133256 0.0125205 -1 -1 -1 -1 28 809 33 646728 592834 52494.1 1458.17 1.99 0.279091 0.24285 2620 9165 -1 829 25 747 2300 88210 34485 2.99961 2.99961 -36.9596 -2.99961 0 0 62803.0 1744.53 0.01 0.08 0.01 -1 -1 0.01 0.0417208 0.0376776 EArch.xml styr.blif common_--target_ext_pin_util_0.1,0.5 6.02 vpr 66.53 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 91 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 68128 10 10 168 178 1 162 111 14 14 196 clb auto 26.8 MiB 0.95 1456 5963 865 4880 218 66.5 MiB 0.13 0.00 3.05524 -37.9348 -3.05524 3.05524 0.59 0.000646566 0.000596261 0.02071 0.019112 -1 -1 -1 -1 26 2865 15 9.20055e+06 4.90435e+06 387483. 1976.95 2.42 0.211695 0.185383 18784 74779 -1 2696 13 472 1947 107713 24081 3.50167 3.50167 -42.0838 -3.50167 0 0 467681. 2386.13 0.16 0.11 0.07 -1 -1 0.16 0.0279753 0.0255829 EArch.xml styr.blif common_--target_ext_pin_util_0.5,0.3 2.84 vpr 66.18 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 13 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 67764 10 10 168 178 1 75 33 7 7 49 clb auto 26.6 MiB 0.23 404 813 125 661 27 66.2 MiB 0.10 0.00 2.45517 -27.3027 -2.45517 2.45517 0.09 0.000647339 0.000599538 0.0197487 0.0185137 -1 -1 -1 -1 26 1116 28 1.07788e+06 700622 75813.7 1547.22 1.14 0.148701 0.13165 3816 13734 -1 925 18 487 1699 71725 25249 2.97305 2.97305 -35.2593 -2.97305 0 0 91376.6 1864.83 0.02 0.24 0.01 -1 -1 0.02 0.0460889 0.0423163 - EArch.xml styr.blif common_--target_ext_pin_util_0.0 5.34 vpr 66.64 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 104 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 68236 10 10 168 178 1 163 124 14 14 196 clb auto 26.9 MiB 1.00 1516 7540 1142 6103 295 66.6 MiB 0.21 0.00 3.06133 -37.7953 -3.06133 3.06133 0.60 0.000630456 0.00057306 0.0281512 0.0261658 -1 -1 -1 -1 20 2911 18 9.20055e+06 5.60498e+06 295730. 1508.82 1.60 0.106074 0.094968 18004 60473 -1 2874 12 603 2265 131794 29163 3.74152 3.74152 -44.1586 -3.74152 0 0 387483. 1976.95 0.14 0.17 0.06 -1 -1 0.14 0.0262615 0.0240198 + EArch.xml styr.blif common_--target_ext_pin_util_0.0 5.34 vpr 66.64 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 104 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 68236 10 10 168 178 1 163 124 14 14 196 clb auto 26.9 MiB 1.00 1516 7540 1142 6103 295 66.6 MiB 0.21 0.00 3.06133 -37.7953 -3.06133 3.06133 0.60 0.000630456 0.00057306 0.0281512 0.0261658 -1 -1 -1 -1 20 2911 18 9.20055e+06 5.60498e+06 354876. 1810.58 1.60 0.106074 0.094968 18004 60473 -1 2874 12 603 2265 131794 29163 3.74152 3.74152 -44.1586 -3.74152 0 0 387483. 1976.95 0.14 0.17 0.06 -1 -1 0.14 0.0262615 0.0240198 EArch.xml styr.blif common_--target_ext_pin_util_clb_0.7 3.16 vpr 66.17 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 11 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 67760 10 10 168 178 1 73 31 6 6 36 clb auto 26.5 MiB 0.20 396 511 91 400 20 66.2 MiB 0.02 0.00 2.39024 -27.2311 -2.39024 2.39024 0.05 0.000653728 0.000608322 0.012902 0.0121572 -1 -1 -1 -1 28 809 33 646728 592834 52494.1 1458.17 1.78 0.26636 0.230807 2620 9165 -1 829 25 747 2300 88210 34485 2.99961 2.99961 -36.9596 -2.99961 0 0 62803.0 1744.53 0.01 0.17 0.01 -1 -1 0.01 0.0463537 0.0422043 EArch.xml styr.blif common_--target_ext_pin_util_clb_0.7_0.8 3.43 vpr 66.35 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 11 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 67944 10 10 168 178 1 73 31 6 6 36 clb auto 26.8 MiB 0.22 396 511 91 400 20 66.4 MiB 0.02 0.00 2.39024 -27.2311 -2.39024 2.39024 0.04 0.000508161 0.000456687 0.0109247 0.0102847 -1 -1 -1 -1 28 809 33 646728 592834 52494.1 1458.17 1.85 0.27407 0.239119 2620 9165 -1 829 25 747 2300 88210 34485 2.99961 2.99961 -36.9596 -2.99961 0 0 62803.0 1744.53 0.01 0.27 0.01 -1 -1 0.01 0.0493788 0.0449139 EArch.xml styr.blif common_--target_ext_pin_util_clb_0.1_0.8 6.33 vpr 66.61 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 91 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 68212 10 10 168 178 1 162 111 14 14 196 clb auto 26.8 MiB 0.96 1456 5963 865 4880 218 66.6 MiB 0.14 0.00 3.05524 -37.9348 -3.05524 3.05524 0.62 0.000637905 0.00057522 0.0218552 0.0201449 -1 -1 -1 -1 26 2865 15 9.20055e+06 4.90435e+06 387483. 1976.95 2.71 0.215189 0.189178 18784 74779 -1 2696 13 472 1947 107713 24081 3.50167 3.50167 -42.0838 -3.50167 0 0 467681. 2386.13 0.18 0.11 0.06 -1 -1 0.18 0.0286783 0.0262735 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_place_delay_calc_method/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_place_delay_calc_method/config/golden_results.txt index 5ab315508d..457652c3ee 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_place_delay_calc_method/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_place_delay_calc_method/config/golden_results.txt @@ -1,5 +1,5 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time - stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta_--place_delta_delay_matrix_calculation_method_astar 38.59 vpr 976.47 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 999904 10 10 168 178 1 68 30 11 8 88 io auto 953.3 MiB 0.63 393 628 105 491 32 976.5 MiB 0.06 0.00 6.51193 -69.1178 -6.51193 6.51193 3.11 0.000480842 0.000431377 0.0112949 0.0105397 -1 -1 -1 -1 20 893 28 0 0 100248. 1139.18 0.66 0.109748 0.0976349 11180 23751 -1 831 19 496 1987 121384 60113 6.91414 6.91414 -78.1319 -6.91414 0 0 125464. 1425.72 0.02 0.09 0.07 -1 -1 0.02 0.0302143 0.0267735 + stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta_--place_delta_delay_matrix_calculation_method_astar 38.59 vpr 976.47 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 999904 10 10 168 178 1 68 30 11 8 88 io auto 953.3 MiB 0.63 393 628 105 491 32 976.5 MiB 0.06 0.00 6.51193 -69.1178 -6.51193 6.51193 3.11 0.000480842 0.000431377 0.0112949 0.0105397 -1 -1 -1 -1 20 893 28 0 0 100248. 1139.18 0.66 0.109748 0.0976349 11180 23751 -1 831 19 496 1987 121384 60113 6.91414 6.91414 -78.1319 -6.91414 0 0 150556. 1710.86 0.02 0.09 0.07 -1 -1 0.02 0.0302143 0.0267735 stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta_override_--place_delta_delay_matrix_calculation_method_astar 38.84 vpr 976.57 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 1000004 10 10 168 178 1 68 30 11 8 88 io auto 953.4 MiB 0.62 380 628 91 496 41 976.6 MiB 0.06 0.00 6.52338 -69.1003 -6.52338 6.52338 3.24 0.000527135 0.000462877 0.0115671 0.0108087 -1 -1 -1 -1 30 673 12 0 0 144567. 1642.81 0.58 0.0925956 0.0823255 11730 32605 -1 585 9 216 698 45031 21119 6.8993 6.8993 -73.7008 -6.8993 0 0 194014. 2204.70 0.03 0.06 0.10 -1 -1 0.03 0.0207007 0.0188861 stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta_--place_delta_delay_matrix_calculation_method_dijkstra 40.21 vpr 976.52 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 999952 10 10 168 178 1 68 30 11 8 88 io auto 953.3 MiB 0.62 369 766 101 608 57 976.5 MiB 0.07 0.00 6.29548 -69.1499 -6.29548 6.29548 3.96 0.000642065 0.000578628 0.0144046 0.0134125 -1 -1 -1 -1 20 979 39 0 0 100248. 1139.18 1.54 0.210388 0.179459 11180 23751 -1 730 15 326 1149 70174 36157 6.70251 6.70251 -75.6785 -6.70251 0 0 125464. 1425.72 0.02 0.07 0.07 -1 -1 0.02 0.0262708 0.0234853 stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta_override_--place_delta_delay_matrix_calculation_method_dijkstra 39.65 vpr 976.56 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 999996 10 10 168 178 1 68 30 11 8 88 io auto 953.5 MiB 0.62 393 720 70 599 51 976.6 MiB 0.06 0.00 6.29266 -69.3194 -6.29266 6.29266 3.95 0.000492354 0.000450911 0.0121985 0.0113493 -1 -1 -1 -1 30 792 16 0 0 144567. 1642.81 0.52 0.0829003 0.0732049 11730 32605 -1 644 15 284 1326 83485 37502 6.72776 6.72776 -73.9475 -6.72776 0 0 194014. 2204.70 0.03 0.07 0.10 -1 -1 0.03 0.0265404 0.0237185 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_place_delay_model/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_place_delay_model/config/golden_results.txt index 550a3bb84c..2d257ead4b 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_place_delay_model/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_place_delay_model/config/golden_results.txt @@ -1,3 +1,3 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time - stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta 39.27 vpr 976.50 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 999936 10 10 168 178 1 68 30 11 8 88 io auto 953.4 MiB 0.61 393 628 105 491 32 976.5 MiB 0.06 0.00 6.51193 -69.1178 -6.51193 6.51193 3.33 0.000569129 0.000496002 0.012275 0.0114625 -1 -1 -1 -1 20 893 28 0 0 100248. 1139.18 0.66 0.116406 0.101755 11180 23751 -1 831 19 496 1987 121384 60113 6.91414 6.91414 -78.1319 -6.91414 0 0 125464. 1425.72 0.02 0.09 0.07 -1 -1 0.02 0.0312385 0.0276729 + stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta 39.27 vpr 976.50 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 999936 10 10 168 178 1 68 30 11 8 88 io auto 953.4 MiB 0.61 393 628 105 491 32 976.5 MiB 0.06 0.00 6.51193 -69.1178 -6.51193 6.51193 3.33 0.000569129 0.000496002 0.012275 0.0114625 -1 -1 -1 -1 20 893 28 0 0 100248. 1139.18 0.66 0.116406 0.101755 11180 23751 -1 831 19 496 1987 121384 60113 6.91414 6.91414 -78.1319 -6.91414 0 0 150556. 1710.86 0.02 0.09 0.07 -1 -1 0.02 0.0312385 0.0276729 stratixiv_arch.timing.xml styr.blif common_--place_delay_model_delta_override 36.90 vpr 976.67 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 10 -1 -1 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 1000112 10 10 168 178 1 68 30 11 8 88 io auto 953.7 MiB 0.60 380 628 91 496 41 976.7 MiB 0.06 0.00 6.52338 -69.1003 -6.52338 6.52338 3.10 0.000501046 0.000450601 0.0117982 0.0109963 -1 -1 -1 -1 30 673 12 0 0 144567. 1642.81 0.47 0.0799941 0.0704629 11730 32605 -1 585 9 216 698 45031 21119 6.8993 6.8993 -73.7008 -6.8993 0 0 194014. 2204.70 0.03 0.06 0.10 -1 -1 0.03 0.0206622 0.0188402 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_target_pin_util/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_target_pin_util/config/golden_results.txt index 392197825a..88f3874749 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_target_pin_util/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_target_pin_util/config/golden_results.txt @@ -3,7 +3,7 @@ EArch.xml styr.blif common_--target_ext_pin_util_0.7 1.85 vpr 66.26 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 11 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 67848 10 10 168 178 1 73 31 6 6 36 clb auto 26.7 MiB 0.24 396 511 91 400 20 66.3 MiB 0.02 0.00 2.39024 -27.2311 -2.39024 2.39024 0.05 0.00050814 0.000464386 0.0102811 0.00966555 -1 -1 -1 -1 28 809 33 646728 592834 52494.1 1458.17 0.71 0.196479 0.168384 2620 9165 -1 829 25 747 2300 88210 34485 2.99961 2.99961 -36.9596 -2.99961 0 0 62803.0 1744.53 0.01 0.07 0.01 -1 -1 0.01 0.0330747 0.0297042 EArch.xml styr.blif common_--target_ext_pin_util_0.1,0.5 5.19 vpr 66.62 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 91 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 68216 10 10 168 178 1 162 111 14 14 196 clb auto 26.9 MiB 0.89 1456 5963 865 4880 218 66.6 MiB 0.06 0.00 3.05524 -37.9348 -3.05524 3.05524 0.65 0.000523477 0.000473123 0.0172523 0.0158836 -1 -1 -1 -1 26 2865 15 9.20055e+06 4.90435e+06 387483. 1976.95 1.88 0.188569 0.163851 18784 74779 -1 2696 13 472 1947 107713 24081 3.50167 3.50167 -42.0838 -3.50167 0 0 467681. 2386.13 0.17 0.07 0.07 -1 -1 0.17 0.0274362 0.0252919 EArch.xml styr.blif common_--target_ext_pin_util_0.5,0.3 1.60 vpr 66.12 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 13 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 67712 10 10 168 178 1 75 33 7 7 49 clb auto 26.6 MiB 0.24 404 813 125 661 27 66.1 MiB 0.04 0.00 2.45517 -27.3027 -2.45517 2.45517 0.08 0.00050798 0.000465116 0.0193336 0.0182759 -1 -1 -1 -1 26 1116 28 1.07788e+06 700622 75813.7 1547.22 0.35 0.112548 0.0995004 3816 13734 -1 925 18 487 1699 71725 25249 2.97305 2.97305 -35.2593 -2.97305 0 0 91376.6 1864.83 0.02 0.09 0.01 -1 -1 0.02 0.036074 0.0329384 - EArch.xml styr.blif common_--target_ext_pin_util_0.0 4.47 vpr 66.50 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 104 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 68092 10 10 168 178 1 163 124 14 14 196 clb auto 26.8 MiB 1.03 1516 7540 1142 6103 295 66.5 MiB 0.06 0.00 3.06133 -37.7953 -3.06133 3.06133 0.57 0.000550538 0.000486087 0.0169228 0.0153939 -1 -1 -1 -1 20 2911 18 9.20055e+06 5.60498e+06 295730. 1508.82 1.28 0.0956201 0.0845563 18004 60473 -1 2874 12 603 2265 131794 29163 3.74152 3.74152 -44.1586 -3.74152 0 0 387483. 1976.95 0.13 0.05 0.05 -1 -1 0.13 0.0197556 0.0180756 + EArch.xml styr.blif common_--target_ext_pin_util_0.0 4.47 vpr 66.50 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 104 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 68092 10 10 168 178 1 163 124 14 14 196 clb auto 26.8 MiB 1.03 1516 7540 1142 6103 295 66.5 MiB 0.06 0.00 3.06133 -37.7953 -3.06133 3.06133 0.57 0.000550538 0.000486087 0.0169228 0.0153939 -1 -1 -1 -1 20 2911 18 9.20055e+06 5.60498e+06 354876. 1810.58 1.28 0.0956201 0.0845563 18004 60473 -1 2874 12 603 2265 131794 29163 3.74152 3.74152 -44.1586 -3.74152 0 0 387483. 1976.95 0.13 0.05 0.05 -1 -1 0.13 0.0197556 0.0180756 EArch.xml styr.blif common_--target_ext_pin_util_clb_0.7 1.87 vpr 66.14 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 11 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 67724 10 10 168 178 1 73 31 6 6 36 clb auto 26.5 MiB 0.19 396 511 91 400 20 66.1 MiB 0.02 0.00 2.39024 -27.2311 -2.39024 2.39024 0.05 0.00050848 0.000463468 0.0108023 0.0101532 -1 -1 -1 -1 28 809 33 646728 592834 52494.1 1458.17 0.76 0.210723 0.180638 2620 9165 -1 829 25 747 2300 88210 34485 2.99961 2.99961 -36.9596 -2.99961 0 0 62803.0 1744.53 0.01 0.07 0.01 -1 -1 0.01 0.0400523 0.0363482 EArch.xml styr.blif common_--target_ext_pin_util_clb_0.7_0.8 1.86 vpr 66.30 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 11 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 67892 10 10 168 178 1 73 31 6 6 36 clb auto 26.6 MiB 0.24 396 511 91 400 20 66.3 MiB 0.02 0.00 2.39024 -27.2311 -2.39024 2.39024 0.04 0.000515808 0.000471915 0.0113979 0.0107292 -1 -1 -1 -1 28 809 33 646728 592834 52494.1 1458.17 0.69 0.195893 0.168677 2620 9165 -1 829 25 747 2300 88210 34485 2.99961 2.99961 -36.9596 -2.99961 0 0 62803.0 1744.53 0.01 0.07 0.01 -1 -1 0.01 0.0384733 0.0348156 EArch.xml styr.blif common_--target_ext_pin_util_clb_0.1_0.8 5.12 vpr 66.80 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 91 10 0 0 success v8.0.0-11852-g026644d7f-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-11-21T16:04:00 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/temp/temp2/vtr-verilog-to-routing 68400 10 10 168 178 1 162 111 14 14 196 clb auto 27.1 MiB 0.91 1456 5963 865 4880 218 66.8 MiB 0.06 0.00 3.05524 -37.9348 -3.05524 3.05524 0.57 0.000858666 0.000789693 0.0217968 0.0201256 -1 -1 -1 -1 26 2865 15 9.20055e+06 4.90435e+06 387483. 1976.95 1.84 0.178818 0.155234 18784 74779 -1 2696 13 472 1947 107713 24081 3.50167 3.50167 -42.0838 -3.50167 0 0 467681. 2386.13 0.17 0.06 0.08 -1 -1 0.17 0.0255381 0.023411