From 40814f834d751d0c2762c6f86d77d6810066dbeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fahrican=20Ko=C5=9Far?= Date: Fri, 22 Mar 2024 22:25:35 -0400 Subject: [PATCH] Build PartitionTree incrementally, tune net-decomposing router --- vpr/src/route/DecompNetlistRouter.h | 18 ++++- vpr/src/route/DecompNetlistRouter.tpp | 61 +++++++++++---- vpr/src/route/ParallelNetlistRouter.h | 8 +- vpr/src/route/ParallelNetlistRouter.tpp | 37 +++++++-- vpr/src/route/SerialNetlistRouter.h | 1 + vpr/src/route/SerialNetlistRouter.tpp | 12 ++- vpr/src/route/connection_router.cpp | 20 +++-- vpr/src/route/netlist_routers.h | 8 ++ vpr/src/route/partition_tree.cpp | 84 ++++++++++++++++++--- vpr/src/route/partition_tree.h | 18 ++++- vpr/src/route/route.cpp | 8 +- vpr/src/route/route_utils.cpp | 10 +-- vpr/src/route/route_utils.h | 3 +- vpr/src/route/sink_sampling.h | 18 +++-- vpr/src/route/spatial_route_tree_lookup.cpp | 6 +- 15 files changed, 240 insertions(+), 72 deletions(-) diff --git a/vpr/src/route/DecompNetlistRouter.h b/vpr/src/route/DecompNetlistRouter.h index 1f16105a356..a41d656c240 100644 --- a/vpr/src/route/DecompNetlistRouter.h +++ b/vpr/src/route/DecompNetlistRouter.h @@ -2,7 +2,8 @@ /** @file Parallel and net-decomposing case for NetlistRouter. Works like * \see ParallelNetlistRouter, but tries to "decompose" nets and assign them to - * the next level of the partition tree where possible. */ + * the next level of the partition tree where possible. + * See "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */ #include "netlist_routers.h" #include @@ -57,6 +58,8 @@ class DecompNetlistRouter : public NetlistRouter { * \ref route_net for each net, which will handle other global updates. * \return RouteIterResults for this iteration. */ RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack); + /** Inform the PartitionTree of the nets with updated bounding boxes */ + void handle_bb_updated_nets(const std::vector& nets); /** Set RCV enable flag for all routers managed by this netlist router. * Net decomposition does not work with RCV, so calling this fn with x=true is a fatal error. */ void set_rcv_enabled(bool x); @@ -65,10 +68,14 @@ class DecompNetlistRouter : public NetlistRouter { private: /** Should we decompose this net? */ bool should_decompose_net(ParentNetId net_id, const PartitionTreeNode& node); - /** Get a bitset with sinks to route before net decomposition */ + /** Get a bitset of sinks to route before net decomposition. Output bitset is + * [1..num_sinks] where the corresponding index is set to 1 if the sink needs to + * be routed */ vtr::dynamic_bitset<> get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node); - /** Get a bitset with sinks to route before virtual net decomposition */ - vtr::dynamic_bitset<> get_vnet_decomposition_mask(const VirtualNet& vnet, const PartitionTreeNode& node); + /** Get a bitset of sinks to route before virtual net decomposition. Output bitset is + * [1..num_sinks] where the corresponding index is set to 1 if the sink needs to + * be routed */ + vtr::dynamic_bitset<> get_decomposition_mask_vnet(const VirtualNet& vnet, const PartitionTreeNode& node); /** Decompose and route a regular net. Output the resulting vnets to \p left and \p right. * \return Success status: true if routing is successful and left and right now contain valid virtual nets: false otherwise. */ bool decompose_and_route_net(ParentNetId net_id, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right); @@ -115,6 +122,9 @@ class DecompNetlistRouter : public NetlistRouter { float _pres_fac; float _worst_neg_slack; + /** The partition tree. Holds the groups of nets for each partition */ + vtr::optional _tree; + /** Sinks to be always sampled for decomposition for each net: [0.._net_list.size()-1] * (i.e. when routing fails after decomposition for a sink, sample it on next iteration) */ vtr::vector> _net_known_samples; diff --git a/vpr/src/route/DecompNetlistRouter.tpp b/vpr/src/route/DecompNetlistRouter.tpp index fc1a6685e56..a009132c45d 100644 --- a/vpr/src/route/DecompNetlistRouter.tpp +++ b/vpr/src/route/DecompNetlistRouter.tpp @@ -3,6 +3,7 @@ /** @file Impls for DecompNetlistRouter */ #include "DecompNetlistRouter.h" +#include "globals.h" #include "netlist_routers.h" #include "route_net.h" #include "sink_sampling.h" @@ -21,25 +22,44 @@ inline RouteIterResults DecompNetlistRouter::route_netlist(int itry, f _pres_fac = pres_fac; _worst_neg_slack = worst_neg_slack; + vtr::Timer timer; + /* Organize netlist into a PartitionTree. * Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */ - PartitionTree tree(_net_list); + if(!_tree){ + _tree = PartitionTree(_net_list); + PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s"); + } + + /* Remove all virtual nets: we will create them for each iteration. + * This needs to be done because the partition tree can change between iterations + * due to bounding box updates, which invalidates virtual nets */ + _tree->clear_vnets(); /* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */ - tbb::task_group g; - route_partition_tree_node(g, tree.root()); - g.wait(); + tbb::task_group group; + route_partition_tree_node(group, _tree->root()); + group.wait(); + PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s"); /* Combine results from threads */ RouteIterResults out; for (auto& results : _results_th) { out.stats.combine(results.stats); out.rerouted_nets.insert(out.rerouted_nets.end(), results.rerouted_nets.begin(), results.rerouted_nets.end()); + out.bb_updated_nets.insert(out.bb_updated_nets.end(), results.bb_updated_nets.begin(), results.bb_updated_nets.end()); out.is_routable &= results.is_routable; } + return out; } +template +void DecompNetlistRouter::handle_bb_updated_nets(const std::vector& nets) { + VTR_ASSERT(_tree); + _tree->update_nets(nets); +} + template void DecompNetlistRouter::set_rcv_enabled(bool x) { if (x) @@ -120,6 +140,10 @@ inline bool should_decompose_vnet(const VirtualNet& vnet, const PartitionTreeNod template void DecompNetlistRouter::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) { auto& route_ctx = g_vpr_ctx.mutable_routing(); + vtr::Timer timer; + + /* node.nets is an unordered set, copy into vector to sort */ + std::vector nets(node.nets.begin(), node.nets.end()); /* Sort so that nets with the most sinks are routed first. * We want to interleave virtual nets with regular ones, so sort an "index vector" @@ -129,15 +153,14 @@ void DecompNetlistRouter::route_partition_tree_node(tbb::task_group& g std::vector order(node.nets.size() + node.vnets.size()); std::iota(order.begin(), order.end(), 0); std::stable_sort(order.begin(), order.end(), [&](size_t i, size_t j) -> bool { - ParentNetId id1 = i < node.nets.size() ? node.nets[i] : node.vnets[i - node.nets.size()].net_id; - ParentNetId id2 = j < node.nets.size() ? node.nets[j] : node.vnets[j - node.nets.size()].net_id; + ParentNetId id1 = i < node.nets.size() ? nets[i] : node.vnets[i - nets.size()].net_id; + ParentNetId id2 = j < node.nets.size() ? nets[j] : node.vnets[j - nets.size()].net_id; return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size(); }); - vtr::Timer t; for (size_t i : order) { - if (i < node.nets.size()) { /* Regular net (not decomposed) */ - ParentNetId net_id = node.nets[i]; + if (i < nets.size()) { /* Regular net (not decomposed) */ + ParentNetId net_id = nets[i]; if (!should_route_net(_net_list, net_id, _connections_inf, _budgeting_inf, _worst_neg_slack, true)) continue; /* Setup the net (reset or prune) only once here in the flow. Then all calls to route_net turn off auto-setup */ @@ -188,6 +211,7 @@ void DecompNetlistRouter::route_partition_tree_node(tbb::task_group& g if (flags.retry_with_full_bb) { /* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */ route_ctx.route_bb[net_id] = full_device_bb(); + _results_th.local().bb_updated_nets.push_back(net_id); /* Disable decomposition for nets like this: they're already problematic */ _is_decomp_disabled[net_id] = true; continue; @@ -206,7 +230,7 @@ void DecompNetlistRouter::route_partition_tree_node(tbb::task_group& g continue; } } - /* Route the full vnet. Again we don't care about the flags, they should be handled by the regular path */ + /* Route the full vnet. We don't care about the flags, they should be handled by the regular path */ auto sink_mask = get_vnet_sink_mask(vnet); route_net( _routers_th.local(), @@ -234,7 +258,7 @@ void DecompNetlistRouter::route_partition_tree_node(tbb::task_group& g PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets and " + std::to_string(node.vnets.size()) - + " virtual nets routed in " + std::to_string(t.elapsed_sec()) + + " virtual nets routed in " + std::to_string(timer.elapsed_sec()) + " s"); /* This node is finished: add left & right branches to the task queue */ @@ -277,7 +301,7 @@ inline void make_vnet_pair(ParentNetId net_id, const t_bb& bb, Axis cutline_axis template bool DecompNetlistRouter::decompose_and_route_net(ParentNetId net_id, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right) { - auto& route_ctx = g_vpr_ctx.routing(); + auto& route_ctx = g_vpr_ctx.mutable_routing(); auto& net_bb = route_ctx.route_bb[net_id]; /* Sample enough sinks to provide branch-off points to the virtual nets we create */ @@ -382,7 +406,7 @@ inline std::string describe_vnet(const VirtualNet& vnet) { template bool DecompNetlistRouter::decompose_and_route_vnet(VirtualNet& vnet, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right) { /* Sample enough sinks to provide branch-off points to the virtual nets we create */ - auto sink_mask = get_vnet_decomposition_mask(vnet, node); + auto sink_mask = get_decomposition_mask_vnet(vnet, node); /* Route the *parent* net with the given mask: only the sinks we ask for will be routed */ auto flags = route_net( @@ -499,6 +523,7 @@ inline bool get_reduction_mask(ParentNetId net_id, Axis cutline_axis, int cutlin template vtr::dynamic_bitset<> DecompNetlistRouter::get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node) { const auto& route_ctx = g_vpr_ctx.routing(); + const RouteTree& tree = route_ctx.route_trees[net_id].value(); size_t num_sinks = tree.num_sinks(); @@ -512,6 +537,7 @@ vtr::dynamic_bitset<> DecompNetlistRouter::get_decomposition_mask(Pare bool is_reduced = get_reduction_mask(net_id, node.cutline_axis, node.cutline_pos, out); bool source_on_cutline = is_close_to_cutline(tree.root().inode, node.cutline_axis, node.cutline_pos, 1); + if (!is_reduced || source_on_cutline) convex_hull_downsample(net_id, route_ctx.route_bb[net_id], out); @@ -638,7 +664,7 @@ inline bool get_reduction_mask_vnet_with_source(const VirtualNet& vnet, Axis cut } template -vtr::dynamic_bitset<> DecompNetlistRouter::get_vnet_decomposition_mask(const VirtualNet& vnet, const PartitionTreeNode& node) { +vtr::dynamic_bitset<> DecompNetlistRouter::get_decomposition_mask_vnet(const VirtualNet& vnet, const PartitionTreeNode& node) { const auto& route_ctx = g_vpr_ctx.routing(); const RouteTree& tree = route_ctx.route_trees[vnet.net_id].value(); int num_sinks = tree.num_sinks(); @@ -652,8 +678,9 @@ vtr::dynamic_bitset<> DecompNetlistRouter::get_vnet_decomposition_mask if (inside_bb(tree.root().inode, vnet.clipped_bb)) { /* We have source, no need to sample after reduction in most cases */ bool is_reduced = get_reduction_mask_vnet_with_source(vnet, node.cutline_axis, node.cutline_pos, out); bool source_on_cutline = is_close_to_cutline(tree.root().inode, node.cutline_axis, node.cutline_pos, 1); - if (!is_reduced || source_on_cutline) + if (!is_reduced || source_on_cutline){ convex_hull_downsample(vnet.net_id, vnet.clipped_bb, out); + } } else { int reduced_sides = get_reduction_mask_vnet_no_source(vnet, node.cutline_axis, node.cutline_pos, out); if (reduced_sides < 2) { @@ -666,9 +693,11 @@ vtr::dynamic_bitset<> DecompNetlistRouter::get_vnet_decomposition_mask /* Sample if a sink is too close to the cutline (and unreached). * Those sinks are likely to fail routing */ for (size_t isink : isinks) { + RRNodeId rr_sink = route_ctx.net_rr_terminals[vnet.net_id][isink]; + if (!inside_bb(rr_sink, vnet.clipped_bb)) + continue; if (is_isink_reached.get(isink)) continue; - RRNodeId rr_sink = route_ctx.net_rr_terminals[vnet.net_id][isink]; if (is_close_to_cutline(rr_sink, node.cutline_axis, node.cutline_pos, 1)) { out.set(isink, true); continue; diff --git a/vpr/src/route/ParallelNetlistRouter.h b/vpr/src/route/ParallelNetlistRouter.h index 35a2da25098..e77fdf8344e 100644 --- a/vpr/src/route/ParallelNetlistRouter.h +++ b/vpr/src/route/ParallelNetlistRouter.h @@ -8,8 +8,9 @@ * * Note that the parallel router does not support graphical router breakpoints. * - * [0]: F. Koşar, "A net-decomposing parallel FPGA router", MS thesis, UofT ECE, 2023 */ + * [0]: "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */ #include "netlist_routers.h" +#include "vtr_optional.h" #include @@ -52,6 +53,8 @@ class ParallelNetlistRouter : public NetlistRouter { * \ref route_net for each net, which will handle other global updates. * \return RouteIterResults for this iteration. */ RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack); + /** Inform the PartitionTree of the nets with updated bounding boxes */ + void handle_bb_updated_nets(const std::vector& nets); void set_rcv_enabled(bool x); void set_timing_info(std::shared_ptr timing_info); @@ -95,6 +98,9 @@ class ParallelNetlistRouter : public NetlistRouter { int _itry; float _pres_fac; float _worst_neg_slack; + + /** The partition tree. Holds the groups of nets for each partition */ + vtr::optional _tree; }; #include "ParallelNetlistRouter.tpp" diff --git a/vpr/src/route/ParallelNetlistRouter.tpp b/vpr/src/route/ParallelNetlistRouter.tpp index 9cae0d84dba..1268ed6030e 100644 --- a/vpr/src/route/ParallelNetlistRouter.tpp +++ b/vpr/src/route/ParallelNetlistRouter.tpp @@ -2,6 +2,7 @@ /** @file Impls for ParallelNetlistRouter */ +#include #include "netlist_routers.h" #include "route_net.h" #include "vtr_time.h" @@ -20,18 +21,24 @@ inline RouteIterResults ParallelNetlistRouter::route_netlist(int itry, /* Organize netlist into a PartitionTree. * Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */ - PartitionTree tree(_net_list); + vtr::Timer timer; + if(!_tree){ + _tree = PartitionTree(_net_list); + PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s"); + } /* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */ - tbb::task_group g; - route_partition_tree_node(g, tree.root()); - g.wait(); + tbb::task_group group; + route_partition_tree_node(group, _tree->root()); + group.wait(); + PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s"); /* Combine results from threads */ RouteIterResults out; for (auto& results : _results_th) { out.stats.combine(results.stats); out.rerouted_nets.insert(out.rerouted_nets.end(), results.rerouted_nets.begin(), results.rerouted_nets.end()); + out.bb_updated_nets.insert(out.bb_updated_nets.end(), results.bb_updated_nets.begin(), results.bb_updated_nets.end()); out.is_routable &= results.is_routable; } return out; @@ -41,13 +48,16 @@ template void ParallelNetlistRouter::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) { auto& route_ctx = g_vpr_ctx.mutable_routing(); + /* node.nets is an unordered set, copy into vector to sort */ + std::vector nets(node.nets.begin(), node.nets.end()); + /* Sort so net with most sinks is routed first. */ - std::stable_sort(node.nets.begin(), node.nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool { + std::stable_sort(nets.begin(), nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool { return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size(); }); - vtr::Timer t; - for (auto net_id : node.nets) { + vtr::Timer timer; + for (auto net_id : nets) { auto flags = route_net( _routers_th.local(), _net_list, @@ -76,13 +86,18 @@ void ParallelNetlistRouter::route_partition_tree_node(tbb::task_group& if (flags.retry_with_full_bb) { /* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */ route_ctx.route_bb[net_id] = full_device_bb(); + _results_th.local().bb_updated_nets.push_back(net_id); continue; } if (flags.was_rerouted) { _results_th.local().rerouted_nets.push_back(net_id); } } - PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets routed in " + std::to_string(t.elapsed_sec()) + " s"); + + PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + + " nets and " + std::to_string(node.vnets.size()) + + " virtual nets routed in " + std::to_string(timer.elapsed_sec()) + + " s"); /* This node is finished: add left & right branches to the task queue */ if (node.left && node.right) { @@ -97,6 +112,12 @@ void ParallelNetlistRouter::route_partition_tree_node(tbb::task_group& } } +template +void ParallelNetlistRouter::handle_bb_updated_nets(const std::vector& nets) { + VTR_ASSERT(_tree); + _tree->update_nets(nets); +} + template void ParallelNetlistRouter::set_rcv_enabled(bool x) { for (auto& router : _routers_th) { diff --git a/vpr/src/route/SerialNetlistRouter.h b/vpr/src/route/SerialNetlistRouter.h index 5bb59df1998..352de125b68 100644 --- a/vpr/src/route/SerialNetlistRouter.h +++ b/vpr/src/route/SerialNetlistRouter.h @@ -35,6 +35,7 @@ class SerialNetlistRouter : public NetlistRouter { ~SerialNetlistRouter() {} RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack); + void handle_bb_updated_nets(const std::vector& nets); void set_rcv_enabled(bool x); void set_timing_info(std::shared_ptr timing_info); diff --git a/vpr/src/route/SerialNetlistRouter.tpp b/vpr/src/route/SerialNetlistRouter.tpp index 7927d06c4e2..63497d7d394 100644 --- a/vpr/src/route/SerialNetlistRouter.tpp +++ b/vpr/src/route/SerialNetlistRouter.tpp @@ -4,12 +4,15 @@ #include "SerialNetlistRouter.h" #include "route_net.h" +#include "vtr_time.h" template inline RouteIterResults SerialNetlistRouter::route_netlist(int itry, float pres_fac, float worst_neg_slack) { auto& route_ctx = g_vpr_ctx.mutable_routing(); RouteIterResults out; + vtr::Timer timer; + /* Sort so net with most sinks is routed first */ auto sorted_nets = std::vector(_net_list.nets().begin(), _net_list.nets().end()); std::stable_sort(sorted_nets.begin(), sorted_nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool { @@ -45,7 +48,9 @@ inline RouteIterResults SerialNetlistRouter::route_netlist(int itry, f } if (flags.retry_with_full_bb) { - /* Grow the BB and retry this net right away. */ + /* Grow the BB and retry this net right away. + * We don't populate out.bb_updated_nets for the serial router, since + * there is no partition tree to update. */ route_ctx.route_bb[net_id] = full_device_bb(); inet--; continue; @@ -59,9 +64,14 @@ inline RouteIterResults SerialNetlistRouter::route_netlist(int itry, f } } + PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s"); return out; } +template +void SerialNetlistRouter::handle_bb_updated_nets(const std::vector& /* nets */) { +} + template void SerialNetlistRouter::set_rcv_enabled(bool x) { _router.set_rcv_enabled(x); diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp index 7fd0f0d1680..7216820726a 100644 --- a/vpr/src/route/connection_router.cpp +++ b/vpr/src/route/connection_router.cpp @@ -4,6 +4,8 @@ #include "rr_graph.h" #include "rr_graph_fwd.h" +/** Used for the flat router. The node isn't relevant to the target if + * it is an intra-block node outside of our target block */ static bool relevant_node_to_target(const RRGraphView* rr_graph, RRNodeId node_to_add, RRNodeId target_node); @@ -972,6 +974,7 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( //Add existing routing starting from the target bin. //If the target's bin has insufficient existing routing add from the surrounding bins + constexpr int SINGLE_BIN_MIN_NODES = 2; bool done = false; bool found_node_on_same_layer = false; for (int dx : {0, -1, +1}) { @@ -989,6 +992,7 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( continue; RRNodeId rr_node_to_add = rt_node.inode; + /* Flat router: don't go into clusters other than the target one */ if (is_flat_) { if (!relevant_node_to_target(rr_graph_, rr_node_to_add, target_node)) continue; @@ -1014,7 +1018,6 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( } } - constexpr int SINGLE_BIN_MIN_NODES = 2; if (dx == 0 && dy == 0 && chan_nodes_added > SINGLE_BIN_MIN_NODES && found_node_on_same_layer) { //Target bin contained at least minimum amount of routing // @@ -1028,8 +1031,9 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( } if (done) break; } - - if (chan_nodes_added == 0 || !found_node_on_same_layer) { //If the target bin, and it's surrounding bins were empty, just add the full route tree + /* If we didn't find enough nodes to branch off near the target + * or they are on the wrong grid layer, just add the full route tree */ + if (chan_nodes_added <= SINGLE_BIN_MIN_NODES || !found_node_on_same_layer) { add_route_tree_to_heap(rt_root, target_node, cost_params, net_bounding_box); return net_bounding_box; } else { @@ -1042,15 +1046,9 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( static inline bool relevant_node_to_target(const RRGraphView* rr_graph, RRNodeId node_to_add, RRNodeId target_node) { - VTR_ASSERT(rr_graph->node_type(target_node) == t_rr_type::SINK); + VTR_ASSERT_SAFE(rr_graph->node_type(target_node) == t_rr_type::SINK); auto node_to_add_type = rr_graph->node_type(node_to_add); - if (node_to_add_type == t_rr_type::OPIN || node_to_add_type == t_rr_type::SOURCE || node_to_add_type == t_rr_type::CHANX || node_to_add_type == t_rr_type::CHANY || node_to_add_type == SINK) { - return true; - } else if (node_in_same_physical_tile(node_to_add, target_node)) { - VTR_ASSERT(node_to_add_type == IPIN); - return true; - } - return false; + return node_to_add_type != t_rr_type::IPIN || node_in_same_physical_tile(node_to_add, target_node); } static inline void update_router_stats(RouterStats* router_stats, diff --git a/vpr/src/route/netlist_routers.h b/vpr/src/route/netlist_routers.h index d5f5354a392..1524c2ddb38 100644 --- a/vpr/src/route/netlist_routers.h +++ b/vpr/src/route/netlist_routers.h @@ -15,6 +15,7 @@ * NetlistRouter-derived class is still a NetlistRouter, so that is transparent to the user * of this interface. */ +#include #include "NetPinTimingInvalidator.h" #include "clustered_netlist_utils.h" #include "connection_based_routing_fwd.h" @@ -37,6 +38,9 @@ struct RouteIterResults { bool is_routable = true; /** Net IDs with changed routing */ std::vector rerouted_nets; + /** Net IDs with changed bounding box for this iteration. + * Used by the parallel router to update the \ref PartitionTree */ + std::vector bb_updated_nets; /** RouterStats for this iteration */ RouterStats stats; }; @@ -53,6 +57,10 @@ class NetlistRouter { * \return RouteIterResults for this iteration. */ virtual RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack) = 0; + /** Handle net bounding box updates by passing them to the PartitionTree. + * No-op for the serial router */ + virtual void handle_bb_updated_nets(const std::vector& nets) = 0; + /** Enable RCV for each of the ConnectionRouters this NetlistRouter manages.*/ virtual void set_rcv_enabled(bool x) = 0; diff --git a/vpr/src/route/partition_tree.cpp b/vpr/src/route/partition_tree.cpp index b679fb90a17..ac95a9a5285 100644 --- a/vpr/src/route/partition_tree.cpp +++ b/vpr/src/route/partition_tree.cpp @@ -1,6 +1,7 @@ #include "partition_tree.h" #include #include +#include /** Minimum number of nets inside a partition to continue further partitioning. * Mostly an arbitrary limit. At a certain point, the quality lost due to disturbed net ordering @@ -10,19 +11,32 @@ constexpr size_t MIN_NETS_TO_PARTITION = 256; PartitionTree::PartitionTree(const Netlist<>& netlist) { const auto& device_ctx = g_vpr_ctx.device(); - auto all_nets = std::vector(netlist.nets().begin(), netlist.nets().end()); + auto all_nets = std::unordered_set(netlist.nets().begin(), netlist.nets().end()); _root = build_helper(netlist, all_nets, 0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); } -std::unique_ptr PartitionTree::build_helper(const Netlist<>& netlist, const std::vector& nets, int x1, int y1, int x2, int y2) { +/** Build a branch of the PartitionTree given a set of \p nets and a bounding box. + * Calls itself recursively with smaller and smaller bounding boxes until there are less + * nets than \ref MIN_NETS_TO_PARTITION. */ +std::unique_ptr PartitionTree::build_helper(const Netlist<>& netlist, const std::unordered_set& nets, int x1, int y1, int x2, int y2) { if (nets.empty()) return nullptr; const auto& route_ctx = g_vpr_ctx.routing(); + + /* Only build this for 2 dimensions. Ignore the layers for now */ + const auto& device_ctx = g_vpr_ctx.device(); + int layer_max = device_ctx.grid.get_num_layers() - 1; + auto out = std::make_unique(); if (nets.size() < MIN_NETS_TO_PARTITION) { + out->bb = {x1, x2, y1, y2, 0, layer_max}; out->nets = nets; + /* Build net to ptree node lookup */ + for(auto net_id: nets){ + _net_to_ptree_node[net_id] = out.get(); + } return out; } @@ -113,22 +127,26 @@ std::unique_ptr PartitionTree::build_helper(const Netlist<>& /* Couldn't find a cutline: all cutlines result in a one-way cut */ if (std::isnan(best_pos)) { - out->nets = nets; /* We hope copy elision is smart enough to optimize this stuff out */ - return out; + out->bb = {x1, x2, y1, y2, 0, layer_max}; + out->nets = nets; + /* Build net to ptree node lookup */ + for(auto net_id: nets){ + _net_to_ptree_node[net_id] = out.get(); + } } /* Populate net IDs on each side and call next level of build_x */ - std::vector left_nets, right_nets, my_nets; + std::unordered_set left_nets, right_nets, my_nets; if (best_axis == Axis::X) { for (auto net_id : nets) { t_bb bb = route_ctx.route_bb[net_id]; if (bb.xmax < best_pos) { - left_nets.push_back(net_id); + left_nets.insert(net_id); } else if (bb.xmin > best_pos) { - right_nets.push_back(net_id); + right_nets.insert(net_id); } else { - my_nets.push_back(net_id); + my_nets.insert(net_id); } } @@ -139,11 +157,11 @@ std::unique_ptr PartitionTree::build_helper(const Netlist<>& for (auto net_id : nets) { t_bb bb = route_ctx.route_bb[net_id]; if (bb.ymax < best_pos) { - left_nets.push_back(net_id); + left_nets.insert(net_id); } else if (bb.ymin > best_pos) { - right_nets.push_back(net_id); + right_nets.insert(net_id); } else { - my_nets.push_back(net_id); + my_nets.insert(net_id); } } @@ -151,8 +169,52 @@ std::unique_ptr PartitionTree::build_helper(const Netlist<>& out->right = build_helper(netlist, right_nets, x1, std::floor(best_pos + 1), x2, y2); } + if(out->left) + out->left->parent = out.get(); + if(out->right) + out->right->parent = out.get(); + + out->bb = {x1, x2, y1, y2, 0, 0}; out->nets = my_nets; out->cutline_axis = best_axis; out->cutline_pos = best_pos; + + /* Build net to ptree node lookup */ + for(auto net_id: my_nets){ + _net_to_ptree_node[net_id] = out.get(); + } return out; } + +inline bool net_in_ptree_node(ParentNetId net_id, const PartitionTreeNode* node){ + auto& route_ctx = g_vpr_ctx.routing(); + const t_bb& bb = route_ctx.route_bb[net_id]; + return bb.xmin >= node->bb.xmin && bb.xmax <= node->bb.xmax && bb.ymin >= node->bb.ymin && bb.ymax <= node->bb.ymax; +} + +void PartitionTree::update_nets(const std::vector& nets) { + for(auto net_id: nets){ + PartitionTreeNode* old_ptree_node = _net_to_ptree_node[net_id]; + PartitionTreeNode* new_ptree_node = old_ptree_node; + while(!net_in_ptree_node(net_id, new_ptree_node)) + new_ptree_node = new_ptree_node->parent; + old_ptree_node->nets.erase(net_id); + new_ptree_node->nets.insert(net_id); + _net_to_ptree_node[net_id] = new_ptree_node; + } +} + +/** Delete all vnets from this tree */ +void PartitionTree::clear_vnets(void) { + std::stack stack; + stack.push(_root.get()); + while(!stack.empty()){ + PartitionTreeNode* node = stack.top(); + stack.pop(); + node->vnets.clear(); + if(node->left) + stack.push(node->left.get()); + if(node->right) + stack.push(node->right.get()); + } +} diff --git a/vpr/src/route/partition_tree.h b/vpr/src/route/partition_tree.h index ac15fea4bcd..82b75976b83 100644 --- a/vpr/src/route/partition_tree.h +++ b/vpr/src/route/partition_tree.h @@ -1,6 +1,7 @@ #pragma once #include "connection_router.h" +#include "netlist_fwd.h" #include "router_stats.h" #include @@ -53,13 +54,15 @@ class VirtualNet { class PartitionTreeNode { public: /** Nets claimed by this node (intersected by cutline if branch, nets in final region if leaf) */ - std::vector nets; + std::unordered_set nets; /** Virtual nets assigned by the parent of this node (\see DecompNetlistRouter) */ std::vector vnets; /** Left subtree. */ std::unique_ptr left = nullptr; /** Right subtree. */ std::unique_ptr right = nullptr; + /** Parent node. */ + PartitionTreeNode* parent = nullptr; /* Axis of the cutline. */ Axis cutline_axis = Axis::X; /* Position of the cutline. It's a float, because cutlines are considered to be "between" integral coordinates. */ @@ -83,9 +86,20 @@ class PartitionTree { /** Access root. Shouldn't cause a segfault, because PartitionTree constructor always makes a _root */ inline PartitionTreeNode& root(void) { return *_root; } + /** Handle nets which had a bounding box update. + * Bounding boxes can only grow, so we should find a new partition tree node for + * these nets by moving them up until they fit in a node's bounds */ + void update_nets(const std::vector& nets); + + /** Delete all virtual nets in the tree. Used for the net decomposing router. + * Virtual nets are invalidated between iterations due to changing bounding + * boxes. */ + void clear_vnets(void); + private: std::unique_ptr _root; - std::unique_ptr build_helper(const Netlist<>& netlist, const std::vector& nets, int x1, int y1, int x2, int y2); + std::unordered_map _net_to_ptree_node; + std::unique_ptr build_helper(const Netlist<>& netlist, const std::unordered_set& nets, int x1, int y1, int x2, int y2); }; #ifdef VPR_DEBUG_PARTITION_TREE diff --git a/vpr/src/route/route.cpp b/vpr/src/route/route.cpp index 6bbc3449d88..d4dbc2a4d55 100644 --- a/vpr/src/route/route.cpp +++ b/vpr/src/route/route.cpp @@ -308,6 +308,8 @@ bool route(const Netlist<>& net_list, float iter_cumm_time = iteration_timer.elapsed_sec(); float iter_elapsed_time = iter_cumm_time - prev_iter_cumm_time; + PartitionTreeDebug::log("Iteration " + std::to_string(itry) + " took " + std::to_string(iter_elapsed_time) + " s"); + //Output progress print_route_status(itry, iter_elapsed_time, pres_fac, num_net_bounding_boxes_updated, iter_results.stats, overuse_info, wirelength_info, timing_info, est_success_iteration); @@ -424,11 +426,13 @@ bool route(const Netlist<>& net_list, /* * Prepare for the next iteration */ - if (router_opts.route_bb_update == e_route_bb_update::DYNAMIC) { - num_net_bounding_boxes_updated = dynamic_update_bounding_boxes(iter_results.rerouted_nets); + dynamic_update_bounding_boxes(iter_results.rerouted_nets, iter_results.bb_updated_nets); } + num_net_bounding_boxes_updated = iter_results.bb_updated_nets.size(); + netlist_router->handle_bb_updated_nets(iter_results.bb_updated_nets); + if (itry >= high_effort_congestion_mode_iteration_threshold) { //We are approaching the maximum number of routing iterations, //and still do not have a legal routing. Switch to a mode which diff --git a/vpr/src/route/route_utils.cpp b/vpr/src/route/route_utils.cpp index 60dec8d18d4..b398066769f 100644 --- a/vpr/src/route/route_utils.cpp +++ b/vpr/src/route/route_utils.cpp @@ -8,6 +8,7 @@ #include "draw_global.h" #include "draw_types.h" #include "net_delay.h" +#include "netlist_fwd.h" #include "overuse_report.h" #include "place_and_route.h" #include "route_debug.h" @@ -68,7 +69,7 @@ bool check_net_delays(const Netlist<>& net_list, NetPinsMatrix& net_delay // // Typically, only a small minority of nets (typically > 10%) have their BBs updated // each routing iteration. -size_t dynamic_update_bounding_boxes(const std::vector& updated_nets) { +void dynamic_update_bounding_boxes(const std::vector& rerouted_nets, std::vector out_bb_updated_nets) { auto& device_ctx = g_vpr_ctx.device(); auto& route_ctx = g_vpr_ctx.mutable_routing(); @@ -87,9 +88,7 @@ size_t dynamic_update_bounding_boxes(const std::vector& updated_net int grid_xmax = grid.width() - 1; int grid_ymax = grid.height() - 1; - size_t num_bb_updated = 0; - - for (ParentNetId net : updated_nets) { + for (ParentNetId net : rerouted_nets) { if (!route_ctx.route_trees[net]) continue; // Skip if no routing if (!route_ctx.net_status.is_routed(net)) @@ -133,13 +132,12 @@ size_t dynamic_update_bounding_boxes(const std::vector& updated_net } if (updated_bb) { - ++num_bb_updated; + out_bb_updated_nets.push_back(net); //VTR_LOG("Expanded net %6zu router BB to (%d,%d)x(%d,%d) based on net RR node BB (%d,%d)x(%d,%d)\n", size_t(net), //router_bb.xmin, router_bb.ymin, router_bb.xmax, router_bb.ymax, //curr_bb.xmin, curr_bb.ymin, curr_bb.xmax, curr_bb.ymax); } } - return num_bb_updated; } bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts, diff --git a/vpr/src/route/route_utils.h b/vpr/src/route/route_utils.h index edf5a3b59f7..d129193ee1e 100644 --- a/vpr/src/route/route_utils.h +++ b/vpr/src/route/route_utils.h @@ -2,6 +2,7 @@ /** @file Utility functions used in the top-level router (route.cpp). */ +#include "netlist_fwd.h" #include "router_stats.h" #include "timing_info.h" #include "vpr_net_pins_matrix.h" @@ -47,7 +48,7 @@ WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t avail bool check_net_delays(const Netlist<>& net_list, NetPinsMatrix& net_delay); /** Update bounding box for net if existing routing is close to boundary */ -size_t dynamic_update_bounding_boxes(const std::vector& updated_nets); +void dynamic_update_bounding_boxes(const std::vector& rerouted_nets, std::vector out_bb_updated_nets); /** Early exit code for cases where it is obvious that a successful route will not be found * Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */ diff --git a/vpr/src/route/sink_sampling.h b/vpr/src/route/sink_sampling.h index eb0df7f2620..485bff9b3e5 100644 --- a/vpr/src/route/sink_sampling.h +++ b/vpr/src/route/sink_sampling.h @@ -116,20 +116,16 @@ inline std::vector quickhull(const std::vector& points) { } // namespace sink_sampling /** Which side of the cutline is this RRNode on? - * Cutlines are always assumed to be at cutline_axis = (cutline_pos + 0.5). - * In the context of the parallel router, a RR node is considered to be inside a bounding - * box if its drive point is inside it (xlow, ylow if the node doesn't have a direction) */ + * Cutlines are always assumed to be at cutline_axis = (cutline_pos + 0.5). */ inline Side which_side(RRNodeId inode, Axis cutline_axis, int cutline_pos) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - Direction dir = rr_graph.node_direction(inode); - if (cutline_axis == Axis::X) { - int x = dir == Direction::DEC ? rr_graph.node_xhigh(inode) : rr_graph.node_xlow(inode); + int x = rr_graph.node_xlow(inode); return Side(x > cutline_pos); /* 1 is RIGHT */ } else { - int y = dir == Direction::DEC ? rr_graph.node_yhigh(inode) : rr_graph.node_ylow(inode); + int y = rr_graph.node_ylow(inode); return Side(y > cutline_pos); } } @@ -149,16 +145,22 @@ inline void convex_hull_downsample(ParentNetId net_id, const t_bb& net_bb, vtr:: RRNodeId rr_sink = route_ctx.net_rr_terminals[net_id][i]; if (!inside_bb(rr_sink, net_bb)) continue; - SinkPoint point{rr_graph.node_xlow(rr_sink), rr_graph.node_ylow(rr_sink), int(i)}; + int x = rr_graph.node_xlow(rr_sink); + int y = rr_graph.node_ylow(rr_sink); + SinkPoint point{x, y, int(i)}; sink_points.push_back(point); } auto hull = sink_sampling::quickhull(sink_points); + auto& is_isink_reached = tree.get_is_isink_reached(); + /* Sample if not source */ for (auto& point : hull) { if (point.isink == 0) /* source */ continue; + if(is_isink_reached.get(point.isink)) + continue; out.set(point.isink, true); } } diff --git a/vpr/src/route/spatial_route_tree_lookup.cpp b/vpr/src/route/spatial_route_tree_lookup.cpp index 3d3f7a25460..ddbb066a188 100644 --- a/vpr/src/route/spatial_route_tree_lookup.cpp +++ b/vpr/src/route/spatial_route_tree_lookup.cpp @@ -17,7 +17,11 @@ SpatialRouteTreeLookup build_route_tree_spatial_lookup(const Netlist<>& net_list float bb_area_per_sink = bb_area / fanout; float bin_area = BIN_AREA_PER_SINK_FACTOR * bb_area_per_sink; - float bin_dim = std::ceil(std::sqrt(bin_area)); + /* Set a minimum bin dimension so that we don't get minuscule bin sizes + * when flat routing is enabled and every LUT input becomes a sink. + * (P.S. This took some time to debug.) */ + constexpr float MIN_BIN_DIM = 3; + float bin_dim = std::max(MIN_BIN_DIM, std::ceil(std::sqrt(bin_area))); size_t bins_x = std::ceil(device_ctx.grid.width() / bin_dim); size_t bins_y = std::ceil(device_ctx.grid.height() / bin_dim);