From 5f837988ae790815cd845233d80b5f61c77c4c70 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 24 Jan 2020 11:47:01 -0800 Subject: [PATCH 01/11] Move rr node storage behind an object. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/base/vpr_context.h | 3 ++- vpr/src/route/clock_network_builders.cpp | 12 ++++----- vpr/src/route/clock_network_builders.h | 15 ++++++----- vpr/src/route/rr_graph.cpp | 34 ++++++++++++------------ vpr/src/route/rr_graph.h | 4 ++- vpr/src/route/rr_graph_clock.h | 5 ++-- 6 files changed, 39 insertions(+), 34 deletions(-) diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index fc941286fc1..59f75bdd991 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -9,6 +9,7 @@ #include "vtr_vector.h" #include "atom_netlist.h" #include "clustered_netlist.h" +#include "rr_node_storage.h" #include "rr_node.h" #include "tatum/TimingGraph.hpp" #include "tatum/TimingConstraints.hpp" @@ -145,7 +146,7 @@ struct DeviceContext : public Context { t_chan_width chan_width; /* Structures to define the routing architecture of the FPGA. */ - std::vector rr_nodes; /* autogenerated in build_rr_graph */ + t_rr_node_storage rr_nodes; /* autogenerated in build_rr_graph */ std::vector rr_indexed_data; /* [0 .. num_rr_indexed_data-1] */ diff --git a/vpr/src/route/clock_network_builders.cpp b/vpr/src/route/clock_network_builders.cpp index 1f1f5cc06a6..2bc425fa37d 100644 --- a/vpr/src/route/clock_network_builders.cpp +++ b/vpr/src/route/clock_network_builders.cpp @@ -61,7 +61,7 @@ void ClockNetwork::set_num_instance(int num_inst) { */ void ClockNetwork::create_rr_nodes_for_clock_network_wires(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) { for (int inst_num = 0; inst_num < get_num_inst(); inst_num++) { @@ -215,7 +215,7 @@ size_t ClockRib::estimate_additional_nodes(const DeviceGrid& grid) { } void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) { // Only chany wires need to know the number of segments inorder @@ -307,7 +307,7 @@ int ClockRib::create_chanx_wire(int x_start, int y, int ptc_num, e_direction direction, - std::vector* rr_nodes) { + t_rr_node_storage* rr_nodes) { rr_nodes->emplace_back(); auto node_index = rr_nodes->size() - 1; auto& node = rr_nodes->back(); @@ -502,7 +502,7 @@ size_t ClockSpine::estimate_additional_nodes(const DeviceGrid& grid) { } void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) { auto& grid = clock_graph.grid(); @@ -596,7 +596,7 @@ int ClockSpine::create_chany_wire(int y_start, int x, int ptc_num, e_direction direction, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, int num_segments) { rr_nodes->emplace_back(); auto node_index = rr_nodes->size() - 1; @@ -662,7 +662,7 @@ size_t ClockHTree::estimate_additional_nodes(const DeviceGrid& /*grid*/) { } void ClockHTree::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) { //Remove unused parameter warning diff --git a/vpr/src/route/clock_network_builders.h b/vpr/src/route/clock_network_builders.h index c4caa039b2e..27d720d3f92 100644 --- a/vpr/src/route/clock_network_builders.h +++ b/vpr/src/route/clock_network_builders.h @@ -11,6 +11,7 @@ #include "rr_graph2.h" #include "rr_graph_clock.h" +class t_rr_node_storage; class ClockRRGraphBuilder; enum class ClockType { @@ -102,13 +103,13 @@ class ClockNetwork { /* Creates the RR nodes for the clock network wires and adds them to the reverse lookup * in ClockRRGraphBuilder. The reverse lookup maps the nodes to their switch point locations */ void create_rr_nodes_for_clock_network_wires(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments); virtual void create_segments(std::vector& segment_inf) = 0; virtual void create_rr_nodes_and_internal_edges_for_one_instance( ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) = 0; @@ -163,7 +164,7 @@ class ClockRib : public ClockNetwork { */ void create_segments(std::vector& segment_inf) override; void create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) override; size_t estimate_additional_nodes(const DeviceGrid& grid) override; @@ -172,7 +173,7 @@ class ClockRib : public ClockNetwork { int y, int ptc_num, e_direction direction, - std::vector* rr_nodes); + t_rr_node_storage* rr_nodes); void record_tap_locations(unsigned x_start, unsigned x_end, unsigned y, @@ -222,7 +223,7 @@ class ClockSpine : public ClockNetwork { */ void create_segments(std::vector& segment_inf) override; void create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) override; size_t estimate_additional_nodes(const DeviceGrid& grid) override; @@ -231,7 +232,7 @@ class ClockSpine : public ClockNetwork { int x, int ptc_num, e_direction direction, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, int num_segments); void record_tap_locations(unsigned y_start, unsigned y_end, @@ -257,7 +258,7 @@ class ClockHTree : private ClockNetwork { // TODO: Unimplemented member function void create_segments(std::vector& segment_inf) override; void create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) override; size_t estimate_additional_nodes(const DeviceGrid& grid) override; diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 6e6788f5868..8632399ae37 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -105,7 +105,7 @@ static void build_bidir_rr_opins(const int i, const int j, const e_side side, const t_rr_node_indices& L_rr_node_indices, - const std::vector& rr_nodes, + const t_rr_node_storage& rr_nodes, const t_pin_to_track_lookup& opin_to_track_map, const std::vector>& Fc_out, t_rr_edge_info_set& created_rr_edges, @@ -130,7 +130,7 @@ static void build_unidir_rr_opins(const int i, t_rr_edge_info_set& created_rr_edges, bool* Fc_clipped, const t_rr_node_indices& L_rr_node_indices, - const std::vector& rr_nodes, + const t_rr_node_storage& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs, @@ -143,12 +143,12 @@ static int get_opin_direct_connecions(int x, int from_rr_node, t_rr_edge_info_set& rr_edges_to_create, const t_rr_node_indices& L_rr_node_indices, - const std::vector& rr_nodes, + const t_rr_node_storage& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs); -static std::function alloc_and_load_rr_graph(std::vector& L_rr_node, +static std::function alloc_and_load_rr_graph(t_rr_node_storage& L_rr_node, const int num_seg_types, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, @@ -209,7 +209,7 @@ static std::vector> alloc_and_load_perturb_ipins(const int L_n static void build_rr_sinks_sources(const int i, const int j, - std::vector& L_rr_node, + t_rr_node_storage& L_rr_node, t_rr_edge_info_set& rr_edges_to_create, const t_rr_node_indices& L_rr_node_indices, const int delayless_switch, @@ -231,13 +231,13 @@ static void build_rr_chan(const int i, const t_chan_details& chan_details_y, const t_rr_node_indices& L_rr_node_indices, t_rr_edge_info_set& created_rr_edges, - std::vector& L_rr_node, + t_rr_node_storage& L_rr_node, const int wire_to_ipin_switch, const enum e_directionality directionality); void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create); -void alloc_and_load_edges(std::vector& L_rr_node, +void alloc_and_load_edges(t_rr_node_storage& L_rr_node, const t_rr_edge_info_set& rr_edges_to_create); static void alloc_and_load_rr_switch_inf(const int num_arch_switches, @@ -275,7 +275,7 @@ static std::vector> alloc_and_load_actual_fc(const std::vector< const enum e_directionality directionality, bool* Fc_clipped); -static int pick_best_direct_connect_target_rr_node(const std::vector& rr_nodes, +static int pick_best_direct_connect_target_rr_node(const t_rr_node_storage& rr_nodes, int from_rr, const std::vector& candidate_rr_nodes); @@ -1187,7 +1187,7 @@ static void free_type_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map, /* Does the actual work of allocating the rr_graph and filling all the * * appropriate values. Everything up to this was just a prelude! */ -static std::function alloc_and_load_rr_graph(std::vector& L_rr_node, +static std::function alloc_and_load_rr_graph(t_rr_node_storage& L_rr_node, const int num_seg_types, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, @@ -1335,7 +1335,7 @@ static void build_bidir_rr_opins(const int i, const int j, const e_side side, const t_rr_node_indices& L_rr_node_indices, - const std::vector& rr_nodes, + const t_rr_node_storage& rr_nodes, const t_pin_to_track_lookup& opin_to_track_map, const std::vector>& Fc_out, t_rr_edge_info_set& rr_edges_to_create, @@ -1428,7 +1428,7 @@ void free_rr_graph() { static void build_rr_sinks_sources(const int i, const int j, - std::vector& L_rr_node, + t_rr_node_storage& L_rr_node, t_rr_edge_info_set& rr_edges_to_create, const t_rr_node_indices& L_rr_node_indices, const int delayless_switch, @@ -1557,7 +1557,7 @@ static void build_rr_sinks_sources(const int i, //Create the actual edges } -void init_fan_in(std::vector& L_rr_node, const int num_rr_nodes) { +void init_fan_in(t_rr_node_storage& L_rr_node, const int num_rr_nodes) { //Loads fan-ins for all nodes //Reset all fan-ins to zero @@ -1593,7 +1593,7 @@ static void build_rr_chan(const int x_coord, const t_chan_details& chan_details_y, const t_rr_node_indices& L_rr_node_indices, t_rr_edge_info_set& rr_edges_to_create, - std::vector& L_rr_node, + t_rr_node_storage& L_rr_node, const int wire_to_ipin_switch, const enum e_directionality directionality) { /* this function builds both x and y-directed channel segments, so set up our @@ -1756,7 +1756,7 @@ void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create) { rr_edges_to_create.erase(std::unique(rr_edges_to_create.begin(), rr_edges_to_create.end()), rr_edges_to_create.end()); } -void alloc_and_load_edges(std::vector& L_rr_node, +void alloc_and_load_edges(t_rr_node_storage& L_rr_node, const t_rr_edge_info_set& rr_edges_to_create) { /* Sets up all the edge related information for rr_node */ @@ -2592,7 +2592,7 @@ std::string describe_rr_node(int inode) { return msg; } -static void build_unidir_rr_opins(const int i, const int j, const e_side side, const DeviceGrid& grid, const std::vector>& Fc_out, const int max_chan_width, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, vtr::NdMatrix& Fc_xofs, vtr::NdMatrix& Fc_yofs, t_rr_edge_info_set& rr_edges_to_create, bool* Fc_clipped, const t_rr_node_indices& L_rr_node_indices, const std::vector& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs, const int num_seg_types) { +static void build_unidir_rr_opins(const int i, const int j, const e_side side, const DeviceGrid& grid, const std::vector>& Fc_out, const int max_chan_width, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, vtr::NdMatrix& Fc_xofs, vtr::NdMatrix& Fc_yofs, t_rr_edge_info_set& rr_edges_to_create, bool* Fc_clipped, const t_rr_node_indices& L_rr_node_indices, const t_rr_node_storage& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs, const int num_seg_types) { /* * This routine adds the edges from opins to channels at the specified * grid location (i,j) and grid tile side @@ -2827,7 +2827,7 @@ static int get_opin_direct_connecions(int x, int from_rr_node, t_rr_edge_info_set& rr_edges_to_create, const t_rr_node_indices& L_rr_node_indices, - const std::vector& rr_nodes, + const t_rr_node_storage& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs) { @@ -3039,7 +3039,7 @@ static std::vector alloc_and_load_perturb_opins(const t_physical_tile_type return perturb_opins; } -static int pick_best_direct_connect_target_rr_node(const std::vector& rr_nodes, +static int pick_best_direct_connect_target_rr_node(const t_rr_node_storage& rr_nodes, int from_rr, const std::vector& candidate_rr_nodes) { //With physically equivalent pins there may be multiple candidate rr nodes (which are equivalent) diff --git a/vpr/src/route/rr_graph.h b/vpr/src/route/rr_graph.h index f55a64f7f9f..527b06a4a19 100644 --- a/vpr/src/route/rr_graph.h +++ b/vpr/src/route/rr_graph.h @@ -45,7 +45,9 @@ void free_rr_graph(); //Returns a brief one-line summary of an RR node std::string describe_rr_node(int inode); -void init_fan_in(std::vector& L_rr_node, const int num_rr_nodes); +class t_rr_node_storage; + +void init_fan_in(t_rr_node_storage& L_rr_node, const int num_rr_nodes); // Sets the spec for the rr_switch based on the arch switch void load_rr_switch_from_arch_switch(int arch_switch_idx, diff --git a/vpr/src/route/rr_graph_clock.h b/vpr/src/route/rr_graph_clock.h index 3ee3a18ee00..162ca58e6c8 100644 --- a/vpr/src/route/rr_graph_clock.h +++ b/vpr/src/route/rr_graph_clock.h @@ -15,6 +15,7 @@ class ClockNetwork; class ClockConnection; +class t_rr_node_storage; class SwitchPoint { /* A switch point object: keeps information on the location and and rr_node indices @@ -76,7 +77,7 @@ class ClockRRGraphBuilder { ClockRRGraphBuilder( const t_chan_width& chan_width, const DeviceGrid& grid, - std::vector* rr_nodes) + t_rr_node_storage* rr_nodes) : chan_width_(chan_width) , grid_(grid) , rr_nodes_(rr_nodes) @@ -132,7 +133,7 @@ class ClockRRGraphBuilder { const t_chan_width& chan_width_; const DeviceGrid& grid_; - std::vector* rr_nodes_; + t_rr_node_storage* rr_nodes_; int chanx_ptc_idx_; int chany_ptc_idx_; From 291f0eac7d0be274b1c5382de01df53da0e95b1f Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 24 Jan 2020 14:12:51 -0800 Subject: [PATCH 02/11] Convert t_rr_node to a fly-weight object. This should have a negliable performance impact, but this enables future changes to modify how rr nodes and rr edges are storaged. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/base/read_route.cpp | 2 +- vpr/src/draw/draw.cpp | 14 +- vpr/src/draw/draw.h | 2 +- vpr/src/power/power.cpp | 94 +++++------ vpr/src/route/clock_network_builders.cpp | 4 +- vpr/src/route/rr_graph.cpp | 2 +- vpr/src/route/rr_node.cpp | 190 +++++++++-------------- vpr/src/route/rr_node.h | 107 ++++--------- vpr/src/route/rr_node_fwd.h | 4 +- vpr/src/route/rr_node_impl.h | 163 +++++++++++++++++++ vpr/src/route/rr_node_storage.h | 133 ++++++++++++++++ 11 files changed, 454 insertions(+), 261 deletions(-) create mode 100644 vpr/src/route/rr_node_impl.h create mode 100644 vpr/src/route/rr_node_storage.h diff --git a/vpr/src/base/read_route.cpp b/vpr/src/base/read_route.cpp index 9ec4069fe2c..31408f21223 100644 --- a/vpr/src/base/read_route.cpp +++ b/vpr/src/base/read_route.cpp @@ -231,7 +231,7 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file } else if (tokens[0] == "Node:") { /*An actual line, go through each node and add it to the route tree*/ inode = atoi(tokens[1].c_str()); - auto& node = device_ctx.rr_nodes[inode]; + auto node = device_ctx.rr_nodes[inode]; /*First node needs to be source. It is isolated to correctly set heap head.*/ if (node_count == 0 && tokens[2] != "SOURCE") { diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp index 7cf406e111e..31f9246c708 100644 --- a/vpr/src/draw/draw.cpp +++ b/vpr/src/draw/draw.cpp @@ -2051,10 +2051,10 @@ static void draw_rr_pin(int inode, const ezgl::color& color, ezgl::renderer* g) * the physical pin is on. */ void draw_get_rr_pin_coords(int inode, float* xcen, float* ycen) { auto& device_ctx = g_vpr_ctx.device(); - draw_get_rr_pin_coords(&device_ctx.rr_nodes[inode], xcen, ycen); + draw_get_rr_pin_coords(device_ctx.rr_nodes[inode], xcen, ycen); } -void draw_get_rr_pin_coords(const t_rr_node* node, float* xcen, float* ycen) { +void draw_get_rr_pin_coords(const t_rr_node node, float* xcen, float* ycen) { t_draw_coords* draw_coords = get_draw_coords_vars(); int i, j, k, ipin, pins_per_sub_tile; @@ -2062,13 +2062,13 @@ void draw_get_rr_pin_coords(const t_rr_node* node, float* xcen, float* ycen) { t_physical_tile_type_ptr type; auto& device_ctx = g_vpr_ctx.device(); - i = node->xlow(); - j = node->ylow(); + i = node.xlow(); + j = node.ylow(); xc = draw_coords->tile_x[i]; yc = draw_coords->tile_y[j]; - ipin = node->ptc_num(); + ipin = node.ptc_num(); type = device_ctx.grid[i][j].type; pins_per_sub_tile = type->num_pins / type->capacity; k = ipin / pins_per_sub_tile; @@ -2080,7 +2080,7 @@ void draw_get_rr_pin_coords(const t_rr_node* node, float* xcen, float* ycen) { step = (float)(draw_coords->get_tile_width()) / (float)(type->num_pins + type->capacity); offset = (ipin + k + 1) * step; - switch (node->side()) { + switch (node.side()) { case LEFT: yc += offset; break; @@ -2101,7 +2101,7 @@ void draw_get_rr_pin_coords(const t_rr_node* node, float* xcen, float* ycen) { default: vpr_throw(VPR_ERROR_OTHER, __FILE__, __LINE__, - "in draw_get_rr_pin_coords: Unexpected side %s.\n", node->side_string()); + "in draw_get_rr_pin_coords: Unexpected side %s.\n", node.side_string()); break; } diff --git a/vpr/src/draw/draw.h b/vpr/src/draw/draw.h index eb8df093044..d1a8ebeba7d 100644 --- a/vpr/src/draw/draw.h +++ b/vpr/src/draw/draw.h @@ -32,7 +32,7 @@ void free_draw_structs(); #ifndef NO_GRAPHICS void draw_get_rr_pin_coords(int inode, float* xcen, float* ycen); -void draw_get_rr_pin_coords(const t_rr_node* node, float* xcen, float* ycen); +void draw_get_rr_pin_coords(const t_rr_node node, float* xcen, float* ycen); void draw_triangle_along_line(ezgl::renderer* g, ezgl::point2d start, ezgl::point2d end, float relative_position = 1., float arrow_size = DEFAULT_ARROW_SIZE); void draw_triangle_along_line(ezgl::renderer* g, ezgl::point2d loc, ezgl::point2d start, ezgl::point2d end, float arrow_size = DEFAULT_ARROW_SIZE); diff --git a/vpr/src/power/power.cpp b/vpr/src/power/power.cpp index d4e17c0f852..1a2587c640e 100644 --- a/vpr/src/power/power.cpp +++ b/vpr/src/power/power.cpp @@ -815,19 +815,19 @@ static void power_usage_routing(t_power_usage* power_usage, t_trace* trace; for (trace = route_ctx.trace[net_id].head; trace != nullptr; trace = trace->next) { - auto node = &device_ctx.rr_nodes[trace->index]; + auto node = device_ctx.rr_nodes[trace->index]; t_rr_node_power* node_power = &rr_node_power[trace->index]; if (node_power->visited) { continue; } - for (t_edge_size edge_idx = 0; edge_idx < node->num_edges(); edge_idx++) { - if (node->edge_sink_node(edge_idx) != OPEN) { - auto next_node = &device_ctx.rr_nodes[node->edge_sink_node(edge_idx)]; - t_rr_node_power* next_node_power = &rr_node_power[node->edge_sink_node(edge_idx)]; + for (t_edge_size edge_idx = 0; edge_idx < node.num_edges(); edge_idx++) { + if (node.edge_sink_node(edge_idx) != OPEN) { + auto next_node = device_ctx.rr_nodes[node.edge_sink_node(edge_idx)]; + t_rr_node_power* next_node_power = &rr_node_power[node.edge_sink_node(edge_idx)]; - switch (next_node->type()) { + switch (next_node.type()) { case CHANX: case CHANY: case IPIN: @@ -837,9 +837,9 @@ static void power_usage_routing(t_power_usage* power_usage, next_node_power->in_dens[next_node_power->num_inputs] = clb_net_density(node_power->net_num); next_node_power->in_prob[next_node_power->num_inputs] = clb_net_prob(node_power->net_num); next_node_power->num_inputs++; - if (next_node_power->num_inputs > next_node->fan_in()) { + if (next_node_power->num_inputs > next_node.fan_in()) { VTR_LOG("%d %d\n", next_node_power->num_inputs, - next_node->fan_in()); + next_node.fan_in()); fflush(nullptr); VTR_ASSERT(0); } @@ -857,7 +857,7 @@ static void power_usage_routing(t_power_usage* power_usage, /* Calculate power of all routing entities */ for (size_t rr_node_idx = 0; rr_node_idx < device_ctx.rr_nodes.size(); rr_node_idx++) { t_power_usage sub_power_usage; - auto node = &device_ctx.rr_nodes[rr_node_idx]; + auto node = device_ctx.rr_nodes[rr_node_idx]; t_rr_node_power* node_power = &rr_node_power[rr_node_idx]; float C_wire; float buffer_size; @@ -866,7 +866,7 @@ static void power_usage_routing(t_power_usage* power_usage, //float C_per_seg_split; int wire_length; - switch (node->type()) { + switch (node.type()) { case SOURCE: case SINK: case OPIN: @@ -877,13 +877,13 @@ static void power_usage_routing(t_power_usage* power_usage, * - Driver (accounted for at end of CHANX/Y - see below) * - Multiplexor */ - if (node->fan_in()) { + if (node.fan_in()) { VTR_ASSERT(node_power->in_dens); VTR_ASSERT(node_power->in_prob); /* Multiplexor */ power_usage_mux_multilevel(&sub_power_usage, - power_get_mux_arch(node->fan_in(), + power_get_mux_arch(node.fan_in(), power_ctx.arch->mux_transistor_size), node_power->in_prob, node_power->in_dens, node_power->selected_input, true, @@ -904,19 +904,19 @@ static void power_usage_routing(t_power_usage* power_usage, VTR_ASSERT(node_power->in_prob); wire_length = 0; - if (node->type() == CHANX) { - wire_length = node->xhigh() - node->xlow() + 1; - } else if (node->type() == CHANY) { - wire_length = node->yhigh() - node->ylow() + 1; + if (node.type() == CHANX) { + wire_length = node.xhigh() - node.xlow() + 1; + } else if (node.type() == CHANY) { + wire_length = node.yhigh() - node.ylow() + 1; } C_wire = wire_length - * segment_inf[device_ctx.rr_indexed_data[node->cost_index()].seg_index].Cmetal; + * segment_inf[device_ctx.rr_indexed_data[node.cost_index()].seg_index].Cmetal; //(double)power_ctx.commonly_used->tile_length); - VTR_ASSERT(node_power->selected_input < node->fan_in()); + VTR_ASSERT(node_power->selected_input < node.fan_in()); /* Multiplexor */ power_usage_mux_multilevel(&sub_power_usage, - power_get_mux_arch(node->fan_in(), + power_get_mux_arch(node.fan_in(), power_ctx.arch->mux_transistor_size), node_power->in_prob, node_power->in_dens, node_power->selected_input, true, power_ctx.solution_inf.T_crit); @@ -979,10 +979,10 @@ static void power_usage_routing(t_power_usage* power_usage, /* Determine types of switches that this wire drives */ connectionbox_fanout = 0; switchbox_fanout = 0; - for (t_edge_size iedge = 0; iedge < node->num_edges(); iedge++) { - if (node->edge_switch(iedge) == routing_arch->wire_to_rr_ipin_switch) { + for (t_edge_size iedge = 0; iedge < node.num_edges(); iedge++) { + if (node.edge_switch(iedge) == routing_arch->wire_to_rr_ipin_switch) { connectionbox_fanout++; - } else if (node->edge_switch(iedge) == routing_arch->delayless_switch) { + } else if (node.edge_switch(iedge) == routing_arch->delayless_switch) { /* Do nothing */ } else { switchbox_fanout++; @@ -1205,37 +1205,37 @@ void power_routing_init(const t_det_routing_arch* routing_arch) { for (size_t rr_node_idx = 0; rr_node_idx < device_ctx.rr_nodes.size(); rr_node_idx++) { int fanout_to_IPIN = 0; int fanout_to_seg = 0; - auto node = &device_ctx.rr_nodes[rr_node_idx]; + auto node = device_ctx.rr_nodes[rr_node_idx]; t_rr_node_power* node_power = &rr_node_power[rr_node_idx]; - switch (node->type()) { + switch (node.type()) { case IPIN: max_IPIN_fanin = std::max(max_IPIN_fanin, - static_cast(node->fan_in())); - max_fanin = std::max(max_fanin, static_cast(node->fan_in())); + static_cast(node.fan_in())); + max_fanin = std::max(max_fanin, static_cast(node.fan_in())); - node_power->in_dens = (float*)vtr::calloc(node->fan_in(), + node_power->in_dens = (float*)vtr::calloc(node.fan_in(), sizeof(float)); - node_power->in_prob = (float*)vtr::calloc(node->fan_in(), + node_power->in_prob = (float*)vtr::calloc(node.fan_in(), sizeof(float)); break; case CHANX: case CHANY: - for (t_edge_size iedge = 0; iedge < node->num_edges(); iedge++) { - if (node->edge_switch(iedge) == routing_arch->wire_to_rr_ipin_switch) { + for (t_edge_size iedge = 0; iedge < node.num_edges(); iedge++) { + if (node.edge_switch(iedge) == routing_arch->wire_to_rr_ipin_switch) { fanout_to_IPIN++; - } else if (node->edge_switch(iedge) != routing_arch->delayless_switch) { + } else if (node.edge_switch(iedge) != routing_arch->delayless_switch) { fanout_to_seg++; } } max_seg_to_IPIN_fanout = std::max(max_seg_to_IPIN_fanout, fanout_to_IPIN); max_seg_to_seg_fanout = std::max(max_seg_to_seg_fanout, fanout_to_seg); - max_fanin = std::max(max_fanin, static_cast(node->fan_in())); + max_fanin = std::max(max_fanin, static_cast(node.fan_in())); - node_power->in_dens = (float*)vtr::calloc(node->fan_in(), + node_power->in_dens = (float*)vtr::calloc(node.fan_in(), sizeof(float)); - node_power->in_prob = (float*)vtr::calloc(node->fan_in(), + node_power->in_prob = (float*)vtr::calloc(node.fan_in(), sizeof(float)); break; default: @@ -1254,14 +1254,14 @@ void power_routing_init(const t_det_routing_arch* routing_arch) { /* Populate driver switch type */ for (size_t rr_node_idx = 0; rr_node_idx < device_ctx.rr_nodes.size(); rr_node_idx++) { - auto node = &device_ctx.rr_nodes[rr_node_idx]; + auto node = device_ctx.rr_nodes[rr_node_idx]; - for (t_edge_size edge_idx = 0; edge_idx < node->num_edges(); edge_idx++) { - if (node->edge_sink_node(edge_idx) != OPEN) { - if (rr_node_power[node->edge_sink_node(edge_idx)].driver_switch_type == OPEN) { - rr_node_power[node->edge_sink_node(edge_idx)].driver_switch_type = node->edge_switch(edge_idx); + for (t_edge_size edge_idx = 0; edge_idx < node.num_edges(); edge_idx++) { + if (node.edge_sink_node(edge_idx) != OPEN) { + if (rr_node_power[node.edge_sink_node(edge_idx)].driver_switch_type == OPEN) { + rr_node_power[node.edge_sink_node(edge_idx)].driver_switch_type = node.edge_switch(edge_idx); } else { - VTR_ASSERT(rr_node_power[node->edge_sink_node(edge_idx)].driver_switch_type == node->edge_switch(edge_idx)); + VTR_ASSERT(rr_node_power[node.edge_sink_node(edge_idx)].driver_switch_type == node.edge_switch(edge_idx)); } } } @@ -1270,13 +1270,13 @@ void power_routing_init(const t_det_routing_arch* routing_arch) { /* Find Max Fanout of Routing Buffer */ t_edge_size max_seg_fanout = 0; for (size_t rr_node_idx = 0; rr_node_idx < device_ctx.rr_nodes.size(); rr_node_idx++) { - auto node = &device_ctx.rr_nodes[rr_node_idx]; + auto node = device_ctx.rr_nodes[rr_node_idx]; - switch (node->type()) { + switch (node.type()) { case CHANX: case CHANY: - if (node->num_edges() > max_seg_fanout) { - max_seg_fanout = node->num_edges(); + if (node.num_edges() > max_seg_fanout) { + max_seg_fanout = node.num_edges(); } break; default: @@ -1358,14 +1358,14 @@ bool power_uninit() { bool error = false; for (size_t rr_node_idx = 0; rr_node_idx < device_ctx.rr_nodes.size(); rr_node_idx++) { - auto node = &device_ctx.rr_nodes[rr_node_idx]; + auto node = device_ctx.rr_nodes[rr_node_idx]; t_rr_node_power* node_power = &rr_node_power[rr_node_idx]; - switch (node->type()) { + switch (node.type()) { case CHANX: case CHANY: case IPIN: - if (node->fan_in()) { + if (node.fan_in()) { free(node_power->in_dens); free(node_power->in_prob); } diff --git a/vpr/src/route/clock_network_builders.cpp b/vpr/src/route/clock_network_builders.cpp index 2bc425fa37d..2af4509c013 100644 --- a/vpr/src/route/clock_network_builders.cpp +++ b/vpr/src/route/clock_network_builders.cpp @@ -310,7 +310,7 @@ int ClockRib::create_chanx_wire(int x_start, t_rr_node_storage* rr_nodes) { rr_nodes->emplace_back(); auto node_index = rr_nodes->size() - 1; - auto& node = rr_nodes->back(); + auto node = rr_nodes->back(); node.set_coordinates(x_start, y, x_end, y); node.set_type(CHANX); @@ -600,7 +600,7 @@ int ClockSpine::create_chany_wire(int y_start, int num_segments) { rr_nodes->emplace_back(); auto node_index = rr_nodes->size() - 1; - auto& node = rr_nodes->back(); + auto node = rr_nodes->back(); node.set_coordinates(x, y_start, x, y_end); node.set_type(CHANY); diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 8632399ae37..75786b63100 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -933,7 +933,7 @@ static void remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin auto& device_ctx = g_vpr_ctx.mutable_device(); for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) { - auto& from_node = device_ctx.rr_nodes[inode]; + auto from_node = device_ctx.rr_nodes[inode]; int num_edges = from_node.num_edges(); for (int iedge = 0; iedge < num_edges; iedge++) { const t_rr_node& to_node = device_ctx.rr_nodes[from_node.edge_sink_node(iedge)]; diff --git a/vpr/src/route/rr_node.cpp b/vpr/src/route/rr_node.cpp index 97aa653d450..2dd2fa1fd50 100644 --- a/vpr/src/route/rr_node.cpp +++ b/vpr/src/route/rr_node.cpp @@ -1,4 +1,5 @@ #include "rr_node.h" +#include "rr_node_storage.h" #include "globals.h" #include "vpr_error.h" @@ -10,70 +11,6 @@ const char* t_rr_node::type_string() const { return rr_node_typename[type()]; } -short t_rr_node::xlow() const { - return xlow_; -} - -short t_rr_node::ylow() const { - return ylow_; -} - -short t_rr_node::xhigh() const { - return xhigh_; -} - -short t_rr_node::yhigh() const { - return yhigh_; -} - -short t_rr_node::ptc_num() const { - return ptc_.pin_num; //TODO eventually remove -} - -short t_rr_node::pin_num() const { - if (type() != IPIN && type() != OPIN) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'pin_num' for non-IPIN/OPIN type '%s'", type_string()); - } - return ptc_.pin_num; -} - -short t_rr_node::track_num() const { - if (type() != CHANX && type() != CHANY) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'track_num' for non-CHANX/CHANY type '%s'", type_string()); - } - return ptc_.track_num; -} - -short t_rr_node::class_num() const { - if (type() != SOURCE && type() != SINK) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'class_num' for non-SOURCE/SINK type '%s'", type_string()); - } - return ptc_.class_num; -} - -short t_rr_node::cost_index() const { - return cost_index_; -} - -short t_rr_node::rc_index() const { - return rc_index_; -} - -short t_rr_node::capacity() const { - return capacity_; -} - -t_edge_size t_rr_node::fan_in() const { - return fan_in_; -} - -e_direction t_rr_node::direction() const { - if (type() != CHANX && type() != CHANY) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'direction' for non-channel type '%s'", type_string()); - } - return dir_side_.direction; -} - const char* t_rr_node::direction_string() const { if (direction() == INC_DIRECTION) { return "INC_DIR"; @@ -87,20 +24,14 @@ const char* t_rr_node::direction_string() const { return "NO_DIR"; } -e_side t_rr_node::side() const { - if (type() != IPIN && type() != OPIN) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'side' for non-IPIN/OPIN type '%s'", type_string()); - } - return dir_side_.side; -} - const char* t_rr_node::side_string() const { return SIDE_STRING[side()]; } //Returns the max 'length' over the x or y direction short t_rr_node::length() const { - return std::max(yhigh_ - ylow_, xhigh_ - xlow_); + const auto& node = storage_->get(id_); + return std::max(node.yhigh_ - node.ylow_, node.xhigh_ - node.xlow_); } bool t_rr_node::edge_is_configurable(t_edge_size iedge) const { @@ -124,8 +55,9 @@ float t_rr_node::C() const { bool t_rr_node::validate() const { //Check internal assumptions about RR node are valid + auto& node = storage_->get(id_); - if (num_edges_ > edges_capacity_) { + if (node.num_edges_ > node.edges_capacity_) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "RR Node number of edges exceeded edge capacity"); } @@ -151,7 +83,8 @@ bool t_rr_node::validate() const { } void t_rr_node::set_type(t_rr_type new_type) { - type_ = new_type; + auto& node = storage_->get(id_); + node.type_ = new_type; } /* @@ -159,163 +92,180 @@ void t_rr_node::set_type(t_rr_type new_type) { * They do not have to be in any particular order. */ void t_rr_node::set_coordinates(short x1, short y1, short x2, short y2) { + auto& node = storage_->get(id_); if (x1 < x2) { - xlow_ = x1; - xhigh_ = x2; + node.xlow_ = x1; + node.xhigh_ = x2; } else { - xlow_ = x2; - xhigh_ = x1; + node.xlow_ = x2; + node.xhigh_ = x1; } if (y1 < y2) { - ylow_ = y1; - yhigh_ = y2; + node.ylow_ = y1; + node.yhigh_ = y2; } else { - ylow_ = y2; - yhigh_ = y1; + node.ylow_ = y2; + node.yhigh_ = y1; } } void t_rr_node::set_ptc_num(short new_ptc_num) { - ptc_.pin_num = new_ptc_num; //TODO: eventually remove + auto& node = storage_->get(id_); + node.ptc_.pin_num = new_ptc_num; //TODO: eventually remove } void t_rr_node::set_pin_num(short new_pin_num) { if (type() != IPIN && type() != OPIN) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'pin_num' for non-IPIN/OPIN type '%s'", type_string()); } - ptc_.pin_num = new_pin_num; + auto& node = storage_->get(id_); + node.ptc_.pin_num = new_pin_num; } void t_rr_node::set_track_num(short new_track_num) { if (type() != CHANX && type() != CHANY) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'track_num' for non-CHANX/CHANY type '%s'", type_string()); } - ptc_.track_num = new_track_num; + auto& node = storage_->get(id_); + node.ptc_.track_num = new_track_num; } void t_rr_node::set_class_num(short new_class_num) { if (type() != SOURCE && type() != SINK) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'class_num' for non-SOURCE/SINK type '%s'", type_string()); } - ptc_.class_num = new_class_num; + auto& node = storage_->get(id_); + node.ptc_.class_num = new_class_num; } void t_rr_node::set_cost_index(size_t new_cost_index) { - if (new_cost_index >= std::numeric_limits::max()) { + auto& node = storage_->get(id_); + if (new_cost_index >= std::numeric_limits::max()) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set cost_index_ %zu above cost_index storage max value.", new_cost_index); } - cost_index_ = new_cost_index; + node.cost_index_ = new_cost_index; } void t_rr_node::set_rc_index(short new_rc_index) { - rc_index_ = new_rc_index; + auto& node = storage_->get(id_); + node.rc_index_ = new_rc_index; } void t_rr_node::set_capacity(short new_capacity) { VTR_ASSERT(new_capacity >= 0); - capacity_ = new_capacity; + auto& node = storage_->get(id_); + node.capacity_ = new_capacity; } void t_rr_node::set_fan_in(t_edge_size new_fan_in) { - fan_in_ = new_fan_in; + auto& node = storage_->get(id_); + node.fan_in_ = new_fan_in; } t_edge_size t_rr_node::add_edge(int sink_node, int iswitch) { - if (edges_capacity_ == num_edges_) { - constexpr size_t MAX_EDGE_COUNT = std::numeric_limits::max(); - if (edges_capacity_ == MAX_EDGE_COUNT) { + auto& node = storage_->get(id_); + if (node.edges_capacity_ == node.num_edges_) { + constexpr size_t MAX_EDGE_COUNT = std::numeric_limits::max(); + if (node.edges_capacity_ == MAX_EDGE_COUNT) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Maximum RR Node out-edge count (%zu) exceeded", MAX_EDGE_COUNT); } //Grow - size_t new_edges_capacity = std::max(1, 2 * edges_capacity_); + size_t new_edges_capacity = std::max(1, 2 * node.edges_capacity_); new_edges_capacity = std::min(new_edges_capacity, MAX_EDGE_COUNT); //Clip to maximum count - auto new_edges = std::make_unique(new_edges_capacity); + auto new_edges = std::make_unique(new_edges_capacity); //Copy - std::copy_n(edges_.get(), num_edges_, new_edges.get()); + std::copy_n(node.edges_.get(), node.num_edges_, new_edges.get()); //Replace - edges_ = std::move(new_edges); - edges_capacity_ = new_edges_capacity; + node.edges_ = std::move(new_edges); + node.edges_capacity_ = new_edges_capacity; } - VTR_ASSERT(num_edges_ < edges_capacity_); + VTR_ASSERT(node.num_edges_ < node.edges_capacity_); - edges_[num_edges_].sink_node = sink_node; - edges_[num_edges_].switch_id = iswitch; + node.edges_[node.num_edges_].sink_node = sink_node; + node.edges_[node.num_edges_].switch_id = iswitch; - ++num_edges_; + ++node.num_edges_; - return num_edges_; + return node.num_edges_; } void t_rr_node::shrink_to_fit() { //Shrink - auto new_edges = std::make_unique(num_edges_); + auto& node = storage_->get(id_); + auto new_edges = std::make_unique(node.num_edges_); //Copy - std::copy_n(edges_.get(), num_edges_, new_edges.get()); + std::copy_n(node.edges_.get(), node.num_edges_, new_edges.get()); //Replace - edges_ = std::move(new_edges); - edges_capacity_ = num_edges_; + node.edges_ = std::move(new_edges); + node.edges_capacity_ = node.num_edges_; } void t_rr_node::partition_edges() { auto& device_ctx = g_vpr_ctx.device(); - auto is_configurable = [&](const t_rr_edge& edge) { + auto is_configurable = [&](const t_rr_node_data::t_rr_edge& edge) { auto iswitch = edge.switch_id; return device_ctx.rr_switch_inf[iswitch].configurable(); }; //Partition the edges so the first set of edges are all configurable, and the later are not - auto first_non_config_edge = std::partition(edges_.get(), edges_.get() + num_edges_, is_configurable); + auto& node = storage_->get(id_); + auto first_non_config_edge = std::partition(node.edges_.get(), node.edges_.get() + node.num_edges_, is_configurable); - size_t num_conf_edges = std::distance(edges_.get(), first_non_config_edge); + size_t num_conf_edges = std::distance(node.edges_.get(), first_non_config_edge); size_t num_non_conf_edges = num_edges() - num_conf_edges; //Note we calculate using the size_t to get full range //Check that within allowable range (no overflow when stored as num_non_configurable_edges_ - if (num_non_conf_edges > std::numeric_limits::max()) { + if (num_non_conf_edges > std::numeric_limits::max()) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Exceeded RR node maximum number of non-configurable edges"); } - num_non_configurable_edges_ = num_non_conf_edges; //Narrowing + node.num_non_configurable_edges_ = num_non_conf_edges; //Narrowing } void t_rr_node::set_num_edges(size_t new_num_edges) { + auto& node = storage_->get(id_); VTR_ASSERT(new_num_edges <= std::numeric_limits::max()); - num_edges_ = new_num_edges; - edges_capacity_ = new_num_edges; + node.num_edges_ = new_num_edges; + node.edges_capacity_ = new_num_edges; - edges_ = std::make_unique(num_edges_); + node.edges_ = std::make_unique(node.num_edges_); } void t_rr_node::set_direction(e_direction new_direction) { if (type() != CHANX && type() != CHANY) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'direction' for non-channel type '%s'", type_string()); } - dir_side_.direction = new_direction; + auto& node = storage_->get(id_); + node.dir_side_.direction = new_direction; } void t_rr_node::set_side(e_side new_side) { if (type() != IPIN && type() != OPIN) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'side' for non-channel type '%s'", type_string()); } - dir_side_.side = new_side; + auto& node = storage_->get(id_); + node.dir_side_.side = new_side; } void t_rr_node::set_edge_sink_node(t_edge_size iedge, int sink_node) { + auto& node = storage_->get(id_); VTR_ASSERT(iedge < num_edges()); VTR_ASSERT(sink_node >= 0); - edges_[iedge].sink_node = sink_node; + node.edges_[iedge].sink_node = sink_node; } void t_rr_node::set_edge_switch(t_edge_size iedge, short switch_index) { + auto& node = storage_->get(id_); VTR_ASSERT(iedge < num_edges()); VTR_ASSERT(switch_index >= 0); - edges_[iedge].switch_id = switch_index; + node.edges_[iedge].switch_id = switch_index; } t_rr_rc_data::t_rr_rc_data(float Rval, float Cval) noexcept diff --git a/vpr/src/route/rr_node.h b/vpr/src/route/rr_node.h index cc9df7a42f8..af6a26ac411 100644 --- a/vpr/src/route/rr_node.h +++ b/vpr/src/route/rr_node.h @@ -1,5 +1,6 @@ #ifndef RR_NODE_H #define RR_NODE_H + #include "rr_node_fwd.h" #include "vpr_types.h" @@ -7,43 +8,13 @@ #include #include -/* Main structure describing one routing resource node. Everything in * - * this structure should describe the graph -- information needed only * - * to store algorithm-specific data should be stored in one of the * - * parallel rr_node_* structures. * - * * - * xlow, xhigh, ylow, yhigh: Integer coordinates (see route.c for * - * coordinate system) of the ends of this routing resource. * - * xlow = xhigh and ylow = yhigh for pins or for segments of * - * length 1. These values are used to decide whether or not this * - * node should be added to the expansion heap, based on things * - * like whether it's outside the net bounding box or is moving * - * further away from the target, etc. * - * type: What is this routing resource? * - * ptc_num: Pin, track or class number, depending on rr_node type. * - * Needed to properly draw. * - * cost_index: An integer index into the table of routing resource indexed * - * data t_rr_index_data (this indirection allows quick dynamic * - * changes of rr base costs, and some memory storage savings for * - * fields that have only a few distinct values). * - * capacity: Capacity of this node (number of routes that can use it). * - * num_edges: Number of edges exiting this node. That is, the number * - * of nodes to which it connects. * - * edges[0..num_edges-1]: Array of indices of the neighbours of this * - * node. * - * switches[0..num_edges-1]: Array of switch indexes for each of the * - * edges leaving this node. * - * * - * direction: if the node represents a track, this field * - * indicates the direction of the track. Otherwise * - * the value contained in the field should be * - * ignored. * - * side: The side of a grid location where an IPIN or OPIN is located. * - * This field is valid only for IPINs and OPINs and should be ignored * - * otherwise. */ class t_rr_node { public: //Types + t_rr_node(t_rr_node_storage* storage, RRNodeId id) + : storage_(storage) + , id_(id) {} + //An iterator that dereferences to an edge index // //Used inconjunction with vtr::Range to return ranges of edge indices @@ -72,25 +43,19 @@ class t_rr_node { typedef vtr::Range edge_idx_range; public: //Accessors - t_rr_type type() const { return type_; } + t_rr_type type() const; const char* type_string() const; /* Retrieve type as a string */ edge_idx_range edges() const { return vtr::make_range(edge_idx_iterator(0), edge_idx_iterator(num_edges())); } edge_idx_range configurable_edges() const { return vtr::make_range(edge_idx_iterator(0), edge_idx_iterator(num_edges() - num_non_configurable_edges())); } edge_idx_range non_configurable_edges() const { return vtr::make_range(edge_idx_iterator(num_edges() - num_non_configurable_edges()), edge_idx_iterator(num_edges())); } - t_edge_size num_edges() const { return num_edges_; } - t_edge_size num_configurable_edges() const { return num_edges() - num_non_configurable_edges(); } - t_edge_size num_non_configurable_edges() const { return num_non_configurable_edges_; } + t_edge_size num_edges() const; + t_edge_size num_configurable_edges() const; + t_edge_size num_non_configurable_edges() const; - int edge_sink_node(t_edge_size iedge) const { - VTR_ASSERT_SAFE(iedge < num_edges()); - return edges_[iedge].sink_node; - } - short edge_switch(t_edge_size iedge) const { - VTR_ASSERT_SAFE(iedge < num_edges()); - return edges_[iedge].switch_id; - } + int edge_sink_node(t_edge_size iedge) const; + short edge_switch(t_edge_size iedge) const; bool edge_is_configurable(t_edge_size iedge) const; t_edge_size fan_in() const; @@ -157,43 +122,21 @@ class t_rr_node { void set_direction(e_direction); void set_side(e_side); - private: //Types - //The edge information is stored in a structure to economize on the number of pointers held - //by t_rr_node (to save memory), and is not exposed externally - struct t_rr_edge { - int sink_node = -1; //The ID of the sink RR node associated with this edge - short switch_id = -1; //The ID of the switch type this edge represents - }; + void next_node() { + id_ = RRNodeId((size_t)(id_) + 1); + } + + RRNodeId id() const { + return id_; + } + + void prev_node() { + id_ = RRNodeId((size_t)(id_)-1); + } private: //Data - //Note: we use a plain array and use small types for sizes to save space vs std::vector - // (using std::vector's nearly doubles the size of the class) - std::unique_ptr edges_ = nullptr; - t_edge_size num_edges_ = 0; - t_edge_size edges_capacity_ = 0; - uint8_t num_non_configurable_edges_ = 0; - - int8_t cost_index_ = -1; - int16_t rc_index_ = -1; - - int16_t xlow_ = -1; - int16_t ylow_ = -1; - int16_t xhigh_ = -1; - int16_t yhigh_ = -1; - - t_rr_type type_ = NUM_RR_TYPES; - union { - e_direction direction; //Valid only for CHANX/CHANY - e_side side; //Valid only for IPINs/OPINs - } dir_side_; - - union { - int16_t pin_num; - int16_t track_num; - int16_t class_num; - } ptc_; - t_edge_size fan_in_ = 0; - uint16_t capacity_ = 0; + t_rr_node_storage* storage_; + RRNodeId id_; }; /* Data that is pointed to by the .cost_index member of t_rr_node. It's * @@ -261,4 +204,6 @@ struct t_rr_rc_data { */ short find_create_rr_rc_data(const float R, const float C); +#include "rr_node_impl.h" + #endif diff --git a/vpr/src/route/rr_node_fwd.h b/vpr/src/route/rr_node_fwd.h index 1711f80c780..c3c772e24c1 100644 --- a/vpr/src/route/rr_node_fwd.h +++ b/vpr/src/route/rr_node_fwd.h @@ -1,7 +1,7 @@ #ifndef RR_NODE_FWD_H #define RR_NODE_FWD_H + #include "vtr_strong_id.h" -#include "rr_node.h" /* * StrongId's for the t_rr_node class @@ -9,6 +9,8 @@ //Forward declaration class t_rr_node; +class t_rr_node_storage; +class node_idx_iterator; //Type tags for Ids struct rr_node_id_tag; diff --git a/vpr/src/route/rr_node_impl.h b/vpr/src/route/rr_node_impl.h new file mode 100644 index 00000000000..49b7ca32577 --- /dev/null +++ b/vpr/src/route/rr_node_impl.h @@ -0,0 +1,163 @@ +#ifndef _RR_NODE_IMPL_H_ +#define _RR_NODE_IMPL_H_ + +#include "rr_node.h" +#include "rr_node_storage.h" + +#include "vpr_error.h" + +class node_idx_iterator : public std::iterator { + public: + node_idx_iterator(t_rr_node value) + : value_(value) {} + + iterator operator++() { + value_.next_node(); + return *this; + } + iterator operator--() { + value_.prev_node(); + return *this; + } + reference operator*() const { return value_; } + pointer operator->() const { return &value_; } + + friend bool operator==(const node_idx_iterator lhs, const node_idx_iterator rhs) { return lhs.value_.id() == rhs.value_.id(); } + friend bool operator!=(const node_idx_iterator lhs, const node_idx_iterator rhs) { return !(lhs == rhs); } + + private: + t_rr_node value_; +}; + +inline node_idx_iterator t_rr_node_storage::begin() const { + return node_idx_iterator(t_rr_node(const_cast(this), RRNodeId(0))); +} + +inline node_idx_iterator t_rr_node_storage::end() const { + return node_idx_iterator(t_rr_node(const_cast(this), RRNodeId(size()))); +} + +inline const t_rr_node t_rr_node_storage::operator[](size_t idx) const { + return t_rr_node(const_cast(this), RRNodeId(idx)); +} + +inline t_rr_node t_rr_node_storage::operator[](size_t idx) { + return t_rr_node(this, RRNodeId(idx)); +} + +inline const t_rr_node t_rr_node_storage::at(size_t idx) const { + VTR_ASSERT(idx < storage_.size()); + return t_rr_node(const_cast(this), RRNodeId(idx)); +} + +inline t_rr_node t_rr_node_storage::at(size_t idx) { + VTR_ASSERT(idx < storage_.size()); + return t_rr_node(this, RRNodeId(idx)); +} + +inline const t_rr_node t_rr_node_storage::front() const { + return t_rr_node(const_cast(this), RRNodeId(0)); +} +inline t_rr_node t_rr_node_storage::front() { + return t_rr_node(this, RRNodeId(0)); +} + +inline const t_rr_node t_rr_node_storage::back() const { + return t_rr_node(const_cast(this), RRNodeId(size() - 1)); +} +inline t_rr_node t_rr_node_storage::back() { + return t_rr_node(this, RRNodeId(size() - 1)); +} + +inline t_rr_type t_rr_node::type() const { + return storage_->get(id_).type_; +} + +inline t_edge_size t_rr_node::num_edges() const { + return storage_->get(id_).num_edges_; +} + +inline t_edge_size t_rr_node::num_non_configurable_edges() const { + return storage_->get(id_).num_non_configurable_edges_; +} + +inline t_edge_size t_rr_node::num_configurable_edges() const { + return num_edges() - num_non_configurable_edges(); +} + +inline int t_rr_node::edge_sink_node(t_edge_size iedge) const { + return storage_->get(id_).edges_.get()[iedge].sink_node; +} +inline short t_rr_node::edge_switch(t_edge_size iedge) const { + return storage_->get(id_).edges_.get()[iedge].switch_id; +} + +inline t_edge_size t_rr_node::fan_in() const { + return storage_->get(id_).fan_in_; +} + +inline short t_rr_node::xlow() const { + return storage_->get(id_).xlow_; +} +inline short t_rr_node::ylow() const { + return storage_->get(id_).ylow_; +} +inline short t_rr_node::xhigh() const { + return storage_->get(id_).xhigh_; +} +inline short t_rr_node::yhigh() const { + return storage_->get(id_).yhigh_; +} + +inline short t_rr_node::capacity() const { + return storage_->get(id_).capacity_; +} + +inline short t_rr_node::ptc_num() const { + return storage_->get(id_).ptc_.pin_num; +} + +inline short t_rr_node::pin_num() const { + if (type() != IPIN && type() != OPIN) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'pin_num' for non-IPIN/OPIN type '%s'", type_string()); + } + return storage_->get(id_).ptc_.pin_num; +} + +inline short t_rr_node::track_num() const { + if (type() != CHANX && type() != CHANY) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'track_num' for non-CHANX/CHANY type '%s'", type_string()); + } + return storage_->get(id_).ptc_.track_num; +} + +inline short t_rr_node::class_num() const { + if (type() != SOURCE && type() != SINK) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'class_num' for non-SOURCE/SINK type '%s'", type_string()); + } + return storage_->get(id_).ptc_.class_num; +} + +inline short t_rr_node::cost_index() const { + return storage_->get(id_).cost_index_; +} + +inline short t_rr_node::rc_index() const { + return storage_->get(id_).rc_index_; +} + +inline e_direction t_rr_node::direction() const { + if (type() != CHANX && type() != CHANY) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'direction' for non-channel type '%s'", type_string()); + } + return storage_->get(id_).dir_side_.direction; +} + +inline e_side t_rr_node::side() const { + if (type() != IPIN && type() != OPIN) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'side' for non-IPIN/OPIN type '%s'", type_string()); + } + return storage_->get(id_).dir_side_.side; +} + +#endif /* _RR_NODE_IMPL_H_ */ diff --git a/vpr/src/route/rr_node_storage.h b/vpr/src/route/rr_node_storage.h new file mode 100644 index 00000000000..ed55334ceb1 --- /dev/null +++ b/vpr/src/route/rr_node_storage.h @@ -0,0 +1,133 @@ +#ifndef _RR_NODE_STORAGE_ +#define _RR_NODE_STORAGE_ + +#include "rr_node_fwd.h" + +/* Main structure describing one routing resource node. Everything in * + * this structure should describe the graph -- information needed only * + * to store algorithm-specific data should be stored in one of the * + * parallel rr_node_* structures. * + * * + * xlow, xhigh, ylow, yhigh: Integer coordinates (see route.c for * + * coordinate system) of the ends of this routing resource. * + * xlow = xhigh and ylow = yhigh for pins or for segments of * + * length 1. These values are used to decide whether or not this * + * node should be added to the expansion heap, based on things * + * like whether it's outside the net bounding box or is moving * + * further away from the target, etc. * + * type: What is this routing resource? * + * ptc_num: Pin, track or class number, depending on rr_node type. * + * Needed to properly draw. * + * cost_index: An integer index into the table of routing resource indexed * + * data t_rr_index_data (this indirection allows quick dynamic * + * changes of rr base costs, and some memory storage savings for * + * fields that have only a few distinct values). * + * capacity: Capacity of this node (number of routes that can use it). * + * num_edges: Number of edges exiting this node. That is, the number * + * of nodes to which it connects. * + * edges[0..num_edges-1]: Array of indices of the neighbours of this * + * node. * + * switches[0..num_edges-1]: Array of switch indexes for each of the * + * edges leaving this node. * + * * + * direction: if the node represents a track, this field * + * indicates the direction of the track. Otherwise * + * the value contained in the field should be * + * ignored. * + * side: The side of a grid location where an IPIN or OPIN is located. * + * This field is valid only for IPINs and OPINs and should be ignored * + * otherwise. */ +struct t_rr_node_data { + //The edge information is stored in a structure to economize on the number of pointers held + //by t_rr_node (to save memory), and is not exposed externally + struct t_rr_edge { + int sink_node = -1; //The ID of the sink RR node associated with this edge + short switch_id = -1; //The ID of the switch type this edge represents + }; + + //Note: we use a plain array and use small types for sizes to save space vs std::vector + // (using std::vector's nearly doubles the size of the class) + std::unique_ptr edges_ = nullptr; + t_edge_size num_edges_ = 0; + t_edge_size edges_capacity_ = 0; + uint8_t num_non_configurable_edges_ = 0; + + int8_t cost_index_ = -1; + int16_t rc_index_ = -1; + + int16_t xlow_ = -1; + int16_t ylow_ = -1; + int16_t xhigh_ = -1; + int16_t yhigh_ = -1; + + t_rr_type type_ = NUM_RR_TYPES; + union { + e_direction direction; //Valid only for CHANX/CHANY + e_side side; //Valid only for IPINs/OPINs + } dir_side_; + + union { + int16_t pin_num; + int16_t track_num; + int16_t class_num; + } ptc_; + t_edge_size fan_in_ = 0; + uint16_t capacity_ = 0; +}; + +// RR node and edge storage class. +class t_rr_node_storage { + public: + void reserve(size_t size) { + storage_.reserve(size); + } + void resize(size_t size) { + storage_.resize(size); + } + size_t size() const { + return storage_.size(); + } + bool empty() const { + return storage_.empty(); + } + + void clear() { + storage_.clear(); + } + + void shrink_to_fit() { + storage_.shrink_to_fit(); + } + + void emplace_back() { + storage_.emplace_back(); + } + + node_idx_iterator begin() const; + + node_idx_iterator end() const; + + const t_rr_node operator[](size_t idx) const; + t_rr_node operator[](size_t idx); + const t_rr_node at(size_t idx) const; + t_rr_node at(size_t idx); + + const t_rr_node front() const; + t_rr_node front(); + const t_rr_node back() const; + t_rr_node back(); + + friend class t_rr_node; + + private: + t_rr_node_data& get(const RRNodeId& id) { + return storage_[id]; + } + const t_rr_node_data& get(const RRNodeId& id) const { + return storage_[id]; + } + + vtr::vector storage_; +}; + +#endif /* _RR_NODE_STORAGE_ */ From 3b14cd90f8630774b19dfbfed98f4f2a6431e255 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Sun, 26 Jan 2020 09:02:23 -0800 Subject: [PATCH 03/11] Refactor edge storage. This changes edge storage from an allocation array of struct per node to struct of array for all edge data. Several algorithms over edges that were previous per node per edge, but were actually just iteration over edges are now part of rr_node_storage. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/base/vpr_types.h | 2 + vpr/src/route/rr_graph.cpp | 139 +-------- vpr/src/route/rr_graph.h | 2 - vpr/src/route/rr_graph2.cpp | 8 - vpr/src/route/rr_graph2.h | 3 - vpr/src/route/rr_node.cpp | 99 ------- vpr/src/route/rr_node.h | 51 +--- vpr/src/route/rr_node_fwd.h | 41 ++- vpr/src/route/rr_node_impl.h | 24 +- vpr/src/route/rr_node_storage.cpp | 457 ++++++++++++++++++++++++++++++ vpr/src/route/rr_node_storage.h | 203 ++++++++++++- 11 files changed, 713 insertions(+), 316 deletions(-) create mode 100644 vpr/src/route/rr_node_storage.cpp diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 33d7d83b70e..78628f45c47 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1318,4 +1318,6 @@ class RouteStatus { typedef vtr::vector>> t_clb_opins_used; //[0..num_blocks-1][0..class-1][0..used_pins-1] +typedef std::vector> t_arch_switch_fanin; + #endif diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 75786b63100..b2faf73fdd7 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -67,8 +67,6 @@ struct t_pin_loc { e_side side; }; -typedef std::vector> t_arch_switch_fanin; - /******************* Variables local to this module. ***********************/ /********************* Subroutines local to this module. *******************/ @@ -824,44 +822,11 @@ static void alloc_and_load_rr_switch_inf(const int num_arch_switches, const floa static void alloc_rr_switch_inf(t_arch_switch_fanin& arch_switch_fanins) { auto& device_ctx = g_vpr_ctx.mutable_device(); - int num_rr_switches = 0; - { - //Collect the fan-in per switch type for each node in the graph - // - //Note that since we don't store backward edge info in the RR graph we need to walk - //the whole graph to get the per-switch-type fanin info - std::vector> inward_switch_inf(device_ctx.rr_nodes.size()); //[to_node][arch_switch] -> fanin - for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); ++inode) { - for (auto iedge : device_ctx.rr_nodes[inode].edges()) { - int iswitch = device_ctx.rr_nodes[inode].edge_switch(iedge); - int to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge); - - if (inward_switch_inf[to_node].count(iswitch) == 0) { - inward_switch_inf[to_node][iswitch] = 0; - } - inward_switch_inf[to_node][iswitch]++; - } - } - - //Record the unique switch type/fanin combinations - for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); ++inode) { - for (auto& switch_fanin : inward_switch_inf[inode]) { - int iswitch, fanin; - std::tie(iswitch, fanin) = switch_fanin; - - if (device_ctx.arch_switch_inf[iswitch].fixed_Tdel()) { - //If delay is independent of fanin drop the unique fanin info - fanin = UNDEFINED; - } - - if (arch_switch_fanins[iswitch].count(fanin) == 0) { //New fanin for this switch - arch_switch_fanins[iswitch][fanin] = num_rr_switches++; //Assign it a unique index - } - } - } - } - /* allocate space for the rr_switch_inf array */ + size_t num_rr_switches = device_ctx.rr_nodes.count_rr_switches( + device_ctx.num_arch_switches, + device_ctx.arch_switch_inf, + arch_switch_fanins); device_ctx.rr_switch_inf.resize(num_rr_switches); } @@ -932,27 +897,7 @@ void load_rr_switch_from_arch_switch(int arch_switch_idx, static void remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin) { auto& device_ctx = g_vpr_ctx.mutable_device(); - for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) { - auto from_node = device_ctx.rr_nodes[inode]; - int num_edges = from_node.num_edges(); - for (int iedge = 0; iedge < num_edges; iedge++) { - const t_rr_node& to_node = device_ctx.rr_nodes[from_node.edge_sink_node(iedge)]; - /* get the switch which this edge uses and its fanin */ - int switch_index = from_node.edge_switch(iedge); - int fanin = to_node.fan_in(); - - if (switch_fanin[switch_index].count(UNDEFINED) == 1) { - fanin = UNDEFINED; - } - - auto itr = switch_fanin[switch_index].find(fanin); - VTR_ASSERT(itr != switch_fanin[switch_index].end()); - - int rr_switch_index = itr->second; - - from_node.set_edge_switch(iedge, rr_switch_index); - } - } + device_ctx.rr_nodes.remap_rr_node_switch_indices(switch_fanin); } static void rr_graph_externals(const std::vector& segment_inf, @@ -1326,7 +1271,7 @@ static std::function alloc_and_load_rr_graph(t_rr_node_stor }; } - init_fan_in(L_rr_node, L_rr_node.size()); + L_rr_node.init_fan_in(); return update_chan_width; } @@ -1483,9 +1428,6 @@ static void build_rr_sinks_sources(const int i, * leads to. If route throughs are allowed, you may want to increase the * * base cost of OPINs and/or SOURCES so they aren't used excessively. */ - /* Initialize to unconnected */ - L_rr_node[inode].set_num_edges(0); - L_rr_node[inode].set_cost_index(SINK_COST_INDEX); L_rr_node[inode].set_type(SINK); } @@ -1557,24 +1499,6 @@ static void build_rr_sinks_sources(const int i, //Create the actual edges } -void init_fan_in(t_rr_node_storage& L_rr_node, const int num_rr_nodes) { - //Loads fan-ins for all nodes - - //Reset all fan-ins to zero - for (int i = 0; i < num_rr_nodes; i++) { - L_rr_node[i].set_fan_in(0); - } - - //Walk the graph and increment fanin on all downstream nodes - for (int i = 0; i < num_rr_nodes; i++) { - for (t_edge_size iedge = 0; iedge < L_rr_node[i].num_edges(); iedge++) { - int to_node = L_rr_node[i].edge_sink_node(iedge); - - L_rr_node[to_node].set_fan_in(L_rr_node[to_node].fan_in() + 1); - } - } -} - /* Allocates/loads edges for nodes belonging to specified channel segment and initializes * node properties such as cost, occupancy and capacity */ static void build_rr_chan(const int x_coord, @@ -1758,54 +1682,7 @@ void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create) { void alloc_and_load_edges(t_rr_node_storage& L_rr_node, const t_rr_edge_info_set& rr_edges_to_create) { - /* Sets up all the edge related information for rr_node */ - - struct compare_from_node { - auto operator()(const t_rr_edge_info& lhs, const int from_node) { - return lhs.from_node < from_node; - } - auto operator()(const int from_node, const t_rr_edge_info& rhs) { - return from_node < rhs.from_node; - } - }; - - std::set from_nodes; - for (auto& edge : rr_edges_to_create) { - from_nodes.insert(edge.from_node); - } - - VTR_ASSERT_SAFE(std::is_sorted(rr_edges_to_create.begin(), rr_edges_to_create.end())); - - for (int inode : from_nodes) { - auto edge_range = std::equal_range(rr_edges_to_create.begin(), rr_edges_to_create.end(), inode, compare_from_node()); - - size_t edge_count = std::distance(edge_range.first, edge_range.second); - - if (L_rr_node[inode].num_edges() == 0) { - //Create initial edges - // - //Note that we do this in bulk instead of via add_edge() to reduce - //memory fragmentation - - L_rr_node[inode].set_num_edges(edge_count); - - int iedge = 0; - for (auto itr = edge_range.first; itr != edge_range.second; ++itr) { - VTR_ASSERT(itr->from_node == inode); - - L_rr_node[inode].set_edge_sink_node(iedge, itr->to_node); - L_rr_node[inode].set_edge_switch(iedge, itr->switch_type); - ++iedge; - } - } else { - //Add new edge incrementally - // - //This should occur relatively rarely (e.g. a backward bidir edge) so memory fragmentation shouldn't be a big problem - for (auto itr = edge_range.first; itr != edge_range.second; ++itr) { - L_rr_node[inode].add_edge(itr->to_node, itr->switch_type); - } - } - } + L_rr_node.alloc_and_load_edges(&rr_edges_to_create); } /* allocate pin to track map for each segment type individually and then combine into a single @@ -2547,7 +2424,7 @@ std::string describe_rr_node(int inode) { std::string msg = vtr::string_fmt("RR node: %d", inode); - const auto& rr_node = device_ctx.rr_nodes[inode]; + auto rr_node = device_ctx.rr_nodes[inode]; msg += vtr::string_fmt(" type: %s", rr_node.type_string()); diff --git a/vpr/src/route/rr_graph.h b/vpr/src/route/rr_graph.h index 527b06a4a19..300de8b81bd 100644 --- a/vpr/src/route/rr_graph.h +++ b/vpr/src/route/rr_graph.h @@ -47,8 +47,6 @@ std::string describe_rr_node(int inode); class t_rr_node_storage; -void init_fan_in(t_rr_node_storage& L_rr_node, const int num_rr_nodes); - // Sets the spec for the rr_switch based on the arch switch void load_rr_switch_from_arch_switch(int arch_switch_idx, int rr_switch_idx, diff --git a/vpr/src/route/rr_graph2.cpp b/vpr/src/route/rr_graph2.cpp index 1c8db05b1df..470324bba95 100644 --- a/vpr/src/route/rr_graph2.cpp +++ b/vpr/src/route/rr_graph2.cpp @@ -2565,11 +2565,3 @@ static bool should_apply_switch_override(int switch_override) { } return false; } - -void partition_rr_graph_edges(std::vector* rr_nodes) { - for (auto& node : *rr_nodes) { - node.partition_edges(); - - VTR_ASSERT_SAFE(node.validate()); - } -} diff --git a/vpr/src/route/rr_graph2.h b/vpr/src/route/rr_graph2.h index 80e540eed74..4d5458b968b 100644 --- a/vpr/src/route/rr_graph2.h +++ b/vpr/src/route/rr_graph2.h @@ -234,7 +234,4 @@ void dump_sblock_pattern(const t_sblock_pattern& sblock_pattern, const DeviceGrid& grid, const char* fname); -//Partitions RR graph edges to allow fast access to configurable/non-configurabe edge subsets -void partition_rr_graph_edges(std::vector* rr_nodes); - #endif diff --git a/vpr/src/route/rr_node.cpp b/vpr/src/route/rr_node.cpp index 2dd2fa1fd50..495f3bbad9e 100644 --- a/vpr/src/route/rr_node.cpp +++ b/vpr/src/route/rr_node.cpp @@ -55,12 +55,6 @@ float t_rr_node::C() const { bool t_rr_node::validate() const { //Check internal assumptions about RR node are valid - auto& node = storage_->get(id_); - - if (node.num_edges_ > node.edges_capacity_) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "RR Node number of edges exceeded edge capacity"); - } - t_edge_size iedge = 0; for (auto edge : edges()) { if (edge < num_configurable_edges()) { @@ -159,85 +153,6 @@ void t_rr_node::set_capacity(short new_capacity) { node.capacity_ = new_capacity; } -void t_rr_node::set_fan_in(t_edge_size new_fan_in) { - auto& node = storage_->get(id_); - node.fan_in_ = new_fan_in; -} - -t_edge_size t_rr_node::add_edge(int sink_node, int iswitch) { - auto& node = storage_->get(id_); - if (node.edges_capacity_ == node.num_edges_) { - constexpr size_t MAX_EDGE_COUNT = std::numeric_limits::max(); - if (node.edges_capacity_ == MAX_EDGE_COUNT) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Maximum RR Node out-edge count (%zu) exceeded", MAX_EDGE_COUNT); - } - - //Grow - size_t new_edges_capacity = std::max(1, 2 * node.edges_capacity_); - new_edges_capacity = std::min(new_edges_capacity, MAX_EDGE_COUNT); //Clip to maximum count - auto new_edges = std::make_unique(new_edges_capacity); - - //Copy - std::copy_n(node.edges_.get(), node.num_edges_, new_edges.get()); - - //Replace - node.edges_ = std::move(new_edges); - node.edges_capacity_ = new_edges_capacity; - } - - VTR_ASSERT(node.num_edges_ < node.edges_capacity_); - - node.edges_[node.num_edges_].sink_node = sink_node; - node.edges_[node.num_edges_].switch_id = iswitch; - - ++node.num_edges_; - - return node.num_edges_; -} - -void t_rr_node::shrink_to_fit() { - //Shrink - auto& node = storage_->get(id_); - auto new_edges = std::make_unique(node.num_edges_); - - //Copy - std::copy_n(node.edges_.get(), node.num_edges_, new_edges.get()); - - //Replace - node.edges_ = std::move(new_edges); - node.edges_capacity_ = node.num_edges_; -} - -void t_rr_node::partition_edges() { - auto& device_ctx = g_vpr_ctx.device(); - auto is_configurable = [&](const t_rr_node_data::t_rr_edge& edge) { - auto iswitch = edge.switch_id; - return device_ctx.rr_switch_inf[iswitch].configurable(); - }; - - //Partition the edges so the first set of edges are all configurable, and the later are not - auto& node = storage_->get(id_); - auto first_non_config_edge = std::partition(node.edges_.get(), node.edges_.get() + node.num_edges_, is_configurable); - - size_t num_conf_edges = std::distance(node.edges_.get(), first_non_config_edge); - size_t num_non_conf_edges = num_edges() - num_conf_edges; //Note we calculate using the size_t to get full range - - //Check that within allowable range (no overflow when stored as num_non_configurable_edges_ - if (num_non_conf_edges > std::numeric_limits::max()) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Exceeded RR node maximum number of non-configurable edges"); - } - node.num_non_configurable_edges_ = num_non_conf_edges; //Narrowing -} - -void t_rr_node::set_num_edges(size_t new_num_edges) { - auto& node = storage_->get(id_); - VTR_ASSERT(new_num_edges <= std::numeric_limits::max()); - node.num_edges_ = new_num_edges; - node.edges_capacity_ = new_num_edges; - - node.edges_ = std::make_unique(node.num_edges_); -} - void t_rr_node::set_direction(e_direction new_direction) { if (type() != CHANX && type() != CHANY) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'direction' for non-channel type '%s'", type_string()); @@ -254,20 +169,6 @@ void t_rr_node::set_side(e_side new_side) { node.dir_side_.side = new_side; } -void t_rr_node::set_edge_sink_node(t_edge_size iedge, int sink_node) { - auto& node = storage_->get(id_); - VTR_ASSERT(iedge < num_edges()); - VTR_ASSERT(sink_node >= 0); - node.edges_[iedge].sink_node = sink_node; -} - -void t_rr_node::set_edge_switch(t_edge_size iedge, short switch_index) { - auto& node = storage_->get(id_); - VTR_ASSERT(iedge < num_edges()); - VTR_ASSERT(switch_index >= 0); - node.edges_[iedge].switch_id = switch_index; -} - t_rr_rc_data::t_rr_rc_data(float Rval, float Cval) noexcept : R(Rval) , C(Cval) {} diff --git a/vpr/src/route/rr_node.h b/vpr/src/route/rr_node.h index af6a26ac411..2eb1bbda8fe 100644 --- a/vpr/src/route/rr_node.h +++ b/vpr/src/route/rr_node.h @@ -15,40 +15,13 @@ class t_rr_node { : storage_(storage) , id_(id) {} - //An iterator that dereferences to an edge index - // - //Used inconjunction with vtr::Range to return ranges of edge indices - class edge_idx_iterator : public std::iterator { - public: - edge_idx_iterator(value_type init) - : value_(init) {} - iterator operator++() { - value_ += 1; - return *this; - } - iterator operator--() { - value_ -= 1; - return *this; - } - reference operator*() { return value_; } - pointer operator->() { return &value_; } - - friend bool operator==(const edge_idx_iterator lhs, const edge_idx_iterator rhs) { return lhs.value_ == rhs.value_; } - friend bool operator!=(const edge_idx_iterator lhs, const edge_idx_iterator rhs) { return !(lhs == rhs); } - - private: - value_type value_; - }; - - typedef vtr::Range edge_idx_range; - public: //Accessors t_rr_type type() const; const char* type_string() const; /* Retrieve type as a string */ - edge_idx_range edges() const { return vtr::make_range(edge_idx_iterator(0), edge_idx_iterator(num_edges())); } - edge_idx_range configurable_edges() const { return vtr::make_range(edge_idx_iterator(0), edge_idx_iterator(num_edges() - num_non_configurable_edges())); } - edge_idx_range non_configurable_edges() const { return vtr::make_range(edge_idx_iterator(num_edges() - num_non_configurable_edges()), edge_idx_iterator(num_edges())); } + edge_idx_range edges() const; + edge_idx_range configurable_edges() const; + edge_idx_range non_configurable_edges() const; t_edge_size num_edges() const; t_edge_size num_configurable_edges() const; @@ -89,24 +62,6 @@ class t_rr_node { public: //Mutators void set_type(t_rr_type new_type); - t_edge_size add_edge(int sink_node, int iswitch); - - void shrink_to_fit(); - - //Partitions all edges so that configurable and non-configurable edges - //are organized for efficient access. - // - //Must be called before configurable_edges(), non_configurable_edges(), - //num_configurable_edges(), num_non_configurable_edges() to ensure they - //are correct. - void partition_edges(); - - void set_num_edges(size_t); //Note will remove any previous edges - void set_edge_sink_node(t_edge_size iedge, int sink_node); - void set_edge_switch(t_edge_size iedge, short switch_index); - - void set_fan_in(t_edge_size); - void set_coordinates(short x1, short y1, short x2, short y2); void set_capacity(short); diff --git a/vpr/src/route/rr_node_fwd.h b/vpr/src/route/rr_node_fwd.h index c3c772e24c1..566c328fe5f 100644 --- a/vpr/src/route/rr_node_fwd.h +++ b/vpr/src/route/rr_node_fwd.h @@ -1,21 +1,54 @@ #ifndef RR_NODE_FWD_H #define RR_NODE_FWD_H +#include +#include "vpr_types.h" #include "vtr_strong_id.h" -/* - * StrongId's for the t_rr_node class - */ - //Forward declaration class t_rr_node; class t_rr_node_storage; class node_idx_iterator; +/* + * StrongId's for the t_rr_node class + */ + //Type tags for Ids struct rr_node_id_tag; +struct rr_edge_id_tag; //A unique identifier for a node in the rr graph typedef vtr::StrongId RRNodeId; +//A unique identifier for an edge in the rr graph +typedef vtr::StrongId RREdgeId; + +//An iterator that dereferences to an edge index +// +//Used inconjunction with vtr::Range to return ranges of edge indices +class edge_idx_iterator : public std::iterator { + public: + edge_idx_iterator(value_type init) + : value_(init) {} + iterator operator++() { + value_ += 1; + return *this; + } + iterator operator--() { + value_ -= 1; + return *this; + } + reference operator*() { return value_; } + pointer operator->() { return &value_; } + + friend bool operator==(const edge_idx_iterator lhs, const edge_idx_iterator rhs) { return lhs.value_ == rhs.value_; } + friend bool operator!=(const edge_idx_iterator lhs, const edge_idx_iterator rhs) { return !(lhs == rhs); } + + private: + value_type value_; +}; + +typedef vtr::Range edge_idx_range; + #endif diff --git a/vpr/src/route/rr_node_impl.h b/vpr/src/route/rr_node_impl.h index 49b7ca32577..2a28d85726d 100644 --- a/vpr/src/route/rr_node_impl.h +++ b/vpr/src/route/rr_node_impl.h @@ -74,26 +74,38 @@ inline t_rr_type t_rr_node::type() const { } inline t_edge_size t_rr_node::num_edges() const { - return storage_->get(id_).num_edges_; + return storage_->num_edges(id_); +} + +inline edge_idx_range t_rr_node::edges() const { + return storage_->edges(id_); +} + +inline edge_idx_range t_rr_node::configurable_edges() const { + return storage_->configurable_edges(id_); +} +inline edge_idx_range t_rr_node::non_configurable_edges() const { + return storage_->non_configurable_edges(id_); } inline t_edge_size t_rr_node::num_non_configurable_edges() const { - return storage_->get(id_).num_non_configurable_edges_; + return storage_->num_non_configurable_edges(id_); } inline t_edge_size t_rr_node::num_configurable_edges() const { - return num_edges() - num_non_configurable_edges(); + return storage_->num_configurable_edges(id_); } inline int t_rr_node::edge_sink_node(t_edge_size iedge) const { - return storage_->get(id_).edges_.get()[iedge].sink_node; + size_t inode = (size_t)storage_->edge_sink_node(id_, iedge); + return inode; } inline short t_rr_node::edge_switch(t_edge_size iedge) const { - return storage_->get(id_).edges_.get()[iedge].switch_id; + return storage_->edge_switch(id_, iedge); } inline t_edge_size t_rr_node::fan_in() const { - return storage_->get(id_).fan_in_; + return storage_->fan_in(id_); } inline short t_rr_node::xlow() const { diff --git a/vpr/src/route/rr_node_storage.cpp b/vpr/src/route/rr_node_storage.cpp new file mode 100644 index 00000000000..d24c0e920ad --- /dev/null +++ b/vpr/src/route/rr_node_storage.cpp @@ -0,0 +1,457 @@ +#include "rr_node_storage.h" +#include "globals.h" + +void t_rr_node_storage::reserve_edges(size_t num_edges) { + edge_src_node_.reserve(num_edges); + edge_dest_node_.reserve(num_edges); + edge_switch_.reserve(num_edges); +} + +void t_rr_node_storage::emplace_back_edge(RRNodeId src, RRNodeId dest, short edge_switch) { + // Cannot mutate edges once edges have been read! + VTR_ASSERT(!edges_read_); + edge_src_node_.emplace_back(src); + edge_dest_node_.emplace_back(dest); + edge_switch_.emplace_back(edge_switch); +} + +// Typical node to edge ratio. This allows a preallocation guess for the edges +// to avoid repeated reallocation. +constexpr size_t kEdgeToNodeRatio = 10; + +void t_rr_node_storage::alloc_and_load_edges(const t_rr_edge_info_set* rr_edges_to_create) { + // Cannot mutate edges once edges have been read! + size_t required_size = edge_src_node_.size() + rr_edges_to_create->size(); + if (edge_src_node_.capacity() < required_size) { + size_t new_capacity = std::min(edge_src_node_.capacity(), storage_.size() * kEdgeToNodeRatio); + if (new_capacity < 1) { + new_capacity = 1; + } + while (new_capacity < required_size) { + new_capacity *= 2; + } + + edge_src_node_.reserve(new_capacity); + edge_dest_node_.reserve(new_capacity); + edge_switch_.reserve(new_capacity); + } + + for (const auto& new_edge : *rr_edges_to_create) { + emplace_back_edge( + RRNodeId(new_edge.from_node), + RRNodeId(new_edge.to_node), + new_edge.switch_type); + } +} + +/* edge_swapper / edge_sort_iterator / edge_compare_src_node_and_configurable_first + * are used to sort the edge data arrays + * edge_src_node_ / edge_dest_node_ / edge_switch_. + * + * edge_sort_iterator is a random access iterator for the edge data arrays. + * + * edge_swapper is a reference for the src/dest/switch tuple, and can convert + * to and from t_rr_edge_info, the value_type for edge_sort_iterator. + * + * edge_compare_src_node_and_configurable_first is a comparision operator + * that first partitions the edge data by source rr node, and then by + * configurable switches first. Sorting by this comparision operator means that + * the edge data is directly usable for each node by simply slicing the arrays. + * + * */ +struct edge_swapper { + edge_swapper(t_rr_node_storage* storage, size_t idx) + : storage_(storage) + , idx_(idx) {} + t_rr_node_storage* storage_; + size_t idx_; + + edge_swapper(const edge_swapper&) = delete; + edge_swapper& operator=(const edge_swapper& other) { + VTR_ASSERT(idx_ < storage_->edge_src_node_.size()); + VTR_ASSERT(other.idx_ < storage_->edge_src_node_.size()); + + RREdgeId edge(idx_); + RREdgeId other_edge(other.idx_); + storage_->edge_src_node_[edge] = storage_->edge_src_node_[other_edge]; + storage_->edge_dest_node_[edge] = storage_->edge_dest_node_[other_edge]; + storage_->edge_switch_[edge] = storage_->edge_switch_[other_edge]; + return *this; + } + + edge_swapper& operator=(const t_rr_edge_info& edge) { + VTR_ASSERT(idx_ < storage_->edge_src_node_.size()); + + storage_->edge_src_node_[RREdgeId(idx_)] = RRNodeId(edge.from_node); + storage_->edge_dest_node_[RREdgeId(idx_)] = RRNodeId(edge.to_node); + storage_->edge_switch_[RREdgeId(idx_)] = edge.switch_type; + return *this; + } + + operator t_rr_edge_info() const { + VTR_ASSERT(idx_ < storage_->edge_src_node_.size()); + t_rr_edge_info info( + (size_t)storage_->edge_src_node_[RREdgeId(idx_)], + (size_t)storage_->edge_dest_node_[RREdgeId(idx_)], + storage_->edge_switch_[RREdgeId(idx_)]); + + return info; + } + + friend class edge_compare; + + static void swap(edge_swapper& a, edge_swapper& b) { + VTR_ASSERT(a.idx_ < a.storage_->edge_src_node_.size()); + VTR_ASSERT(b.idx_ < a.storage_->edge_src_node_.size()); + RREdgeId a_edge(a.idx_); + RREdgeId b_edge(b.idx_); + + std::swap(a.storage_->edge_src_node_[a_edge], a.storage_->edge_src_node_[b_edge]); + std::swap(a.storage_->edge_dest_node_[a_edge], a.storage_->edge_dest_node_[b_edge]); + std::swap(a.storage_->edge_switch_[a_edge], a.storage_->edge_switch_[b_edge]); + } + + friend void swap(edge_swapper& a, edge_swapper& b) { + edge_swapper::swap(a, b); + } +}; + +class edge_sort_iterator { + public: + edge_sort_iterator(t_rr_node_storage* storage, size_t idx) + : swapper_(storage, idx) {} + + edge_sort_iterator(const edge_sort_iterator& other) + : swapper_( + other.swapper_.storage_, + other.swapper_.idx_) { + } + + edge_sort_iterator& operator=(const edge_sort_iterator& other) { + swapper_.storage_ = other.swapper_.storage_; + swapper_.idx_ = other.swapper_.idx_; + + return *this; + } + + using iterator_category = std::random_access_iterator_tag; + using value_type = t_rr_edge_info; + using reference = edge_swapper&; + using pointer = edge_swapper*; + using difference_type = ssize_t; + + edge_swapper& operator*() { + return this->swapper_; + } + + edge_swapper* operator->() { + return &this->swapper_; + } + + edge_sort_iterator& operator+=(ssize_t n) { + swapper_.idx_ += n; + return *this; + } + + edge_sort_iterator& operator++() { + ++swapper_.idx_; + return *this; + } + + edge_sort_iterator& operator--() { + --swapper_.idx_; + return *this; + } + + friend edge_sort_iterator operator+(const edge_sort_iterator& lhs, ssize_t n) { + edge_sort_iterator ret = lhs; + ret.swapper_.idx_ += n; + return ret; + } + + friend edge_sort_iterator operator-(const edge_sort_iterator& lhs, ssize_t n) { + edge_sort_iterator ret = lhs; + ret.swapper_.idx_ -= n; + return ret; + } + + friend ssize_t operator-(const edge_sort_iterator& lhs, const edge_sort_iterator& rhs) { + ssize_t diff = lhs.swapper_.idx_; + diff -= rhs.swapper_.idx_; + return diff; + } + + friend bool operator==(const edge_sort_iterator& lhs, const edge_sort_iterator& rhs) { + return lhs.swapper_.idx_ == rhs.swapper_.idx_; + } + + friend bool operator!=(const edge_sort_iterator& lhs, const edge_sort_iterator& rhs) { + return lhs.swapper_.idx_ != rhs.swapper_.idx_; + } + + friend bool operator<(const edge_sort_iterator& lhs, const edge_sort_iterator& rhs) { + return lhs.swapper_.idx_ < rhs.swapper_.idx_; + } + + RREdgeId edge() const { + return RREdgeId(swapper_.idx_); + } + + private: + edge_swapper swapper_; +}; + +class edge_compare_src_node_and_configurable_first { + public: + edge_compare_src_node_and_configurable_first(const std::vector& rr_switch_inf) + : rr_switch_inf_(rr_switch_inf) {} + + bool operator()(const t_rr_edge_info& lhs, const edge_swapper& rhs) { + auto lhs_src_node = RRNodeId(lhs.from_node); + auto lhs_is_configurable = rr_switch_inf_[lhs.switch_type].configurable(); + + auto rhs_edge = RREdgeId(rhs.idx_); + auto rhs_src_node = rhs.storage_->edge_src_node_[rhs_edge]; + auto rhs_is_configurable = rr_switch_inf_[rhs.storage_->edge_switch_[rhs_edge]].configurable(); + + return std::make_pair(lhs_src_node, !lhs_is_configurable) < std::make_pair(rhs_src_node, !rhs_is_configurable); + } + + bool operator()(const t_rr_edge_info& lhs, const t_rr_edge_info& rhs) { + auto lhs_src_node = lhs.from_node; + auto lhs_is_configurable = rr_switch_inf_[lhs.switch_type].configurable(); + + auto rhs_src_node = rhs.from_node; + auto rhs_is_configurable = rr_switch_inf_[rhs.switch_type].configurable(); + + return std::make_pair(lhs_src_node, !lhs_is_configurable) < std::make_pair(rhs_src_node, !rhs_is_configurable); + } + bool operator()(const edge_swapper& lhs, const t_rr_edge_info& rhs) { + auto lhs_edge = RREdgeId(lhs.idx_); + auto lhs_src_node = lhs.storage_->edge_src_node_[lhs_edge]; + auto lhs_is_configurable = rr_switch_inf_[lhs.storage_->edge_switch_[lhs_edge]].configurable(); + + auto rhs_src_node = RRNodeId(rhs.from_node); + auto rhs_is_configurable = rr_switch_inf_[rhs.switch_type].configurable(); + + return std::make_pair(lhs_src_node, !lhs_is_configurable) < std::make_pair(rhs_src_node, !rhs_is_configurable); + } + bool operator()(const edge_swapper& lhs, const edge_swapper& rhs) { + auto lhs_edge = RREdgeId(lhs.idx_); + auto lhs_src_node = lhs.storage_->edge_src_node_[lhs_edge]; + auto lhs_is_configurable = rr_switch_inf_[lhs.storage_->edge_switch_[lhs_edge]].configurable(); + + auto rhs_edge = RREdgeId(rhs.idx_); + auto rhs_src_node = rhs.storage_->edge_src_node_[rhs_edge]; + auto rhs_is_configurable = rr_switch_inf_[rhs.storage_->edge_switch_[rhs_edge]].configurable(); + + return std::make_pair(lhs_src_node, !lhs_is_configurable) < std::make_pair(rhs_src_node, !rhs_is_configurable); + } + + private: + const std::vector& rr_switch_inf_; +}; + +void t_rr_node_storage::assign_edges() { + VTR_ASSERT(first_edge_.empty()); + + // Last element is a dummy element + first_edge_.resize(storage_.size() + 1); + + VTR_ASSERT(std::is_sorted( + edge_src_node_.begin(), + edge_src_node_.end())); + + size_t node_id = 0; + size_t first_id = 0; + size_t second_id = 0; + size_t num_edges = edge_src_node_.size(); + VTR_ASSERT(edge_dest_node_.size() == num_edges); + VTR_ASSERT(edge_switch_.size() == num_edges); + while (true) { + VTR_ASSERT(first_id < num_edges); + VTR_ASSERT(second_id < num_edges); + size_t current_node_id = (size_t)edge_src_node_[RREdgeId(second_id)]; + if (node_id < current_node_id) { + // All edges belonging to node_id are assigned. + while (node_id < current_node_id) { + // Store any edges belongs to node_id. + first_edge_[RRNodeId(node_id)] = RREdgeId(first_id); + first_id = second_id; + node_id += 1; + VTR_ASSERT(first_edge_.size()); + } + + VTR_ASSERT(node_id == current_node_id); + first_edge_[RRNodeId(node_id)] = RREdgeId(second_id); + } else { + second_id += 1; + if (second_id == num_edges) { + break; + } + } + } + + // All remaining nodes have no edges, set as such. + for (size_t inode = node_id + 1; inode < first_edge_.size(); ++inode) { + first_edge_[RRNodeId(inode)] = RREdgeId(second_id); + } + + VTR_ASSERT_SAFE(verify_first_edges()); +} + +bool t_rr_node_storage::verify_first_edges() const { + size_t num_edges = edge_src_node_.size(); + VTR_ASSERT(first_edge_[RRNodeId(storage_.size())] == RREdgeId(num_edges)); + + // Each edge should belong with the edge range defined by + // [first_edge_[src_node], first_edge_[src_node+1]). + for (size_t iedge = 0; iedge < num_edges; ++iedge) { + RRNodeId src_node = edge_src_node_.at(RREdgeId(iedge)); + RREdgeId first_edge = first_edge_.at(src_node); + RREdgeId second_edge = first_edge_.at(RRNodeId((size_t)src_node + 1)); + VTR_ASSERT(iedge >= (size_t)first_edge); + VTR_ASSERT(iedge < (size_t)second_edge); + } + + return true; +} + +void t_rr_node_storage::init_fan_in() { + //Reset all fan-ins to zero + edges_read_ = true; + fan_in_.resize(storage_.size(), 0); + fan_in_.shrink_to_fit(); + + //Walk the graph and increment fanin on all downstream nodes + for (const auto& dest_node : edge_dest_node_) { + fan_in_[dest_node] += 1; + } +} + +size_t t_rr_node_storage::count_rr_switches( + size_t num_arch_switches, + t_arch_switch_inf* arch_switch_inf, + t_arch_switch_fanin& arch_switch_fanins) const { + VTR_ASSERT(!remapped_edges_); + + edges_read_ = true; + int num_rr_switches = 0; + + //Collect the fan-in per switch type for each node in the graph + // + //Note that since we don't store backward edge info in the RR graph we need to walk + //the whole graph to get the per-switch-type fanin info + vtr::vector> inward_switch_inf(size()); //[to_node][arch_switch] -> fanin + for (size_t iedge = 0; iedge < edge_src_node_.size(); ++iedge) { + RREdgeId edge = RREdgeId(iedge); + + int iswitch = edge_switch_[edge]; + RRNodeId to_node = edge_dest_node_[edge]; + + if (inward_switch_inf[to_node].count(iswitch) == 0) { + inward_switch_inf[to_node][iswitch] = 0; + } + inward_switch_inf[to_node][iswitch]++; + } + + //Record the unique switch type/fanin combinations + for (size_t inode = 0; inode < size(); ++inode) { + RRNodeId node = RRNodeId(inode); + for (auto& switch_fanin : inward_switch_inf[node]) { + int iswitch, fanin; + std::tie(iswitch, fanin) = switch_fanin; + + VTR_ASSERT_SAFE(iswitch < (ssize_t)num_arch_switches); + if (arch_switch_inf[iswitch].fixed_Tdel()) { + //If delay is independent of fanin drop the unique fanin info + fanin = UNDEFINED; + } + + if (arch_switch_fanins[iswitch].count(fanin) == 0) { //New fanin for this switch + arch_switch_fanins[iswitch][fanin] = num_rr_switches++; //Assign it a unique index + } + } + } + + return num_rr_switches; +} + +void t_rr_node_storage::remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin) { + edges_read_ = true; + + VTR_ASSERT(!remapped_edges_); + for (size_t i = 0; i < edge_src_node_.size(); ++i) { + RREdgeId edge(i); + + RRNodeId to_node = edge_dest_node_[edge]; + int switch_index = edge_switch_[edge]; + int fanin = fan_in_[to_node]; + + if (switch_fanin[switch_index].count(UNDEFINED) == 1) { + fanin = UNDEFINED; + } + + auto itr = switch_fanin[switch_index].find(fanin); + VTR_ASSERT(itr != switch_fanin[switch_index].end()); + + int rr_switch_index = itr->second; + + edge_switch_[edge] = rr_switch_index; + } + remapped_edges_ = true; +} + +void t_rr_node_storage::mark_edges_as_rr_switch_ids() { + edges_read_ = true; + remapped_edges_ = true; +} + +void t_rr_node_storage::partition_edges() { + if (partitioned_) { + return; + } + + edges_read_ = true; + VTR_ASSERT(remapped_edges_); + + const auto& device_ctx = g_vpr_ctx.device(); + std::stable_sort( + edge_sort_iterator(this, 0), + edge_sort_iterator(this, edge_src_node_.size()), + edge_compare_src_node_and_configurable_first(device_ctx.rr_switch_inf)); + + partitioned_ = true; + + assign_edges(); + + VTR_ASSERT_SAFE(validate()); +} + +t_edge_size t_rr_node_storage::num_configurable_edges(const RRNodeId& id) const { + VTR_ASSERT(!first_edge_.empty() && remapped_edges_); + + const auto& device_ctx = g_vpr_ctx.device(); + auto first_id = (size_t)first_edge_[id]; + auto second_id = (size_t)(&first_edge_[id])[1]; + for (size_t idx = first_id; idx < second_id; ++idx) { + auto switch_idx = edge_switch_[RREdgeId(idx)]; + if (!device_ctx.rr_switch_inf[switch_idx].configurable()) { + return idx - first_id; + } + } + + return second_id - first_id; +} + +t_edge_size t_rr_node_storage::num_non_configurable_edges(const RRNodeId& id) const { + return num_edges(id) - num_configurable_edges(id); +} + +bool t_rr_node_storage::validate() const { + bool all_valid = verify_first_edges(); + for (size_t inode = 0; inode < size(); ++inode) { + all_valid = (*this)[inode].validate() || all_valid; + } + return all_valid; +} diff --git a/vpr/src/route/rr_node_storage.h b/vpr/src/route/rr_node_storage.h index ed55334ceb1..e41eafbf983 100644 --- a/vpr/src/route/rr_node_storage.h +++ b/vpr/src/route/rr_node_storage.h @@ -2,6 +2,8 @@ #define _RR_NODE_STORAGE_ #include "rr_node_fwd.h" +#include "rr_graph2.h" +#include "vtr_log.h" /* Main structure describing one routing resource node. Everything in * * this structure should describe the graph -- information needed only * @@ -38,20 +40,6 @@ * This field is valid only for IPINs and OPINs and should be ignored * * otherwise. */ struct t_rr_node_data { - //The edge information is stored in a structure to economize on the number of pointers held - //by t_rr_node (to save memory), and is not exposed externally - struct t_rr_edge { - int sink_node = -1; //The ID of the sink RR node associated with this edge - short switch_id = -1; //The ID of the switch type this edge represents - }; - - //Note: we use a plain array and use small types for sizes to save space vs std::vector - // (using std::vector's nearly doubles the size of the class) - std::unique_ptr edges_ = nullptr; - t_edge_size num_edges_ = 0; - t_edge_size edges_capacity_ = 0; - uint8_t num_non_configurable_edges_ = 0; - int8_t cost_index_ = -1; int16_t rc_index_ = -1; @@ -71,17 +59,25 @@ struct t_rr_node_data { int16_t track_num; int16_t class_num; } ptc_; - t_edge_size fan_in_ = 0; + uint16_t capacity_ = 0; }; // RR node and edge storage class. class t_rr_node_storage { public: + t_rr_node_storage() { + clear(); + } + void reserve(size_t size) { + // No edges can be assigned if mutating the rr node array. + VTR_ASSERT(!edges_read_); storage_.reserve(size); } void resize(size_t size) { + // No edges can be assigned if mutating the rr node array. + VTR_ASSERT(!edges_read_); storage_.resize(size); } size_t size() const { @@ -93,13 +89,28 @@ class t_rr_node_storage { void clear() { storage_.clear(); + first_edge_.clear(); + fan_in_.clear(); + edge_src_node_.clear(); + edge_dest_node_.clear(); + edge_switch_.clear(); + edges_read_ = false; + partitioned_ = false; + remapped_edges_ = false; } void shrink_to_fit() { storage_.shrink_to_fit(); + first_edge_.shrink_to_fit(); + fan_in_.shrink_to_fit(); + edge_src_node_.shrink_to_fit(); + edge_dest_node_.shrink_to_fit(); + edge_switch_.shrink_to_fit(); } void emplace_back() { + // No edges can be assigned if mutating the rr node array. + VTR_ASSERT(!edges_read_); storage_.emplace_back(); } @@ -119,7 +130,152 @@ class t_rr_node_storage { friend class t_rr_node; + /**************** + * Edge methods * + ****************/ + + // Edge initialization ordering: + // 1. Use reserve_edges/emplace_back_edge/alloc_and_load_edges to + // initialize edges. All edges must be added prior to calling any + // methods that read edge data. + // + // Note: Either arch_switch_inf indicies or rr_switch_inf should be + // used with emplace_back_edge and alloc_and_load_edges. Do not mix + // indicies, otherwise things will be break. + // + // 2. The following methods read from the edge data, and lock out the + // edge mutation methods (e.g. emplace_back_edge/alloc_and_load_edges): + // - init_fan_in + // - partition_edges + // - count_rr_switches + // - remap_rr_node_switch_indices + // - mark_edges_as_rr_switch_ids + // + // 3. If edge_switch values are arch_switch_inf indicies, + // remap_rr_node_switch_indices must be called prior to calling + // partition_edges. + // + // If edge_switch values are rr_switch_inf indices, + // mark_edges_as_rr_switch_ids must be called prior to calling + // partition_edges. + // + // 4. init_fan_in can be invoked any time after edges have been + // initialized. + // + // 5. The following methods must only be called after partition_edges + // have been invoked: + // - edges + // - configurable_edges + // - non_configurable_edges + // - num_edges + // - num_configurable_edges + // - edge_id + // - edge_sink_node + // - edge_switch + + /* Edge mutators */ + + // Reserve at least num_edges in the edge backing arrays. + void reserve_edges(size_t num_edges); + + // Adds ones edge. This method is efficient if reserve_edges was called with + // the number of edges present in the graph. This method is still + // amortized O(1), like std::vector::emplace_back, but both runtime and + // peak memory usage will be higher if reallocation is required. + void emplace_back_edge(RRNodeId src, RRNodeId dest, short edge_switch); + + // Adds a batch of edges. + void alloc_and_load_edges(const t_rr_edge_info_set* rr_edges_to_create); + + /* Edge finalization methods */ + + // Counts the number of rr switches needed based on fan in. + // + // init_fan_in does not need to be invoked before this method. + size_t count_rr_switches( + size_t num_arch_switches, + t_arch_switch_inf* arch_switch_inf, + t_arch_switch_fanin& arch_switch_fanins) const; + + // Maps arch_switch_inf indicies to rr_switch_inf indicies. + // + // This must be called before partition_edges if edges were created with + // arch_switch_inf indicies. + void remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin); + + // Marks that edge switch values are rr switch indicies. + // + // This must be called before partition_edges if edges were created with + // rr_switch_inf indicies. + void mark_edges_as_rr_switch_ids(); + + // Sorts edge data such that configurable edges appears before + // non-configurable edges. + void partition_edges(); + + // Validate that edge data is partitioned correctly. + bool validate() const; + + /* Edge accessors + * + * Only call these methods after partition_edges has been invoked. */ + edge_idx_range edges(const RRNodeId& id) const { + return vtr::make_range(edge_idx_iterator(0), edge_idx_iterator(num_edges(id))); + } + edge_idx_range configurable_edges(const RRNodeId& id) const { + return vtr::make_range(edge_idx_iterator(0), edge_idx_iterator(num_edges(id) - num_non_configurable_edges(id))); + } + edge_idx_range non_configurable_edges(const RRNodeId& id) const { + return vtr::make_range(edge_idx_iterator(num_edges(id) - num_non_configurable_edges(id)), edge_idx_iterator(num_edges(id))); + } + + t_edge_size num_edges(const RRNodeId& id) const { + RREdgeId first_id = first_edge_[id]; + RREdgeId second_id = (&first_edge_[id])[1]; + return (size_t)second_id - (size_t)first_id; + } + + t_edge_size num_configurable_edges(const RRNodeId& id) const; + t_edge_size num_non_configurable_edges(const RRNodeId& id) const; + + RREdgeId edge_id(const RRNodeId& id, t_edge_size iedge) const { + RREdgeId first_edge = first_edge_[id]; + RREdgeId ret((size_t)first_edge + iedge); + VTR_ASSERT_SAFE(ret < (&first_edge_[id])[1]); + return ret; + } + RRNodeId edge_sink_node(const RREdgeId& edge) const { + return edge_dest_node_[edge]; + } + RRNodeId edge_sink_node(const RRNodeId& id, t_edge_size iedge) const { + return edge_sink_node(edge_id(id, iedge)); + } + short edge_switch(const RREdgeId& edge) const { + return edge_switch_[edge]; + } + short edge_switch(const RRNodeId& id, t_edge_size iedge) const { + return edge_switch(edge_id(id, iedge)); + } + + /****************** + * Fan-in methods * + ******************/ + + /* Init per node fan-in data. Should only be called after all edges have + * been allocated */ + void init_fan_in(); + + /* Retrieve fan_in for RRNodeId, init_fan_in must have been called first. */ + t_edge_size fan_in(RRNodeId id) { + return fan_in_[id]; + } + private: + friend struct edge_swapper; + friend class edge_sort_iterator; + friend class edge_compare_src_node; + friend class edge_compare_src_node_and_configurable_first; + t_rr_node_data& get(const RRNodeId& id) { return storage_[id]; } @@ -127,7 +283,24 @@ class t_rr_node_storage { return storage_[id]; } + // Take allocated edges in edge_src_node_/ edge_dest_node_ / edge_switch_ + // sort, and assign the first edge for each + void assign_edges(); + + // Verify that first_edge_ array correctly partitions rr edge data. + bool verify_first_edges() const; + vtr::vector storage_; + vtr::vector first_edge_; + vtr::vector fan_in_; + + vtr::vector edge_src_node_; + vtr::vector edge_dest_node_; + vtr::vector edge_switch_; + + mutable bool edges_read_; + bool remapped_edges_; + bool partitioned_; }; #endif /* _RR_NODE_STORAGE_ */ From 0b15b11cccf11a198996afd5799ad24783c0cef6 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Mon, 3 Feb 2020 13:58:21 -0800 Subject: [PATCH 04/11] Add support for custom allocator to vtr::vector. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- libs/libvtrutil/src/vtr_vector.h | 108 ++++++++++++++++--------------- 1 file changed, 55 insertions(+), 53 deletions(-) diff --git a/libs/libvtrutil/src/vtr_vector.h b/libs/libvtrutil/src/vtr_vector.h index 073f84ec3b4..4886b256481 100644 --- a/libs/libvtrutil/src/vtr_vector.h +++ b/libs/libvtrutil/src/vtr_vector.h @@ -15,8 +15,10 @@ namespace vtr { // //If you need more std::map-like (instead of std::vector-like) behaviour see //vtr::vector_map. -template -class vector : private std::vector { +template> +class vector : private std::vector { + using storage = std::vector; + public: typedef K key_type; @@ -25,71 +27,71 @@ class vector : private std::vector { public: //Pass through std::vector's types - using typename std::vector::value_type; - using typename std::vector::allocator_type; - using typename std::vector::reference; - using typename std::vector::const_reference; - using typename std::vector::pointer; - using typename std::vector::const_pointer; - using typename std::vector::iterator; - using typename std::vector::const_iterator; - using typename std::vector::reverse_iterator; - using typename std::vector::const_reverse_iterator; - using typename std::vector::difference_type; - using typename std::vector::size_type; - - //Pass through std::vector's methods - using std::vector::vector; - - using std::vector::begin; - using std::vector::end; - using std::vector::rbegin; - using std::vector::rend; - using std::vector::cbegin; - using std::vector::cend; - using std::vector::crbegin; - using std::vector::crend; - - using std::vector::size; - using std::vector::max_size; - using std::vector::resize; - using std::vector::capacity; - using std::vector::empty; - using std::vector::reserve; - using std::vector::shrink_to_fit; - - using std::vector::front; - using std::vector::back; - using std::vector::data; - - using std::vector::assign; - using std::vector::push_back; - using std::vector::pop_back; - using std::vector::insert; - using std::vector::erase; - using std::vector::swap; - using std::vector::clear; - using std::vector::emplace; - using std::vector::emplace_back; - using std::vector::get_allocator; + using typename storage::allocator_type; + using typename storage::const_iterator; + using typename storage::const_pointer; + using typename storage::const_reference; + using typename storage::const_reverse_iterator; + using typename storage::difference_type; + using typename storage::iterator; + using typename storage::pointer; + using typename storage::reference; + using typename storage::reverse_iterator; + using typename storage::size_type; + using typename storage::value_type; + + //Pass through storagemethods + using std::vector::vector; + + using storage::begin; + using storage::cbegin; + using storage::cend; + using storage::crbegin; + using storage::crend; + using storage::end; + using storage::rbegin; + using storage::rend; + + using storage::capacity; + using storage::empty; + using storage::max_size; + using storage::reserve; + using storage::resize; + using storage::shrink_to_fit; + using storage::size; + + using storage::back; + using storage::data; + using storage::front; + + using storage::assign; + using storage::clear; + using storage::emplace; + using storage::emplace_back; + using storage::erase; + using storage::get_allocator; + using storage::insert; + using storage::pop_back; + using storage::push_back; + using storage::swap; //Don't include operator[] and at() from std::vector, //since we redine them to take key_type instead of size_t reference operator[](const key_type id) { auto i = size_t(id); - return std::vector::operator[](i); + return storage::operator[](i); } const_reference operator[](const key_type id) const { auto i = size_t(id); - return std::vector::operator[](i); + return storage::operator[](i); } reference at(const key_type id) { auto i = size_t(id); - return std::vector::at(i); + return storage::at(i); } const_reference at(const key_type id) const { auto i = size_t(id); - return std::vector::at(i); + return storage::at(i); } //Returns a range containing the keys From 359f1423018077723fe7f98b9118294ab9f63d78 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Mon, 3 Feb 2020 14:00:09 -0800 Subject: [PATCH 05/11] Split node ptc data away from core storage. This enables 16-byte alignment (4 nodes per cache line). Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/rr_node.cpp | 21 ++--------- vpr/src/route/rr_node_impl.h | 17 ++------- vpr/src/route/rr_node_storage.cpp | 53 ++++++++++++++++++++++++++ vpr/src/route/rr_node_storage.h | 62 +++++++++++++++++++++++++++++-- 4 files changed, 120 insertions(+), 33 deletions(-) diff --git a/vpr/src/route/rr_node.cpp b/vpr/src/route/rr_node.cpp index 495f3bbad9e..d6434a7391f 100644 --- a/vpr/src/route/rr_node.cpp +++ b/vpr/src/route/rr_node.cpp @@ -105,32 +105,19 @@ void t_rr_node::set_coordinates(short x1, short y1, short x2, short y2) { } void t_rr_node::set_ptc_num(short new_ptc_num) { - auto& node = storage_->get(id_); - node.ptc_.pin_num = new_ptc_num; //TODO: eventually remove + storage_->set_node_ptc_num(id_, new_ptc_num); } void t_rr_node::set_pin_num(short new_pin_num) { - if (type() != IPIN && type() != OPIN) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'pin_num' for non-IPIN/OPIN type '%s'", type_string()); - } - auto& node = storage_->get(id_); - node.ptc_.pin_num = new_pin_num; + storage_->set_node_pin_num(id_, new_pin_num); } void t_rr_node::set_track_num(short new_track_num) { - if (type() != CHANX && type() != CHANY) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'track_num' for non-CHANX/CHANY type '%s'", type_string()); - } - auto& node = storage_->get(id_); - node.ptc_.track_num = new_track_num; + storage_->set_node_track_num(id_, new_track_num); } void t_rr_node::set_class_num(short new_class_num) { - if (type() != SOURCE && type() != SINK) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'class_num' for non-SOURCE/SINK type '%s'", type_string()); - } - auto& node = storage_->get(id_); - node.ptc_.class_num = new_class_num; + storage_->set_node_class_num(id_, new_class_num); } void t_rr_node::set_cost_index(size_t new_cost_index) { diff --git a/vpr/src/route/rr_node_impl.h b/vpr/src/route/rr_node_impl.h index 2a28d85726d..6b058fadd0a 100644 --- a/vpr/src/route/rr_node_impl.h +++ b/vpr/src/route/rr_node_impl.h @@ -126,28 +126,19 @@ inline short t_rr_node::capacity() const { } inline short t_rr_node::ptc_num() const { - return storage_->get(id_).ptc_.pin_num; + return storage_->node_ptc_num(id_); } inline short t_rr_node::pin_num() const { - if (type() != IPIN && type() != OPIN) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'pin_num' for non-IPIN/OPIN type '%s'", type_string()); - } - return storage_->get(id_).ptc_.pin_num; + return storage_->node_pin_num(id_); } inline short t_rr_node::track_num() const { - if (type() != CHANX && type() != CHANY) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'track_num' for non-CHANX/CHANY type '%s'", type_string()); - } - return storage_->get(id_).ptc_.track_num; + return storage_->node_track_num(id_); } inline short t_rr_node::class_num() const { - if (type() != SOURCE && type() != SINK) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'class_num' for non-SOURCE/SINK type '%s'", type_string()); - } - return storage_->get(id_).ptc_.class_num; + return storage_->node_class_num(id_); } inline short t_rr_node::cost_index() const { diff --git a/vpr/src/route/rr_node_storage.cpp b/vpr/src/route/rr_node_storage.cpp index d24c0e920ad..c733bae33b0 100644 --- a/vpr/src/route/rr_node_storage.cpp +++ b/vpr/src/route/rr_node_storage.cpp @@ -455,3 +455,56 @@ bool t_rr_node_storage::validate() const { } return all_valid; } + +const char* t_rr_node_storage::node_type_string(RRNodeId id) const { + return rr_node_typename[node_type(id)]; +} +t_rr_type t_rr_node_storage::node_type(RRNodeId id) const { + return storage_[id].type_; +} + +void t_rr_node_storage::set_node_ptc_num(RRNodeId id, short new_ptc_num) { + ptc_[id].ptc_.pin_num = new_ptc_num; //TODO: eventually remove +} +void t_rr_node_storage::set_node_pin_num(RRNodeId id, short new_pin_num) { + if (node_type(id) != IPIN && node_type(id) != OPIN) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'pin_num' for non-IPIN/OPIN type '%s'", node_type_string(id)); + } + ptc_[id].ptc_.pin_num = new_pin_num; +} + +void t_rr_node_storage::set_node_track_num(RRNodeId id, short new_track_num) { + if (node_type(id) != CHANX && node_type(id) != CHANY) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'track_num' for non-CHANX/CHANY type '%s'", node_type_string(id)); + } + ptc_[id].ptc_.track_num = new_track_num; +} + +void t_rr_node_storage::set_node_class_num(RRNodeId id, short new_class_num) { + if (node_type(id) != SOURCE && node_type(id) != SINK) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'class_num' for non-SOURCE/SINK type '%s'", node_type_string(id)); + } + ptc_[id].ptc_.class_num = new_class_num; +} + +short t_rr_node_storage::node_ptc_num(RRNodeId id) const { + return ptc_[id].ptc_.pin_num; +} +short t_rr_node_storage::node_pin_num(RRNodeId id) const { + if (node_type(id) != IPIN && node_type(id) != OPIN) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'pin_num' for non-IPIN/OPIN type '%s'", node_type_string(id)); + } + return ptc_[id].ptc_.pin_num; +} +short t_rr_node_storage::node_track_num(RRNodeId id) const { + if (node_type(id) != CHANX && node_type(id) != CHANY) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'track_num' for non-CHANX/CHANY type '%s'", node_type_string(id)); + } + return ptc_[id].ptc_.track_num; +} +short t_rr_node_storage::node_class_num(RRNodeId id) const { + if (node_type(id) != SOURCE && node_type(id) != SINK) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'class_num' for non-SOURCE/SINK type '%s'", node_type_string(id)); + } + return ptc_[id].ptc_.class_num; +} diff --git a/vpr/src/route/rr_node_storage.h b/vpr/src/route/rr_node_storage.h index e41eafbf983..cd8a495d6e8 100644 --- a/vpr/src/route/rr_node_storage.h +++ b/vpr/src/route/rr_node_storage.h @@ -1,6 +1,8 @@ #ifndef _RR_NODE_STORAGE_ #define _RR_NODE_STORAGE_ +#include + #include "rr_node_fwd.h" #include "rr_graph2.h" #include "vtr_log.h" @@ -39,7 +41,7 @@ * side: The side of a grid location where an IPIN or OPIN is located. * * This field is valid only for IPINs and OPINs and should be ignored * * otherwise. */ -struct t_rr_node_data { +struct alignas(16) t_rr_node_data { int8_t cost_index_ = -1; int16_t rc_index_ = -1; @@ -54,13 +56,42 @@ struct t_rr_node_data { e_side side; //Valid only for IPINs/OPINs } dir_side_; + uint16_t capacity_ = 0; +}; + +struct t_rr_node_ptc_data { union { int16_t pin_num; int16_t track_num; int16_t class_num; } ptc_; +}; - uint16_t capacity_ = 0; +static_assert(sizeof(t_rr_node_data) == 16, "Check t_rr_node_data size"); +static_assert(alignof(t_rr_node_data) == 16, "Check t_rr_node_data size"); + +template +struct aligned_allocator { + using value_type = T; + using pointer = T*; + using const_pointer = const T*; + using reference = T&; + using const_reference = const T&; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + pointer allocate(size_type n, const void* /*hint*/ = 0) { + void* data; + int ret = posix_memalign(&data, alignof(T), sizeof(T) * n); + if (ret != 0) { + throw std::bad_alloc(); + } + return static_cast(data); + } + + void deallocate(T* p, size_type /*n*/) { + free(p); + } }; // RR node and edge storage class. @@ -74,11 +105,13 @@ class t_rr_node_storage { // No edges can be assigned if mutating the rr node array. VTR_ASSERT(!edges_read_); storage_.reserve(size); + ptc_.reserve(size); } void resize(size_t size) { // No edges can be assigned if mutating the rr node array. VTR_ASSERT(!edges_read_); storage_.resize(size); + ptc_.resize(size); } size_t size() const { return storage_.size(); @@ -89,6 +122,7 @@ class t_rr_node_storage { void clear() { storage_.clear(); + ptc_.clear(); first_edge_.clear(); fan_in_.clear(); edge_src_node_.clear(); @@ -101,6 +135,7 @@ class t_rr_node_storage { void shrink_to_fit() { storage_.shrink_to_fit(); + ptc_.shrink_to_fit(); first_edge_.shrink_to_fit(); fan_in_.shrink_to_fit(); edge_src_node_.shrink_to_fit(); @@ -112,6 +147,7 @@ class t_rr_node_storage { // No edges can be assigned if mutating the rr node array. VTR_ASSERT(!edges_read_); storage_.emplace_back(); + ptc_.emplace_back(); } node_idx_iterator begin() const; @@ -130,6 +166,25 @@ class t_rr_node_storage { friend class t_rr_node; + /**************** + * Node methods * + ****************/ + + const char* node_type_string(RRNodeId id) const; + t_rr_type node_type(RRNodeId id) const; + + /* PTC set methods */ + void set_node_ptc_num(RRNodeId id, short); + void set_node_pin_num(RRNodeId id, short); //Same as set_ptc_num() by checks type() is consistent + void set_node_track_num(RRNodeId id, short); //Same as set_ptc_num() by checks type() is consistent + void set_node_class_num(RRNodeId id, short); //Same as set_ptc_num() by checks type() is consistent + + /* PTC get methods */ + short node_ptc_num(RRNodeId id) const; + short node_pin_num(RRNodeId id) const; //Same as ptc_num() but checks that type() is consistent + short node_track_num(RRNodeId id) const; //Same as ptc_num() but checks that type() is consistent + short node_class_num(RRNodeId id) const; //Same as ptc_num() but checks that type() is consistent + /**************** * Edge methods * ****************/ @@ -290,7 +345,8 @@ class t_rr_node_storage { // Verify that first_edge_ array correctly partitions rr edge data. bool verify_first_edges() const; - vtr::vector storage_; + vtr::vector> storage_; + vtr::vector ptc_; vtr::vector first_edge_; vtr::vector fan_in_; From 5c1b3316130012af0de378b8f5e92146dff394c2 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Mon, 3 Feb 2020 14:22:31 -0800 Subject: [PATCH 06/11] Rename t_rr_node_storage to t_rr_graph_storage. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/base/vpr_context.h | 4 +- vpr/src/route/clock_network_builders.cpp | 12 ++--- vpr/src/route/clock_network_builders.h | 16 +++--- vpr/src/route/rr_graph.cpp | 32 +++++------ vpr/src/route/rr_graph.h | 2 - vpr/src/route/rr_graph_clock.h | 6 +-- ..._node_storage.cpp => rr_graph_storage.cpp} | 54 +++++++++---------- .../{rr_node_storage.h => rr_graph_storage.h} | 4 +- vpr/src/route/rr_node.cpp | 2 +- vpr/src/route/rr_node.h | 4 +- vpr/src/route/rr_node_fwd.h | 2 +- vpr/src/route/rr_node_impl.h | 34 ++++++------ 12 files changed, 85 insertions(+), 87 deletions(-) rename vpr/src/route/{rr_node_storage.cpp => rr_graph_storage.cpp} (90%) rename vpr/src/route/{rr_node_storage.h => rr_graph_storage.h} (99%) diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 59f75bdd991..0a369c0d8e3 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -9,7 +9,7 @@ #include "vtr_vector.h" #include "atom_netlist.h" #include "clustered_netlist.h" -#include "rr_node_storage.h" +#include "rr_graph_storage.h" #include "rr_node.h" #include "tatum/TimingGraph.hpp" #include "tatum/TimingConstraints.hpp" @@ -146,7 +146,7 @@ struct DeviceContext : public Context { t_chan_width chan_width; /* Structures to define the routing architecture of the FPGA. */ - t_rr_node_storage rr_nodes; /* autogenerated in build_rr_graph */ + t_rr_graph_storage rr_nodes; /* autogenerated in build_rr_graph */ std::vector rr_indexed_data; /* [0 .. num_rr_indexed_data-1] */ diff --git a/vpr/src/route/clock_network_builders.cpp b/vpr/src/route/clock_network_builders.cpp index 2af4509c013..bdfcb4900bc 100644 --- a/vpr/src/route/clock_network_builders.cpp +++ b/vpr/src/route/clock_network_builders.cpp @@ -61,7 +61,7 @@ void ClockNetwork::set_num_instance(int num_inst) { */ void ClockNetwork::create_rr_nodes_for_clock_network_wires(ClockRRGraphBuilder& clock_graph, - t_rr_node_storage* rr_nodes, + t_rr_graph_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) { for (int inst_num = 0; inst_num < get_num_inst(); inst_num++) { @@ -215,7 +215,7 @@ size_t ClockRib::estimate_additional_nodes(const DeviceGrid& grid) { } void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - t_rr_node_storage* rr_nodes, + t_rr_graph_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) { // Only chany wires need to know the number of segments inorder @@ -307,7 +307,7 @@ int ClockRib::create_chanx_wire(int x_start, int y, int ptc_num, e_direction direction, - t_rr_node_storage* rr_nodes) { + t_rr_graph_storage* rr_nodes) { rr_nodes->emplace_back(); auto node_index = rr_nodes->size() - 1; auto node = rr_nodes->back(); @@ -502,7 +502,7 @@ size_t ClockSpine::estimate_additional_nodes(const DeviceGrid& grid) { } void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - t_rr_node_storage* rr_nodes, + t_rr_graph_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) { auto& grid = clock_graph.grid(); @@ -596,7 +596,7 @@ int ClockSpine::create_chany_wire(int y_start, int x, int ptc_num, e_direction direction, - t_rr_node_storage* rr_nodes, + t_rr_graph_storage* rr_nodes, int num_segments) { rr_nodes->emplace_back(); auto node_index = rr_nodes->size() - 1; @@ -662,7 +662,7 @@ size_t ClockHTree::estimate_additional_nodes(const DeviceGrid& /*grid*/) { } void ClockHTree::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - t_rr_node_storage* rr_nodes, + t_rr_graph_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) { //Remove unused parameter warning diff --git a/vpr/src/route/clock_network_builders.h b/vpr/src/route/clock_network_builders.h index 27d720d3f92..7fa2829a7d6 100644 --- a/vpr/src/route/clock_network_builders.h +++ b/vpr/src/route/clock_network_builders.h @@ -11,7 +11,7 @@ #include "rr_graph2.h" #include "rr_graph_clock.h" -class t_rr_node_storage; +class t_rr_graph_storage; class ClockRRGraphBuilder; enum class ClockType { @@ -103,13 +103,13 @@ class ClockNetwork { /* Creates the RR nodes for the clock network wires and adds them to the reverse lookup * in ClockRRGraphBuilder. The reverse lookup maps the nodes to their switch point locations */ void create_rr_nodes_for_clock_network_wires(ClockRRGraphBuilder& clock_graph, - t_rr_node_storage* rr_nodes, + t_rr_graph_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments); virtual void create_segments(std::vector& segment_inf) = 0; virtual void create_rr_nodes_and_internal_edges_for_one_instance( ClockRRGraphBuilder& clock_graph, - t_rr_node_storage* rr_nodes, + t_rr_graph_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) = 0; @@ -164,7 +164,7 @@ class ClockRib : public ClockNetwork { */ void create_segments(std::vector& segment_inf) override; void create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - t_rr_node_storage* rr_nodes, + t_rr_graph_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) override; size_t estimate_additional_nodes(const DeviceGrid& grid) override; @@ -173,7 +173,7 @@ class ClockRib : public ClockNetwork { int y, int ptc_num, e_direction direction, - t_rr_node_storage* rr_nodes); + t_rr_graph_storage* rr_nodes); void record_tap_locations(unsigned x_start, unsigned x_end, unsigned y, @@ -223,7 +223,7 @@ class ClockSpine : public ClockNetwork { */ void create_segments(std::vector& segment_inf) override; void create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - t_rr_node_storage* rr_nodes, + t_rr_graph_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) override; size_t estimate_additional_nodes(const DeviceGrid& grid) override; @@ -232,7 +232,7 @@ class ClockSpine : public ClockNetwork { int x, int ptc_num, e_direction direction, - t_rr_node_storage* rr_nodes, + t_rr_graph_storage* rr_nodes, int num_segments); void record_tap_locations(unsigned y_start, unsigned y_end, @@ -258,7 +258,7 @@ class ClockHTree : private ClockNetwork { // TODO: Unimplemented member function void create_segments(std::vector& segment_inf) override; void create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - t_rr_node_storage* rr_nodes, + t_rr_graph_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) override; size_t estimate_additional_nodes(const DeviceGrid& grid) override; diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index b2faf73fdd7..49bac28b9d8 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -103,7 +103,7 @@ static void build_bidir_rr_opins(const int i, const int j, const e_side side, const t_rr_node_indices& L_rr_node_indices, - const t_rr_node_storage& rr_nodes, + const t_rr_graph_storage& rr_nodes, const t_pin_to_track_lookup& opin_to_track_map, const std::vector>& Fc_out, t_rr_edge_info_set& created_rr_edges, @@ -128,7 +128,7 @@ static void build_unidir_rr_opins(const int i, t_rr_edge_info_set& created_rr_edges, bool* Fc_clipped, const t_rr_node_indices& L_rr_node_indices, - const t_rr_node_storage& rr_nodes, + const t_rr_graph_storage& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs, @@ -141,12 +141,12 @@ static int get_opin_direct_connecions(int x, int from_rr_node, t_rr_edge_info_set& rr_edges_to_create, const t_rr_node_indices& L_rr_node_indices, - const t_rr_node_storage& rr_nodes, + const t_rr_graph_storage& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs); -static std::function alloc_and_load_rr_graph(t_rr_node_storage& L_rr_node, +static std::function alloc_and_load_rr_graph(t_rr_graph_storage& L_rr_node, const int num_seg_types, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, @@ -207,7 +207,7 @@ static std::vector> alloc_and_load_perturb_ipins(const int L_n static void build_rr_sinks_sources(const int i, const int j, - t_rr_node_storage& L_rr_node, + t_rr_graph_storage& L_rr_node, t_rr_edge_info_set& rr_edges_to_create, const t_rr_node_indices& L_rr_node_indices, const int delayless_switch, @@ -229,13 +229,13 @@ static void build_rr_chan(const int i, const t_chan_details& chan_details_y, const t_rr_node_indices& L_rr_node_indices, t_rr_edge_info_set& created_rr_edges, - t_rr_node_storage& L_rr_node, + t_rr_graph_storage& L_rr_node, const int wire_to_ipin_switch, const enum e_directionality directionality); void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create); -void alloc_and_load_edges(t_rr_node_storage& L_rr_node, +void alloc_and_load_edges(t_rr_graph_storage& L_rr_node, const t_rr_edge_info_set& rr_edges_to_create); static void alloc_and_load_rr_switch_inf(const int num_arch_switches, @@ -273,7 +273,7 @@ static std::vector> alloc_and_load_actual_fc(const std::vector< const enum e_directionality directionality, bool* Fc_clipped); -static int pick_best_direct_connect_target_rr_node(const t_rr_node_storage& rr_nodes, +static int pick_best_direct_connect_target_rr_node(const t_rr_graph_storage& rr_nodes, int from_rr, const std::vector& candidate_rr_nodes); @@ -1132,7 +1132,7 @@ static void free_type_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map, /* Does the actual work of allocating the rr_graph and filling all the * * appropriate values. Everything up to this was just a prelude! */ -static std::function alloc_and_load_rr_graph(t_rr_node_storage& L_rr_node, +static std::function alloc_and_load_rr_graph(t_rr_graph_storage& L_rr_node, const int num_seg_types, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, @@ -1280,7 +1280,7 @@ static void build_bidir_rr_opins(const int i, const int j, const e_side side, const t_rr_node_indices& L_rr_node_indices, - const t_rr_node_storage& rr_nodes, + const t_rr_graph_storage& rr_nodes, const t_pin_to_track_lookup& opin_to_track_map, const std::vector>& Fc_out, t_rr_edge_info_set& rr_edges_to_create, @@ -1373,7 +1373,7 @@ void free_rr_graph() { static void build_rr_sinks_sources(const int i, const int j, - t_rr_node_storage& L_rr_node, + t_rr_graph_storage& L_rr_node, t_rr_edge_info_set& rr_edges_to_create, const t_rr_node_indices& L_rr_node_indices, const int delayless_switch, @@ -1517,7 +1517,7 @@ static void build_rr_chan(const int x_coord, const t_chan_details& chan_details_y, const t_rr_node_indices& L_rr_node_indices, t_rr_edge_info_set& rr_edges_to_create, - t_rr_node_storage& L_rr_node, + t_rr_graph_storage& L_rr_node, const int wire_to_ipin_switch, const enum e_directionality directionality) { /* this function builds both x and y-directed channel segments, so set up our @@ -1680,7 +1680,7 @@ void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create) { rr_edges_to_create.erase(std::unique(rr_edges_to_create.begin(), rr_edges_to_create.end()), rr_edges_to_create.end()); } -void alloc_and_load_edges(t_rr_node_storage& L_rr_node, +void alloc_and_load_edges(t_rr_graph_storage& L_rr_node, const t_rr_edge_info_set& rr_edges_to_create) { L_rr_node.alloc_and_load_edges(&rr_edges_to_create); } @@ -2469,7 +2469,7 @@ std::string describe_rr_node(int inode) { return msg; } -static void build_unidir_rr_opins(const int i, const int j, const e_side side, const DeviceGrid& grid, const std::vector>& Fc_out, const int max_chan_width, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, vtr::NdMatrix& Fc_xofs, vtr::NdMatrix& Fc_yofs, t_rr_edge_info_set& rr_edges_to_create, bool* Fc_clipped, const t_rr_node_indices& L_rr_node_indices, const t_rr_node_storage& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs, const int num_seg_types) { +static void build_unidir_rr_opins(const int i, const int j, const e_side side, const DeviceGrid& grid, const std::vector>& Fc_out, const int max_chan_width, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, vtr::NdMatrix& Fc_xofs, vtr::NdMatrix& Fc_yofs, t_rr_edge_info_set& rr_edges_to_create, bool* Fc_clipped, const t_rr_node_indices& L_rr_node_indices, const t_rr_graph_storage& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs, const int num_seg_types) { /* * This routine adds the edges from opins to channels at the specified * grid location (i,j) and grid tile side @@ -2704,7 +2704,7 @@ static int get_opin_direct_connecions(int x, int from_rr_node, t_rr_edge_info_set& rr_edges_to_create, const t_rr_node_indices& L_rr_node_indices, - const t_rr_node_storage& rr_nodes, + const t_rr_graph_storage& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs) { @@ -2916,7 +2916,7 @@ static std::vector alloc_and_load_perturb_opins(const t_physical_tile_type return perturb_opins; } -static int pick_best_direct_connect_target_rr_node(const t_rr_node_storage& rr_nodes, +static int pick_best_direct_connect_target_rr_node(const t_rr_graph_storage& rr_nodes, int from_rr, const std::vector& candidate_rr_nodes) { //With physically equivalent pins there may be multiple candidate rr nodes (which are equivalent) diff --git a/vpr/src/route/rr_graph.h b/vpr/src/route/rr_graph.h index 300de8b81bd..e00fbbb4c06 100644 --- a/vpr/src/route/rr_graph.h +++ b/vpr/src/route/rr_graph.h @@ -45,8 +45,6 @@ void free_rr_graph(); //Returns a brief one-line summary of an RR node std::string describe_rr_node(int inode); -class t_rr_node_storage; - // Sets the spec for the rr_switch based on the arch switch void load_rr_switch_from_arch_switch(int arch_switch_idx, int rr_switch_idx, diff --git a/vpr/src/route/rr_graph_clock.h b/vpr/src/route/rr_graph_clock.h index 162ca58e6c8..a377f3777de 100644 --- a/vpr/src/route/rr_graph_clock.h +++ b/vpr/src/route/rr_graph_clock.h @@ -15,7 +15,7 @@ class ClockNetwork; class ClockConnection; -class t_rr_node_storage; +class t_rr_graph_storage; class SwitchPoint { /* A switch point object: keeps information on the location and and rr_node indices @@ -77,7 +77,7 @@ class ClockRRGraphBuilder { ClockRRGraphBuilder( const t_chan_width& chan_width, const DeviceGrid& grid, - t_rr_node_storage* rr_nodes) + t_rr_graph_storage* rr_nodes) : chan_width_(chan_width) , grid_(grid) , rr_nodes_(rr_nodes) @@ -133,7 +133,7 @@ class ClockRRGraphBuilder { const t_chan_width& chan_width_; const DeviceGrid& grid_; - t_rr_node_storage* rr_nodes_; + t_rr_graph_storage* rr_nodes_; int chanx_ptc_idx_; int chany_ptc_idx_; diff --git a/vpr/src/route/rr_node_storage.cpp b/vpr/src/route/rr_graph_storage.cpp similarity index 90% rename from vpr/src/route/rr_node_storage.cpp rename to vpr/src/route/rr_graph_storage.cpp index c733bae33b0..3a8acaaa5db 100644 --- a/vpr/src/route/rr_node_storage.cpp +++ b/vpr/src/route/rr_graph_storage.cpp @@ -1,13 +1,13 @@ -#include "rr_node_storage.h" +#include "rr_graph_storage.h" #include "globals.h" -void t_rr_node_storage::reserve_edges(size_t num_edges) { +void t_rr_graph_storage::reserve_edges(size_t num_edges) { edge_src_node_.reserve(num_edges); edge_dest_node_.reserve(num_edges); edge_switch_.reserve(num_edges); } -void t_rr_node_storage::emplace_back_edge(RRNodeId src, RRNodeId dest, short edge_switch) { +void t_rr_graph_storage::emplace_back_edge(RRNodeId src, RRNodeId dest, short edge_switch) { // Cannot mutate edges once edges have been read! VTR_ASSERT(!edges_read_); edge_src_node_.emplace_back(src); @@ -19,7 +19,7 @@ void t_rr_node_storage::emplace_back_edge(RRNodeId src, RRNodeId dest, short edg // to avoid repeated reallocation. constexpr size_t kEdgeToNodeRatio = 10; -void t_rr_node_storage::alloc_and_load_edges(const t_rr_edge_info_set* rr_edges_to_create) { +void t_rr_graph_storage::alloc_and_load_edges(const t_rr_edge_info_set* rr_edges_to_create) { // Cannot mutate edges once edges have been read! size_t required_size = edge_src_node_.size() + rr_edges_to_create->size(); if (edge_src_node_.capacity() < required_size) { @@ -60,10 +60,10 @@ void t_rr_node_storage::alloc_and_load_edges(const t_rr_edge_info_set* rr_edges_ * * */ struct edge_swapper { - edge_swapper(t_rr_node_storage* storage, size_t idx) + edge_swapper(t_rr_graph_storage* storage, size_t idx) : storage_(storage) , idx_(idx) {} - t_rr_node_storage* storage_; + t_rr_graph_storage* storage_; size_t idx_; edge_swapper(const edge_swapper&) = delete; @@ -118,7 +118,7 @@ struct edge_swapper { class edge_sort_iterator { public: - edge_sort_iterator(t_rr_node_storage* storage, size_t idx) + edge_sort_iterator(t_rr_graph_storage* storage, size_t idx) : swapper_(storage, idx) {} edge_sort_iterator(const edge_sort_iterator& other) @@ -252,7 +252,7 @@ class edge_compare_src_node_and_configurable_first { const std::vector& rr_switch_inf_; }; -void t_rr_node_storage::assign_edges() { +void t_rr_graph_storage::assign_edges() { VTR_ASSERT(first_edge_.empty()); // Last element is a dummy element @@ -300,7 +300,7 @@ void t_rr_node_storage::assign_edges() { VTR_ASSERT_SAFE(verify_first_edges()); } -bool t_rr_node_storage::verify_first_edges() const { +bool t_rr_graph_storage::verify_first_edges() const { size_t num_edges = edge_src_node_.size(); VTR_ASSERT(first_edge_[RRNodeId(storage_.size())] == RREdgeId(num_edges)); @@ -317,7 +317,7 @@ bool t_rr_node_storage::verify_first_edges() const { return true; } -void t_rr_node_storage::init_fan_in() { +void t_rr_graph_storage::init_fan_in() { //Reset all fan-ins to zero edges_read_ = true; fan_in_.resize(storage_.size(), 0); @@ -329,7 +329,7 @@ void t_rr_node_storage::init_fan_in() { } } -size_t t_rr_node_storage::count_rr_switches( +size_t t_rr_graph_storage::count_rr_switches( size_t num_arch_switches, t_arch_switch_inf* arch_switch_inf, t_arch_switch_fanin& arch_switch_fanins) const { @@ -377,7 +377,7 @@ size_t t_rr_node_storage::count_rr_switches( return num_rr_switches; } -void t_rr_node_storage::remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin) { +void t_rr_graph_storage::remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin) { edges_read_ = true; VTR_ASSERT(!remapped_edges_); @@ -402,12 +402,12 @@ void t_rr_node_storage::remap_rr_node_switch_indices(const t_arch_switch_fanin& remapped_edges_ = true; } -void t_rr_node_storage::mark_edges_as_rr_switch_ids() { +void t_rr_graph_storage::mark_edges_as_rr_switch_ids() { edges_read_ = true; remapped_edges_ = true; } -void t_rr_node_storage::partition_edges() { +void t_rr_graph_storage::partition_edges() { if (partitioned_) { return; } @@ -428,7 +428,7 @@ void t_rr_node_storage::partition_edges() { VTR_ASSERT_SAFE(validate()); } -t_edge_size t_rr_node_storage::num_configurable_edges(const RRNodeId& id) const { +t_edge_size t_rr_graph_storage::num_configurable_edges(const RRNodeId& id) const { VTR_ASSERT(!first_edge_.empty() && remapped_edges_); const auto& device_ctx = g_vpr_ctx.device(); @@ -444,11 +444,11 @@ t_edge_size t_rr_node_storage::num_configurable_edges(const RRNodeId& id) const return second_id - first_id; } -t_edge_size t_rr_node_storage::num_non_configurable_edges(const RRNodeId& id) const { +t_edge_size t_rr_graph_storage::num_non_configurable_edges(const RRNodeId& id) const { return num_edges(id) - num_configurable_edges(id); } -bool t_rr_node_storage::validate() const { +bool t_rr_graph_storage::validate() const { bool all_valid = verify_first_edges(); for (size_t inode = 0; inode < size(); ++inode) { all_valid = (*this)[inode].validate() || all_valid; @@ -456,53 +456,53 @@ bool t_rr_node_storage::validate() const { return all_valid; } -const char* t_rr_node_storage::node_type_string(RRNodeId id) const { +const char* t_rr_graph_storage::node_type_string(RRNodeId id) const { return rr_node_typename[node_type(id)]; } -t_rr_type t_rr_node_storage::node_type(RRNodeId id) const { +t_rr_type t_rr_graph_storage::node_type(RRNodeId id) const { return storage_[id].type_; } -void t_rr_node_storage::set_node_ptc_num(RRNodeId id, short new_ptc_num) { +void t_rr_graph_storage::set_node_ptc_num(RRNodeId id, short new_ptc_num) { ptc_[id].ptc_.pin_num = new_ptc_num; //TODO: eventually remove } -void t_rr_node_storage::set_node_pin_num(RRNodeId id, short new_pin_num) { +void t_rr_graph_storage::set_node_pin_num(RRNodeId id, short new_pin_num) { if (node_type(id) != IPIN && node_type(id) != OPIN) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'pin_num' for non-IPIN/OPIN type '%s'", node_type_string(id)); } ptc_[id].ptc_.pin_num = new_pin_num; } -void t_rr_node_storage::set_node_track_num(RRNodeId id, short new_track_num) { +void t_rr_graph_storage::set_node_track_num(RRNodeId id, short new_track_num) { if (node_type(id) != CHANX && node_type(id) != CHANY) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'track_num' for non-CHANX/CHANY type '%s'", node_type_string(id)); } ptc_[id].ptc_.track_num = new_track_num; } -void t_rr_node_storage::set_node_class_num(RRNodeId id, short new_class_num) { +void t_rr_graph_storage::set_node_class_num(RRNodeId id, short new_class_num) { if (node_type(id) != SOURCE && node_type(id) != SINK) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'class_num' for non-SOURCE/SINK type '%s'", node_type_string(id)); } ptc_[id].ptc_.class_num = new_class_num; } -short t_rr_node_storage::node_ptc_num(RRNodeId id) const { +short t_rr_graph_storage::node_ptc_num(RRNodeId id) const { return ptc_[id].ptc_.pin_num; } -short t_rr_node_storage::node_pin_num(RRNodeId id) const { +short t_rr_graph_storage::node_pin_num(RRNodeId id) const { if (node_type(id) != IPIN && node_type(id) != OPIN) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'pin_num' for non-IPIN/OPIN type '%s'", node_type_string(id)); } return ptc_[id].ptc_.pin_num; } -short t_rr_node_storage::node_track_num(RRNodeId id) const { +short t_rr_graph_storage::node_track_num(RRNodeId id) const { if (node_type(id) != CHANX && node_type(id) != CHANY) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'track_num' for non-CHANX/CHANY type '%s'", node_type_string(id)); } return ptc_[id].ptc_.track_num; } -short t_rr_node_storage::node_class_num(RRNodeId id) const { +short t_rr_graph_storage::node_class_num(RRNodeId id) const { if (node_type(id) != SOURCE && node_type(id) != SINK) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'class_num' for non-SOURCE/SINK type '%s'", node_type_string(id)); } diff --git a/vpr/src/route/rr_node_storage.h b/vpr/src/route/rr_graph_storage.h similarity index 99% rename from vpr/src/route/rr_node_storage.h rename to vpr/src/route/rr_graph_storage.h index cd8a495d6e8..c6e1855c89f 100644 --- a/vpr/src/route/rr_node_storage.h +++ b/vpr/src/route/rr_graph_storage.h @@ -95,9 +95,9 @@ struct aligned_allocator { }; // RR node and edge storage class. -class t_rr_node_storage { +class t_rr_graph_storage { public: - t_rr_node_storage() { + t_rr_graph_storage() { clear(); } diff --git a/vpr/src/route/rr_node.cpp b/vpr/src/route/rr_node.cpp index d6434a7391f..74e1a0e2f93 100644 --- a/vpr/src/route/rr_node.cpp +++ b/vpr/src/route/rr_node.cpp @@ -1,5 +1,5 @@ #include "rr_node.h" -#include "rr_node_storage.h" +#include "rr_graph_storage.h" #include "globals.h" #include "vpr_error.h" diff --git a/vpr/src/route/rr_node.h b/vpr/src/route/rr_node.h index 2eb1bbda8fe..5ce75ee6d79 100644 --- a/vpr/src/route/rr_node.h +++ b/vpr/src/route/rr_node.h @@ -11,7 +11,7 @@ class t_rr_node { public: //Types - t_rr_node(t_rr_node_storage* storage, RRNodeId id) + t_rr_node(t_rr_graph_storage* storage, RRNodeId id) : storage_(storage) , id_(id) {} @@ -90,7 +90,7 @@ class t_rr_node { } private: //Data - t_rr_node_storage* storage_; + t_rr_graph_storage* storage_; RRNodeId id_; }; diff --git a/vpr/src/route/rr_node_fwd.h b/vpr/src/route/rr_node_fwd.h index 566c328fe5f..6d44f259bae 100644 --- a/vpr/src/route/rr_node_fwd.h +++ b/vpr/src/route/rr_node_fwd.h @@ -7,7 +7,7 @@ //Forward declaration class t_rr_node; -class t_rr_node_storage; +class t_rr_graph_storage; class node_idx_iterator; /* diff --git a/vpr/src/route/rr_node_impl.h b/vpr/src/route/rr_node_impl.h index 6b058fadd0a..a3001ef85eb 100644 --- a/vpr/src/route/rr_node_impl.h +++ b/vpr/src/route/rr_node_impl.h @@ -2,7 +2,7 @@ #define _RR_NODE_IMPL_H_ #include "rr_node.h" -#include "rr_node_storage.h" +#include "rr_graph_storage.h" #include "vpr_error.h" @@ -29,43 +29,43 @@ class node_idx_iterator : public std::iterator(this), RRNodeId(0))); +inline node_idx_iterator t_rr_graph_storage::begin() const { + return node_idx_iterator(t_rr_node(const_cast(this), RRNodeId(0))); } -inline node_idx_iterator t_rr_node_storage::end() const { - return node_idx_iterator(t_rr_node(const_cast(this), RRNodeId(size()))); +inline node_idx_iterator t_rr_graph_storage::end() const { + return node_idx_iterator(t_rr_node(const_cast(this), RRNodeId(size()))); } -inline const t_rr_node t_rr_node_storage::operator[](size_t idx) const { - return t_rr_node(const_cast(this), RRNodeId(idx)); +inline const t_rr_node t_rr_graph_storage::operator[](size_t idx) const { + return t_rr_node(const_cast(this), RRNodeId(idx)); } -inline t_rr_node t_rr_node_storage::operator[](size_t idx) { +inline t_rr_node t_rr_graph_storage::operator[](size_t idx) { return t_rr_node(this, RRNodeId(idx)); } -inline const t_rr_node t_rr_node_storage::at(size_t idx) const { +inline const t_rr_node t_rr_graph_storage::at(size_t idx) const { VTR_ASSERT(idx < storage_.size()); - return t_rr_node(const_cast(this), RRNodeId(idx)); + return t_rr_node(const_cast(this), RRNodeId(idx)); } -inline t_rr_node t_rr_node_storage::at(size_t idx) { +inline t_rr_node t_rr_graph_storage::at(size_t idx) { VTR_ASSERT(idx < storage_.size()); return t_rr_node(this, RRNodeId(idx)); } -inline const t_rr_node t_rr_node_storage::front() const { - return t_rr_node(const_cast(this), RRNodeId(0)); +inline const t_rr_node t_rr_graph_storage::front() const { + return t_rr_node(const_cast(this), RRNodeId(0)); } -inline t_rr_node t_rr_node_storage::front() { +inline t_rr_node t_rr_graph_storage::front() { return t_rr_node(this, RRNodeId(0)); } -inline const t_rr_node t_rr_node_storage::back() const { - return t_rr_node(const_cast(this), RRNodeId(size() - 1)); +inline const t_rr_node t_rr_graph_storage::back() const { + return t_rr_node(const_cast(this), RRNodeId(size() - 1)); } -inline t_rr_node t_rr_node_storage::back() { +inline t_rr_node t_rr_graph_storage::back() { return t_rr_node(this, RRNodeId(size() - 1)); } From db9eed4cc3dba1bf8df7e73c23a2153440f2a95d Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Mon, 3 Feb 2020 14:25:40 -0800 Subject: [PATCH 07/11] Add comment around state flags. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/rr_graph_storage.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/vpr/src/route/rr_graph_storage.h b/vpr/src/route/rr_graph_storage.h index c6e1855c89f..8a1e8908426 100644 --- a/vpr/src/route/rr_graph_storage.h +++ b/vpr/src/route/rr_graph_storage.h @@ -354,8 +354,25 @@ class t_rr_graph_storage { vtr::vector edge_dest_node_; vtr::vector edge_switch_; + // Has any edges been read? + // + // Any method that mutates edge storage will be locked out after this + // variable is set. + // + // Reading any of the following members should set this flag: + // - edge_src_node_ + // - edge_dest_node_ + // - edge_switch_ mutable bool edges_read_; + + // Set after either remap_rr_node_switch_indices or mark_edges_as_rr_switch_ids + // has been called. + // + // remap_rr_node_switch_indices converts indices to arch_switch_inf into + // indices to rr_switch_inf. bool remapped_edges_; + + // Set after partition_edges has been called. bool partitioned_; }; From eb13bd0a682a614f0ce8fee607def621b24f515b Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Mon, 3 Feb 2020 14:27:01 -0800 Subject: [PATCH 08/11] Add missing flag check. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/rr_graph_storage.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vpr/src/route/rr_graph_storage.cpp b/vpr/src/route/rr_graph_storage.cpp index 3a8acaaa5db..6f2f90a46a2 100644 --- a/vpr/src/route/rr_graph_storage.cpp +++ b/vpr/src/route/rr_graph_storage.cpp @@ -21,6 +21,8 @@ constexpr size_t kEdgeToNodeRatio = 10; void t_rr_graph_storage::alloc_and_load_edges(const t_rr_edge_info_set* rr_edges_to_create) { // Cannot mutate edges once edges have been read! + VTR_ASSERT(!edges_read_); + size_t required_size = edge_src_node_.size() + rr_edges_to_create->size(); if (edge_src_node_.capacity() < required_size) { size_t new_capacity = std::min(edge_src_node_.capacity(), storage_.size() * kEdgeToNodeRatio); From 058192ece174ecfad6ed7c2eee7f77c199ded59b Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Mon, 3 Feb 2020 14:29:25 -0800 Subject: [PATCH 09/11] Add comments around edge sorting. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/rr_graph_storage.cpp | 9 +++++++-- vpr/src/route/rr_graph_storage.h | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/vpr/src/route/rr_graph_storage.cpp b/vpr/src/route/rr_graph_storage.cpp index 6f2f90a46a2..1ed6f53a942 100644 --- a/vpr/src/route/rr_graph_storage.cpp +++ b/vpr/src/route/rr_graph_storage.cpp @@ -254,7 +254,7 @@ class edge_compare_src_node_and_configurable_first { const std::vector& rr_switch_inf_; }; -void t_rr_graph_storage::assign_edges() { +void t_rr_graph_storage::assign_first_edges() { VTR_ASSERT(first_edge_.empty()); // Last element is a dummy element @@ -418,6 +418,11 @@ void t_rr_graph_storage::partition_edges() { VTR_ASSERT(remapped_edges_); const auto& device_ctx = g_vpr_ctx.device(); + // This sort ensures two things: + // - Edges are stored in ascending source node order. This is required + // by assign_first_edges() + // - Edges within a source node have the configurable edges before the + // non-configurable edges. std::stable_sort( edge_sort_iterator(this, 0), edge_sort_iterator(this, edge_src_node_.size()), @@ -425,7 +430,7 @@ void t_rr_graph_storage::partition_edges() { partitioned_ = true; - assign_edges(); + assign_first_edges(); VTR_ASSERT_SAFE(validate()); } diff --git a/vpr/src/route/rr_graph_storage.h b/vpr/src/route/rr_graph_storage.h index 8a1e8908426..57db619627b 100644 --- a/vpr/src/route/rr_graph_storage.h +++ b/vpr/src/route/rr_graph_storage.h @@ -340,7 +340,7 @@ class t_rr_graph_storage { // Take allocated edges in edge_src_node_/ edge_dest_node_ / edge_switch_ // sort, and assign the first edge for each - void assign_edges(); + void assign_first_edges(); // Verify that first_edge_ array correctly partitions rr edge data. bool verify_first_edges() const; From 98a6d3e2f92c05b9e9d6aba662060072b273422d Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Mon, 3 Feb 2020 14:33:37 -0800 Subject: [PATCH 10/11] Used function form of size_t(). Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/rr_graph_storage.cpp | 20 ++++++++++---------- vpr/src/route/rr_graph_storage.h | 6 +++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/vpr/src/route/rr_graph_storage.cpp b/vpr/src/route/rr_graph_storage.cpp index 1ed6f53a942..b163cb5fe15 100644 --- a/vpr/src/route/rr_graph_storage.cpp +++ b/vpr/src/route/rr_graph_storage.cpp @@ -93,8 +93,8 @@ struct edge_swapper { operator t_rr_edge_info() const { VTR_ASSERT(idx_ < storage_->edge_src_node_.size()); t_rr_edge_info info( - (size_t)storage_->edge_src_node_[RREdgeId(idx_)], - (size_t)storage_->edge_dest_node_[RREdgeId(idx_)], + size_t(storage_->edge_src_node_[RREdgeId(idx_)]), + size_t(storage_->edge_dest_node_[RREdgeId(idx_)]), storage_->edge_switch_[RREdgeId(idx_)]); return info; @@ -273,7 +273,7 @@ void t_rr_graph_storage::assign_first_edges() { while (true) { VTR_ASSERT(first_id < num_edges); VTR_ASSERT(second_id < num_edges); - size_t current_node_id = (size_t)edge_src_node_[RREdgeId(second_id)]; + size_t current_node_id = size_t(edge_src_node_[RREdgeId(second_id)]); if (node_id < current_node_id) { // All edges belonging to node_id are assigned. while (node_id < current_node_id) { @@ -311,9 +311,9 @@ bool t_rr_graph_storage::verify_first_edges() const { for (size_t iedge = 0; iedge < num_edges; ++iedge) { RRNodeId src_node = edge_src_node_.at(RREdgeId(iedge)); RREdgeId first_edge = first_edge_.at(src_node); - RREdgeId second_edge = first_edge_.at(RRNodeId((size_t)src_node + 1)); - VTR_ASSERT(iedge >= (size_t)first_edge); - VTR_ASSERT(iedge < (size_t)second_edge); + RREdgeId second_edge = first_edge_.at(RRNodeId(size_t(src_node) + 1)); + VTR_ASSERT(iedge >= size_t(first_edge)); + VTR_ASSERT(iedge < size_t(second_edge)); } return true; @@ -439,16 +439,16 @@ t_edge_size t_rr_graph_storage::num_configurable_edges(const RRNodeId& id) const VTR_ASSERT(!first_edge_.empty() && remapped_edges_); const auto& device_ctx = g_vpr_ctx.device(); - auto first_id = (size_t)first_edge_[id]; - auto second_id = (size_t)(&first_edge_[id])[1]; - for (size_t idx = first_id; idx < second_id; ++idx) { + auto first_id = size_t(first_edge_[id]); + auto last_id = size_t((&first_edge_[id])[1]); + for (size_t idx = first_id; idx < last_id; ++idx) { auto switch_idx = edge_switch_[RREdgeId(idx)]; if (!device_ctx.rr_switch_inf[switch_idx].configurable()) { return idx - first_id; } } - return second_id - first_id; + return last_id - first_id; } t_edge_size t_rr_graph_storage::num_non_configurable_edges(const RRNodeId& id) const { diff --git a/vpr/src/route/rr_graph_storage.h b/vpr/src/route/rr_graph_storage.h index 57db619627b..4e83e11e606 100644 --- a/vpr/src/route/rr_graph_storage.h +++ b/vpr/src/route/rr_graph_storage.h @@ -286,8 +286,8 @@ class t_rr_graph_storage { t_edge_size num_edges(const RRNodeId& id) const { RREdgeId first_id = first_edge_[id]; - RREdgeId second_id = (&first_edge_[id])[1]; - return (size_t)second_id - (size_t)first_id; + RREdgeId last_id = (&first_edge_[id])[1]; + return size_t(last_id) - size_t(first_id); } t_edge_size num_configurable_edges(const RRNodeId& id) const; @@ -295,7 +295,7 @@ class t_rr_graph_storage { RREdgeId edge_id(const RRNodeId& id, t_edge_size iedge) const { RREdgeId first_edge = first_edge_[id]; - RREdgeId ret((size_t)first_edge + iedge); + RREdgeId ret(size_t(first_edge) + iedge); VTR_ASSERT_SAFE(ret < (&first_edge_[id])[1]); return ret; } From e96a3ba5cda28c490390470cf3168110f2773286 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Wed, 5 Feb 2020 11:47:41 -0800 Subject: [PATCH 11/11] Integrate schema based reader with edge refactoring. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/rr_graph.cpp | 2 +- vpr/src/route/rr_graph_storage.h | 12 ++ vpr/src/route/rr_graph_uxsdcxx_serializer.h | 228 ++++++++------------ vpr/src/util/vpr_utils.h | 23 ++ 4 files changed, 127 insertions(+), 138 deletions(-) diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 49bac28b9d8..dc68cc117e6 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -725,7 +725,7 @@ static void build_rr_graph(const t_graph_type graph_type, //Partition the rr graph edges for efficient access to configurable/non-configurable //edge subsets. Must be done after RR switches have been allocated - partition_rr_graph_edges(&device_ctx.rr_nodes); + device_ctx.rr_nodes.partition_edges(); //Save the channel widths for the newly constructed graph device_ctx.chan_width = nodes_per_chan; diff --git a/vpr/src/route/rr_graph_storage.h b/vpr/src/route/rr_graph_storage.h index 4e83e11e606..1b73a5712f6 100644 --- a/vpr/src/route/rr_graph_storage.h +++ b/vpr/src/route/rr_graph_storage.h @@ -6,6 +6,7 @@ #include "rr_node_fwd.h" #include "rr_graph2.h" #include "vtr_log.h" +#include "vpr_utils.h" /* Main structure describing one routing resource node. Everything in * * this structure should describe the graph -- information needed only * @@ -101,6 +102,17 @@ class t_rr_graph_storage { clear(); } + // Makes room in storage for RRNodeId in amoritized O(1) fashion. + // + // This results in an allocation pattern similiar to what would happen + // if push_back(x) / emplace_back() were used if underlying storage + // was not preallocated. + void make_room_for_node(RRNodeId elem_position) { + make_room_in_vector(&storage_, size_t(elem_position)); + ptc_.reserve(storage_.capacity()); + ptc_.resize(storage_.size()); + } + void reserve(size_t size) { // No edges can be assigned if mutating the rr node array. VTR_ASSERT(!edges_read_); diff --git a/vpr/src/route/rr_graph_uxsdcxx_serializer.h b/vpr/src/route/rr_graph_uxsdcxx_serializer.h index 5295ecbd5fd..6f07ac045d4 100644 --- a/vpr/src/route/rr_graph_uxsdcxx_serializer.h +++ b/vpr/src/route/rr_graph_uxsdcxx_serializer.h @@ -17,24 +17,6 @@ #include "rr_graph2.h" #include "rr_graph_indexed_data.h" -template -void make_room_in_vector(T* vec, size_t elem_position) { - if (elem_position < vec->size()) { - return; - } - - size_t capacity = std::max(vec->capacity(), size_t(16)); - while (elem_position >= capacity) { - capacity *= 2; - } - - if (capacity >= vec->capacity()) { - vec->reserve(capacity); - } - - vec->resize(elem_position + 1); -} - class MetadataBind { public: MetadataBind() @@ -164,7 +146,7 @@ class t_metadata_dict_iterator { class EdgeWalker { public: - void initialize(const std::vector* nodes) { + void initialize(const t_rr_graph_storage* nodes) { nodes_ = nodes; num_edges_ = 0; current_src_inode_ = 0; @@ -222,7 +204,7 @@ class EdgeWalker { } private: - const std::vector* nodes_; + const t_rr_graph_storage* nodes_; size_t num_edges_; size_t current_src_inode_; size_t current_edge_; @@ -241,12 +223,12 @@ struct RrGraphContextTypes : public uxsd::DefaultRrGraphContextTypes { using PinClassReadContext = const std::pair; using BlockTypeReadContext = const t_physical_tile_type*; using GridLocReadContext = const t_grid_tile*; - using NodeLocReadContext = const t_rr_node*; - using NodeTimingReadContext = const t_rr_node*; - using NodeSegmentReadContext = const t_rr_node*; + using NodeLocReadContext = const t_rr_node; + using NodeTimingReadContext = const t_rr_node; + using NodeSegmentReadContext = const t_rr_node; using MetaReadContext = const t_metadata_dict::value_type*; using MetadataReadContext = t_metadata_dict_iterator; - using NodeReadContext = const t_rr_node*; + using NodeReadContext = const t_rr_node; using EdgeReadContext = const EdgeWalker*; using RrEdgesReadContext = EdgeWalker; using TimingWriteContext = t_rr_switch_inf*; @@ -277,7 +259,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { std::string* read_rr_graph_filename, bool read_edge_metadata, t_chan_width* chan_width, - std::vector* rr_nodes, + t_rr_graph_storage* rr_nodes, std::vector* rr_switch_inf, std::vector* rr_indexed_data, t_rr_node_indices* rr_node_indices, @@ -558,35 +540,35 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { */ inline int init_node_loc(int& inode, int ptc, int xhigh, int xlow, int yhigh, int ylow) final { - auto& node = (*rr_nodes_)[inode]; + auto node = (*rr_nodes_)[inode]; node.set_coordinates(xlow, ylow, xhigh, yhigh); node.set_ptc_num(ptc); return inode; } inline void finish_node_loc(int& /*inode*/) final {} - inline const t_rr_node* get_node_loc(const t_rr_node*& node) final { + inline const t_rr_node get_node_loc(const t_rr_node& node) final { return node; } - inline int get_node_loc_ptc(const t_rr_node*& node) final { - return node->ptc_num(); + inline int get_node_loc_ptc(const t_rr_node& node) final { + return node.ptc_num(); } - inline int get_node_loc_xhigh(const t_rr_node*& node) final { - return node->xhigh(); + inline int get_node_loc_xhigh(const t_rr_node& node) final { + return node.xhigh(); } - inline int get_node_loc_xlow(const t_rr_node*& node) final { - return node->xlow(); + inline int get_node_loc_xlow(const t_rr_node& node) final { + return node.xlow(); } - inline int get_node_loc_yhigh(const t_rr_node*& node) final { - return node->yhigh(); + inline int get_node_loc_yhigh(const t_rr_node& node) final { + return node.yhigh(); } - inline int get_node_loc_ylow(const t_rr_node*& node) final { - return node->ylow(); + inline int get_node_loc_ylow(const t_rr_node& node) final { + return node.ylow(); } inline void set_node_loc_side(uxsd::enum_loc_side side, int& inode) final { - auto& node = (*rr_nodes_)[inode]; + auto node = (*rr_nodes_)[inode]; if (uxsd::enum_loc_side::UXSD_INVALID == side) { // node_loc.side is only expected on IPIN/OPIN. @@ -599,9 +581,9 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { node.set_side(from_uxsd_loc_side(side)); } } - inline uxsd::enum_loc_side get_node_loc_side(const t_rr_node*& node) final { - if (node->type() == IPIN || node->type() == OPIN) { - return to_uxsd_loc_side(node->side()); + inline uxsd::enum_loc_side get_node_loc_side(const t_rr_node& node) final { + if (node.type() == IPIN || node.type() == OPIN) { + return to_uxsd_loc_side(node.side()); } else { return uxsd::enum_loc_side::UXSD_INVALID; } @@ -614,23 +596,23 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { * */ inline int init_node_timing(int& inode, float C, float R) final { - auto& node = (*rr_nodes_)[inode]; + auto node = (*rr_nodes_)[inode]; node.set_rc_index(find_create_rr_rc_data(R, C)); return inode; } inline void finish_node_timing(int& /*inode*/) final {} - inline const t_rr_node* get_node_timing(const t_rr_node*& node) final { + inline const t_rr_node get_node_timing(const t_rr_node& node) final { return node; } - inline bool has_node_timing(const t_rr_node*& /*node*/) final { + inline bool has_node_timing(const t_rr_node& /*node*/) final { return true; } - inline float get_node_timing_C(const t_rr_node*& node) final { - return node->C(); + inline float get_node_timing_C(const t_rr_node& node) final { + return node.C(); } - inline float get_node_timing_R(const t_rr_node*& node) final { - return node->R(); + inline float get_node_timing_R(const t_rr_node& node) final { + return node.R(); } /** Generated for complex type "node_segment": @@ -645,7 +627,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { segment_inf_.size()); } - auto& node = (*rr_nodes_)[inode]; + auto node = (*rr_nodes_)[inode]; if (GRAPH_GLOBAL == graph_type_) { node.set_cost_index(0); } else if (node.type() == CHANX) { @@ -658,15 +640,15 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { return inode; } inline void finish_node_segment(int& /*inode*/) final {} - inline int get_node_segment_segment_id(const t_rr_node*& node) final { - return (*rr_indexed_data_)[node->cost_index()].seg_index; + inline int get_node_segment_segment_id(const t_rr_node& node) final { + return (*rr_indexed_data_)[node.cost_index()].seg_index; } - inline const t_rr_node* get_node_segment(const t_rr_node*& node) final { + inline const t_rr_node get_node_segment(const t_rr_node& node) final { return node; } - inline bool has_node_segment(const t_rr_node*& node) final { - return (*rr_indexed_data_)[node->cost_index()].seg_index != -1; + inline bool has_node_segment(const t_rr_node& node) final { + return (*rr_indexed_data_)[node.cost_index()].seg_index != -1; } inline MetadataBind init_node_metadata(int& inode) final { @@ -677,11 +659,11 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { inline void finish_node_metadata(MetadataBind& bind) final { bind.finish(); } - inline t_metadata_dict_iterator get_node_metadata(const t_rr_node*& node) final { + inline t_metadata_dict_iterator get_node_metadata(const t_rr_node& node) final { const auto itr = rr_node_metadata_->find(get_node_id(node)); return t_metadata_dict_iterator(&itr->second, report_error_); } - inline bool has_node_metadata(const t_rr_node*& node) final { + inline bool has_node_metadata(const t_rr_node& node) final { const auto itr = rr_node_metadata_->find(get_node_id(node)); return itr != rr_node_metadata_->end(); } @@ -701,8 +683,8 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { // was invoked, but on formats that lack size on read, // make_room_in_vector will use an allocation pattern that is // amoritized O(1). - make_room_in_vector(rr_nodes_, id); - auto& node = (*rr_nodes_)[id]; + rr_nodes_->make_room_for_node(RRNodeId(id)); + auto node = (*rr_nodes_)[id]; node.set_capacity(capacity); node.set_type(from_uxsd_node_type(type)); @@ -734,29 +716,27 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { return id; } - inline void finish_rr_nodes_node(int& inode) final { - auto& node = (*rr_nodes_)[inode]; - node.set_num_edges(0); + inline void finish_rr_nodes_node(int& /*inode*/) final { } inline size_t num_rr_nodes_node(void*& /*ctx*/) final { return rr_nodes_->size(); } - inline const t_rr_node* get_rr_nodes_node(int n, void*& /*ctx*/) final { - return &(*rr_nodes_)[n]; + inline const t_rr_node get_rr_nodes_node(int n, void*& /*ctx*/) final { + return (*rr_nodes_)[n]; } - inline unsigned int get_node_capacity(const t_rr_node*& node) final { - return node->capacity(); + inline unsigned int get_node_capacity(const t_rr_node& node) final { + return node.capacity(); } - inline unsigned int get_node_id(const t_rr_node*& node) final { - return node - &(*rr_nodes_)[0]; + inline unsigned int get_node_id(const t_rr_node& node) final { + return size_t(node.id()); } - inline uxsd::enum_node_type get_node_type(const t_rr_node*& node) final { - return to_uxsd_node_type(node->type()); + inline uxsd::enum_node_type get_node_type(const t_rr_node& node) final { + return to_uxsd_node_type(node.type()); } inline void set_node_direction(uxsd::enum_node_direction direction, int& inode) final { - auto& node = (*rr_nodes_)[inode]; + auto node = (*rr_nodes_)[inode]; if (direction == uxsd::enum_node_direction::UXSD_INVALID) { if (node.type() == CHANX || node.type() == CHANY) { report_error( @@ -767,9 +747,9 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { node.set_direction(from_uxsd_node_direction(direction)); } } - inline uxsd::enum_node_direction get_node_direction(const t_rr_node*& node) final { - if (node->type() == CHANX || node->type() == CHANY) { - return to_uxsd_node_direction(node->direction()); + inline uxsd::enum_node_direction get_node_direction(const t_rr_node& node) final { + if (node.type() == CHANX || node.type() == CHANY) { + return to_uxsd_node_direction(node.direction()); } else { return uxsd::enum_node_direction::UXSD_INVALID; } @@ -804,7 +784,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { * */ inline void preallocate_rr_edges_edge(void*& /*ctx*/, size_t size) final { - edges_.reserve(size); + rr_nodes_->reserve_edges(size); } inline MetadataBind add_rr_edges_edge(void*& /*ctx*/, unsigned int sink_node, unsigned int src_node, unsigned int switch_id) final { if (src_node >= rr_nodes_->size()) { @@ -820,9 +800,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { bind.set_ignore(); } - // If preallocate_rr_edges_edge is not invoked, this push_back is - // still amoritized O(1) as guarenteed by std::vector. - edges_.push_back(std::make_tuple(src_node, sink_node, switch_id)); + rr_nodes_->emplace_back_edge(RRNodeId(src_node), RRNodeId(sink_node), switch_id); return bind; } inline void finish_rr_edges_edge(MetadataBind& bind) final { @@ -877,23 +855,6 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { return nullptr; } inline void finish_rr_graph_rr_edges(void*& /*ctx*/) final { - // edges_ now contains the full edge list, copy edges out to the - // relevant nodes. - std::vector num_edges_for_node(rr_nodes_->size()); - for (const auto& edge : edges_) { - num_edges_for_node[std::get<0>(edge)]++; - } - - for (size_t inode = 0; inode < rr_nodes_->size(); inode++) { - if (num_edges_for_node[inode] > std::numeric_limits::max()) { - report_error( - "source node %d edge count %d is too high", - inode, num_edges_for_node[inode]); - } - (*rr_nodes_)[inode].set_num_edges(num_edges_for_node[inode]); - num_edges_for_node[inode] = 0; - } - /*initialize a vector that keeps track of the number of wire to ipin switches * There should be only one wire to ipin switch. In case there are more, make sure to * store the most frequent switch */ @@ -902,45 +863,44 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { //first is index, second is count std::pair most_frequent_switch(-1, 0); - for (const auto& edge : edges_) { - auto source_node = std::get<0>(edge); - auto sink_node = std::get<1>(edge); - auto switch_id = std::get<2>(edge); - - if (sink_node >= rr_nodes_->size()) { - report_error( - "sink_node %u is larger than rr_nodes.size() %zu", - sink_node, rr_nodes_->size()); - } - - if (switch_id >= rr_switch_inf_->size()) { - report_error( - "switch_id %u is larger than num_rr_switches %zu", - switch_id, rr_switch_inf_->size()); - } - - auto& node = (*rr_nodes_)[source_node]; + // Partition the rr graph edges for efficient access to + // configurable/non-configurable edge subsets. Must be done after RR + // switches have been allocated. + rr_nodes_->mark_edges_as_rr_switch_ids(); + rr_nodes_->partition_edges(); + + for (int source_node = 0; source_node < (ssize_t)rr_nodes_->size(); ++source_node) { + int num_edges = rr_nodes_->num_edges(RRNodeId(source_node)); + for (int iconn = 0; iconn < num_edges; ++iconn) { + size_t sink_node = size_t(rr_nodes_->edge_sink_node(RRNodeId(source_node), iconn)); + size_t switch_id = rr_nodes_->edge_switch(RRNodeId(source_node), iconn); + if (sink_node >= rr_nodes_->size()) { + report_error( + "sink_node %zu is larger than rr_nodes.size() %zu", + sink_node, rr_nodes_->size()); + } - /*Keeps track of the number of the specific type of switch that connects a wire to an ipin - * use the pair data structure to keep the maximum*/ - if (node.type() == CHANX || node.type() == CHANY) { - if ((*rr_nodes_)[sink_node].type() == IPIN) { - count_for_wire_to_ipin_switches[switch_id]++; - if (count_for_wire_to_ipin_switches[switch_id] > most_frequent_switch.second) { - most_frequent_switch.first = switch_id; - most_frequent_switch.second = count_for_wire_to_ipin_switches[switch_id]; + if (switch_id >= rr_switch_inf_->size()) { + report_error( + "switch_id %zu is larger than num_rr_switches %zu", + switch_id, rr_switch_inf_->size()); + } + auto node = (*rr_nodes_)[source_node]; + + /*Keeps track of the number of the specific type of switch that connects a wire to an ipin + * use the pair data structure to keep the maximum*/ + if (node.type() == CHANX || node.type() == CHANY) { + if ((*rr_nodes_)[sink_node].type() == IPIN) { + count_for_wire_to_ipin_switches[switch_id]++; + if (count_for_wire_to_ipin_switches[switch_id] > most_frequent_switch.second) { + most_frequent_switch.first = switch_id; + most_frequent_switch.second = count_for_wire_to_ipin_switches[switch_id]; + } } } } - - //set edge in correct rr_node data structure - node.set_edge_sink_node(num_edges_for_node[source_node], sink_node); - node.set_edge_switch(num_edges_for_node[source_node], switch_id); - num_edges_for_node[source_node]++; } - edges_.clear(); - edges_.shrink_to_fit(); VTR_ASSERT(wire_to_rr_ipin_switch_ != nullptr); *wire_to_rr_ipin_switch_ = most_frequent_switch.first; } @@ -1495,14 +1455,9 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { } void finish_load() final { - // Partition the rr graph edges for efficient access to - // configurable/non-configurable edge subsets. Must be done after RR - // switches have been allocated. - partition_rr_graph_edges(rr_nodes_); - process_rr_node_indices(); - init_fan_in(*rr_nodes_, rr_nodes_->size()); + rr_nodes_->init_fan_in(); bool is_global_graph = GRAPH_GLOBAL == graph_type_; int max_chan_width = (is_global_graph ? 1 : chan_width_->max); @@ -1565,7 +1520,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { * Note that CHANX and CHANY 's x and y are swapped due to the chan and seg convention. */ for (size_t inode = 0; inode < rr_nodes_->size(); inode++) { - auto& node = (*rr_nodes_)[inode]; + auto node = (*rr_nodes_)[inode]; if (node.type() == SOURCE || node.type() == SINK) { for (int ix = node.xlow(); ix <= node.xhigh(); ix++) { for (int iy = node.ylow(); iy <= node.yhigh(); iy++) { @@ -1627,7 +1582,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { int count; /* CHANX and CHANY need to reevaluated with its ptc num as the correct index*/ for (size_t inode = 0; inode < rr_nodes_->size(); inode++) { - auto& node = (*rr_nodes_)[inode]; + auto node = (*rr_nodes_)[inode]; if (node.type() == CHANX) { for (int iy = node.ylow(); iy <= node.yhigh(); iy++) { for (int ix = node.xlow(); ix <= node.xhigh(); ix++) { @@ -1878,14 +1833,13 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { } // Temporary storage - std::vector> edges_; std::vector seg_index_; std::string temp_string_; // Output for loads, and constant data for writes. int* wire_to_rr_ipin_switch_; t_chan_width* chan_width_; - std::vector* rr_nodes_; + t_rr_graph_storage* rr_nodes_; std::vector* rr_switch_inf_; std::vector* rr_indexed_data_; t_rr_node_indices* rr_node_indices_; diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index e43cc91dacd..cee7379c9ae 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -182,4 +182,27 @@ int max_pins_per_grid_tile(); void pretty_print_uint(const char* prefix, size_t value, int num_digits, int scientific_precision); void pretty_print_float(const char* prefix, double value, int num_digits, int scientific_precision); + +// Make room in a vector, with amortized O(1) time by using a pow2 growth pattern. +// +// This enables potentially random insertion into a vector with amortized O(1) +// time. +template +void make_room_in_vector(T* vec, size_t elem_position) { + if (elem_position < vec->size()) { + return; + } + + size_t capacity = std::max(vec->capacity(), size_t(16)); + while (elem_position >= capacity) { + capacity *= 2; + } + + if (capacity >= vec->capacity()) { + vec->reserve(capacity); + } + + vec->resize(elem_position + 1); +} + #endif