diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst index b3d482e048..fe1b46f83b 100644 --- a/doc/src/vpr/command_line_usage.rst +++ b/doc/src/vpr/command_line_usage.rst @@ -907,6 +907,16 @@ If any of init_t, exit_t or alpha_t is specified, the user schedule, with a fixe **Default:** ``auto_bb`` +.. option:: --place_frequency {once | always} + + Specifies how often placement is performed during the minimum channel width search. + + ``once``: Placement is run only once at the beginning of the channel width search. This reduces runtime but may not benefit from congestion-aware optimizations. + + ``always``: Placement is rerun for each channel width trial. This might improve routability at the cost of increased runtime. + + **Default:** ``once`` + .. option:: --place_chan_width <int> Tells VPR how many tracks a channel of relative width 1 is expected to need to complete routing of this circuit. @@ -1869,6 +1879,7 @@ The following options are only valid when the router is in timing-driven mode (t **Default:** ``0.5`` .. option:: --router_initial_acc_cost_chan_congestion_weight <float> + Weight applied to the excess channel utilization (above threshold) when computing the initial accumulated cost (acc_cost)of routing resources. Higher values make the router more sensitive to early congestion. @@ -1907,7 +1918,7 @@ The following options are only valid when the router is in timing-driven mode (t .. option:: --router_first_iter_timing_report <file> - Name of the timing report file to generate after the first routing iteration completes (not generated if unspecfied). + Name of the timing report file to generate after the first routing iteration completes (not generated if unspecified). .. option:: --router_debug_net <int> diff --git a/libs/libvtrutil/src/vtr_prefix_sum.h b/libs/libvtrutil/src/vtr_prefix_sum.h index a9472fcd07..489dd487fe 100644 --- a/libs/libvtrutil/src/vtr_prefix_sum.h +++ b/libs/libvtrutil/src/vtr_prefix_sum.h @@ -92,7 +92,7 @@ class PrefixSum1D { /** * @brief Construct the 1D prefix sum from a vector. */ - PrefixSum1D(std::vector<T> vals, T zero = T()) + PrefixSum1D(const std::vector<T>& vals, T zero = T()) : PrefixSum1D( vals.size(), [&](size_t x) noexcept { diff --git a/vpr/src/analytical_place/ap_flow_enums.h b/vpr/src/analytical_place/ap_flow_enums.h index da47927d5d..707a842ae8 100644 --- a/vpr/src/analytical_place/ap_flow_enums.h +++ b/vpr/src/analytical_place/ap_flow_enums.h @@ -27,7 +27,7 @@ enum class e_ap_analytical_solver { */ enum class e_ap_partial_legalizer { BiPartitioning, ///< Partial Legalizer which forms minimum windows around dense regions and uses bipartitioning to spread blocks over windows. - FlowBased ///> Partial Legalizer which flows blocks from overfilled bins to underfilled bins. + FlowBased ///< Partial Legalizer which flows blocks from overfilled bins to underfilled bins. }; /** diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index 2e8d36e5f2..858bd3198c 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -495,20 +495,16 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) { static void ShowPlacerOpts(const t_placer_opts& PlacerOpts) { VTR_LOG("PlacerOpts.place_freq: "); switch (PlacerOpts.place_freq) { - case PLACE_ONCE: - VTR_LOG("PLACE_ONCE\n"); + case e_place_freq::ONCE: + VTR_LOG("ONCE\n"); break; - case PLACE_ALWAYS: - VTR_LOG("PLACE_ALWAYS\n"); - break; - case PLACE_NEVER: - VTR_LOG("PLACE_NEVER\n"); + case e_place_freq::ALWAYS: + VTR_LOG("ALWAYS\n"); break; default: VTR_LOG_ERROR("Unknown Place Freq\n"); } - if ((PLACE_ONCE == PlacerOpts.place_freq) - || (PLACE_ALWAYS == PlacerOpts.place_freq)) { + if (PlacerOpts.place_freq == e_place_freq::ONCE || PlacerOpts.place_freq == e_place_freq::ALWAYS) { VTR_LOG("PlacerOpts.place_algorithm: "); switch (PlacerOpts.place_algorithm.get()) { case e_place_algorithm::BOUNDING_BOX_PLACE: diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp index 138d411539..6381eb39dc 100644 --- a/vpr/src/base/place_and_route.cpp +++ b/vpr/src/base/place_and_route.cpp @@ -167,7 +167,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, break; } - if (placer_opts.place_freq == PLACE_ALWAYS) { + if (placer_opts.place_freq == e_place_freq::ALWAYS) { placer_opts.place_chan_width = current; try_place(placement_net_list, placer_opts, @@ -312,7 +312,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, fflush(stdout); if (current < 1) break; - if (placer_opts.place_freq == PLACE_ALWAYS) { + if (placer_opts.place_freq == e_place_freq::ALWAYS) { placer_opts.place_chan_width = current; try_place(placement_net_list, placer_opts, router_opts, analysis_opts, noc_opts, arch->Chans, det_routing_arch, segment_inf, @@ -341,7 +341,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, route_ctx.clb_opins_used_locally, saved_clb_opins_used_locally); - if (placer_opts.place_freq == PLACE_ALWAYS) { + if (placer_opts.place_freq == e_place_freq::ALWAYS) { auto& cluster_ctx = g_vpr_ctx.clustering(); // Cluster-based net_list is used for placement std::string placement_id = print_place(filename_opts.NetFile.c_str(), cluster_ctx.clb_nlist.netlist_id().c_str(), @@ -417,7 +417,7 @@ t_chan_width setup_chan_width(const t_router_opts& router_opts, if (router_opts.fixed_channel_width == NO_FIXED_CHANNEL_WIDTH) { auto& device_ctx = g_vpr_ctx.device(); - auto type = find_most_common_tile_type(device_ctx.grid); + t_physical_tile_type_ptr type = find_most_common_tile_type(device_ctx.grid); width_fac = 4 * type->num_pins; // this is 2x the value that binary search starts diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 57c449b6d2..e13c7768b1 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -93,6 +93,7 @@ struct ParseArchFormat { return {"vtr", "fpga-interchange"}; } }; + struct ParseCircuitFormat { ConvertedValue<e_circuit_format> from_str(const std::string& str) { ConvertedValue<e_circuit_format> conv_value; @@ -618,6 +619,37 @@ struct ParsePlaceBoundingBox { } }; +struct ParsePlacementFreq { + ConvertedValue<e_place_freq> from_str(const std::string& str) { + ConvertedValue<e_place_freq> conv_value; + if (str == "once") { + conv_value.set_value(e_place_freq::ONCE); + } else if (str == "always") { + conv_value.set_value(e_place_freq::ALWAYS); + } else { + std::stringstream msg; + msg << "Invalid conversion from '" << str << "' to e_place_freq (expected one of: " << argparse::join(default_choices(), ", ") << ")"; + conv_value.set_error(msg.str()); + } + return conv_value; + } + + ConvertedValue<std::string> to_str(e_place_freq val) { + ConvertedValue<std::string> conv_value; + if (val == e_place_freq::ONCE) { + conv_value.set_value("once"); + } else { + VTR_ASSERT(val == e_place_freq::ALWAYS); + conv_value.set_value("always"); + } + return conv_value; + } + + std::vector<std::string> default_choices() { + return {"once", "always"}; + } +}; + struct ParsePlaceAgentAlgorithm { ConvertedValue<e_agent_algorithm> from_str(const std::string& str) { ConvertedValue<e_agent_algorithm> conv_value; @@ -2177,7 +2209,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio auto& place_grp = parser.add_argument_group("placement options"); - place_grp.add_argument(args.Seed, "--seed") + place_grp.add_argument(args.seed, "--seed") .help("Placement random number generator seed") .default_value("1") .show_in(argparse::ShowIn::HELP_ONLY); @@ -2195,7 +2227,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("astar") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceInnerNum, "--inner_num") + place_grp.add_argument(args.place_inner_num, "--inner_num") .help("Controls number of moves per temperature: inner_num * num_blocks ^ (4/3)") .default_value("0.5") .show_in(argparse::ShowIn::HELP_ONLY); @@ -2226,17 +2258,17 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("1.0") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceInitT, "--init_t") + place_grp.add_argument(args.place_init_t, "--init_t") .help("Initial temperature for manual annealing schedule") .default_value("100.0") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceExitT, "--exit_t") + place_grp.add_argument(args.place_exit_t, "--exit_t") .help("Temperature at which annealing which terminate for manual annealing schedule") .default_value("0.01") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceAlphaT, "--alpha_t") + place_grp.add_argument(args.place_alpha_t, "--alpha_t") .help( "Temperature scaling factor for manual annealing schedule." " Old temperature is multiplied by alpha_t") @@ -2259,7 +2291,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.PlaceAlgorithm, "--place_algorithm") + place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.place_algorithm, "--place_algorithm") .help( "Controls which placement algorithm is used. Valid options:\n" " * bounding_box: Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified.\n" @@ -2269,7 +2301,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .choices({"bounding_box", "criticality_timing", "slack_timing"}) .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.PlaceQuenchAlgorithm, "--place_quench_algorithm") + place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.place_quench_algorithm, "--place_quench_algorithm") .help( "Controls which placement algorithm is used during placement quench.\n" "If specified, it overrides the option --place_algorithm during placement quench.\n" @@ -2281,7 +2313,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .choices({"bounding_box", "criticality_timing", "slack_timing"}) .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceChanWidth, "--place_chan_width") + place_grp.add_argument(args.place_chan_width, "--place_chan_width") .help( "Sets the assumed channel width during placement. " "If --place_chan_width is unspecified, but --route_chan_width is specified the " @@ -2334,15 +2366,21 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio "Specifies the type of bounding box to be used in 3D architectures.\n" "\n" "MODE options:\n" - " auto_bb : Automatically determine the appropriate bounding box based on the connections between layers.\n" - " cube_bb : Use 3D bounding boxes.\n" - " per_layer_bb : Use per-layer bounding boxes.\n" + " auto_bb : Automatically determine the appropriate bounding box based on the connections between layers.\n" + " cube_bb : Use 3D bounding boxes.\n" + " per_layer_bb : Use per-layer bounding boxes.\n" "\n" "Choose one of the available modes to define the behavior of bounding boxes in your 3D architecture. The default mode is 'automatic'.") .default_value("auto_bb") .choices({"auto_bb", "cube_bb", "per_layer_bb"}) .show_in(argparse::ShowIn::HELP_ONLY); + place_grp.add_argument<e_place_freq, ParsePlacementFreq>(args.place_placement_freq, "--place_frequency") + .help("Run placement every time or only once during channel width search.") + .default_value("once") + .choices({"once", "always"}) + .show_in(argparse::ShowIn::HELP_ONLY); + place_grp.add_argument<bool, ParseOnOff>(args.RL_agent_placement, "--RL_agent_placement") .help( "Uses a Reinforcement Learning (RL) agent in choosing the appropriate move type in placement." @@ -2483,14 +2521,30 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio auto& place_timing_grp = parser.add_argument_group("timing-driven placement options"); - place_timing_grp.add_argument(args.PlaceTimingTradeoff, "--timing_tradeoff") + place_timing_grp.add_argument(args.place_timing_tradeoff, "--timing_tradeoff") .help( "Trade-off control between delay and wirelength during placement." " 0.0 focuses completely on wirelength, 1.0 completely on timing") .default_value("0.5") .show_in(argparse::ShowIn::HELP_ONLY); - place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter") + place_timing_grp.add_argument(args.place_congestion_factor, "--congestion_factor") + .help("Weighting factor for congestion cost during placement. " + "Higher values prioritize congestion avoidance over bounding box and timing costs. " + "When set to zero, congestion modeling and optimization is disabled in the placement stage.") + .default_value("0.0") + .show_in(argparse::ShowIn::HELP_ONLY); + + place_timing_grp.add_argument(args.place_congestion_rlim_trigger_ratio, "--congestion_rlim_trigger_ratio") + .help("Enables congestion modeling when the ratio of the current range limit to the initial range limit falls below this threshold, " + "provided the congestion weighting factor is non-zero.") + .default_value("1.0") + .show_in(argparse::ShowIn::HELP_ONLY); + + place_timing_grp.add_argument(args.place_congestion_chan_util_threshold, "--congestion_chan_util_threshold") + .help("Penalizes nets in placement whose average routing channel utilization within their bounding boxes exceeds this threshold."); + + place_timing_grp.add_argument(args.recompute_crit_iter, "--recompute_crit_iter") .help("Controls how many temperature updates occur between timing analysis during placement") .default_value("1") .show_in(argparse::ShowIn::HELP_ONLY); @@ -3449,11 +3503,11 @@ void set_conditional_defaults(t_options& args) { */ //Which placement algorithm to use? - if (args.PlaceAlgorithm.provenance() != Provenance::SPECIFIED) { + if (args.place_algorithm.provenance() != Provenance::SPECIFIED) { if (args.timing_analysis) { - args.PlaceAlgorithm.set(e_place_algorithm::CRITICALITY_TIMING_PLACE, Provenance::INFERRED); + args.place_algorithm.set(e_place_algorithm::CRITICALITY_TIMING_PLACE, Provenance::INFERRED); } else { - args.PlaceAlgorithm.set(e_place_algorithm::BOUNDING_BOX_PLACE, Provenance::INFERRED); + args.place_algorithm.set(e_place_algorithm::BOUNDING_BOX_PLACE, Provenance::INFERRED); } } @@ -3467,7 +3521,7 @@ void set_conditional_defaults(t_options& args) { // Check for correct options combinations // If you are running WLdriven placement, the RL reward function should be // either basic or nonPenalizing basic - if (args.RL_agent_placement && (args.PlaceAlgorithm == e_place_algorithm::BOUNDING_BOX_PLACE || !args.timing_analysis)) { + if (args.RL_agent_placement && (args.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE || !args.timing_analysis)) { if (args.place_reward_fun.value() != "basic" && args.place_reward_fun.value() != "nonPenalizing_basic") { VTR_LOG_WARN( "To use RLPlace for WLdriven placements, the reward function should be basic or nonPenalizing_basic.\n" @@ -3478,18 +3532,18 @@ void set_conditional_defaults(t_options& args) { } //Which placement algorithm to use during placement quench? - if (args.PlaceQuenchAlgorithm.provenance() != Provenance::SPECIFIED) { - args.PlaceQuenchAlgorithm.set(args.PlaceAlgorithm, Provenance::INFERRED); + if (args.place_quench_algorithm.provenance() != Provenance::SPECIFIED) { + args.place_quench_algorithm.set(args.place_algorithm, Provenance::INFERRED); } //Place chan width follows Route chan width if unspecified - if (args.PlaceChanWidth.provenance() != Provenance::SPECIFIED && args.RouteChanWidth.provenance() == Provenance::SPECIFIED) { - args.PlaceChanWidth.set(args.RouteChanWidth.value(), Provenance::INFERRED); + if (args.place_chan_width.provenance() != Provenance::SPECIFIED && args.RouteChanWidth.provenance() == Provenance::SPECIFIED) { + args.place_chan_width.set(args.RouteChanWidth.value(), Provenance::INFERRED); } //Do we calculate timing info during placement? - if (args.ShowPlaceTiming.provenance() != Provenance::SPECIFIED) { - args.ShowPlaceTiming.set(args.timing_analysis, Provenance::INFERRED); + if (args.show_place_timing.provenance() != Provenance::SPECIFIED) { + args.show_place_timing.set(args.timing_analysis, Provenance::INFERRED); } //Slave quench recompute divider of inner loop recompute divider unless specified @@ -3498,9 +3552,9 @@ void set_conditional_defaults(t_options& args) { } //Which schedule? - if (args.PlaceInitT.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule - || args.PlaceExitT.provenance() == Provenance::SPECIFIED - || args.PlaceAlphaT.provenance() == Provenance::SPECIFIED) { + if (args.place_init_t.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule + || args.place_exit_t.provenance() == Provenance::SPECIFIED + || args.place_alpha_t.provenance() == Provenance::SPECIFIED) { args.anneal_sched_type.set(e_sched_type::USER_SCHED, Provenance::INFERRED); } else { args.anneal_sched_type.set(e_sched_type::AUTO_SCHED, Provenance::INFERRED); // Otherwise use the automatic schedule diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index f846867af7..2c309253b4 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -9,7 +9,7 @@ #include "argparse.hpp" struct t_options { - /* File names */ + // File names argparse::ArgValue<std::string> ArchFile; argparse::ArgValue<std::string> CircuitName; argparse::ArgValue<std::string> NetFile; @@ -49,7 +49,7 @@ struct t_options { argparse::ArgValue<std::string> write_block_usage; - /* Stage Options */ + // Stage Options argparse::ArgValue<bool> do_packing; argparse::ArgValue<bool> do_legalize; argparse::ArgValue<bool> do_placement; @@ -58,13 +58,13 @@ struct t_options { argparse::ArgValue<bool> do_analysis; argparse::ArgValue<bool> do_power; - /* Graphics Options */ + // Graphics Options argparse::ArgValue<bool> show_graphics; ///<Enable argparse::ArgValue<int>eractive graphics? argparse::ArgValue<int> GraphPause; argparse::ArgValue<bool> save_graphics; argparse::ArgValue<std::string> graphics_commands; - /* General options */ + // General options argparse::ArgValue<bool> show_help; argparse::ArgValue<bool> show_version; argparse::ArgValue<bool> show_arch_resources; @@ -86,11 +86,11 @@ struct t_options { argparse::ArgValue<bool> allow_dangling_combinational_nodes; argparse::ArgValue<bool> terminate_if_timing_fails; - /* Server options */ + // Server options argparse::ArgValue<bool> is_server_mode_enabled; argparse::ArgValue<int> server_port_num; - /* Atom netlist options */ + // Atom netlist options argparse::ArgValue<bool> absorb_buffer_luts; argparse::ArgValue<e_const_gen_inference> const_gen_inference; argparse::ArgValue<bool> sweep_dangling_primary_ios; @@ -99,7 +99,7 @@ struct t_options { argparse::ArgValue<bool> sweep_constant_primary_outputs; argparse::ArgValue<int> netlist_verbosity; - /* Analytical Placement options */ + // Analytical Placement options argparse::ArgValue<e_ap_analytical_solver> ap_analytical_solver; argparse::ArgValue<e_ap_partial_legalizer> ap_partial_legalizer; argparse::ArgValue<e_ap_full_legalizer> ap_full_legalizer; @@ -111,7 +111,7 @@ struct t_options { argparse::ArgValue<int> ap_high_fanout_threshold; argparse::ArgValue<bool> ap_generate_mass_report; - /* Clustering options */ + // Clustering options argparse::ArgValue<bool> connection_driven_clustering; argparse::ArgValue<e_unrelated_clustering> allow_unrelated_clustering; argparse::ArgValue<float> timing_gain_weight; @@ -126,19 +126,20 @@ struct t_options { argparse::ArgValue<int> pack_feasible_block_array_size; argparse::ArgValue<std::vector<std::string>> pack_high_fanout_threshold; argparse::ArgValue<int> pack_verbosity; - /* Placement options */ - argparse::ArgValue<int> Seed; - argparse::ArgValue<bool> ShowPlaceTiming; - argparse::ArgValue<float> PlaceInnerNum; + + // Placement options + argparse::ArgValue<int> seed; + argparse::ArgValue<bool> show_place_timing; + argparse::ArgValue<float> place_inner_num; argparse::ArgValue<float> place_auto_init_t_scale; - argparse::ArgValue<float> PlaceInitT; - argparse::ArgValue<float> PlaceExitT; - argparse::ArgValue<float> PlaceAlphaT; + argparse::ArgValue<float> place_init_t; + argparse::ArgValue<float> place_exit_t; + argparse::ArgValue<float> place_alpha_t; argparse::ArgValue<e_sched_type> anneal_sched_type; - argparse::ArgValue<e_place_algorithm> PlaceAlgorithm; - argparse::ArgValue<e_place_algorithm> PlaceQuenchAlgorithm; + argparse::ArgValue<e_place_algorithm> place_algorithm; + argparse::ArgValue<e_place_algorithm> place_quench_algorithm; argparse::ArgValue<e_pad_loc_type> pad_loc_type; - argparse::ArgValue<int> PlaceChanWidth; + argparse::ArgValue<int> place_chan_width; argparse::ArgValue<float> place_rlim_escape_fraction; argparse::ArgValue<std::string> place_move_stats_file; argparse::ArgValue<int> placement_saves_per_temperature; @@ -147,6 +148,7 @@ struct t_options { argparse::ArgValue<std::vector<float>> place_static_move_prob; argparse::ArgValue<int> place_high_fanout_net; argparse::ArgValue<e_place_bounding_box_mode> place_bounding_box_mode; + argparse::ArgValue<e_place_freq> place_placement_freq; argparse::ArgValue<bool> RL_agent_placement; argparse::ArgValue<bool> place_agent_multistate; @@ -167,7 +169,7 @@ struct t_options { argparse::ArgValue<int> placer_debug_block; argparse::ArgValue<int> placer_debug_net; - /*NoC Options*/ + // NoC Options argparse::ArgValue<bool> noc; argparse::ArgValue<std::string> noc_flows_file; argparse::ArgValue<std::string> noc_routing_algorithm; @@ -185,9 +187,12 @@ struct t_options { argparse::ArgValue<bool> noc_sat_routing_log_search_progress; argparse::ArgValue<std::string> noc_placement_file_name; - /* Timing-driven placement options only */ - argparse::ArgValue<float> PlaceTimingTradeoff; - argparse::ArgValue<int> RecomputeCritIter; + // Timing-driven placement options only + argparse::ArgValue<float> place_congestion_factor; + argparse::ArgValue<float> place_congestion_rlim_trigger_ratio; + argparse::ArgValue<float> place_congestion_chan_util_threshold; + argparse::ArgValue<float> place_timing_tradeoff; + argparse::ArgValue<int> recompute_crit_iter; argparse::ArgValue<int> inner_loop_recompute_divider; argparse::ArgValue<int> quench_recompute_divider; argparse::ArgValue<float> place_exp_first; @@ -202,7 +207,7 @@ struct t_options { argparse::ArgValue<e_reducer> place_delay_model_reducer; argparse::ArgValue<std::string> allowed_tiles_for_delay_model; - /* Router Options */ + // Router Options argparse::ArgValue<bool> check_rr_graph; argparse::ArgValue<int> max_router_iterations; argparse::ArgValue<float> first_iter_pres_fac; @@ -232,7 +237,7 @@ struct t_options { argparse::ArgValue<int> route_verbosity; argparse::ArgValue<int> custom_3d_sb_fanin_fanout; - /* Timing-driven router options only */ + // Timing-driven router options only argparse::ArgValue<float> astar_fac; argparse::ArgValue<float> astar_offset; argparse::ArgValue<float> router_profiler_astar_fac; @@ -267,7 +272,7 @@ struct t_options { argparse::ArgValue<e_router_initial_timing> router_initial_timing; argparse::ArgValue<e_heap_type> router_heap; - /* Analysis options */ + // Analysis options argparse::ArgValue<bool> full_stats; argparse::ArgValue<bool> Generate_Post_Synthesis_Netlist; argparse::ArgValue<bool> Generate_Post_Implementation_Merged_Netlist; diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp index 4f2cc967bc..cb0bd5b2b7 100644 --- a/vpr/src/base/setup_vpr.cpp +++ b/vpr/src/base/setup_vpr.cpp @@ -27,23 +27,61 @@ #include "setup_vib_utils.h" static void setup_netlist_opts(const t_options& Options, t_netlist_opts& NetlistOpts); + +/** + * @brief Sets up the t_ap_opts structure based on users inputs and + * on the architecture specified. + * + * Error checking, such as checking for conflicting params is assumed + * to be done beforehand + */ static void setup_ap_opts(const t_options& options, t_ap_opts& apOpts); + +/** + * @brief Sets up the t_packer_opts structure based on users inputs and + * on the architecture specified. + * + * Error checking, such as checking for conflicting params is assumed + * to be done beforehand + */ static void setup_packer_opts(const t_options& Options, t_packer_opts* PackerOpts); + +/** + * @brief Sets up the s_placer_opts structure based on users input. + * + * Error checking, such as checking for conflicting params + * is assumed to be done beforehand + */ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpts); static void setup_anneal_sched(const t_options& Options, t_annealing_sched* AnnealSched); static void setup_router_opts(const t_options& Options, t_router_opts* RouterOpts); + +/** + * Go through all the NoC options supplied by the user and store them internally. + */ static void setup_noc_opts(const t_options& Options, t_noc_opts* NocOpts); + static void setup_server_opts(const t_options& Options, t_server_opts* ServerOpts); +/** + * @brief Sets up routing structures. + * + * Since checks are already done, this just copies values across + */ static void setup_routing_arch(const t_arch& Arch, t_det_routing_arch& RoutingArch); static void setup_timing(const t_options& Options, const bool TimingEnabled, t_timing_inf* Timing); + +/** + * @brief This loads up VPR's arch_switch_inf data by combining the switches + * from the arch file with the special switches that VPR needs. + */ static void setup_switches(const t_arch& Arch, t_det_routing_arch& RoutingArch, const std::vector<t_arch_switch_inf>& arch_switches); @@ -72,22 +110,12 @@ static void add_intra_tile_switches(); /** * Identify the pins that can directly reach class_inf - * @param physical_tile - * @param logical_block - * @param class_inf - * @param physical_class_num */ static void do_reachability_analysis(t_physical_tile_type* physical_tile, t_logical_block_type* logical_block, t_class* class_inf, int physical_class_num); -/** - * @brief Sets VPR parameters and defaults. - * - * Does not do any error checking as this should have been done by - * the various input checkers - */ void SetupVPR(const t_options* options, const bool timingenabled, const bool readArchFile, @@ -306,7 +334,7 @@ void SetupVPR(const t_options* options, ShowSetup(*vpr_setup); - /* init global variables */ + // init global variables vtr::out_file_prefix = options->out_file_prefix; { @@ -348,7 +376,7 @@ void SetupVPR(const t_options* options, } static void setup_timing(const t_options& Options, const bool TimingEnabled, t_timing_inf* Timing) { - /* Don't do anything if they don't want timing */ + // Don't do anything if they don't want timing if (!TimingEnabled) { Timing->timing_analysis_enabled = false; return; @@ -358,10 +386,6 @@ static void setup_timing(const t_options& Options, const bool TimingEnabled, t_t Timing->SDCFile = Options.SDCFile; } -/** - * @brief This loads up VPR's arch_switch_inf data by combining the switches - * from the arch file with the special switches that VPR needs. - */ static void setup_switches(const t_arch& Arch, t_det_routing_arch& RoutingArch, const std::vector<t_arch_switch_inf>& arch_switches) { @@ -372,10 +396,10 @@ static void setup_switches(const t_arch& Arch, find_ipin_cblock_switch_index(Arch, RoutingArch.wire_to_arch_ipin_switch, RoutingArch.wire_to_arch_ipin_switch_between_dice); - /* Depends on device_ctx.num_arch_switches */ + // Depends on device_ctx.num_arch_switches RoutingArch.delayless_switch = num_arch_switches++; - /* Alloc the list now that we know the final num_arch_switches value */ + // Alloc the list now that we know the final num_arch_switches value device_ctx.arch_switch_inf.resize(num_arch_switches); for (int iswitch = 0; iswitch < switches_to_copy; iswitch++) { device_ctx.arch_switch_inf[iswitch] = arch_switches[iswitch]; @@ -384,7 +408,7 @@ static void setup_switches(const t_arch& Arch, device_ctx.all_sw_inf[iswitch] = arch_switches[iswitch]; } - /* Delayless switch for connecting sinks and sources with their pins. */ + // Delayless switch for connecting sinks and sources with their pins. device_ctx.arch_switch_inf[RoutingArch.delayless_switch].set_type(SwitchType::MUX); device_ctx.arch_switch_inf[RoutingArch.delayless_switch].name = std::string(VPR_DELAYLESS_SWITCH_NAME); device_ctx.arch_switch_inf[RoutingArch.delayless_switch].R = 0.; @@ -404,21 +428,15 @@ static void setup_switches(const t_arch& Arch, device_ctx.delayless_switch_idx = RoutingArch.delayless_switch; - //Warn about non-zero Cout values for the ipin switch, since these values have no effect. - //VPR do not model the R/C's of block internal routing connection. - // - //Note that we don't warn about the R value as it may be used to size the buffer (if buf_size_type is AUTO) + // Warn about non-zero Cout values for the ipin switch, since these values have no effect. + // VPR do not model the R/C's of block internal routing connection + // Note that we don't warn about the R value as it may be used to size the buffer (if buf_size_type is AUTO) if (device_ctx.arch_switch_inf[RoutingArch.wire_to_arch_ipin_switch].Cout != 0.) { VTR_LOG_WARN("Non-zero switch output capacitance (%g) has no effect when switch '%s' is used for connection block inputs\n", device_ctx.arch_switch_inf[RoutingArch.wire_to_arch_ipin_switch].Cout, Arch.ipin_cblock_switch_name[0].c_str()); } } -/** - * @brief Sets up routing structures. - * - * Since checks are already done, this just copies values across - */ static void setup_routing_arch(const t_arch& Arch, t_det_routing_arch& RoutingArch) { RoutingArch.switch_block_type = Arch.sb_type; @@ -432,10 +450,10 @@ static void setup_routing_arch(const t_arch& Arch, RoutingArch.directionality = Arch.Segments[0].directionality; } - /* copy over the switch block information */ + // copy over the switch block information RoutingArch.switchblocks = Arch.switchblocks; - /* Copy the tileable routing setting */ + // Copy the tileable routing setting RoutingArch.tileable = Arch.tileable; RoutingArch.perimeter_cb = Arch.perimeter_cb; RoutingArch.shrink_boundary = Arch.shrink_boundary; @@ -542,17 +560,17 @@ static void setup_router_opts(const t_options& Options, t_router_opts* RouterOpt static void setup_anneal_sched(const t_options& Options, t_annealing_sched* AnnealSched) { - AnnealSched->alpha_t = Options.PlaceAlphaT; + AnnealSched->alpha_t = Options.place_alpha_t; if (AnnealSched->alpha_t >= 1 || AnnealSched->alpha_t <= 0) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_t must be between 0 and 1 exclusive.\n"); } - AnnealSched->exit_t = Options.PlaceExitT; + AnnealSched->exit_t = Options.place_exit_t; if (AnnealSched->exit_t <= 0) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "exit_t must be greater than 0.\n"); } - AnnealSched->init_t = Options.PlaceInitT; + AnnealSched->init_t = Options.place_init_t; if (AnnealSched->init_t <= 0) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "init_t must be greater than 0.\n"); } @@ -561,7 +579,7 @@ static void setup_anneal_sched(const t_options& Options, VPR_FATAL_ERROR(VPR_ERROR_OTHER, "init_t must be greater or equal to than exit_t.\n"); } - AnnealSched->inner_num = Options.PlaceInnerNum; + AnnealSched->inner_num = Options.place_inner_num; if (AnnealSched->inner_num <= 0) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "inner_num must be greater than 0.\n"); } @@ -569,15 +587,8 @@ static void setup_anneal_sched(const t_options& Options, AnnealSched->type = Options.anneal_sched_type; } -/** - * @brief Sets up the t_ap_opts structure based on users inputs and - * on the architecture specified. - * - * Error checking, such as checking for conflicting params is assumed - * to be done beforehand - */ -void setup_ap_opts(const t_options& options, - t_ap_opts& apOpts) { +static void setup_ap_opts(const t_options& options, + t_ap_opts& apOpts) { apOpts.analytical_solver_type = options.ap_analytical_solver.value(); apOpts.partial_legalizer_type = options.ap_partial_legalizer.value(); apOpts.full_legalizer_type = options.ap_full_legalizer.value(); @@ -591,13 +602,6 @@ void setup_ap_opts(const t_options& options, apOpts.generate_mass_report = options.ap_generate_mass_report.value(); } -/** - * @brief Sets up the t_packer_opts structure based on users inputs and - * on the architecture specified. - * - * Error checking, such as checking for conflicting params is assumed - * to be done beforehand - */ void setup_packer_opts(const t_options& Options, t_packer_opts* PackerOpts) { PackerOpts->output_file = Options.NetFile; @@ -639,12 +643,6 @@ static void setup_netlist_opts(const t_options& Options, t_netlist_opts& Netlist NetlistOpts.netlist_verbosity = Options.netlist_verbosity; } -/** - * @brief Sets up the s_placer_opts structure based on users input. - * - * Error checking, such as checking for conflicting params - * is assumed to be done beforehand - */ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpts) { if (Options.do_placement) { PlacerOpts->doPlacement = e_stage_action::DO; @@ -657,8 +655,8 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->td_place_exp_last = Options.place_exp_last; - PlacerOpts->place_algorithm = Options.PlaceAlgorithm; - PlacerOpts->place_quench_algorithm = Options.PlaceQuenchAlgorithm; + PlacerOpts->place_algorithm = Options.place_algorithm; + PlacerOpts->place_quench_algorithm = Options.place_quench_algorithm; PlacerOpts->constraints_file = Options.constraints_file; @@ -668,13 +666,16 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->pad_loc_type = Options.pad_loc_type; - PlacerOpts->place_chan_width = Options.PlaceChanWidth; + PlacerOpts->place_chan_width = Options.place_chan_width; - PlacerOpts->recompute_crit_iter = Options.RecomputeCritIter; + PlacerOpts->recompute_crit_iter = Options.recompute_crit_iter; - PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff; + PlacerOpts->timing_tradeoff = Options.place_timing_tradeoff; + PlacerOpts->congestion_factor = Options.place_congestion_factor; + PlacerOpts->congestion_rlim_trigger_ratio = Options.place_congestion_rlim_trigger_ratio; + PlacerOpts->congestion_chan_util_threshold = Options.place_congestion_chan_util_threshold; - /* Depends on PlacerOpts->place_algorithm */ + // Depends on PlacerOpts->place_algorithm PlacerOpts->delay_offset = Options.place_delay_offset; PlacerOpts->delay_ramp_delta_threshold = Options.place_delay_ramp_delta_threshold; PlacerOpts->delay_ramp_slope = Options.place_delay_ramp_slope; @@ -683,7 +684,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->delay_model_type = Options.place_delay_model; PlacerOpts->delay_model_reducer = Options.place_delay_model_reducer; - PlacerOpts->place_freq = PLACE_ONCE; /* DEFAULT */ + PlacerOpts->place_freq = Options.place_placement_freq; PlacerOpts->post_place_timing_report_file = Options.post_place_timing_report_file; @@ -721,7 +722,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->floorplan_num_vertical_partitions = Options.floorplan_num_vertical_partitions; PlacerOpts->place_quench_only = Options.place_quench_only; - PlacerOpts->seed = Options.Seed; + PlacerOpts->seed = Options.seed; PlacerOpts->placer_debug_block = Options.placer_debug_block; PlacerOpts->placer_debug_net = Options.placer_debug_net; @@ -775,9 +776,6 @@ static void setup_power_opts(const t_options& Options, t_power_opts* power_opts, } } -/* - * Go through all the NoC options supplied by the user and store them internally. - */ static void setup_noc_opts(const t_options& Options, t_noc_opts* NocOpts) { // assign the noc specific options from the command line NocOpts->noc = Options.noc; diff --git a/vpr/src/base/setup_vpr.h b/vpr/src/base/setup_vpr.h index f72bb231bd..7364b8bb05 100644 --- a/vpr/src/base/setup_vpr.h +++ b/vpr/src/base/setup_vpr.h @@ -5,6 +5,12 @@ #include "physical_types.h" #include "vpr_types.h" +/** + * @brief Sets VPR parameters and defaults. + * + * Does not do any error checking as this should have been done by + * the various input checkers + */ void SetupVPR(const t_options* Options, const bool TimingEnabled, const bool readArchFile, diff --git a/vpr/src/base/stats.cpp b/vpr/src/base/stats.cpp index 6718d67678..e1757f791f 100644 --- a/vpr/src/base/stats.cpp +++ b/vpr/src/base/stats.cpp @@ -33,23 +33,6 @@ static void load_channel_occupancies(const Netlist<>& net_list, vtr::Matrix<int>& chanx_occ, vtr::Matrix<int>& chany_occ); -/** - * @brief Writes channel occupancy data to a file. - * - * Each row contains: - * - (x, y) coordinate - * - Occupancy count - * - Occupancy percentage (occupancy / capacity) - * - Channel capacity - * - * @param filename Output file path. - * @param occupancy Matrix of occupancy counts. - * @param capacity_list List of channel capacities (per y for chanx, per x for chany). - */ -static void write_channel_occupancy_table(const std::string_view filename, - const vtr::Matrix<int>& occupancy, - const std::vector<int>& capacity_list); - /** * @brief Figures out maximum, minimum and average number of bends * and net length in the routing. @@ -175,7 +158,7 @@ void length_and_bends_stats(const Netlist<>& net_list, bool is_flat) { int num_clb_opins_reserved = 0; int num_absorbed_nets = 0; - for (auto net_id : net_list.nets()) { + for (ParentNetId net_id : net_list.nets()) { if (!net_list.net_is_ignored(net_id) && net_list.net_sinks(net_id).size() != 0) { /* Globals don't count. */ int bends, length, segments; bool is_absorbed; @@ -283,9 +266,9 @@ static void get_channel_occupancy_stats(const Netlist<>& net_list, bool /***/) { VTR_LOG("\n"); } -static void write_channel_occupancy_table(const std::string_view filename, - const vtr::Matrix<int>& occupancy, - const std::vector<int>& capacity_list) { +void write_channel_occupancy_table(const std::string_view filename, + const vtr::Matrix<int>& occupancy, + const std::vector<int>& capacity_list) { constexpr int w_coord = 6; constexpr int w_value = 12; constexpr int w_percent = 12; diff --git a/vpr/src/base/stats.h b/vpr/src/base/stats.h index 08b9e4d1d1..1d4f118721 100644 --- a/vpr/src/base/stats.h +++ b/vpr/src/base/stats.h @@ -2,6 +2,8 @@ #include <map> #include <vector> +#include <string_view> + #include "netlist.h" #include "rr_graph_type.h" @@ -26,7 +28,7 @@ void routing_stats(const Netlist<>& net_list, /** * @brief Calculates the routing channel width at each grid location. * - * Iterates through all RR nodes and counts how many wires pass through each (x, y) location + * Iterates through all RR nodes and counts how many wires pass through each (layer, x, y) location * for both horizontal (CHANX) and vertical (CHANY) channels. * * @return A pair of 3D matrices: @@ -61,3 +63,22 @@ void print_resource_usage(); * @param target_device_utilization The target device utilization set by the user */ void print_device_utilization(const float target_device_utilization); + +/** + * @brief Writes channel occupancy data to a file. + * + * Each row contains: + * - (x, y) coordinate + * - Occupancy count + * - Occupancy percentage (occupancy / capacity) + * - Channel capacity + * + * TODO: extend to 3D + * + * @param filename Output file path. + * @param occupancy Matrix of occupancy counts. + * @param capacity_list List of channel capacities (per y for chanx, per x for chany). + */ +void write_channel_occupancy_table(const std::string_view filename, + const vtr::Matrix<int>& occupancy, + const std::vector<int>& capacity_list); diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index dcd0d2394c..17c6df6832 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -726,7 +726,7 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) { auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); const AtomContext& atom_ctx = g_vpr_ctx.atom(); - /* Ensure we have a clean start with void net remapping information */ + // Ensure we have a clean start with void net remapping information cluster_ctx.post_routing_clb_pin_nets.clear(); cluster_ctx.pre_routing_net_pin_mapping.clear(); @@ -735,7 +735,7 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) { vpr_setup.FileNameOpts.verify_file_digests, vpr_setup.PackerOpts.pack_verbosity); - /* Load the mapping between clusters and their atoms */ + // Load the mapping between clusters and their atoms init_clb_atoms_lookup(cluster_ctx.atoms_lookup, atom_ctx, cluster_ctx.clb_nlist); process_constant_nets(g_vpr_ctx.mutable_atom().mutable_netlist(), @@ -749,14 +749,14 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) { report_packing_pin_usage(ofs, g_vpr_ctx); } - // Ater the clustered netlist has been loaded, update the floorplanning + // After the clustered netlist has been loaded, update the floorplanning // constraints with the new information. g_vpr_ctx.mutable_floorplanning().update_floorplanning_context_post_pack(); - /* Sanity check the resulting netlist */ + // Sanity check the resulting netlist check_netlist(vpr_setup.PackerOpts.pack_verbosity); - // Independently verify the clusterings to ensure the clustering can be + // Independently verify the clustering to ensure the clustering can be // used for the rest of the VPR flow. unsigned num_errors = verify_clustering(g_vpr_ctx); if (num_errors == 0) { @@ -768,7 +768,7 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) { num_errors); } - /* Output the netlist stats to console and optionally to file. */ + // Output the netlist stats to console and optionally to file. writeClusteredNetlistStats(vpr_setup.FileNameOpts.write_block_usage); // print the total number of used physical blocks for each diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 41da9a6d08..7547348cad 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -375,23 +375,23 @@ constexpr int NUM_PL_MOVE_TYPES = 7; constexpr int NUM_PL_NONTIMING_MOVE_TYPES = 3; /* Timing data structures end */ + +// Annealing schedule enum class e_sched_type { AUTO_SCHED, USER_SCHED }; -/* Annealing schedule */ +// What's on screen? enum pic_type { NO_PICTURE, PLACEMENT, ROUTING }; -/* What's on screen? */ -enum pfreq { - PLACE_NEVER, - PLACE_ONCE, - PLACE_ALWAYS +enum class e_place_freq { + ONCE, + ALWAYS }; ///@brief Power data for t_netlist structure @@ -968,85 +968,82 @@ enum class e_move_type; /** * @brief Various options for the placer. - * - * @param place_algorithm - * Controls which placement algorithm is used. - * @param place_quench_algorithm - * Controls which placement algorithm is used - * during placement quench. - * @param timing_tradeoff - * When in CRITICALITY_TIMING_PLACE mode, what is the - * tradeoff between timing and wiring costs. - * @param place_chan_width - * The channel width assumed if only one placement is performed. - * @param pad_loc_type - * Are pins FREE or fixed randomly. - * @param constraints_file - * File that specifies locations of locked down (constrained) - * blocks for placement. Empty string means no constraints file. - * @param write_initial_place_file - * Write the initial placement into this file. Empty string means - * the initial placement is not written. - * @param pad_loc_file - * File to read pad locations from if pad_loc_type is USER. - * @param place_freq - * Should the placement be skipped, done once, or done - * for each channel width in the binary search. (Default: ONCE) - * @param recompute_crit_iter - * How many temperature stages pass before we recompute - * criticalities based on the current placement and its - * estimated point-to-point delays. - * @param inner_loop_crit_divider - * (move_lim/inner_loop_crit_divider) determines how - * many inner_loop iterations pass before a recompute - * of criticalities is done. - * @param td_place_exp_first - * Exponent that is used in the CRITICALITY_TIMING_PLACE - * mode to specify the initial value of `crit_exponent`. - * After we map the slacks to criticalities, this value - * is used to `sharpen` the criticalities, making connections - * with worse slacks more critical. - * @param td_place_exp_last - * Value that the crit_exponent will be at the end. - * @param doPlacement - * True if placement is supposed to be done in the CAD flow. - * False if otherwise. - * @param place_constraint_expand - * Integer value that specifies how far to expand the floorplan - * region when printing out floorplan constraints based on - * current placement. - * @param place_constraint_subtile - * True if subtiles should be specified when printing floorplan - * constraints. False if not. - * @param place_auto_init_t_scale - * When the annealer is using the automatic schedule, this option - * scales the initial temperature selected. */ struct t_placer_opts { + /// Controls which placement algorithm is used. t_place_algorithm place_algorithm; + + /// Controls which placement algorithm is used during placement quench. t_place_algorithm place_quench_algorithm; - t_annealing_sched anneal_sched; ///<Placement option annealing schedule + + /// Placement option annealing schedule + t_annealing_sched anneal_sched; + + /// When in CRITICALITY_TIMING_PLACE mode, what is the tradeoff between timing and wiring costs. float timing_tradeoff; + + /// Weight for how much congestion affects placement cost. + /// Higher means congestion is more important. + float congestion_factor; + /// Start using congestion cost when (current rlim / initial rlim) drops below this value. + float congestion_rlim_trigger_ratio; + /// Nets with average channel usage (withing their bounding box) above this threshold + /// are predicted to face some congestion in the routing stage. + float congestion_chan_util_threshold; + + /// The channel width assumed if only one placement is performed. int place_chan_width; + + /// Are pins FREE or fixed randomly. enum e_pad_loc_type pad_loc_type; + + /// File that specifies locations of locked down (constrained) blocks for placement. Empty string means no constraints file. std::string constraints_file; + + /// Write the initial placement into this file. Empty string means the initial placement is not written. std::string write_initial_place_file; + std::string read_initial_place_file; - enum pfreq place_freq; + + /// Should the placement be skipped, done once, or done for each channel width in the binary search. (Default: ONCE) + e_place_freq place_freq; + + /// How many temperature stages pass before we recompute criticalities + /// based on the current placement and its estimated point-to-point delays. int recompute_crit_iter; + + /// (move_lim/inner_loop_crit_divider) determines how many inner_loop iterations pass before a recompute of criticalities is done. int inner_loop_recompute_divider; + int quench_recompute_divider; + + /** + * Exponent that is used in the CRITICALITY_TIMING_PLACE mode to specify the initial value of `crit_exponent`. + * After we map the slacks to criticalities, this value is used to `sharpen` the criticalities, making + * connections with worse slacks more critical. + */ float td_place_exp_first; + int seed; + + /// Value that the crit_exponent will be at the end. float td_place_exp_last; + + /// True if placement is supposed to be done in the CAD flow. False if otherwise. e_stage_action doPlacement; + float rlim_escape_fraction; + std::string move_stats_file; + int placement_saves_per_temperature; + e_place_effort_scaling effort_scaling; + e_timing_update_type timing_update_type; PlaceDelayModelType delay_model_type; + e_reducer delay_model_reducer; float delay_offset; @@ -1061,23 +1058,39 @@ struct t_placer_opts { std::string write_placement_delay_lookup; std::string read_placement_delay_lookup; + vtr::vector<e_move_type, float> place_static_move_prob; + bool RL_agent_placement; bool place_agent_multistate; bool place_checkpointing; + int place_high_fanout_net; + e_place_bounding_box_mode place_bounding_box_mode; + e_agent_algorithm place_agent_algorithm; + float place_agent_epsilon; float place_agent_gamma; float place_dm_rlim; + e_agent_space place_agent_space; + std::string place_reward_fun; + float place_crit_limit; + + /// Integer value that specifies how far to expand the floorplan region when + /// printing out floorplan constraints based on current placement. int place_constraint_expand; + + /// True if subtiles should be specified when printing floorplan constraints. False if not. bool place_constraint_subtile; + int floorplan_num_horizontal_partitions; int floorplan_num_vertical_partitions; + bool place_quench_only; int placer_debug_block; @@ -1093,6 +1106,7 @@ struct t_placer_opts { e_place_delta_delay_algorithm place_delta_delay_matrix_calculation_method; + /// When the annealer is using the automatic schedule, this option scales the initial temperature selected. float place_auto_init_t_scale; }; @@ -1102,63 +1116,42 @@ struct t_placer_opts { /** * @brief Various options for the Analytical Placer. - * - * @param doAnalyticalPlacement - * True if analytical placement is supposed to be done in the CAD - * flow. False if otherwise. - * @param analytical_solver_type - * The type of analytical solver the Global Placer in the AP flow - * will use. - * @param partial_legalizer_type - * The type of partial legalizer the Global Placer in the AP flow - * will use. - * @param full_legalizer_type - * The type of full legalizer the AP flow will use. - * @param detailed_placer_type - * The type of detailed placter the AP flow will use. - * @param ap_timing_tradeoff - * A trade-off parameter used to decide how focused the AP flow - * should be on optimizing timing over wirelength. - * @param ap_high_fanout_threshold; - * The threshold to ignore nets with higher fanout than that - * value while constructing the solver. - * @param ap_partial_legalizer_target_density - * Vector of strings passed by the user to configure the target - * density of different physical tiles on the device. - * @param appack_max_dist_th - * Array of string passed by the user to configure the max candidate - * distance thresholds. - * @param num_threads - * The number of threads the AP flow can use. - * @param log_verbosity - * The verbosity level of log messages in the AP flow, with higher - * values leading to more verbose messages. - * @param generate_mass_report - * Whether to generate a mass report during global placement or not. */ struct t_ap_opts { + /// True if analytical placement is supposed to be done in the CAD flow. False if otherwise. e_stage_action doAP; + /// The type of analytical solver the Global Placer in the AP flow will use. e_ap_analytical_solver analytical_solver_type; + /// The type of partial legalizer the Global Placer in the AP flow will use. e_ap_partial_legalizer partial_legalizer_type; + /// The type of full legalizer the AP flow will use. e_ap_full_legalizer full_legalizer_type; + /// The type of detailed placer the AP flow will use. e_ap_detailed_placer detailed_placer_type; + /// A trade-off parameter used to decide how focused the AP flow should be on optimizing timing over wirelength. float ap_timing_tradeoff; + /// The threshold to ignore nets with higher fanout than that value while constructing the solver. int ap_high_fanout_threshold; + /// Vector of strings passed by the user to configure the target density of different physical tiles on the device. std::vector<std::string> ap_partial_legalizer_target_density; + /// Array of string passed by the user to configure the max candidate distance thresholds. std::vector<std::string> appack_max_dist_th; + /// The number of threads the AP flow can use. unsigned num_threads; + /// The verbosity level of log messages in the AP flow, with higher values leading to more verbose messages. int log_verbosity; + /// Whether to generate a mass report during global placement or not. bool generate_mass_report; }; @@ -1166,56 +1159,6 @@ struct t_ap_opts { * Router data types *******************************************************************/ -/* All the parameters controlling the router's operation are in this * - * structure. * - * first_iter_pres_fac: Present sharing penalty factor used for the * - * very first (congestion mapping) Pathfinder iteration. * - * initial_pres_fac: Initial present sharing penalty factor for * - * Pathfinder; used to set pres_fac on 2nd iteration. * - * pres_fac_mult: Amount by which pres_fac is multiplied each * - * routing iteration. * - * acc_fac: Historical congestion cost multiplier. Used unchanged * - * for all iterations. * - * bend_cost: Cost of a bend (usually non-zero only for global routing). * - * max_router_iterations: Maximum number of iterations before giving * - * up. * - * min_incremental_reroute_fanout: Minimum fanout a net needs to have * - * for incremental reroute to be applied to it through route * - * tree pruning. Larger circuits should get larger thresholds * - * bb_factor: Linear distance a route can go outside the net bounding * - * box. * - * route_type: GLOBAL or DETAILED. * - * fixed_channel_width: Only attempt to route the design once, with the * - * channel width given. If this variable is * - * == NO_FIXED_CHANNEL_WIDTH, do a binary search * - * on channel width. * - * router_algorithm: TIMING_DRIVEN or PARALLEL. Selects the desired * - * routing algorithm. * - * base_cost_type: Specifies how to compute the base cost of each type of * - * rr_node. DELAY_NORMALIZED -> base_cost = "demand" * - * x average delay to route past 1 CLB. DEMAND_ONLY -> * - * expected demand of this node (old breadth-first costs). * - * * - * The following parameters are used only by the timing-driven router. * - * * - * astar_fac: Factor (alpha) used to weight expected future costs to * - * target in the timing_driven router. astar_fac = 0 leads to * - * an essentially breadth-first search, astar_fac = 1 is near * - * the usual astar algorithm and astar_fac > 1 are more * - * aggressive. * - * astar_offset: Offset that is subtracted from the lookahead (expected * - * future costs) in the timing-driven router. * - * max_criticality: The maximum criticality factor (from 0 to 1) any sink * - * will ever have (i.e. clip criticality to this number). * - * criticality_exp: Set criticality to (path_length(sink) / longest_path) ^ * - * criticality_exp (then clip to max_criticality). * - * doRouting: true if routing is supposed to be done, false otherwise * - * routing_failure_predictor: sets the configuration to be used by the * - * routing failure predictor, how aggressive the threshold used to judge * - * and abort routings deemed unroutable * - * write_rr_graph_name: stores the file name of the output rr graph * - * read_rr_graph_name: stores the file name of the rr graph to be read by vpr */ - enum e_router_algorithm { NESTED, PARALLEL, @@ -1275,25 +1218,54 @@ enum class e_incr_reroute_delay_ripup { constexpr int NO_FIXED_CHANNEL_WIDTH = -1; +/** + * @brief Parameters controlling the router's operation. + */ struct t_router_opts { bool read_rr_edge_metadata = false; bool do_check_rr_graph = true; + + /// Present sharing penalty factor used for the very first (congestion mapping) Pathfinder iteration. float first_iter_pres_fac; + /// Initial present sharing penalty factor for Pathfinder; used to set pres_fac on 2nd iteration. float initial_pres_fac; + /// Amount by which pres_fac is multiplied each routing iteration. float pres_fac_mult; float max_pres_fac; + + /// Historical congestion cost multiplier. Used unchanged for all iterations. float acc_fac; + /// Cost of a bend (usually non-zero only for global routing). float bend_cost; + /// Maximum number of iterations before giving up. int max_router_iterations; + /// Minimum fanout a net needs to have for incremental reroute to be applied to it through route tree pruning. + /// Larger circuits should get larger thresholds int min_incremental_reroute_fanout; e_incr_reroute_delay_ripup incr_reroute_delay_ripup; + /// Linear distance a route can go outside the net bounding box. int bb_factor; + /// GLOBAL or DETAILED. enum e_route_type route_type; + /// Only attempt to route the design once, with the channel width given. + /// If this variable is == NO_FIXED_CHANNEL_WIDTH, do a binary search on channel width. int fixed_channel_width; - int min_channel_width_hint; ///<Hint to binary search of what the minimum channel width is + /// Hint to binary search of what the minimum channel width is + int min_channel_width_hint; + /// TIMING_DRIVEN or PARALLEL. Selects the desired routing algorithm. enum e_router_algorithm router_algorithm; + + /// Specifies how to compute the base cost of each type of rr_node. + /// DELAY_NORMALIZED -> base_cost = "demand" x average delay to route past 1 CLB. + /// DEMAND_ONLY -> expected demand of this node (old breadth-first costs). enum e_base_cost_type base_cost_type; + + /// Factor (alpha) used to weight expected future costs to target in the timing_driven router. + /// astar_fac = 0 leads to an essentially breadth-first search, + /// astar_fac = 1 is near the usual astar algorithm and astar_fac > 1 are more aggressive. float astar_fac; + + /// Offset that is subtracted from the lookahead (expected future costs) in the timing-driven router. float astar_offset; float router_profiler_astar_fac; bool enable_parallel_connection_router; @@ -1302,7 +1274,9 @@ struct t_router_opts { int multi_queue_num_threads; int multi_queue_num_queues; bool multi_queue_direct_draining; + /// The maximum criticality factor (from 0 to 1) any sink will ever have (i.e. clip criticality to this number). float max_criticality; + /// Set criticality to (path_length(sink) / longest_path) ^ criticality_exp (then clip to max_criticality). float criticality_exp; float init_wirelength_abort_threshold; bool verify_binary_search; @@ -1310,7 +1284,10 @@ struct t_router_opts { bool congestion_analysis; bool fanout_analysis; bool switch_usage_analysis; + /// true if routing is supposed to be done, false otherwise e_stage_action doRouting; + /// the configuration to be used by the routing failure predictor, + /// how aggressive the threshold used to judge and abort routings deemed unroutable enum e_routing_failure_predictor routing_failure_predictor; enum e_routing_budgets_algorithm routing_budgets_algorithm; bool save_routing_per_iteration; diff --git a/vpr/src/pack/verify_clustering.cpp b/vpr/src/pack/verify_clustering.cpp index ec08e10a40..93f925ef68 100644 --- a/vpr/src/pack/verify_clustering.cpp +++ b/vpr/src/pack/verify_clustering.cpp @@ -406,7 +406,7 @@ unsigned verify_clustering(const ClusteredNetlist& clb_nlist, // Return here since this error can cause serious issues below. return num_errors; } - // Check conssitency between which clusters the atom's think thet are in and + // Check consistency between which clusters the atom's think thet are in and // which atoms the clusters think they have. num_errors += check_clustering_atom_consistency(clb_nlist, atom_nlist, diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 3422850312..ece401daa8 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -118,6 +118,7 @@ t_annealing_state::t_annealing_state(float first_t, } bool t_annealing_state::outer_loop_update(float success_rate, + bool congestion_modeling_enabled, const t_placer_costs& costs, const t_placer_opts& placer_opts) { #ifndef NO_GRAPHICS @@ -139,8 +140,11 @@ bool t_annealing_state::outer_loop_update(float success_rate, } // Automatically determine exit temperature. - auto& cluster_ctx = g_vpr_ctx.clustering(); + const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering(); float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size(); + if (congestion_modeling_enabled) { + t_exit *= (1. + placer_opts.congestion_factor); + } VTR_ASSERT_SAFE(placer_opts.anneal_sched.type == e_sched_type::AUTO_SCHED); // Automatically adjust alpha according to success rate. @@ -228,7 +232,8 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, , move_stats_file_(nullptr, vtr::fclose) , outer_crit_iter_count_(1) , blocks_affected_(placer_state.block_locs().size()) - , quench_started_(false) { + , quench_started_(false) + , congestion_modeling_started_(false) { const auto& device_ctx = g_vpr_ctx.device(); float first_crit_exponent; @@ -373,14 +378,13 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, MoveOutcomeStats move_outcome_stats; - /* I'm using negative values of proposed_net_cost as a flag, - * so DO NOT use cost functions that can go negative. */ - double delta_c = 0; //Change in cost due to this swap. - double bb_delta_c = 0; //Change in the bounding box (wiring) cost. - double timing_delta_c = 0; //Change in the timing cost (delay * criticality). + double delta_c = 0.; // Change in cost due to this swap. + double bb_delta_c = 0.; // Change in the bounding box (wiring) cost. + double timing_delta_c = 0.; // Change in the timing cost (delay * criticality). + double congestion_delta_c = 0.; // Change in the congestion cost - /* Allow some fraction of moves to not be restricted by rlim, - * in the hopes of better escaping local minima. */ + // Allow some fraction of moves to not be restricted by rlim, + // in the hopes of better escaping local minima. float rlim; if (placer_opts_.rlim_escape_fraction > 0. && rng_.frand() < placer_opts_.rlim_escape_fraction) { rlim = std::numeric_limits<float>::infinity(); @@ -396,19 +400,19 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, router_block_move = check_for_router_swap(noc_opts_.noc_swap_percentage, rng_); } - //When manual move toggle button is active, the manual move window asks the user for input. + // When manual move toggle button is active, the manual move window asks the user for input. if (manual_move_enabled) { #ifndef NO_GRAPHICS create_move_outcome = manual_move_display_and_propose(manual_move_generator_, blocks_affected_, proposed_action.move_type, rlim, placer_opts_, criticalities_); -#endif //NO_GRAPHICS +#endif // NO_GRAPHICS } else if (router_block_move) { // generate a move where two random router blocks are swapped create_move_outcome = propose_router_swap(blocks_affected_, rlim, blk_loc_registry, place_macros_, rng_); proposed_action.move_type = e_move_type::UNIFORM; } else { - //Generate a new move (perturbation) used to explore the space of possible placements + // Generate a new move (perturbation) used to explore the space of possible placements create_move_outcome = move_generator.propose_move(blocks_affected_, proposed_action, rlim, placer_opts_, criticalities_); } @@ -455,7 +459,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, * delays and timing costs and store them in proposed_* data structures. */ net_cost_handler_.find_affected_nets_and_update_costs(delay_model_, criticalities_, blocks_affected_, - bb_delta_c, timing_delta_c); + bb_delta_c, timing_delta_c, congestion_delta_c); if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { /* Take delta_c as a combination of timing and wiring cost. In @@ -472,7 +476,8 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, timing_delta_c, costs_.timing_cost_norm); delta_c = (1 - placer_opts_.timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm - + placer_opts_.timing_tradeoff * timing_delta_c * costs_.timing_cost_norm; + + placer_opts_.timing_tradeoff * timing_delta_c * costs_.timing_cost_norm + + placer_opts_.congestion_factor * congestion_delta_c * costs_.congestion_cost_norm; } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { /* For setup slack analysis, we first do a timing analysis to get the newest * slack values resulted from the proposed block moves. If the move turns out @@ -539,6 +544,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, if (move_outcome == e_move_result::ACCEPTED) { costs_.cost += delta_c; costs_.bb_cost += bb_delta_c; + costs_.congestion_cost += congestion_delta_c; if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { costs_.timing_cost += timing_delta_c; @@ -673,6 +679,26 @@ void PlacementAnnealer::outer_loop_update_timing_info() { outer_crit_iter_count_++; } + // Congestion modeling is enabled when the ratio of the current range limit to the initial range limit + // drops below a user-specified threshold, and the congestion cost weighting factor is non-zero. + // Once enabled, congestion modeling continues even if the range limit increases and the ratio + // rises above the threshold. + // + // This logic is motivated by the observation that enabling congestion modeling too early in the + // anneal increases computational overhead and introduces noise into the placement cost function, + // as early placements are typically highly congested and unstable. So, we delay congestion modeling + // until the placement is more settled and wirelength has been reasonably optimized. + if ((annealing_state_.rlim / MoveGenerator::first_rlim < placer_opts_.congestion_rlim_trigger_ratio + && placer_opts_.congestion_factor != 0.) + || congestion_modeling_started_) { + costs_.congestion_cost = net_cost_handler_.estimate_routing_chan_util(); + + if (!congestion_modeling_started_) { + VTR_LOG("Congestion modeling started.\n"); + congestion_modeling_started_ = true; + } + } + // Update the cost normalization factors costs_.update_norm_factors(); @@ -783,7 +809,7 @@ const t_annealing_state& PlacementAnnealer::get_annealing_state() const { } bool PlacementAnnealer::outer_loop_update_state() { - return annealing_state_.outer_loop_update(placer_stats_.success_rate, costs_, placer_opts_); + return annealing_state_.outer_loop_update(placer_stats_.success_rate, congestion_modeling_started_, costs_, placer_opts_); } void PlacementAnnealer::start_quench() { diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index 0d098db728..e406581956 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -105,6 +105,7 @@ class t_annealing_state { * @return True->continues the annealing. False->exits the annealing. */ bool outer_loop_update(float success_rate, + bool congestion_modeling_enabled, const t_placer_costs& costs, const t_placer_opts& placer_opts); @@ -328,6 +329,8 @@ class PlacementAnnealer { int tot_iter_; /// Indicates whether the annealer has entered into the quench stage bool quench_started_; + /// Indicates whether routing congestion modeling has been started + bool congestion_modeling_started_; void LOG_MOVE_STATS_HEADER(); void LOG_MOVE_STATS_PROPOSED(); diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index 9f6a4316eb..0b8336e08f 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -372,11 +372,11 @@ static bool is_loc_legal(const t_pl_loc& loc, return legal; } -bool find_subtile_in_location(t_pl_loc& centroid, - t_logical_block_type_ptr block_type, - const BlkLocRegistry& blk_loc_registry, - const PartitionRegion& pr, - vtr::RngContainer& rng) { +static bool find_subtile_in_location(t_pl_loc& centroid, + t_logical_block_type_ptr block_type, + const BlkLocRegistry& blk_loc_registry, + const PartitionRegion& pr, + vtr::RngContainer& rng) { //check if the location is on chip and legal, if yes try to update subtile if (is_loc_on_chip({centroid.x, centroid.y, centroid.layer}) && is_loc_legal(centroid, pr, block_type)) { //find the compatible subtiles diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 6282a3e872..77247f1510 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -2,11 +2,11 @@ * @file net_cost_handler.cpp * @brief This file contains the implementation of functions used to update placement cost when a new move is proposed/committed. * - * VPR placement cost consists of three terms which represent wirelength, timing, and NoC cost. + * VPR placement cost consists of multiple terms which represent wirelength, timing, congestion, and NoC cost. * - * To get an estimation of the wirelength of each net, the Half Perimeter Wire Length (HPWL) approach is used. In this approach, + * To get an estimation of the wirelength of each net, the Half Perimeter Wire Length (HPWL) metric is used. In this approach, * half of the perimeter of the bounding box which contains all terminals of the net is multiplied by a correction factor, - * and the resulting number is considered as an estimation of the bounding box. + * and the resulting number is considered as an estimation of the wirelength needed to route this net. * * Currently, we have two types of bounding boxes: 3D bounding box (or Cube BB) and per-layer bounding box. * If the FPGA grid is a 2D structure, a Cube bounding box is used, which will always have the z direction equal to 1. For 3D architectures, @@ -20,6 +20,16 @@ * To get a delay estimation of a connection (from a source to a sink), first, dx and dy between these two points should be calculated, * and these two numbers are the indices to access this 2D array. By default, the placement delay model is created by iterating over the router lookahead * to get the minimum cost for each dx and dy. + * + * For congestion modeling, we periodically estimate routing channel usage by distributing the estimated + * wirelength (WL) of each net across all routing channels within its bounding box. The wirelength is divided + * between CHANX and CHANY in proportion to the bounding box's width and height, respectively. However, all + * routing channels of the same type (CHANX or CHANY) within the box receive an equal share of that net's WL. + * + * We compute a congestion cost for each net by averaging the estimated utilization over all CHANX and CHANY + * channels in its bounding box. These average utilizations are then compared to a user-specified threshold. + * If a net’s average utilization exceeds the threshold, the excess is penalized by adding a cost proportional + * to the amount of the exceedance. */ #include "net_cost_handler.h" @@ -88,10 +98,13 @@ static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc, NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, PlacerState& placer_state, bool cube_bb) - : cube_bb_(cube_bb) + : congestion_modeling_started_(false) + , cube_bb_(cube_bb) , placer_state_(placer_state) , placer_opts_(placer_opts) { - const int num_layers = g_vpr_ctx.device().grid.get_num_layers(); + const auto& device_ctx = g_vpr_ctx.device(); + + const size_t num_layers = device_ctx.grid.get_num_layers(); const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size(); is_multi_layer_ = num_layers > 1; @@ -100,9 +113,11 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, if (cube_bb_) { ts_bb_edge_new_.resize(num_nets, t_bb()); ts_bb_coord_new_.resize(num_nets, t_bb()); + bb_coords_.resize(num_nets, t_bb()); + bb_num_on_edges_.resize(num_nets, t_bb()); - comp_bb_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cost_, this, std::placeholders::_1); + comp_bb_cong_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cong_cost_, this, std::placeholders::_1); update_bb_functor_ = std::bind(&NetCostHandler::update_bb_, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4); get_net_bb_cost_functor_ = std::bind(&NetCostHandler::get_net_cube_bb_cost_, this, std::placeholders::_1, /*use_ts=*/true); @@ -112,14 +127,14 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, layer_ts_bb_coord_new_.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb())); layer_bb_num_on_edges_.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb())); layer_bb_coords_.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb())); - comp_bb_cost_functor_ = std::bind(&NetCostHandler::comp_per_layer_bb_cost_, this, std::placeholders::_1); + comp_bb_cong_cost_functor_ = std::bind(&NetCostHandler::comp_per_layer_bb_cost_, this, std::placeholders::_1); update_bb_functor_ = std::bind(&NetCostHandler::update_layer_bb_, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4); get_net_bb_cost_functor_ = std::bind(&NetCostHandler::get_net_per_layer_bb_cost_, this, std::placeholders::_1, /*use_ts=*/true); get_non_updatable_bb_functor_ = std::bind(&NetCostHandler::get_non_updatable_per_layer_bb_, this, std::placeholders::_1, /*use_ts=*/true); } - /* This initializes the whole matrix to OPEN which is an invalid value*/ + // This initializes the whole matrix to OPEN which is an invalid value ts_layer_sink_pin_count_.resize({num_nets, size_t(num_layers)}, OPEN); num_sink_pin_layer_.resize({num_nets, size_t(num_layers)}, OPEN); @@ -129,12 +144,21 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, net_cost_.resize(num_nets, -1.); proposed_net_cost_.resize(num_nets, -1.); - /* Used to store costs for moves not yet made and to indicate when a net's - * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't - * been recomputed. */ + // Used to store costs for moves not yet made and to indicate when a net's + // cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't + // been recomputed. bb_update_status_.resize(num_nets, NetUpdateState::NOT_UPDATED_YET); alloc_and_load_chan_w_factors_for_place_cost_(); + + // Congestion-related data members are not allocated until congestion modeling is enabled + // by calling estimate_routing_chan_util(). + VTR_ASSERT(!congestion_modeling_started_); + VTR_ASSERT(chan_util_.x.empty() && chan_util_.y.empty()); + VTR_ASSERT(acc_chan_util_.x.empty() && acc_chan_util_.y.empty()); + VTR_ASSERT(ts_avg_chan_util_new_.empty()); + VTR_ASSERT(avg_chan_util_.empty()); + VTR_ASSERT(net_cong_cost_.empty() && proposed_net_cong_cost_.empty()); } void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() { @@ -152,24 +176,26 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() { * This returns the total number of tracks between channels 'low' and 'high', * including tracks in these channels. */ - acc_chanx_width_ = vtr::PrefixSum1D<int>(grid_height, [&](size_t y) noexcept { + acc_chan_width_.x = vtr::PrefixSum1D<int>(grid_height, [&](size_t y) noexcept { int chan_x_width = device_ctx.chan_width.x_list[y]; - /* If the number of tracks in a channel is zero, two consecutive elements take the same - * value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this - * potential issue, we assume that the channel width is at least 1. - */ - if (chan_x_width == 0) + // If the number of tracks in a channel is zero, two consecutive elements take the same + // value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this + // potential issue, we assume that the channel width is at least 1. + if (chan_x_width == 0) { return 1; + } return chan_x_width; }); - acc_chany_width_ = vtr::PrefixSum1D<int>(grid_width, [&](size_t x) noexcept { + + acc_chan_width_.y = vtr::PrefixSum1D<int>(grid_width, [&](size_t x) noexcept { int chan_y_width = device_ctx.chan_width.y_list[x]; // to avoid a division by zero - if (chan_y_width == 0) + if (chan_y_width == 0) { return 1; + } return chan_y_width; }); @@ -237,20 +263,20 @@ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_() { }); } -std::pair<double, double> NetCostHandler::comp_bb_cost(e_cost_methods method) { - return comp_bb_cost_functor_(method); +std::tuple<double, double, double> NetCostHandler::comp_bb_cong_cost(e_cost_methods method) { + return comp_bb_cong_cost_functor_(method); } -std::pair<double, double> NetCostHandler::comp_cube_bb_cost_(e_cost_methods method) { +std::tuple<double, double, double> NetCostHandler::comp_cube_bb_cong_cost_(e_cost_methods method) { const auto& cluster_ctx = g_vpr_ctx.clustering(); - double cost = 0; - double expected_wirelength = 0.0; + double bb_cost = 0.; + double expected_wirelength = 0.; - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */ - if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */ - /* Small nets don't use incremental updating on their bounding boxes, * - * so they can use a fast bounding box calculator. */ + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + // Small nets don't use incremental updating on their bounding boxes, + // so they can use a fast bounding box calculator. if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET && method == e_cost_methods::NORMAL) { get_bb_from_scratch_(net_id, /*use_ts=*/false); } else { @@ -258,26 +284,40 @@ std::pair<double, double> NetCostHandler::comp_cube_bb_cost_(e_cost_methods meth } net_cost_[net_id] = get_net_cube_bb_cost_(net_id, /*use_ts=*/false); - cost += net_cost_[net_id]; + bb_cost += net_cost_[net_id]; if (method == e_cost_methods::CHECK) { expected_wirelength += get_net_wirelength_estimate_(net_id); } } } - return {cost, expected_wirelength}; + double cong_cost = 0.; + // Compute congestion cost using recomputed bounding boxes and channel utilization map + if (congestion_modeling_started_) { + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false); + cong_cost += net_cong_cost_[net_id]; + } + } + } + + return {bb_cost, expected_wirelength, cong_cost}; } -std::pair<double, double> NetCostHandler::comp_per_layer_bb_cost_(e_cost_methods method) { +std::tuple<double, double, double> NetCostHandler::comp_per_layer_bb_cost_(e_cost_methods method) { const auto& cluster_ctx = g_vpr_ctx.clustering(); - double cost = 0; - double expected_wirelength = 0.0; + double cost = 0.; + double expected_wirelength = 0.; + // TODO: compute congestion cost + // Congestion modeling is not supported for per-layer mode, so 0 is returned. + constexpr double cong_cost = 0.; - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */ - if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */ - /* Small nets don't use incremental updating on their bounding boxes, * - * so they can use a fast bounding box calculator. */ + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + // Small nets don't use incremental updating on their bounding boxes, + //so they can use a fast bounding box calculator. if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET && method == e_cost_methods::NORMAL) { get_layer_bb_from_scratch_(net_id, layer_bb_num_on_edges_[net_id], @@ -295,7 +335,7 @@ std::pair<double, double> NetCostHandler::comp_per_layer_bb_cost_(e_cost_methods } } - return {cost, expected_wirelength}; + return {cost, expected_wirelength, cong_cost}; } void NetCostHandler::update_net_bb_(const ClusterNetId net, @@ -420,7 +460,6 @@ void NetCostHandler::update_td_delta_costs_(const PlaceDelayModel* delay_model, } } -///@brief Record effected nets. void NetCostHandler::record_affected_net_(const ClusterNetId net) { /* Record effected nets. */ if (proposed_net_cost_[net] < 0.) { @@ -521,6 +560,14 @@ void NetCostHandler::get_non_updatable_cube_bb_(ClusterNetId net_id, bool use_ts num_sink_pin_layer[pin_loc.layer_num]++; } + + // Update average CHANX and CHANY usage for this net within its bounding box if congestion modeling is enabled + if (congestion_modeling_started_) { + auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chan_util_new_[net_id] : avg_chan_util_[net_id]; + const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); + x_chan_util = acc_chan_util_.x.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + y_chan_util = acc_chan_util_.y.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + } } void NetCostHandler::get_non_updatable_per_layer_bb_(ClusterNetId net_id, bool use_ts) { @@ -567,8 +614,6 @@ void NetCostHandler::update_bb_(ClusterNetId net_id, t_physical_tile_loc pin_new_loc, bool src_pin) { //TODO: account for multiple physical pin instances per logical pin - const t_bb *curr_bb_edge, *curr_bb_coord; - const auto& device_ctx = g_vpr_ctx.device(); const int num_layers = device_ctx.grid.get_num_layers(); @@ -588,6 +633,7 @@ void NetCostHandler::update_bb_(ClusterNetId net_id, vtr::NdMatrixProxy<int, 1> curr_num_sink_pin_layer = (bb_update_status_[net_id] == NetUpdateState::NOT_UPDATED_YET) ? num_sink_pin_layer_[size_t(net_id)] : num_sink_pin_layer_new; + const t_bb *curr_bb_edge, *curr_bb_coord; if (bb_update_status_[net_id] == NetUpdateState::NOT_UPDATED_YET) { /* The net had NOT been updated before, could use the old values */ curr_bb_edge = &bb_num_on_edges_[net_id]; @@ -828,6 +874,14 @@ void NetCostHandler::update_bb_(ClusterNetId net_id, if (bb_update_status_[net_id] == NetUpdateState::NOT_UPDATED_YET) { bb_update_status_[net_id] = NetUpdateState::UPDATED_ONCE; } + + // Update average CHANX and CHANY usage for this net within its bounding box if congestion modeling is enabled + if (congestion_modeling_started_) { + auto& [x_chan_util, y_chan_util] = ts_avg_chan_util_new_[net_id]; + const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); + x_chan_util = acc_chan_util_.x.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + y_chan_util = acc_chan_util_.y.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + } } void NetCostHandler::update_layer_bb_(ClusterNetId net_id, @@ -1262,6 +1316,14 @@ void NetCostHandler::get_bb_from_scratch_(ClusterNetId net_id, bool use_ts) { num_on_edges.ymax = ymax_edge; num_on_edges.layer_min = layer_min_edge; num_on_edges.layer_max = layer_max_edge; + + // Update average CHANX and CHANY usage for this net within its bounding box if congestion modeling is enabled + if (congestion_modeling_started_) { + auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chan_util_new_[net_id] : avg_chan_util_[net_id]; + const int total_channels = (coords.xmax - coords.xmin + 1) * (coords.ymax - coords.ymin + 1); + x_chan_util = acc_chan_util_.x.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; + y_chan_util = acc_chan_util_.y.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; + } } void NetCostHandler::get_layer_bb_from_scratch_(ClusterNetId net_id, @@ -1324,7 +1386,7 @@ void NetCostHandler::get_layer_bb_from_scratch_(ClusterNetId net_id, double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { // Finds the cost due to one net by looking at its coordinate bounding box. - auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id]; @@ -1356,6 +1418,18 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { return ncost; } +double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) { + VTR_ASSERT_SAFE(congestion_modeling_started_); + const auto [x_chan_util, y_chan_util] = use_ts ? ts_avg_chan_util_new_[net_id] : avg_chan_util_[net_id]; + + const float threshold = placer_opts_.congestion_chan_util_threshold; + + float x_chan_cong = (x_chan_util < threshold) ? 0.0f : x_chan_util - threshold; + float y_chan_cong = (y_chan_util < threshold) ? 0.0f : y_chan_util - threshold; + + return x_chan_cong + y_chan_cong; +} + double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id, bool use_ts) { // Per-layer bounding box of the net const std::vector<t_2D_bb>& bb = use_ts ? layer_ts_bb_coord_new_[net_id] : layer_bb_coords_[net_id]; @@ -1466,25 +1540,29 @@ float NetCostHandler::get_chanz_cost_factor_(const t_bb& bb) { return z_cost_factor; } -double NetCostHandler::recompute_bb_cost_() { - double cost = 0; +std::pair<double, double> NetCostHandler::recompute_bb_cong_cost_() { + const auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& cluster_ctx = g_vpr_ctx.clustering(); + double bb_cost = 0.; + double cong_cost = 0.; - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */ - if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */ - /* Bounding boxes don't have to be recomputed; they're correct. */ - cost += net_cost_[net_id]; + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + // Bounding boxes don't have to be recomputed; they're correct. + bb_cost += net_cost_[net_id]; + + if (congestion_modeling_started_) { + cong_cost += net_cong_cost_[net_id]; + } } } - return cost; + return {bb_cost, cong_cost}; } double wirelength_crossing_count(size_t fanout) { - /* Get the expected "crossing count" of a net, based on its number * - * of pins. Extrapolate for very large nets. */ - + // Get the expected "crossing count" of a net, based on its number of pins. + // Extrapolate for very large nets. if (fanout > MAX_FANOUT_CROSSING_COUNT) { return 2.7933 + 0.02616 * (fanout - MAX_FANOUT_CROSSING_COUNT); } else { @@ -1492,13 +1570,17 @@ double wirelength_crossing_count(size_t fanout) { } } -void NetCostHandler::set_bb_delta_cost_(double& bb_delta_c) { +void NetCostHandler::set_bb_delta_cost_(double& bb_delta_c, double& congestion_delta_c) { for (const ClusterNetId ts_net : ts_nets_to_update_) { ClusterNetId net_id = ts_net; proposed_net_cost_[net_id] = get_net_bb_cost_functor_(net_id); - bb_delta_c += proposed_net_cost_[net_id] - net_cost_[net_id]; + + if (congestion_modeling_started_) { + proposed_net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/true); + congestion_delta_c += proposed_net_cong_cost_[net_id] - net_cong_cost_[net_id]; + } } } @@ -1506,19 +1588,21 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel* const PlacerCriticalities* criticalities, t_pl_blocks_to_be_moved& blocks_affected, double& bb_delta_c, - double& timing_delta_c) { - VTR_ASSERT_SAFE(bb_delta_c == 0.); - VTR_ASSERT_SAFE(timing_delta_c == 0.); - auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + double& timing_delta_c, + double& congestion_delta_c) { + VTR_ASSERT_DEBUG(bb_delta_c == 0.); + VTR_ASSERT_DEBUG(timing_delta_c == 0.); + VTR_ASSERT_DEBUG(congestion_delta_c == 0.); + const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; ts_nets_to_update_.resize(0); - /* Go through all the blocks moved. */ + // Go through all the blocks moved. for (const t_pl_moved_block& moving_block : blocks_affected.moved_blocks) { auto& affected_pins = blocks_affected.affected_pins; ClusterBlockId blk_id = moving_block.block_num; - /* Go through all the pins in the moved block. */ + // Go through all the pins in the moved block. for (ClusterPinId blk_pin : clb_nlist.block_pins(blk_id)) { bool is_src_moving = false; if (clb_nlist.pin_type(blk_pin) == PinType::SINK) { @@ -1535,14 +1619,14 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel* } } - /* Now update the bounding box costs (since the net bounding * - * boxes are up-to-date). The cost is only updated once per net. */ - set_bb_delta_cost_(bb_delta_c); + // Now update the bounding box costs (since the net bounding + // boxes are up-to-date). The cost is only updated once per net. + set_bb_delta_cost_(bb_delta_c, congestion_delta_c); } void NetCostHandler::update_move_nets() { - /* update net cost functions and reset flags. */ - auto& cluster_ctx = g_vpr_ctx.clustering(); + // update net cost functions and reset flags. + const auto& cluster_ctx = g_vpr_ctx.clustering(); for (const ClusterNetId ts_net : ts_nets_to_update_) { ClusterNetId net_id = ts_net; @@ -1558,18 +1642,27 @@ void NetCostHandler::update_move_nets() { } net_cost_[net_id] = proposed_net_cost_[net_id]; - - /* negative proposed_net_cost value is acting as a flag to mean not computed yet. */ + // negative proposed_net_cost value is acting as a flag to mean not computed yet. proposed_net_cost_[net_id] = -1; + + if (congestion_modeling_started_) { + net_cong_cost_[net_id] = proposed_net_cong_cost_[net_id]; + proposed_net_cong_cost_[net_id] = -1; + } + bb_update_status_[net_id] = NetUpdateState::NOT_UPDATED_YET; } } void NetCostHandler::reset_move_nets() { - /* Reset the net cost function flags first. */ - for (const ClusterNetId ts_net : ts_nets_to_update_) { - ClusterNetId net_id = ts_net; + // Reset the net cost function flags first. + for (const ClusterNetId net_id : ts_nets_to_update_) { proposed_net_cost_[net_id] = -1; + + if (congestion_modeling_started_) { + proposed_net_cong_cost_[net_id] = -1; + } + bb_update_status_[net_id] = NetUpdateState::NOT_UPDATED_YET; } } @@ -1588,10 +1681,17 @@ void NetCostHandler::recompute_costs_from_scratch(const PlaceDelayModel* delay_m } }; - double new_bb_cost = recompute_bb_cost_(); + auto [new_bb_cost, new_cong_cost] = recompute_bb_cong_cost_(); check_and_print_cost(new_bb_cost, costs.bb_cost, "bb_cost"); costs.bb_cost = new_bb_cost; + if (congestion_modeling_started_) { + check_and_print_cost(new_cong_cost, costs.congestion_cost, "cong_cost"); + costs.congestion_cost = new_cong_cost; + } else { + costs.congestion_cost = 0.; + } + if (placer_opts_.place_algorithm.is_timing_driven()) { double new_timing_cost = 0.; comp_td_costs(delay_model, *criticalities, placer_state_, &new_timing_cost); @@ -1607,8 +1707,8 @@ double NetCostHandler::get_total_wirelength_estimate() const { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; double estimated_wirelength = 0.0; - for (ClusterNetId net_id : clb_nlist.nets()) { /* for each net ... */ - if (!clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */ + for (ClusterNetId net_id : clb_nlist.nets()) { + if (!clb_nlist.net_is_ignored(net_id)) { if (cube_bb_) { estimated_wirelength += get_net_wirelength_estimate_(net_id); } else { @@ -1620,9 +1720,182 @@ double NetCostHandler::get_total_wirelength_estimate() const { return estimated_wirelength; } +double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /*=true*/) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const DeviceContext& device_ctx = g_vpr_ctx.device(); + + const size_t grid_width = device_ctx.grid.width(); + const size_t grid_height = device_ctx.grid.height(); + const size_t num_layers = device_ctx.grid.get_num_layers(); + const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size(); + + // Congestion-related data members are allocated the first time this method is called + // to enable congestion modeling. This lazy allocation helps save memory when congestion + // modeling is not used. + if (!congestion_modeling_started_) { + congestion_modeling_started_ = true; + + chan_util_.x = vtr::NdMatrix<double, 3>({{num_layers, grid_width, grid_height}}, 0); + chan_util_.y = vtr::NdMatrix<double, 3>({{num_layers, grid_width, grid_height}}, 0); + + acc_chan_util_.x = vtr::PrefixSum2D<double>(grid_width, + grid_height, + [&](size_t x, size_t y) { + return chan_util_.x[0][x][y]; + }); + + acc_chan_util_.y = vtr::PrefixSum2D<double>(grid_width, + grid_height, + [&](size_t x, size_t y) { + return chan_util_.y[0][x][y]; + }); + + ts_avg_chan_util_new_.resize(num_nets, {0., 0.}); + avg_chan_util_.resize(num_nets, {0., 0.}); + net_cong_cost_.resize(num_nets, -1.); + proposed_net_cong_cost_.resize(num_nets, -1.); + } + + chan_util_.x.fill(0.); + chan_util_.y.fill(0.); + + // For each net, this function estimates routing channel utilization by distributing + // the net's expected wirelength across its bounding box. The expected wirelength + // for each dimension (x, y) is computed proportionally based on the bounding box size + // in each direction. The wirelength in each dimension is then **evenly spread** across + // all grid locations within the bounding box, and the demand is accumulated in + // the channel utilization matrices. + + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + + if (cube_bb_) { + const t_bb& bb = bb_coords_[net_id]; + double expected_wirelength = get_net_wirelength_estimate_(net_id); + + int distance_x = bb.xmax - bb.xmin + 1; + int distance_y = bb.ymax - bb.ymin + 1; + int distance_z = bb.layer_max - bb.layer_min + 1; + + double expected_x_wl = (double)distance_x / (distance_x + distance_y) * expected_wirelength; + double expected_y_wl = expected_wirelength - expected_x_wl; + + int total_channel_segments = distance_x * distance_y * distance_z; + double expected_per_x_segment_wl = expected_x_wl / total_channel_segments; + double expected_per_y_segment_wl = expected_y_wl / total_channel_segments; + + for (int layer = bb.layer_min; layer <= bb.layer_max; layer++) { + for (int x = bb.xmin; x <= bb.xmax; x++) { + for (int y = bb.ymin; y <= bb.ymax; y++) { + chan_util_.x[layer][x][y] += expected_per_x_segment_wl; + chan_util_.y[layer][x][y] += expected_per_y_segment_wl; + } + } + } + } else { + const std::vector<t_2D_bb>& bb = layer_bb_coords_[net_id]; + const vtr::NdMatrixProxy<int, 1> net_layer_pin_sink_count = num_sink_pin_layer_[size_t(net_id)]; + + for (size_t layer = 0; layer < bb.size(); layer++) { + if (net_layer_pin_sink_count[layer] == 0) { + continue; + } + + double crossing = wirelength_crossing_count(net_layer_pin_sink_count[layer] + 1); + double expected_wirelength = ((bb[layer].xmax - bb[layer].xmin + 1) + (bb[layer].ymax - bb[layer].ymin + 1)) * crossing; + + int distance_x = bb[layer].xmax - bb[layer].xmin + 1; + int distance_y = bb[layer].ymax - bb[layer].ymin + 1; + + double expected_x_wl = (double)distance_x / (distance_x + distance_y) * expected_wirelength; + double expected_y_wl = expected_wirelength - expected_x_wl; + + int total_channel_segments = distance_x * distance_y; + double expected_per_x_segment_wl = expected_x_wl / total_channel_segments; + double expected_per_y_segment_wl = expected_y_wl / total_channel_segments; + + for (int x = bb[layer].xmin; x <= bb[layer].xmax; x++) { + for (int y = bb[layer].ymin; y <= bb[layer].ymax; y++) { + chan_util_.x[layer][x][y] += expected_per_x_segment_wl; + chan_util_.y[layer][x][y] += expected_per_y_segment_wl; + } + } + } + } + } + } + + // Channel width is computed only once and reused in later calls. + if (chan_width_.x.empty()) { + VTR_ASSERT(chan_width_.y.empty()); + std::tie(chan_width_.x, chan_width_.y) = calculate_channel_width(); + } + + VTR_ASSERT(chan_util_.x.size() == chan_util_.y.size()); + VTR_ASSERT(chan_util_.x.size() == chan_width_.x.size()); + VTR_ASSERT(chan_util_.y.size() == chan_width_.y.size()); + + // Normalize channel utilizations by dividing by the corresponding channel widths. + // If a channel does not exist (i.e., its width is zero), we set its utilization to 1 + // to avoid division by zero. + for (size_t layer = 0; layer < num_layers; ++layer) { + for (size_t x = 0; x < grid_width; ++x) { + for (size_t y = 0; y < grid_height; ++y) { + if (chan_width_.x[layer][x][y] > 0) { + chan_util_.x[layer][x][y] /= chan_width_.x[layer][x][y]; + } else { + VTR_ASSERT_SAFE(chan_width_.x[layer][x][y] == 0); + chan_util_.x[layer][x][y] = 1.; + } + + if (chan_width_.y[layer][x][y] > 0) { + chan_util_.y[layer][x][y] /= chan_width_.y[layer][x][y]; + } else { + VTR_ASSERT_SAFE(chan_width_.y[layer][x][y] == 0); + chan_util_.y[layer][x][y] = 1.; + } + } + } + } + + // For now, congestion modeling in the placement stage is limited to a single die + // TODO: extend it to multiple dice + acc_chan_util_.x = vtr::PrefixSum2D<double>(grid_width, + grid_height, + [&](size_t x, size_t y) { + return chan_util_.x[0][x][y]; + }); + + acc_chan_util_.y = vtr::PrefixSum2D<double>(grid_width, + grid_height, + [&](size_t x, size_t y) { + return chan_util_.y[0][x][y]; + }); + + double cong_cost = 0.; + // Compute congestion cost using computed bounding boxes and channel utilization map + if (compute_congestion_cost) { + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false); + cong_cost += net_cong_cost_[net_id]; + } + } + } + + return cong_cost; +} + +const ChannelData<vtr::NdMatrix<double, 3>>& NetCostHandler::get_chan_util() const { + return chan_util_; +} + void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) { if (cube_bb_) { bb_coords_[net_id] = ts_bb_coord_new_[net_id]; + if (congestion_modeling_started_) { + avg_chan_util_[net_id] = ts_avg_chan_util_new_[net_id]; + } } else { layer_bb_coords_[net_id] = layer_ts_bb_coord_new_[net_id]; } @@ -1746,115 +2019,3 @@ std::pair<t_bb, t_bb> NetCostHandler::union_2d_bb_incr(ClusterNetId net_id) cons return std::make_pair(merged_num_edge, merged_bb); } - -std::pair<vtr::NdMatrix<double, 3>, vtr::NdMatrix<double, 3>> NetCostHandler::estimate_routing_chan_util() const { - const auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& device_ctx = g_vpr_ctx.device(); - - auto chanx_util = vtr::NdMatrix<double, 3>({{(size_t)device_ctx.grid.get_num_layers(), - device_ctx.grid.width(), - device_ctx.grid.height()}}, - 0); - - auto chany_util = vtr::NdMatrix<double, 3>({{(size_t)device_ctx.grid.get_num_layers(), - device_ctx.grid.width(), - device_ctx.grid.height()}}, - 0); - - // For each net, this function estimates routing channel utilization by distributing - // the net's expected wirelength across its bounding box. The expected wirelength - // for each dimension (x, y) is computed proportionally based on the bounding box size - // in each direction. The wirelength in each dimension is then **evenly spread** across - // all grid locations within the bounding box, and the demand is accumulated in - // the channel utilization matrices. - - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { - if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { - - if (cube_bb_) { - const t_bb& bb = bb_coords_[net_id]; - double expected_wirelength = get_net_wirelength_estimate_(net_id); - - int distance_x = bb.xmax - bb.xmin + 1; - int distance_y = bb.ymax - bb.ymin + 1; - int distance_z = bb.layer_max - bb.layer_min + 1; - - double expected_x_wl = (double)distance_x / (distance_x + distance_y) * expected_wirelength; - double expected_y_wl = expected_wirelength - expected_x_wl; - - int total_channel_segments = distance_x * distance_y * distance_z; - double expected_per_x_segment_wl = expected_x_wl / total_channel_segments; - double expected_per_y_segment_wl = expected_y_wl / total_channel_segments; - - for (int layer = bb.layer_min; layer <= bb.layer_max; layer++) { - for (int x = bb.xmin; x <= bb.xmax; x++) { - for (int y = bb.ymin; y <= bb.ymax; y++) { - chanx_util[layer][x][y] += expected_per_x_segment_wl; - chany_util[layer][x][y] += expected_per_y_segment_wl; - } - } - } - } else { - const std::vector<t_2D_bb>& bb = layer_bb_coords_[net_id]; - const vtr::NdMatrixProxy<int, 1> net_layer_pin_sink_count = num_sink_pin_layer_[size_t(net_id)]; - - for (size_t layer = 0; layer < bb.size(); layer++) { - if (net_layer_pin_sink_count[layer] == 0) { - continue; - } - - double crossing = wirelength_crossing_count(net_layer_pin_sink_count[layer] + 1); - double expected_wirelength = ((bb[layer].xmax - bb[layer].xmin + 1) + (bb[layer].ymax - bb[layer].ymin + 1)) * crossing; - - int distance_x = bb[layer].xmax - bb[layer].xmin + 1; - int distance_y = bb[layer].ymax - bb[layer].ymin + 1; - - double expected_x_wl = (double)distance_x / (distance_x + distance_y) * expected_wirelength; - double expected_y_wl = expected_wirelength - expected_x_wl; - - int total_channel_segments = distance_x * distance_y; - double expected_per_x_segment_wl = expected_x_wl / total_channel_segments; - double expected_per_y_segment_wl = expected_y_wl / total_channel_segments; - - for (int x = bb[layer].xmin; x <= bb[layer].xmax; x++) { - for (int y = bb[layer].ymin; y <= bb[layer].ymax; y++) { - chanx_util[layer][x][y] += expected_per_x_segment_wl; - chany_util[layer][x][y] += expected_per_y_segment_wl; - } - } - } - } - } - } - - const auto [chanx_width, chany_width] = calculate_channel_width(); - - VTR_ASSERT(chanx_util.size() == chany_util.size()); - VTR_ASSERT(chanx_util.ndims() == chany_util.ndims()); - VTR_ASSERT(chanx_util.size() == chanx_width.size()); - VTR_ASSERT(chanx_util.ndims() == chanx_width.ndims()); - VTR_ASSERT(chany_util.size() == chany_width.size()); - VTR_ASSERT(chany_util.ndims() == chany_width.ndims()); - - for (size_t layer = 0; layer < chanx_util.dim_size(0); ++layer) { - for (size_t x = 0; x < chanx_util.dim_size(1); ++x) { - for (size_t y = 0; y < chanx_util.dim_size(2); ++y) { - if (chanx_width[layer][x][y] > 0) { - chanx_util[layer][x][y] /= chanx_width[layer][x][y]; - } else { - VTR_ASSERT_SAFE(chanx_width[layer][x][y] == 0); - chanx_util[layer][x][y] = 1.; - } - - if (chany_width[layer][x][y] > 0) { - chany_util[layer][x][y] /= chany_width[layer][x][y]; - } else { - VTR_ASSERT_SAFE(chany_width[layer][x][y] == 0); - chany_util[layer][x][y] = 1.; - } - } - } - } - - return {chanx_util, chany_util}; -} diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index b36fe9fc6e..8f9d4511e1 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -35,6 +35,13 @@ enum class e_cost_methods { CHECK }; +template<typename T> +struct ChannelData { + T x; + T y; + // TODO: add Z dimension +}; + class NetCostHandler { public: NetCostHandler() = delete; @@ -55,20 +62,20 @@ class NetCostHandler { NetCostHandler(const t_placer_opts& placer_opts, PlacerState& placer_state, bool cube_bb); /** - * @brief Finds the bb cost from scratch. - * Done only when the placement has been radically changed - * (i.e. after initial placement). Otherwise find the cost + * @brief Finds the bb cost and congestion cost from scratch. + * @details Done only when the placement has been radically changed + * (i.e. after initial placement). Otherwise, find the cost * change incrementally. If method check is NORMAL, we find * bounding boxes that are updatable for the larger nets. * If method is CHECK, all bounding boxes are found via the * non_updateable_bb routine, to provide a cost which can be * used to check the correctness of the other routine. * @param method The method used to calculate placement cost. - * @return (bounding box cost of the placement, estimated wirelength) + * @return (bounding box cost of the placement, estimated wirelength, congestion cost) * * @note The returned estimated wirelength is valid only when method == CHECK */ - std::pair<double, double> comp_bb_cost(e_cost_methods method); + std::tuple<double, double, double> comp_bb_cong_cost(e_cost_methods method); /** * @brief Find all the nets and pins affected by this swap and update costs. @@ -97,7 +104,8 @@ class NetCostHandler { const PlacerCriticalities* criticalities, t_pl_blocks_to_be_moved& blocks_affected, double& bb_delta_c, - double& timing_delta_c); + double& timing_delta_c, + double& congestion_delta_c); /** * @brief Reset the net cost function flags (proposed_net_cost and bb_updated_before) @@ -107,7 +115,6 @@ class NetCostHandler { /** * @brief Update net cost data structures (in placer context and net_cost in .cpp file) * and reset flags (proposed_net_cost and bb_updated_before). - * @param num_nets_affected The number of nets affected by the move. * It is used to determine the index up to which elements in ts_nets_to_update are valid. */ void update_move_nets(); @@ -117,7 +124,6 @@ class NetCostHandler { * and update "costs" accordingly. It is important to note that in this function bounding box * and connection delays are not calculated from scratch. However, it iterates over all nets * and connections and updates their costs by a complete summation, rather than incrementally. - * @param noc_opts Contains NoC cost weighting factors. * @param delay_model Placement delay model. Used to compute timing cost. * @param criticalities Contains the clustered netlist connection criticalities. * Used to computed timing cost . @@ -133,33 +139,44 @@ class NetCostHandler { double get_total_wirelength_estimate() const; /** - * @brief Estimates routing channel utilization. + * @brief Estimates routing channel utilization and computes the congestion cost + * for each net. + * @param compute_congestion_cost Indicates whether computing congestion cost is needed. * * For each net, distributes estimated wirelength across its bounding box * and accumulates demand for different routing channels. Normalizes by channel widths * (e.g. a value of 0.5 means 50% of the wiring in a channel is expected to be used). * - * @return Pair of matrices with relative CHANX and CHANY utilization. - * The dimension order for each matrix is [layer][x][y]. + * @note This method assumes that net bounding boxes are already computed. + * + * @return Total congestion cost if requested. */ - std::pair<vtr::NdMatrix<double, 3>, vtr::NdMatrix<double, 3>> estimate_routing_chan_util() const; + double estimate_routing_chan_util(bool compute_congestion_cost = true); + + /** + * @brief Returns the estimated routing channel usage for each location in the grid. + * The channel usage estimates are computed in estimate_routing_chan_util(). + */ + const ChannelData<vtr::NdMatrix<double, 3>>& get_chan_util() const; private: - ///@brief Specifies whether the bounding box is computed using cube method or per-layer method. + /// Indicates whether congestion cost modeling is enabled. + bool congestion_modeling_started_; + /// Specifies whether the bounding box is computed using cube method or per-layer method. bool cube_bb_; - ///@brief Determines whether the FPGA has multiple dies (layers) + /// Determines whether the FPGA has multiple dies (layers) bool is_multi_layer_; - ///@brief A reference to the placer's state to be updated by this object. + /// A reference to the placer's state to be updated by this object. PlacerState& placer_state_; - ///@brief Contains some parameter that determine how the placement cost is computed. + /// Contains some parameter that determine how the placement cost is computed. const t_placer_opts& placer_opts_; - ///@brief Points to the proper method for computing the bounding box cost from scratch. - std::function<std::pair<double, double>(e_cost_methods method)> comp_bb_cost_functor_; - ///@brief Points to the proper method for updating the bounding box of a net. + /// Points to the proper method for computing the bounding box cost, estimated wirelength and congestion cost from scratch. + std::function<std::tuple<double, double, double>(e_cost_methods method)> comp_bb_cong_cost_functor_; + /// Points to the proper method for updating the bounding box of a net. std::function<void(ClusterNetId net_id, t_physical_tile_loc pin_old_loc, t_physical_tile_loc pin_new_loc, bool is_driver)> update_bb_functor_; - ///@brief Points to the proper method for getting the bounding box cost of a net + /// Points to the proper method for getting the bounding box cost of a net std::function<double(ClusterNetId)> get_net_bb_cost_functor_; - ///@brief Points to the proper method for getting the non-updatable bounding box of a net + /// Points to the proper method for getting the non-updatable bounding box of a net std::function<void(const ClusterNetId net)> get_non_updatable_bb_functor_; /** @@ -195,19 +212,28 @@ class NetCostHandler { /* [0...num_affected_nets] -> net_id of the affected nets */ std::vector<ClusterNetId> ts_nets_to_update_; - // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates) + vtr::vector<ClusterNetId, std::pair<float, float>> ts_avg_chan_util_new_; + + /// Store the number of blocks on each of a net's bounding box (to allow efficient updates) + /// [0..cluster_ctx.clb_nlist.nets().size()-1] vtr::vector<ClusterNetId, t_bb> bb_num_on_edges_; - // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box + /// Store the bounding box coordinates of a net's bounding box + /// [0..cluster_ctx.clb_nlist.nets().size()-1] vtr::vector<ClusterNetId, t_bb> bb_coords_; - // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates) + vtr::vector<ClusterNetId, std::pair<float, float>> avg_chan_util_; + + /// Store the number of blocks on each of a net's bounding box (to allow efficient updates) + /// [0..cluster_ctx.clb_nlist.nets().size()-1] vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_bb_num_on_edges_; - // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box + /// Store the bounding box coordinates of a net's bounding box + /// [0..cluster_ctx.clb_nlist.nets().size()-1] vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_bb_coords_; - // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each layer () + /// Store the number of blocks on each layer () + /// [0..cluster_ctx.clb_nlist.nets().size()-1] vtr::Matrix<int> num_sink_pin_layer_; /** @@ -230,6 +256,19 @@ class NetCostHandler { */ vtr::vector<ClusterNetId, double> net_cost_; vtr::vector<ClusterNetId, double> proposed_net_cost_; + + /** + * @brief The congestion cost for each net is based on the extent to which its + * average routing channel utilization exceeds a predefined threshold. + * This is computed by measuring the average utilization within the net's + * bounding box and subtracting the congestion threshold. + * Only the excess portion contributes to the net's congestion cost. + * The valid range is [0...cluster_ctx.clb_nlist.nets().size()-1] when + * congestion modeling is enabled. Otherwise, this vector would be empty. + */ + vtr::vector<ClusterNetId, double> net_cong_cost_; + vtr::vector<ClusterNetId, double> proposed_net_cong_cost_; + vtr::vector<ClusterNetId, NetUpdateState> bb_update_status_; /** @@ -241,14 +280,30 @@ class NetCostHandler { * number of tracks in that direction; for other cost functions they * will never be used. */ - vtr::PrefixSum1D<int> acc_chanx_width_; // [0..device_ctx.grid.width()-1] - vtr::PrefixSum1D<int> acc_chany_width_; // [0..device_ctx.grid.height()-1] + ChannelData<vtr::PrefixSum1D<int>> acc_chan_width_; + + /** + * @brief Estimated routing usage per channel segment, + * indexed by [layer][x][y]. Values represent normalized wire demand + * contribution from all nets distributed over their bounding boxes. + */ + ChannelData<vtr::NdMatrix<double, 3>> chan_util_; + + /** + * @brief Accumulated (prefix sum) channel utilization in each direction (x/y), + * on the base layer. Enables fast computation of average utilization + * over a net’s bounding box during congestion cost estimation. + */ + ChannelData<vtr::PrefixSum2D<double>> acc_chan_util_; + + /// Available channel width per grid location, indexed by [layer][x][y]. + ChannelData<vtr::NdMatrix<int, 3>> chan_width_; /** * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in * the cross-die-layer direction over a 2D (x,y) region. We don't assume the inter-die connectivity is the same at all (x,y) locations, so we * can't compute the full chanz_place_cost_fac for all possible (xlow,ylow)(xhigh,yhigh) without a 4D array, which would - * be too big: O(n^2) in circuit size. Instead we compute a prefix sum that stores the number of inter-die connections per layer from + * be too big: O(n^2) in circuit size. Instead, we compute a prefix sum that stores the number of inter-die connections per layer from * (x=0,y=0) to (x,y). Given this, we can compute the average number of inter-die connections over a (xlow,ylow) to (xhigh,yhigh) * region in O(1) (by adding and subtracting 4 entries) */ @@ -287,9 +342,10 @@ class NetCostHandler { /** * @brief Calculates and returns the total bb (wirelength) cost change that would result from moving the blocks * indicated in the blocks_affected data structure. - * @param bb_delta_c Cost difference after and before moving the block + * @param bb_delta_c Bounding box cost difference after and before moving the block. + * @param congestion_delta_c Congestion cost difference after and before moving the block. */ - void set_bb_delta_cost_(double& bb_delta_c); + void set_bb_delta_cost_(double& bb_delta_c, double& congestion_delta_c); /** * @brief Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac arrays with the inverse of @@ -304,7 +360,7 @@ class NetCostHandler { /** * @brief Allocates and loads acc_tile_num_inter_die_conn_ which contains the accumulative number of inter-die - * conntections. + * connections. * * @details This is only useful for multi-die FPGAs. */ @@ -377,10 +433,10 @@ class NetCostHandler { * @details This routine finds the bounding box of each net from scratch when the bounding box is of type per-layer (i.e. from * only the block location information). It updates the coordinate, number of pins on each edge information, and the * number of sinks on each layer. It should only be called when the bounding box information is not valid. - * @param net_id ID of the net which the moving pin belongs to - * @param coords Bounding box coordinates of the net. It is calculated in this function - * @param num_on_edges Net's number of blocks on the edges of the bounding box. It is calculated in this function. - * @param num_sink_pin_layer Net's number of sinks on each layer, calculated in this function. + * @param net_id ID of the net which the moving pin belongs to + * @param coords Bounding box coordinates of the net. It is calculated in this function + * @param num_on_edges Net's number of blocks on the edges of the bounding box. It is calculated in this function. + * @param layer_pin_sink_count Net's number of sinks on each layer, calculated in this function. */ void get_layer_bb_from_scratch_(ClusterNetId net_id, std::vector<t_2D_bb>& num_on_edges, @@ -415,11 +471,9 @@ class NetCostHandler { * Currently assumes channels on both sides of the CLBs forming the edges of the bounding box can be used. * Essentially, I am assuming the pins always lie on the outside of the bounding box. The x and y coordinates * are the pin's x and y coordinates. IO blocks are considered to be one cell in for simplicity. - * @param bb_edge_new Number of blocks on the edges of the bounding box - * @param bb_coord_new Coordinates of the bounding box - * @param num_sink_pin_layer_new Number of sinks of the given net on each layer - * @param pin_old_loc The old location of the moving pin - * @param pin_new_loc The new location of the moving pin + * @param net_id Net whose bounding box is to be updated. + * @param pin_old_loc The old location of the moving pin + * @param pin_new_loc The new location of the moving pin * @param is_output_pin Is the moving pin of the type output */ void update_layer_bb_(ClusterNetId net_id, @@ -495,22 +549,23 @@ class NetCostHandler { /** * @brief Computes the bounding box from scratch using 2D bounding boxes (per-layer mode) * @param method The method used to calculate placement cost. Specifies whether the cost is - * computed from scratch or incrementally. - * @return (bounding box cost of the placement, estimated wirelength) - * + * computed from scratch or incrementally. + * @return (bounding box cost of the placement, estimated wirelength, congestion cost) + * @note Congestion modeling is not supported for per-layer mode, so 0 is returned. * @note The returned estimated wirelength is valid only when method == CHECK */ - std::pair<double, double> comp_per_layer_bb_cost_(e_cost_methods method); + std::tuple<double, double, double> comp_per_layer_bb_cost_(e_cost_methods method); /** * @brief Computes the bounding box from scratch using 3D bounding boxes (cube mode) + * and calculates BB cost, estimated wirelength, and congestion cost (if enabled). * @param method The method used to calculate placement cost. Specifies whether the cost is - * computed from scratch or incrementally. - * @return (bounding box cost of the placement, estimated wirelength) + * computed from scratch or incrementally. + * @return (bounding box cost of the placement, estimated wirelength, congestion cost) * * @note The returned estimated wirelength is valid only when method == CHECK */ - std::pair<double, double> comp_cube_bb_cost_(e_cost_methods method); + std::tuple<double, double, double> comp_cube_bb_cong_cost_(e_cost_methods method); /** * @brief if "net" is not already stored as an affected net, add it in ts_nets_to_update. @@ -520,20 +575,31 @@ class NetCostHandler { /** * @brief To mitigate round-off errors, every once in a while, the costs of nets are summed up from scratch. - * This functions is called to do that for bb cost. It doesn't calculate the BBs from scratch, it would only add the costs again. - * @return Total bb (wirelength) cost for the placement + * This function is called to do that for bb and congestion cost. + * It doesn't calculate the BBs or channel usage estimate from scratch, + * it would only add the costs again. + * @return (total bb cost, total congestion cost) */ - double recompute_bb_cost_(); + std::pair<double, double> recompute_bb_cong_cost_(); /** * @brief Given the 3D BB, calculate the wire-length cost of the net - * @param net_id ID of the net which cost is requested. + * @param net_id ID of the net whose cost is requested. * @param use_ts Specifies if the bounding box is retrieved from ts data structures - * or move context. + * or permanent data structures. * @return Wirelength cost of the net */ double get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts); + /** + * @brief Calculate the congestion cost of net using its 3D bounding box. + * @param net_id ID of the net whose cost is requested. + * @param use_ts Specifies if the bounding box is retrieved from ts data structures + * or move context. + * @return Congestion cost of the net + */ + double get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts); + /** * @brief Given the per-layer BB, calculate the wire-length cost of the net on each layer * and return the sum of the costs @@ -556,10 +622,10 @@ class NetCostHandler { */ template<typename BBT> std::pair<double, double> get_chanxy_cost_fac_(const BBT& bb) { - const int total_chanx_width = acc_chanx_width_.get_sum(bb.ymin, bb.ymax); + const int total_chanx_width = acc_chan_width_.x.get_sum(bb.ymin, bb.ymax); const double inverse_average_chanx_width = (bb.ymax - bb.ymin + 1.0) / total_chanx_width; - const int total_chany_width = acc_chany_width_.get_sum(bb.xmin, bb.xmax); + const int total_chany_width = acc_chan_width_.y.get_sum(bb.xmin, bb.xmax); const double inverse_average_chany_width = (bb.xmax - bb.xmin + 1.0) / total_chany_width; return {inverse_average_chanx_width, inverse_average_chany_width}; @@ -580,16 +646,14 @@ class NetCostHandler { /** * @brief Given the 3D BB, calculate the wire-length estimate of the net * @param net_id ID of the net which wirelength estimate is requested - * @param bb Bounding box of the net * @return Wirelength estimate of the net */ double get_net_wirelength_estimate_(ClusterNetId net_id) const; /** * @brief Given the per-layer BB, calculate the wire-length estimate of the net on each layer - * and return the sum of the lengths - * @param bb Per-layer BB of the net - * @param net_layer_pin_sink_count Number of sink pins on each layer for the net + * and return the sum of the lengths + * @param net_id Net whose weirelength is to be estimated. * @return Wirelength estimate of the net */ double get_net_wirelength_from_layer_bb_(ClusterNetId net_id) const; diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index c5d46b5af3..9e57362338 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -38,10 +38,9 @@ void try_place(const Netlist<>& net_list, const FlatPlacementInfo& flat_placement_info, bool is_flat) { - /* Currently, the functions that require is_flat as their parameter and are called during placement should - * receive is_flat as false. For example, if the RR graph of router lookahead is built here, it should be as - * if is_flat is false, even if is_flat is set to true from the command line. - */ + // Currently, the functions that require is_flat as their parameter and are called during placement should + // receive is_flat as false. For example, if the RR graph of router lookahead is built here, it should be as + // if is_flat is false, even if is_flat is set to true from the command line VTR_ASSERT(!is_flat); const auto& device_ctx = g_vpr_ctx.device(); const auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -52,6 +51,10 @@ void try_place(const Netlist<>& net_list, // Initialize the variables in the placement context. mutable_placement.init_placement_context(placer_opts, directs); + if (mutable_floorplanning.cluster_constraints.empty()) { + mutable_floorplanning.update_floorplanning_context_post_pack(); + } + // Update the floorplanning constraints with the macro information from the // placement context. mutable_floorplanning.update_floorplanning_context_pre_place(*mutable_placement.place_macros); @@ -60,23 +63,22 @@ void try_place(const Netlist<>& net_list, VTR_LOG("Bounding box mode is %s\n", (mutable_placement.cube_bb ? "Cube" : "Per-layer")); VTR_LOG("\n"); - /* To make sure the importance of NoC-related cost terms compared to - * BB and timing cost is determine only through NoC placement weighting factor, - * we normalize NoC-related cost weighting factors so that they add up to 1. - * With this normalization, NoC-related cost weighting factors only determine - * the relative importance of NoC cost terms with respect to each other, while - * the importance of total NoC cost to conventional placement cost is determined - * by NoC placement weighting factor. - * FIXME: This should not be modifying the NoC Opts here, this normalization - * should occur when these Opts are loaded in. - */ + // To make sure the importance of NoC-related cost terms compared to + // BB and timing cost is determine only through NoC placement weighting factor, + // we normalize NoC-related cost weighting factors so that they add up to 1. + // With this normalization, NoC-related cost weighting factors only determine + // the relative importance of NoC cost terms with respect to each other, while + // the importance of total NoC cost to conventional placement cost is determined + // by NoC placement weighting factor. + // FIXME: This should not be modifying the NoC Opts here, this normalization + // should occur when these Opts are loaded in. if (noc_opts.noc) { normalize_noc_cost_weighting_factor(const_cast<t_noc_opts&>(noc_opts)); } - /* Placement delay model is independent of the placement and can be shared across - * multiple placers if we are performing parallel annealing. - * So, it is created and initialized once. */ + // Placement delay model is independent of the placement and can be shared across + // multiple placers if we are performing parallel annealing. + // So, it is created and initialized once. */ std::shared_ptr<PlaceDelayModel> place_delay_model; if (placer_opts.place_algorithm.is_timing_driven()) { @@ -95,15 +97,13 @@ void try_place(const Netlist<>& net_list, } } - /* Make the global instance of BlkLocRegistry inaccessible through the getter methods of the - * placement context. This is done to make sure that the placement stage only accesses its - * own local instances of BlkLocRegistry. - */ + // Make the global instance of BlkLocRegistry inaccessible through the getter methods of the + // placement context. This is done to make sure that the placement stage only accesses its + // own local instances of BlkLocRegistry. mutable_placement.lock_loc_vars(); - /* Start measuring placement time. The measured execution time will be printed - * when this object goes out of scope at the end of this function. - */ + // Start measuring placement time. The measured execution time will be printed + // when this object goes out of scope at the end of this function. vtr::ScopedStartFinishTimer placement_timer("Placement"); // Enables fast look-up pb graph pins from block pin indices @@ -117,10 +117,9 @@ void try_place(const Netlist<>& net_list, placer.place(); - /* The placer object has its own copy of block locations and doesn't update - * the global context directly. We need to copy its internal data structures - * to the global placement context before it goes out of scope. - */ + // The placer object has its own copy of block locations and doesn't update + // the global context directly. We need to copy its internal data structures + // to the global placement context before it goes out of scope. placer.update_global_state(); // Clean the variables in the placement context. This will deallocate memory @@ -154,7 +153,7 @@ static void update_screen_debug(); //Performs a major (i.e. interactive) placement screen update. //This function with no arguments is useful for calling from a debugger to -//look at the intermediate implemetnation state. +//look at the intermediate implementation state. static void update_screen_debug() { update_screen(ScreenUpdatePriority::MAJOR, "DEBUG", PLACEMENT, nullptr); } diff --git a/vpr/src/place/place_constraints.cpp b/vpr/src/place/place_constraints.cpp index ef867ce5b1..fdad4813cb 100644 --- a/vpr/src/place/place_constraints.cpp +++ b/vpr/src/place/place_constraints.cpp @@ -209,7 +209,7 @@ void load_cluster_constraints() { floorplanning_ctx.cluster_constraints.resize(cluster_ctx.clb_nlist.blocks().size()); - for (auto cluster_id : cluster_ctx.clb_nlist.blocks()) { + for (ClusterBlockId cluster_id : cluster_ctx.clb_nlist.blocks()) { const std::unordered_set<AtomBlockId>& atoms = cluster_ctx.atoms_lookup[cluster_id]; PartitionRegion empty_pr; floorplanning_ctx.cluster_constraints[cluster_id] = empty_pr; diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index 7fdf3383a0..e206037bb3 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -11,13 +11,22 @@ #include "noc_place_utils.h" void t_placer_costs::update_norm_factors() { + const ClusteredNetlist& clustered_nlist = g_vpr_ctx.clustering().clb_nlist; + + bb_cost_norm = 1 / bb_cost; + + if (congestion_cost > 0.) { + congestion_cost_norm = 1 / congestion_cost; + } else { + congestion_cost_norm = 1. / (double)clustered_nlist.nets().size(); + } + if (place_algorithm.is_timing_driven()) { - bb_cost_norm = 1 / bb_cost; - //Prevent the norm factor from going to infinity + // Prevent the norm factor from going to infinity timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST); } else { - VTR_ASSERT_SAFE(place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); - bb_cost_norm = 1 / bb_cost; //Updating the normalization factor in bounding box mode since the cost in this mode is determined after normalizing the wirelength cost + // Timing normalization factor is not used + timing_cost_norm = std::numeric_limits<double>::quiet_NaN(); } if (noc_enabled) { @@ -36,6 +45,8 @@ double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_ total_cost = (1 - placer_opts.timing_tradeoff) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm); } + total_cost += placer_opts.congestion_factor * congestion_cost * congestion_cost_norm; + if (noc_opts.noc) { // in noc mode we include noc aggregate bandwidth, noc latency, and noc congestion total_cost += calculate_noc_cost(noc_cost_terms, noc_cost_norm_factors, noc_opts); @@ -65,7 +76,7 @@ int get_place_inner_loop_num_move(const t_placer_opts& placer_opts, const t_anne move_lim = int(annealing_sched.inner_num * pow(device_size, 2. / 3.) * pow(num_blocks, 2. / 3.)); } - /* Avoid having a non-positive move_lim */ + // Avoid having a non-positive move_lim move_lim = std::max(move_lim, 1); return move_lim; @@ -76,6 +87,7 @@ void t_placer_statistics::reset() { av_cost = 0.; av_bb_cost = 0.; av_timing_cost = 0.; + av_cong_cost = 0.; sum_of_squares = 0.; success_sum = 0; success_rate = 0.; @@ -88,6 +100,7 @@ void t_placer_statistics::single_swap_update(const t_placer_costs& costs) { av_cost += costs.cost; av_bb_cost += costs.bb_cost; av_timing_cost += costs.timing_cost; + av_cong_cost += costs.congestion_cost; sum_of_squares += (costs.cost) * (costs.cost); } @@ -97,10 +110,12 @@ void t_placer_statistics::calc_iteration_stats(const t_placer_costs& costs, int av_cost = costs.cost; av_bb_cost = costs.bb_cost; av_timing_cost = costs.timing_cost; + av_cong_cost = costs.congestion_cost; } else { av_cost /= success_sum; av_bb_cost /= success_sum; av_timing_cost /= success_sum; + av_cong_cost /= success_sum; } success_rate = success_sum / float(move_lim); std_dev = get_std_dev(success_sum, sum_of_squares, av_cost); diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h index f9925cbcc7..0caa10b8d5 100644 --- a/vpr/src/place/place_util.h +++ b/vpr/src/place/place_util.h @@ -63,37 +63,33 @@ struct NocCostTerms { * values of the previous iteration. However, the divisions are expensive, * so we store their multiplicative inverses when they are updated in * the outer loop routines to speed up the normalization process. - * - * @param cost The weighted average of the wiring cost and the timing cost. - * @param bb_cost The bounding box cost, aka the wiring cost. - * @param timing_cost The timing cost, which is connection delay * criticality. - * - * @param bb_cost_norm The normalization factor for the wiring cost. - * @param timing_cost_norm The normalization factor for the timing cost, which - * is upper-bounded by the value of MAX_INV_TIMING_COST. - * - * @param noc_cost_terms NoC-related cost terms - * @param noc_cost_norm_factors Normalization factors for NoC-related cost terms. - * - * @param MAX_INV_TIMING_COST Stops inverse timing cost from going to infinity - * with very lax timing constraints, which avoids multiplying by a - * gigantic timing_cost_norm when auto-normalizing. The exact value - * of this cost has relatively little impact, but should be large - * enough to not affect the timing costs computation for normal - * constraints. - * - * @param place_algorithm Determines how the member values are updated upon - * each temperature change during the placer annealing process. */ class t_placer_costs { public: //members + /// The weighted average of the wiring cost, the timing cost, and the congestion cost (if enabled) double cost = 0.; + + /// The bounding box cost, aka the wiring cost. double bb_cost = 0.; + + /// The timing cost, which is connection delay * criticality. double timing_cost = 0.; + + /// The congestion cost, which estimates how much routing channels are over-utilized. + double congestion_cost = 0.; + + /// The normalization factor for the wiring cost. double bb_cost_norm = 0.; + + /// The normalization factor for the timing cost, which is upper-bounded by the value of MAX_INV_TIMING_COST. double timing_cost_norm = 0.; + /// The normalization factor for the congestion cost. + double congestion_cost_norm = 0.; + + /// NoC-related cost terms. NocCostTerms noc_cost_terms; + /// Normalization factors for NoC-related cost terms. NocCostTerms noc_cost_norm_factors; public: //Constructor @@ -131,7 +127,18 @@ class t_placer_costs { t_placer_costs& operator+=(const NocCostTerms& noc_delta_cost); private: + /** + * @brief Stops inverse timing cost from going to infinity + * with very lax timing constraints, which avoids multiplying by a + * gigantic timing_cost_norm when auto-normalizing. The exact value + * of this cost has relatively little impact, but should be large + * enough to not affect the timing costs computation for normal + * constraints. + */ static constexpr double MAX_INV_TIMING_COST = 1.e12; + + /// Determines how the member values are updated upon + /// each temperature change during the placer annealing process. t_place_algorithm place_algorithm; bool noc_enabled; }; @@ -148,38 +155,30 @@ class t_placer_costs { * In terms of calculating statistics for total cost, we mean that we * operate upon the set of placer cost values gathered after every * accepted block move. - * - * @param av_cost - * Average total cost. Cost formulation depends on - * the place algorithm currently being used. - * @param av_bb_cost - * Average bounding box (wiring) cost. - * @param av_timing_cost - * Average timing cost (delay * criticality). - * @param sum_of_squares - * Sum of squares of the total cost. - * @param success_num - * Number of accepted block swaps for the current iteration. - * @param success_rate - * num_accepted / total_trials for the current iteration. - * @param std_dev - * Standard deviation of the total cost. - * */ class t_placer_statistics { public: + /// Average total cost. Cost formulation depends on the place algorithm currently being used. double av_cost; + /// Average bounding box (wiring) cost. double av_bb_cost; + /// Average timing cost (delay * criticality). double av_timing_cost; + /// Average congestion cost. + double av_cong_cost; + /// Sum of squares of the total cost. double sum_of_squares; + /// Number of accepted block swaps for the current iteration. int success_sum; + /// num_accepted / total_trials for the current iteration. float success_rate; + /// Standard deviation of the total cost. double std_dev; - public: //Constructor + public: // Constructor t_placer_statistics() { reset(); } - public: //Mutator + public: // Mutator ///@brief Clear all data fields. void reset(); diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index c2980ce984..f264576c62 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -117,7 +117,7 @@ Placer::Placer(const Netlist<>& net_list, } // Gets initial cost and loads bounding boxes. - costs_.bb_cost = net_cost_handler_.comp_bb_cost(e_cost_methods::NORMAL).first; + std::tie(costs_.bb_cost, std::ignore, costs_.congestion_cost) = net_cost_handler_.comp_bb_cong_cost(e_cost_methods::NORMAL); if (placer_opts.place_algorithm.is_timing_driven()) { alloc_and_init_timing_objects_(net_list, analysis_opts); @@ -254,9 +254,8 @@ void Placer::check_place_() { int Placer::check_placement_costs_() { int error = 0; - double timing_cost_check; - const auto [bb_cost_check, expected_wirelength] = net_cost_handler_.comp_bb_cost(e_cost_methods::CHECK); + const auto [bb_cost_check, expected_wirelength, _] = net_cost_handler_.comp_bb_cong_cost(e_cost_methods::CHECK); if (fabs(bb_cost_check - costs_.bb_cost) > costs_.bb_cost * PL_INCREMENTAL_COST_TOLERANCE) { VTR_LOG_ERROR( @@ -266,6 +265,7 @@ int Placer::check_placement_costs_() { } if (placer_opts_.place_algorithm.is_timing_driven()) { + double timing_cost_check; comp_td_costs(place_delay_model_.get(), *placer_criticalities_, placer_state_, &timing_cost_check); if (fabs(timing_cost_check - costs_.timing_cost) > costs_.timing_cost * PL_INCREMENTAL_COST_TOLERANCE) { VTR_LOG_ERROR( diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index 6ae41b5bb5..b5785cc5fc 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -80,15 +80,12 @@ static bool classes_in_same_block(ParentBlockId blk_id, int first_class_ptc_num, * @param route_opts Contains channel utilization threshold and weighting factor * used to increase initial 'acc_cost' for nodes going through * congested channels. - * @param chanx_util Post-placement estimate of CHANX routing utilization per (layer, x, y) location. - * @param chany_util Post-placement estimate of CHANY routing utilization per (layer, x, y) location. + * @param chan_util Post-placement estimate of routing channel utilization per (layer, x, y) location. * @return Initial `acc_cost` for the given RR node. */ - static float comp_initial_acc_cost(RRNodeId node_id, const t_router_opts& route_opts, - const vtr::NdMatrix<double, 3>& chanx_util, - const vtr::NdMatrix<double, 3>& chany_util); + const ChannelData<vtr::NdMatrix<double, 3>>& chan_util); /************************** Subroutine definitions ***************************/ @@ -436,8 +433,7 @@ void alloc_and_load_rr_node_route_structs(const t_router_opts& router_opts) { static float comp_initial_acc_cost(RRNodeId node_id, const t_router_opts& route_opts, - const vtr::NdMatrix<double, 3>& chanx_util, - const vtr::NdMatrix<double, 3>& chany_util) { + const ChannelData<vtr::NdMatrix<double, 3>>& chan_util) { const auto& rr_graph = g_vpr_ctx.device().rr_graph; // The default acc_cost is 1 for all rr_nodes. For routing wires, if they pass through a channel @@ -457,7 +453,7 @@ static float comp_initial_acc_cost(RRNodeId node_id, int y = rr_graph.node_ylow(node_id); int layer = rr_graph.node_layer(node_id); for (int x = rr_graph.node_xlow(node_id); x <= rr_graph.node_xhigh(node_id); x++) { - max_util = std::max(max_util, chanx_util[layer][x][y]); + max_util = std::max(max_util, chan_util.x[layer][x][y]); } } else { @@ -465,7 +461,7 @@ static float comp_initial_acc_cost(RRNodeId node_id, int x = rr_graph.node_xlow(node_id); int layer = rr_graph.node_layer(node_id); for (int y = rr_graph.node_ylow(node_id); y <= rr_graph.node_yhigh(node_id); y++) { - max_util = std::max(max_util, chany_util[layer][x][y]); + max_util = std::max(max_util, chan_util.y[layer][x][y]); } } @@ -481,18 +477,18 @@ void reset_rr_node_route_structs(const t_router_opts& route_opts) { auto& route_ctx = g_vpr_ctx.mutable_routing(); const auto& device_ctx = g_vpr_ctx.device(); const auto& blk_loc_registry = g_vpr_ctx.placement().blk_loc_registry(); - const bool cube_bb = g_vpr_ctx.placement().cube_bb; VTR_ASSERT(route_ctx.rr_node_route_inf.size() == size_t(device_ctx.rr_graph.num_nodes())); - RoutingChanUtilEstimator routing_chan_util_estimator(blk_loc_registry, cube_bb); - const auto [chanx_util, chany_util] = routing_chan_util_estimator.estimate_routing_chan_util(); + // RoutingChanUtilEstimator assumes cube bounding box + RoutingChanUtilEstimator routing_chan_util_estimator(blk_loc_registry); + const ChannelData<vtr::NdMatrix<double, 3>> chan_util = routing_chan_util_estimator.estimate_routing_chan_util(); for (const RRNodeId rr_id : device_ctx.rr_graph.nodes()) { t_rr_node_route_inf& node_inf = route_ctx.rr_node_route_inf[rr_id]; node_inf.prev_edge = RREdgeId::INVALID(); - node_inf.acc_cost = comp_initial_acc_cost(rr_id, route_opts, chanx_util, chany_util); + node_inf.acc_cost = comp_initial_acc_cost(rr_id, route_opts, chan_util); node_inf.path_cost = std::numeric_limits<float>::infinity(); node_inf.backward_path_cost = std::numeric_limits<float>::infinity(); node_inf.set_occ(0); diff --git a/vpr/src/route/route_utilization.cpp b/vpr/src/route/route_utilization.cpp index c01d0a62c8..b8cb5b4c5d 100644 --- a/vpr/src/route/route_utilization.cpp +++ b/vpr/src/route/route_utilization.cpp @@ -5,15 +5,16 @@ #include "vpr_utils.h" #include "route_common.h" -RoutingChanUtilEstimator::RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry, bool cube_bb) { +RoutingChanUtilEstimator::RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry) { placer_state_ = std::make_unique<PlacerState>(/*placement_is_timing_driven=*/false); placer_state_->mutable_blk_loc_registry() = blk_loc_registry; placer_opts_.place_algorithm = e_place_algorithm::BOUNDING_BOX_PLACE; - net_cost_handler_ = std::make_unique<NetCostHandler>(placer_opts_, *placer_state_, cube_bb); + /// RoutingChanUtilEstimator uses cube bounding box + net_cost_handler_ = std::make_unique<NetCostHandler>(placer_opts_, *placer_state_, /*cube_bb=*/true); } -std::pair<vtr::NdMatrix<double, 3>, vtr::NdMatrix<double, 3>> RoutingChanUtilEstimator::estimate_routing_chan_util() { +ChannelData<vtr::NdMatrix<double, 3>> RoutingChanUtilEstimator::estimate_routing_chan_util() { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; const auto& block_locs = placer_state_->block_locs(); @@ -21,24 +22,28 @@ std::pair<vtr::NdMatrix<double, 3>, vtr::NdMatrix<double, 3>> RoutingChanUtilEst if (placement_is_done) { // Compute net bounding boxes - net_cost_handler_->comp_bb_cost(e_cost_methods::NORMAL); + net_cost_handler_->comp_bb_cong_cost(e_cost_methods::NORMAL); - // Estimate routing channel utilization using - return net_cost_handler_->estimate_routing_chan_util(); + // Estimate routing channel usage + net_cost_handler_->estimate_routing_chan_util(/*compute_congestion_cost=*/false); + + return net_cost_handler_->get_chan_util(); } else { const auto& device_ctx = g_vpr_ctx.device(); - auto chanx_util = vtr::NdMatrix<double, 3>({{(size_t)device_ctx.grid.get_num_layers(), - device_ctx.grid.width(), - device_ctx.grid.height()}}, - 0); + ChannelData<vtr::NdMatrix<double, 3>> chan_util; + + chan_util.x = vtr::NdMatrix<double, 3>({{(size_t)device_ctx.grid.get_num_layers(), + device_ctx.grid.width(), + device_ctx.grid.height()}}, + 0); - auto chany_util = vtr::NdMatrix<double, 3>({{(size_t)device_ctx.grid.get_num_layers(), - device_ctx.grid.width(), - device_ctx.grid.height()}}, - 0); + chan_util.y = vtr::NdMatrix<double, 3>({{(size_t)device_ctx.grid.get_num_layers(), + device_ctx.grid.width(), + device_ctx.grid.height()}}, + 0); - return {chanx_util, chany_util}; + return chan_util; } } diff --git a/vpr/src/route/route_utilization.h b/vpr/src/route/route_utilization.h index 8e71e73375..5a89247eaf 100644 --- a/vpr/src/route/route_utilization.h +++ b/vpr/src/route/route_utilization.h @@ -8,15 +8,15 @@ /** * @class RoutingChanUtilEstimator - * @brief This class computes the net bounding boxes and estimates the routing channel utilization + * @brief This class computes the net bounding boxes (cube mode) and estimates the routing channel utilization * for each CHANX/CHANY channel by smearing the estimated wirelength for each net across all channels * within its bounding box. */ class RoutingChanUtilEstimator { public: - RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry, bool cube_bb); + RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry); - std::pair<vtr::NdMatrix<double, 3>, vtr::NdMatrix<double, 3>> estimate_routing_chan_util(); + ChannelData<vtr::NdMatrix<double, 3>> estimate_routing_chan_util(); private: std::unique_ptr<PlacerState> placer_state_;