From 695d205dd782504134ddf53474cd31a82dc30980 Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Tue, 24 Jun 2025 15:58:38 -0400 Subject: [PATCH] [APPack] Iterative Re-Packing Being based off the packer, APPack also uses iterative re-packing when a dense enough clustering cannot be found. APPack has some special options that it can use to increase the density of clustering further without hurting quality as much as the default flow. Updated the iterative re-packing algorithm to use these options if needed. Having this safer fall-back has allowed me to tune some numbers that I knew would improve the quality of most circuits but was causing a few circuits to fail packing. These few that were failing should now hit this fall-back paths which resolves this issue. --- .../flat_placement_density_manager.cpp | 16 +++- vpr/src/base/flat_placement_utils.h | 31 ++++++ vpr/src/pack/appack_context.h | 25 ++++- vpr/src/pack/appack_max_dist_th_manager.cpp | 23 +++-- vpr/src/pack/appack_max_dist_th_manager.h | 41 ++++++-- vpr/src/pack/greedy_candidate_selector.cpp | 94 ++++++++++++++----- vpr/src/pack/pack.cpp | 85 +++++++++++++++-- vpr/src/place/initial_placement.cpp | 37 ++------ .../config/golden_results.txt | 8 +- 9 files changed, 268 insertions(+), 92 deletions(-) diff --git a/vpr/src/analytical_place/flat_placement_density_manager.cpp b/vpr/src/analytical_place/flat_placement_density_manager.cpp index 93f7021c22..ec22283621 100644 --- a/vpr/src/analytical_place/flat_placement_density_manager.cpp +++ b/vpr/src/analytical_place/flat_placement_density_manager.cpp @@ -59,15 +59,20 @@ static PrimitiveVector calc_bin_underfill(const PrimitiveVector& bin_utilization * The command-line arguments provided by the user. * @param physical_tile_types * A vector of all physical tile types in the architecture. + * @param device_grid + * The current physical device grid of the FPGA. */ static std::vector get_physical_type_target_densities(const std::vector& target_density_arg_strs, - const std::vector& physical_tile_types) { + const std::vector& physical_tile_types, + const DeviceGrid& device_grid) { // Get the target densisty of each physical block type. - // TODO: Create auto feature to automatically select target densities based - // on properties of the architecture. Need to sweep to find reasonable - // values. std::vector phy_ty_target_density(physical_tile_types.size(), 1.0f); + // By default (auto), make the CLB target density 80%, leaving the other + // blocks at 100%. + t_logical_block_type_ptr logic_block_type = infer_logic_block_type(device_grid); + phy_ty_target_density[logic_block_type->index] = 0.8f; + // Set to auto if no user args are provided. if (target_density_arg_strs.size() == 0) return phy_ty_target_density; @@ -123,7 +128,8 @@ FlatPlacementDensityManager::FlatPlacementDensityManager(const APNetlist& ap_net // Get the target densisty of each physical block type. std::vector phy_ty_target_densities = get_physical_type_target_densities(target_density_arg_strs, - physical_tile_types); + physical_tile_types, + device_grid); VTR_LOG("Partial legalizer is using target densities:"); for (const t_physical_tile_type& phy_ty : physical_tile_types) { VTR_LOG(" %s:%.1f", phy_ty.name.c_str(), phy_ty_target_densities[phy_ty.index]); diff --git a/vpr/src/base/flat_placement_utils.h b/vpr/src/base/flat_placement_utils.h index 16a4641a01..1772585e92 100644 --- a/vpr/src/base/flat_placement_utils.h +++ b/vpr/src/base/flat_placement_utils.h @@ -6,8 +6,11 @@ * @brief Utility methods for working with flat placements. */ +#include #include +#include "device_grid.h" #include "flat_placement_types.h" +#include "physical_types.h" /** * @brief Returns the manhattan distance (L1 distance) between two flat @@ -17,3 +20,31 @@ inline float get_manhattan_distance(const t_flat_pl_loc& loc_a, const t_flat_pl_loc& loc_b) { return std::abs(loc_a.x - loc_b.x) + std::abs(loc_a.y - loc_b.y) + std::abs(loc_a.layer - loc_b.layer); } + +/** + * @brief Returns the L1 distance something at the given flat location would + * need to move to be within the bounds of a tile at the given tile loc. + */ +inline float get_manhattan_distance_to_tile(const t_flat_pl_loc& src_flat_loc, + const t_physical_tile_loc& tile_loc, + const DeviceGrid& device_grid) { + // Get the bounds of the tile. + // Note: The get_tile_bb function will not work in this case since it + // subtracts 1 from the width and height. + auto tile_type = device_grid.get_physical_type(tile_loc); + float tile_xmin = tile_loc.x - device_grid.get_width_offset(tile_loc); + float tile_xmax = tile_xmin + tile_type->width; + float tile_ymin = tile_loc.y - device_grid.get_height_offset(tile_loc); + float tile_ymax = tile_ymin + tile_type->height; + + // Get the closest point in the bounding box (including the edges) to + // the src_flat_loc. To do this, we project the point in L1 space. + float proj_x = std::clamp(src_flat_loc.x, tile_xmin, tile_xmax); + float proj_y = std::clamp(src_flat_loc.y, tile_ymin, tile_ymax); + + // Then compute the L1 distance from the src_flat_loc to the projected + // position. This will be the minimum distance this point needs to move. + float dx = std::abs(proj_x - src_flat_loc.x); + float dy = std::abs(proj_y - src_flat_loc.y); + return dx + dy; +} diff --git a/vpr/src/pack/appack_context.h b/vpr/src/pack/appack_context.h index 4cc7e84fd8..8b4434ea59 100644 --- a/vpr/src/pack/appack_context.h +++ b/vpr/src/pack/appack_context.h @@ -56,9 +56,9 @@ struct t_appack_options { // Distance threshold which decides when to use quadratic decay or inverted // sqrt decay. If the distance is less than this threshold, quadratic decay // is used. Inverted sqrt is used otherwise. - static constexpr float dist_th = 1.75f; + static constexpr float dist_th = 2.0f; // Attenuation value at the threshold. - static constexpr float attenuation_th = 0.35f; + static constexpr float attenuation_th = 0.25f; // Using the distance threshold and the attenuation value at that point, we // can compute the other two terms. This is to keep the attenuation function @@ -82,7 +82,9 @@ struct t_appack_options { // search within the cluster's tile. Setting this to a higher number would // allow APPack to search farther away; but may bring in molecules which // do not "want" to be in the cluster. - static constexpr float max_unrelated_tile_distance = 5.0f; + // + // [block_type_index] -> unrelated_tile_distance + std::vector max_unrelated_tile_distance; // Unrelated clustering occurs after all other candidate selection methods // have failed. This parameter sets how many time we will attempt unrelated @@ -93,7 +95,13 @@ struct t_appack_options { // NOTE: A similar option exists in the candidate selector class. This was // duplicated since it is very likely that APPack would need a // different value for this option than the non-APPack flow. - static constexpr int max_unrelated_clustering_attempts = 10; + // + // [block_type_index] -> max_unrelated_attempts + std::vector max_unrelated_clustering_attempts; + // By default, we perform 10 unrelated clustering attempts. This is used + // to aggresivly resolve density while adhering to the GP solution as much + // as possible. + static constexpr int default_max_unrelated_clustering_attempts = 10; // TODO: Investigate adding flat placement info to seed selection. }; @@ -122,6 +130,15 @@ struct APPackContext : public Context { logical_block_types, device_grid); } + + // By default, when unrelated clustering is on, search for unrelated molecules + // that are within 1 tile from the centroid of the cluster. + // NOTE: Molecules within the same tile as the centroid are considered to have + // 0 distance. The distance is computed relative to the bounds of the + // tile containing the centroid. + appack_options.max_unrelated_tile_distance.resize(logical_block_types.size(), 1.0); + appack_options.max_unrelated_clustering_attempts.resize(logical_block_types.size(), + appack_options.default_max_unrelated_clustering_attempts); } /** diff --git a/vpr/src/pack/appack_max_dist_th_manager.cpp b/vpr/src/pack/appack_max_dist_th_manager.cpp index 4d602bed00..1c224cd55b 100644 --- a/vpr/src/pack/appack_max_dist_th_manager.cpp +++ b/vpr/src/pack/appack_max_dist_th_manager.cpp @@ -29,6 +29,10 @@ static bool has_memory_pbs(const t_pb_type* pb_type); void APPackMaxDistThManager::init(const std::vector& max_dist_ths, const std::vector& logical_block_types, const DeviceGrid& device_grid) { + // Compute the max device distance based on the width and height of the + // device. This is the L1 (manhattan) distance. + max_distance_on_device_ = device_grid.width() + device_grid.height(); + // Automatically set the max distance thresholds. auto_set_max_distance_thresholds(logical_block_types, device_grid); @@ -36,7 +40,7 @@ void APPackMaxDistThManager::init(const std::vector& max_dist_ths, // auto), set the max distance thresholds based on the user-provided strings. VTR_ASSERT(!max_dist_ths.empty()); if (max_dist_ths.size() != 1 || max_dist_ths[0] != "auto") { - set_max_distance_thresholds_from_strings(max_dist_ths, logical_block_types, device_grid); + set_max_distance_thresholds_from_strings(max_dist_ths, logical_block_types); } // Set the initilized flag to true. @@ -57,18 +61,15 @@ void APPackMaxDistThManager::init(const std::vector& max_dist_ths, void APPackMaxDistThManager::auto_set_max_distance_thresholds(const std::vector& logical_block_types, const DeviceGrid& device_grid) { - // Compute the max device distance based on the width and height of the - // device. This is the L1 (manhattan) distance. - float max_device_distance = device_grid.width() + device_grid.height(); // Compute the max distance thresholds of the different logical block types. - float default_max_distance_th = std::max(default_max_dist_th_scale_ * max_device_distance, + float default_max_distance_th = std::max(default_max_dist_th_scale_ * max_distance_on_device_, default_max_dist_th_offset_); - float logic_block_max_distance_th = std::max(logic_block_max_dist_th_scale_ * max_device_distance, + float logic_block_max_distance_th = std::max(logic_block_max_dist_th_scale_ * max_distance_on_device_, logic_block_max_dist_th_offset_); - float memory_max_distance_th = std::max(memory_max_dist_th_scale_ * max_device_distance, + float memory_max_distance_th = std::max(memory_max_dist_th_scale_ * max_distance_on_device_, memory_max_dist_th_offset_); - float io_block_max_distance_th = std::max(io_max_dist_th_scale_ * max_device_distance, + float io_block_max_distance_th = std::max(io_max_dist_th_scale_ * max_distance_on_device_, io_max_dist_th_offset_); // Set all logical block types to have the default max distance threshold. @@ -138,8 +139,7 @@ static bool has_memory_pbs(const t_pb_type* pb_type) { void APPackMaxDistThManager::set_max_distance_thresholds_from_strings( const std::vector& max_dist_ths, - const std::vector& logical_block_types, - const DeviceGrid& device_grid) { + const std::vector& logical_block_types) { std::vector lb_type_names; std::unordered_map lb_type_name_to_index; @@ -167,8 +167,7 @@ void APPackMaxDistThManager::set_max_distance_thresholds_from_strings( } // Compute the max distance threshold the user selected. - float max_device_distance = device_grid.width() + device_grid.height(); - float logical_block_max_dist_th = std::max(max_device_distance * logical_block_max_dist_th_scale, + float logical_block_max_dist_th = std::max(max_distance_on_device_ * logical_block_max_dist_th_scale, logical_block_max_dist_th_offset); int lb_ty_index = lb_type_name_to_index[lb_name]; diff --git a/vpr/src/pack/appack_max_dist_th_manager.h b/vpr/src/pack/appack_max_dist_th_manager.h index 5dc461b2a6..976356724e 100644 --- a/vpr/src/pack/appack_max_dist_th_manager.h +++ b/vpr/src/pack/appack_max_dist_th_manager.h @@ -39,12 +39,12 @@ class APPackMaxDistThManager { // This is the default scale and offset. Logical blocks that we do not // recognize as being of the special categories will have this threshold. - static constexpr float default_max_dist_th_scale_ = 0.35f; - static constexpr float default_max_dist_th_offset_ = 15.0f; + static constexpr float default_max_dist_th_scale_ = 0.1f; + static constexpr float default_max_dist_th_offset_ = 10.0f; // Logic blocks (such as CLBs and LABs) tend to have more resources on the // device, thus they have tighter thresholds. This was found to work well. - static constexpr float logic_block_max_dist_th_scale_ = 0.1f; + static constexpr float logic_block_max_dist_th_scale_ = 0.06f; static constexpr float logic_block_max_dist_th_offset_ = 15.0f; // Memory blocks (i.e. blocks that contain pb_types of the memory class) @@ -80,7 +80,7 @@ class APPackMaxDistThManager { const DeviceGrid& device_grid); /** - * @brief Get the max distance threshold of the given lobical block type. + * @brief Get the max distance threshold of the given logical block type. */ inline float get_max_dist_threshold(const t_logical_block_type& logical_block_ty) const { VTR_ASSERT_SAFE_MSG(is_initialized_, @@ -91,6 +91,31 @@ class APPackMaxDistThManager { return logical_block_dist_thresholds_[logical_block_ty.index]; } + /** + * @brief Get the maximum distance possible on the device. This is the + * manhattan distance from the bottom-left corner of the device to + * the top-right. + */ + inline float get_max_device_distance() const { + VTR_ASSERT_SAFE_MSG(is_initialized_, + "APPackMaxDistThManager has not been initialized, cannot call this method"); + + return max_distance_on_device_; + } + + /** + * @brief Set the max distance threshold of the given logical block type. + */ + inline void set_max_dist_threshold(const t_logical_block_type& logical_block_ty, + float new_threshold) { + VTR_ASSERT_SAFE_MSG(is_initialized_, + "APPackMaxDistThManager has not been initialized, cannot call this method"); + VTR_ASSERT_SAFE_MSG((size_t)logical_block_ty.index < logical_block_dist_thresholds_.size(), + "Logical block type does not have a max distance threshold"); + + logical_block_dist_thresholds_[logical_block_ty.index] = new_threshold; + } + private: /** * @brief Helper method that initializes the thresholds of all logical @@ -105,8 +130,7 @@ class APPackMaxDistThManager { * strings. */ void set_max_distance_thresholds_from_strings(const std::vector& max_dist_ths, - const std::vector& logical_block_types, - const DeviceGrid& device_grid); + const std::vector& logical_block_types); /// @brief A flag which shows if the thesholds have been computed or not. bool is_initialized_ = false; @@ -114,4 +138,9 @@ class APPackMaxDistThManager { /// @brief The max distance thresholds of all logical blocks in the architecture. /// This is initialized in the constructor and accessed during packing. std::vector logical_block_dist_thresholds_; + + /// @brief This is the maximum minhattan distance possible on the device. This + /// is the distance of traveling from the bottom-left corner of the device + /// to the top right. + float max_distance_on_device_; }; diff --git a/vpr/src/pack/greedy_candidate_selector.cpp b/vpr/src/pack/greedy_candidate_selector.cpp index 60f169c049..5f1b63998d 100644 --- a/vpr/src/pack/greedy_candidate_selector.cpp +++ b/vpr/src/pack/greedy_candidate_selector.cpp @@ -8,6 +8,7 @@ #include "greedy_candidate_selector.h" #include #include +#include #include #include #include "PreClusterTimingManager.h" @@ -18,15 +19,16 @@ #include "attraction_groups.h" #include "cluster_legalizer.h" #include "cluster_placement.h" +#include "globals.h" #include "greedy_clusterer.h" #include "logic_types.h" +#include "physical_types.h" #include "prepack.h" #include "timing_info.h" #include "vpr_types.h" #include "vtr_assert.h" #include "vtr_ndmatrix.h" #include "vtr_vector.h" -#include "vtr_vector_map.h" /* * @brief Get gain of packing molecule into current cluster. @@ -755,7 +757,9 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster( if (allow_unrelated_clustering_ && best_molecule == PackMoleculeId::INVALID()) { const t_appack_options& appack_options = appack_ctx_.appack_options; if (appack_options.use_appack) { - if (num_unrelated_clustering_attempts_ < appack_options.max_unrelated_clustering_attempts) { + t_logical_block_type_ptr cluster_type = cluster_legalizer.get_cluster_type(cluster_id); + int cluster_max_attempts = appack_options.max_unrelated_clustering_attempts[cluster_type->index]; + if (num_unrelated_clustering_attempts_ < cluster_max_attempts) { best_molecule = get_unrelated_candidate_for_cluster_appack(cluster_gain_stats, cluster_id, cluster_legalizer); @@ -1101,8 +1105,23 @@ static float get_molecule_gain(PackMoleculeId molecule_id, // Get the position of the molecule t_flat_pl_loc target_loc = get_molecule_pos(molecule_id, prepacker, appack_ctx); + // Get the physical tile location of the flat cluster position. + // TODO: This should really be the closest compatible tile to the cluster + // centroid. To do this would require using information from the + // placer which we do not have yet. + t_physical_tile_loc cluster_tile_loc(cluster_gain_stats.flat_cluster_position.x, + cluster_gain_stats.flat_cluster_position.y, + cluster_gain_stats.flat_cluster_position.layer); + // Compute the gain attenuatation term. - float dist = get_manhattan_distance(cluster_gain_stats.flat_cluster_position, target_loc); + + // Here we compute the distance we would need to move the molecule from + // its GP solution to go into the tile we think the cluster will go into. + // This returns a distance of 0 if the molecule is already in the same + // tile as the rest of the molecules in the cluster. + float dist = get_manhattan_distance_to_tile(target_loc, + cluster_tile_loc, + g_vpr_ctx.device().grid); float gain_mult = 1.0f; if (dist < appack_options.dist_th) { gain_mult = 1.0f - (appack_options.quad_fac_sqr * dist * dist); @@ -1245,21 +1264,34 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa } // Create a queue of locations to search and a map of visited grid locations. - std::queue search_queue; + std::queue search_queue; vtr::NdMatrix visited({appack_unrelated_clustering_data_.dim_size(0), appack_unrelated_clustering_data_.dim_size(1)}, false); // Push the position of the cluster to the queue. - search_queue.push(cluster_gain_stats.flat_cluster_position); + t_physical_tile_loc cluster_tile_loc(cluster_gain_stats.flat_cluster_position.x, + cluster_gain_stats.flat_cluster_position.y, + cluster_gain_stats.flat_cluster_position.layer); + search_queue.push(cluster_tile_loc); + + // Get the max unrelated tile distance for the block type of this cluster. + t_logical_block_type_ptr cluster_type = cluster_legalizer.get_cluster_type(cluster_id); + float max_dist = appack_ctx_.appack_options.max_unrelated_tile_distance[cluster_type->index]; + + // Keep track of the closest compatible molecule and its distance. + float best_distance = std::numeric_limits::max(); + PackMoleculeId closest_compatible_molecule = PackMoleculeId::INVALID(); while (!search_queue.empty()) { // Pop a position to search from the queue. - const t_flat_pl_loc& node_loc = search_queue.front(); - VTR_ASSERT_SAFE(node_loc.layer == 0); + const t_physical_tile_loc& node_loc = search_queue.front(); + VTR_ASSERT_SAFE(node_loc.layer_num == 0); + + // Get the distance from the cluster to the current tile in tiles. + float dist = std::abs(node_loc.x - cluster_tile_loc.x) + std::abs(node_loc.y - cluster_tile_loc.y); // If this position is too far from the source, skip it. - float dist = get_manhattan_distance(node_loc, cluster_gain_stats.flat_cluster_position); - if (dist > 1) { + if (dist > max_dist) { search_queue.pop(); continue; } @@ -1272,6 +1304,10 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa visited[node_loc.x][node_loc.y] = true; // Explore this position from highest number of inputs available to lowest. + // Here, we are trying to find the closest compatible molecule, where we + // break ties based on whoever has more external inputs. + PackMoleculeId best_candidate = PackMoleculeId::INVALID(); + float best_candidate_distance = std::numeric_limits::max(); const auto& uc_data = appack_unrelated_clustering_data_[node_loc.x][node_loc.y]; VTR_ASSERT_SAFE(inputs_avail < uc_data.size()); for (int ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) { @@ -1289,30 +1325,46 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa // skip it. if (!cluster_legalizer.is_molecule_compatible(mol_id, cluster_id)) continue; - // Return this molecule as the unrelated candidate. - return mol_id; + + // If this is the best candidate we have seen so far, hold onto it. + // Here, we get the distance needed to move the molecule from its + // GP placement to the current cluster's tile. + t_flat_pl_loc mol_pos = get_molecule_pos(mol_id, prepacker_, appack_ctx_); + float mol_dist = get_manhattan_distance_to_tile(mol_pos, + cluster_tile_loc, + g_vpr_ctx.device().grid); + if (mol_dist < best_candidate_distance && mol_dist < best_distance) { + best_candidate = mol_id; + best_candidate_distance = mol_dist; + } } } + // If a candidate could be found, add it as the best found so far. + if (best_candidate.is_valid()) { + closest_compatible_molecule = best_candidate; + best_distance = best_candidate_distance; + } + // Push the neighbors of the position to the queue. // Note: Here, we are using the manhattan distance, so we do not push // the diagonals. We also want to try the direct neighbors first // since they should be closer. - if (node_loc.x >= 1.0f) - search_queue.push({node_loc.x - 1, node_loc.y, node_loc.layer}); - if (node_loc.x <= visited.dim_size(0) - 2) - search_queue.push({node_loc.x + 1, node_loc.y, node_loc.layer}); - if (node_loc.y >= 1.0f) - search_queue.push({node_loc.x, node_loc.y - 1, node_loc.layer}); - if (node_loc.y <= visited.dim_size(1) - 2) - search_queue.push({node_loc.x, node_loc.y + 1, node_loc.layer}); + if (node_loc.x >= 1) + search_queue.push({node_loc.x - 1, node_loc.y, node_loc.layer_num}); + if (node_loc.x <= (int)visited.dim_size(0) - 2) + search_queue.push({node_loc.x + 1, node_loc.y, node_loc.layer_num}); + if (node_loc.y >= 1) + search_queue.push({node_loc.x, node_loc.y - 1, node_loc.layer_num}); + if (node_loc.y <= (int)visited.dim_size(1) - 2) + search_queue.push({node_loc.x, node_loc.y + 1, node_loc.layer_num}); // Pop the position off the queue. search_queue.pop(); } - // No molecule could be found. Return an invalid ID. - return PackMoleculeId::INVALID(); + // Return the closest compatible molecule to the cluster. + return closest_compatible_molecule; } void GreedyCandidateSelector::update_candidate_selector_finalize_cluster( diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index 24936c767a..886735e56f 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -54,6 +54,8 @@ enum class e_packer_state { /// @brief Region constraints: Turns on more attraction groups for all regions /// and increases the pull on these groups. CREATE_ATTRACTION_GROUPS_FOR_ALL_REGIONS_AND_INCREASE_PULL, + /// @brief APPack: Increase the max displacement threshold for overused block types. + AP_INCREASE_MAX_DISPLACEMENT, /// @brief The failure state. FAILURE }; @@ -90,6 +92,8 @@ static bool try_size_device_grid(const t_arch& arch, * The current external pin utilization targets. * @param packer_opts * The options passed into the packer. + * @param appack_ctx + * The APPack context used when AP is turned on. */ static e_packer_state get_next_packer_state(e_packer_state current_packer_state, bool fits_on_device, @@ -98,7 +102,8 @@ static e_packer_state get_next_packer_state(e_packer_state current_packer_state, bool using_balanced_block_type_util, const std::map& block_type_utils, const t_ext_pin_util_targets& external_pin_util_targets, - const t_packer_opts& packer_opts) { + const t_packer_opts& packer_opts, + const APPackContext& appack_ctx) { if (fits_on_device && !floorplan_regions_overfull) { // If everything fits on the device and the floorplan regions are // not overfilled, the next state is success. @@ -111,13 +116,12 @@ static e_packer_state get_next_packer_state(e_packer_state current_packer_state, // density of the block types available. // Check if we can turn on unrelated cluster and/or balanced block type - // utilization. - if (packer_opts.allow_unrelated_clustering == e_unrelated_clustering::AUTO && packer_opts.balance_block_type_utilization == e_balance_block_type_util::AUTO) { - - // Check if they are not already on. If not, set the next state to turn them on. - if (!using_unrelated_clustering || !using_balanced_block_type_util) { - return e_packer_state::SET_UNRELATED_AND_BALANCED; - } + // utilization and they have not been turned on already. + if (packer_opts.allow_unrelated_clustering == e_unrelated_clustering::AUTO && !using_unrelated_clustering) { + return e_packer_state::SET_UNRELATED_AND_BALANCED; + } + if (packer_opts.balance_block_type_utilization == e_balance_block_type_util::AUTO && !using_balanced_block_type_util) { + return e_packer_state::SET_UNRELATED_AND_BALANCED; } } @@ -152,10 +156,27 @@ static e_packer_state get_next_packer_state(e_packer_state current_packer_state, } } + // If APPack is used, we can increase the max distance threshold to create + // a denser clustering. This will cause the packer to not adhere as well to + // the global placement. + if (appack_ctx.appack_options.use_appack) { + for (const auto& p : block_type_utils) { + if (p.second <= 1.0f) + continue; + + // Check if we can increase the max distance threshold for any of the + // overused block types. + float max_device_distance = appack_ctx.max_distance_threshold_manager.get_max_device_distance(); + float max_distance_th = appack_ctx.max_distance_threshold_manager.get_max_dist_threshold(*p.first); + if (max_distance_th < max_device_distance) + return e_packer_state::AP_INCREASE_MAX_DISPLACEMENT; + } + } + // Check if we can increase the target density of the overused block types. // This is a last resort since increasing the target pin density can have // bad affects on quality and routability. - for (auto& p : block_type_utils) { + for (const auto& p : block_type_utils) { const t_ext_pin_util& target_pin_util = external_pin_util_targets.get_pin_util(p.first->name); if (p.second > 1.0f && (target_pin_util.input_pin_util < 1.0f || target_pin_util.output_pin_util < 1.0f)) return e_packer_state::INCREASE_OVERUSED_TARGET_PIN_UTILIZATION; @@ -323,7 +344,8 @@ bool try_pack(const t_packer_opts& packer_opts, balance_block_type_util, block_type_utils, cluster_legalizer.get_target_external_pin_util(), - packer_opts); + packer_opts, + appack_ctx); // Set up for the options used for the next packer state. // NOTE: This must be done here (and not at the start of the next packer @@ -342,6 +364,20 @@ bool try_pack(const t_packer_opts& packer_opts, VTR_ASSERT(balance_block_type_util == false); balance_block_type_util = true; } + if (appack_ctx.appack_options.use_appack) { + // Only do unrelated clustering on the overused type instances. + for (const auto& p : block_type_utils) { + // Any overutalized block types will use the default options. + if (p.second > 1.0f) + continue; + + // Any underutalized block types should not do unrelated clustering. + // We can turn this off by just setting the max attempts to 0. + // TODO: These may become over-utilized in the future. Should + // investigate turning these on if needed. + appack_ctx.appack_options.max_unrelated_clustering_attempts[p.first->index] = 0; + } + } VTR_LOG("Packing failed to fit on device. Re-packing with: unrelated_logic_clustering=%s balance_block_type_util=%s\n", (allow_unrelated_clustering ? "true" : "false"), (balance_block_type_util ? "true" : "false")); @@ -402,6 +438,35 @@ bool try_pack(const t_packer_opts& packer_opts, attraction_groups.set_att_group_pulls(4); break; } + case e_packer_state::AP_INCREASE_MAX_DISPLACEMENT: { + VTR_ASSERT(appack_ctx.appack_options.use_appack); + std::vector block_types_to_increase; + for (const auto& p : block_type_utils) { + if (p.second <= 1.0f) + continue; + + float max_device_distance = appack_ctx.max_distance_threshold_manager.get_max_device_distance(); + float max_distance_th = appack_ctx.max_distance_threshold_manager.get_max_dist_threshold(*p.first); + if (max_distance_th < max_device_distance) + block_types_to_increase.push_back(p.first); + } + + // TODO: Instead of setting to max distance, set to the current threshold, + // multiplied by the overuse. Or maybe just double it. + VTR_LOG("Packing failed to fit on device. Increasing the APPack max distance thresholds of block types: "); + for (size_t i = 0; i < block_types_to_increase.size(); i++) { + t_logical_block_type_ptr block_type_ptr = block_types_to_increase[i]; + + float max_device_distance = appack_ctx.max_distance_threshold_manager.get_max_device_distance(); + appack_ctx.max_distance_threshold_manager.set_max_dist_threshold(*block_type_ptr, max_device_distance); + + VTR_LOG("%s", block_type_ptr->name.c_str()); + if (i < block_types_to_increase.size() - 1) + VTR_LOG(", "); + } + VTR_LOG("\n"); + break; + } case e_packer_state::DEFAULT: case e_packer_state::SUCCESS: case e_packer_state::FAILURE: diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index c4cbeb5645..9f6a4316eb 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -1,6 +1,7 @@ #include "clustered_netlist.h" #include "flat_placement_types.h" #include "atom_netlist_fwd.h" +#include "flat_placement_utils.h" #include "physical_types_util.h" #include "place_macro.h" #include "vtr_assert.h" @@ -638,34 +639,6 @@ static t_flat_pl_loc find_centroid_loc_from_flat_placement(const t_pl_macro& pl_ return centroid; } -/** - * @brief Returns the L1 distance a cluster at the given flat location would - * need to move to be within the bounds of a tile at the given tile loc. - */ -static inline float get_dist_to_tile(const t_flat_pl_loc& src_flat_loc, - const t_physical_tile_loc& tile_loc, - const DeviceGrid& device_grid) { - // Get the bounds of the tile. - // Note: The get_tile_bb function will not work in this case since it - // subtracts 1 from the width and height. - auto tile_type = device_grid.get_physical_type(tile_loc); - float tile_xmin = tile_loc.x - device_grid.get_width_offset(tile_loc); - float tile_xmax = tile_xmin + tile_type->width; - float tile_ymin = tile_loc.y - device_grid.get_height_offset(tile_loc); - float tile_ymax = tile_ymin + tile_type->height; - - // Get the closest point in the bounding box (including the edges) to - // the src_flat_loc. To do this, we project the point in L1 space. - float proj_x = std::clamp(src_flat_loc.x, tile_xmin, tile_xmax); - float proj_y = std::clamp(src_flat_loc.y, tile_ymin, tile_ymax); - - // Then compute the L1 distance from the src_flat_loc to the projected - // position. This will be the minimum distance this point needs to move. - float dx = std::abs(proj_x - src_flat_loc.x); - float dy = std::abs(proj_y - src_flat_loc.y); - return dx + dy; -} - /** * @brief Returns the first available sub_tile (both compatible with the given * compressed grid and is empty according the the blk_loc_registry) in @@ -760,7 +733,9 @@ static inline t_pl_loc find_nearest_compatible_loc(const t_flat_pl_loc& src_flat // Note: In compressed space, distances are not what they appear. We are // using the true grid positions to get the truly closest loc. auto grid_loc = compressed_block_grid.compressed_loc_to_grid_loc(loc); - float grid_dist = get_dist_to_tile(src_flat_loc, grid_loc, device_grid); + float grid_dist = get_manhattan_distance_to_tile(src_flat_loc, + grid_loc, + device_grid); // If this distance is worst than the best we have seen. // NOTE: This prune is always safe (i.e. it will never remove a better // solution) since this is a spatial graph and our objective is @@ -1580,7 +1555,9 @@ static inline float get_flat_variance(const t_pl_macro& macro, // Get the amount this atom needs to be displaced in order to be // within the same tile as the centroid. - float dist = get_dist_to_tile(atom_pos, centroid_grid_loc, g_vpr_ctx.device().grid); + float dist = get_manhattan_distance_to_tile(atom_pos, + centroid_grid_loc, + g_vpr_ctx.device().grid); // Accumulate the variance. variance += (dist * dist); diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_partial_legalizer/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_partial_legalizer/config/golden_results.txt index 600428ad62..de446b6643 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_partial_legalizer/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_partial_legalizer/config/golden_results.txt @@ -1,5 +1,5 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time - k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 3.69 vpr 74.85 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 82 9 -1 -1 success v8.0.0-13084-g071ad3865 release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-60-generic x86_64 2025-06-17T09:37:40 betzgrp-wintermute /home/pooladam/vtr-verilog-to-routing 76644 9 19 896 28 0 558 110 16 16 256 -1 mcnc_medium -1 -1 6604.32 6187 4055 330 2657 1068 74.8 MiB 3.14 0.00 5.5006 5.04382 -83.4196 -5.04382 nan 0.00 0.00159178 0.00140179 0.052728 0.048536 74.8 MiB 3.14 74.8 MiB 1.13 9693 17.4022 2573 4.61939 4635 22418 750276 125848 1.05632e+07 4.41931e+06 1.26944e+06 4958.75 18 28900 206586 -1 5.23966 nan -85.8792 -5.23966 0 0 0.13 -1 -1 74.8 MiB 0.25 0.25043 0.231439 31.6 MiB -1 0.05 - k6_frac_N10_40nm.xml des.pre-vpr.blif common 0.98 vpr 75.87 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 60 256 -1 -1 success v8.0.0-13084-g071ad3865 release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-60-generic x86_64 2025-06-17T09:37:40 betzgrp-wintermute /home/pooladam/vtr-verilog-to-routing 77692 256 245 954 501 0 592 561 22 22 484 -1 mcnc_large -1 -1 7794.07 7693 5185 54 899 4232 75.9 MiB 0.51 0.01 5.23302 4.72031 -822.458 -4.72031 nan 0.00 0.0019465 0.00181915 0.0146274 0.014127 75.9 MiB 0.51 75.9 MiB 0.36 10399 17.5659 2857 4.82601 2368 5414 292191 61927 2.15576e+07 3.23364e+06 1.49107e+06 3080.73 14 47664 245996 -1 5.04732 nan -891.503 -5.04732 0 0 0.17 -1 -1 75.9 MiB 0.14 0.105175 0.100293 33.7 MiB -1 0.06 - k6_frac_N10_40nm.xml ex1010.pre-vpr.blif common 13.53 vpr 105.74 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 295 10 -1 -1 success v8.0.0-13084-g071ad3865 release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-60-generic x86_64 2025-06-17T09:37:40 betzgrp-wintermute /home/pooladam/vtr-verilog-to-routing 108280 10 10 2655 20 0 1258 315 22 22 484 -1 mcnc_large -1 -1 27055.4 24196 24948 3246 18814 2888 105.7 MiB 11.83 0.02 8.14213 6.45814 -63.4873 -6.45814 nan 0.00 0.00723015 0.00601179 0.178647 0.157549 105.7 MiB 11.83 105.7 MiB 3.24 36569 29.0692 9362 7.44197 8387 54711 2280579 294712 2.15576e+07 1.58987e+07 3.51389e+06 7260.09 17 64568 594370 -1 6.99083 nan -66.327 -6.99083 0 0 0.42 -1 -1 105.7 MiB 0.83 0.805444 0.717568 49.7 MiB -1 0.12 - k6_frac_N10_40nm.xml seq.pre-vpr.blif common 3.68 vpr 75.47 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 82 41 -1 -1 success v8.0.0-13084-g071ad3865 release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-60-generic x86_64 2025-06-17T09:37:40 betzgrp-wintermute /home/pooladam/vtr-verilog-to-routing 77284 41 35 1006 76 0 591 158 16 16 256 -1 mcnc_medium -1 -1 6788.51 6434 4001 201 1978 1822 75.5 MiB 3.16 0.00 5.22637 4.95486 -145.087 -4.95486 nan 0.00 0.00148664 0.00128788 0.0309401 0.0286989 75.5 MiB 3.16 75.5 MiB 1.05 9852 16.6701 2636 4.46024 3774 18255 562759 99961 1.05632e+07 4.41931e+06 1.26944e+06 4958.75 18 28900 206586 -1 5.24035 nan -152.337 -5.24035 0 0 0.13 -1 -1 75.5 MiB 0.21 0.224432 0.206859 31.8 MiB -1 0.04 + k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 4.13 vpr 77.07 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 82 9 -1 -1 success v8.0.0-13239-gc574bc5f2 release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-58-generic x86_64 2025-06-28T23:19:15 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 78920 9 19 896 28 0 597 110 16 16 256 -1 mcnc_medium -1 -1 6923.74 6384 3266 257 2173 836 77.1 MiB 3.53 0.01 5.93826 5.04913 -82.6284 -5.04913 nan 0.00 0.00162746 0.00126185 0.0369838 0.0320011 77.1 MiB 3.53 77.1 MiB 1.45 9854 16.5336 2619 4.39430 4254 19787 655822 113994 1.05632e+07 4.41931e+06 1.26944e+06 4958.75 18 28900 206586 -1 5.61854 nan -86.9247 -5.61854 0 0 0.19 -1 -1 77.1 MiB 0.25 0.236227 0.206716 33.2 MiB -1 0.05 + k6_frac_N10_40nm.xml des.pre-vpr.blif common 1.13 vpr 77.78 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 53 256 -1 -1 success v8.0.0-13239-gc574bc5f2 release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-58-generic x86_64 2025-06-28T23:19:15 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 79648 256 245 954 501 0 598 554 22 22 484 -1 mcnc_large -1 -1 7759.78 7813 32390 260 5540 26590 77.8 MiB 0.62 0.01 5.3774 4.07795 -783.558 -4.07795 nan 0.00 0.00216885 0.00194181 0.0417738 0.0379768 77.8 MiB 0.62 77.8 MiB 0.36 10841 18.1288 2955 4.94147 2557 5883 360499 76612 2.15576e+07 2.85638e+06 1.49107e+06 3080.73 14 47664 245996 -1 4.6034 nan -875.791 -4.6034 0 0 0.21 -1 -1 77.8 MiB 0.17 0.140155 0.129596 35.2 MiB -1 0.07 + k6_frac_N10_40nm.xml ex1010.pre-vpr.blif common 17.36 vpr 108.07 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 287 10 -1 -1 success v8.0.0-13239-gc574bc5f2 release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-58-generic x86_64 2025-06-28T23:19:15 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 110664 10 10 2655 20 0 1394 307 22 22 484 -1 mcnc_large -1 -1 29331.8 25819 17902 2041 13789 2072 108.1 MiB 15.40 0.02 8.02093 6.59208 -64.4571 -6.59208 nan 0.00 0.00521069 0.004042 0.148628 0.1258 108.1 MiB 15.40 108.1 MiB 4.28 37964 27.2339 9782 7.01722 9764 59405 2548944 324595 2.15576e+07 1.54676e+07 3.51389e+06 7260.09 18 64568 594370 -1 6.70317 nan -65.0915 -6.70317 0 0 0.60 -1 -1 108.1 MiB 0.88 0.765326 0.666153 51.7 MiB -1 0.15 + k6_frac_N10_40nm.xml seq.pre-vpr.blif common 4.38 vpr 78.20 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 86 41 -1 -1 success v8.0.0-13239-gc574bc5f2 release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-58-generic x86_64 2025-06-28T23:19:15 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 80080 41 35 1006 76 0 650 162 16 16 256 -1 mcnc_medium -1 -1 7393.66 7070 4572 193 2379 2000 78.2 MiB 3.75 0.01 5.40605 5.02754 -145.024 -5.02754 nan 0.00 0.00227909 0.00183827 0.0409561 0.0356465 78.2 MiB 3.75 78.2 MiB 1.23 10921 16.8015 2925 4.50000 4695 22078 725102 126903 1.05632e+07 4.63488e+06 1.26944e+06 4958.75 19 28900 206586 -1 5.1862 nan -149.801 -5.1862 0 0 0.19 -1 -1 78.2 MiB 0.27 0.25655 0.224304 33.2 MiB -1 0.05