Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
32476d7
Fix k blocking for non independent option
iomaganaris Nov 21, 2025
8547aba
PoC for moving temporary outside kloop
iomaganaris Nov 25, 2025
e265480
Working version
iomaganaris Nov 27, 2025
0cb5f5b
Enable maxnreg setting
iomaganaris Nov 27, 2025
dcc74d4
Make sure that the inner loop doesn't get unrolled
iomaganaris Nov 27, 2025
d0d567f
Enable promote_independent_memlets option
iomaganaris Nov 27, 2025
ec11475
Enable loop blocking if there's any independent memlet to promote
iomaganaris Nov 27, 2025
0846ba8
Added initial test and fixed pass
iomaganaris Nov 28, 2025
8c7ce09
Fixing test
iomaganaris Nov 28, 2025
2ac4aa7
If we don't require_independent_nodes always apply
iomaganaris Nov 28, 2025
b8686f9
Fix most of the tests
iomaganaris Nov 28, 2025
21f053b
Fix tests
iomaganaris Dec 1, 2025
b80dc12
Fix memlet promotion number
iomaganaris Dec 2, 2025
56e390f
Added option for independent node promotion threshold
iomaganaris Dec 2, 2025
93e84ab
Extended the tests and fixes
iomaganaris Dec 2, 2025
0305aaa
Skip maps with single sizes
iomaganaris Dec 2, 2025
da784a2
Make formatting happy
iomaganaris Dec 2, 2025
329803d
Set better block size and gpu_maxnreg for kblocking
iomaganaris Dec 3, 2025
cf87306
Make formatting happy
iomaganaris Dec 3, 2025
b1cce65
Improve some hacks for subsets
iomaganaris Dec 5, 2025
0a12898
Merge remote-tracking branch 'origin/main' into extend_loopblocking
iomaganaris Feb 12, 2026
729dca7
Merge remote-tracking branch 'origin/main' into extend_loopblocking
iomaganaris Apr 13, 2026
180d777
Fix unique_name call
iomaganaris Apr 13, 2026
5391431
Merge remote-tracking branch 'origin/main' into extend_loopblocking
iomaganaris Apr 17, 2026
73a8ac0
Remove NVIDIA related options for the loop in kblocking
iomaganaris Apr 17, 2026
87fc4bf
Don't change the maxnreg of a map if it's already set
iomaganaris Apr 19, 2026
eabbbb1
Handling comments from Philip
iomaganaris Apr 19, 2026
7e07bff
Extend test and address more comments
iomaganaris Apr 27, 2026
dbb2ace
Avoid calling self.outer_entry in functions apart from apply and can_…
iomaganaris Apr 27, 2026
ca13254
Don't promote independent memlets if their destination is a map that …
iomaganaris Apr 28, 2026
794ff4a
Handle better adjustment of edges in inner map
iomaganaris Apr 28, 2026
c147920
Add more comments for handling the out edges from inner map and move …
iomaganaris Apr 28, 2026
bdb24d0
Fix how subsets are set to memlets
iomaganaris Apr 28, 2026
197a4f1
Fixed unit test with empty memlet
iomaganaris Apr 28, 2026
f77078e
Instead of a single dimension pass a list of dimensions for loop bloc…
iomaganaris Apr 28, 2026
8488fb9
Use unroll length same as blocking size
iomaganaris Apr 29, 2026
2328a2d
Avoid loop blocking for maps that have scans
iomaganaris Apr 29, 2026
d2947cc
Fix disabling normal loop blocking for scans
iomaganaris Apr 29, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import enum
import warnings
from typing import Any, Callable, Optional, Sequence, TypeAlias, Union
from typing import Any, Callable, List, Optional, Sequence, TypeAlias, Union
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
from typing import Any, Callable, List, Optional, Sequence, TypeAlias, Union
from typing import Any, Callable, Optional, Sequence, TypeAlias, Union


import dace
from dace import data as dace_data
Expand Down Expand Up @@ -118,9 +118,11 @@ def gt_auto_optimize(
gpu_block_size_2d: Optional[Sequence[int | str] | str] = None,
gpu_block_size_3d: Optional[Sequence[int | str] | str] = None,
gpu_maxnreg: Optional[int] = None,
blocking_dim: Optional[gtx_common.Dimension] = None,
blocking_dims: Optional[List[gtx_common.Dimension]] = None,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
blocking_dims: Optional[List[gtx_common.Dimension]] = None,
blocking_dims: Optional[Sequence[gtx_common.Dimension]] = None,

blocking_size: int = 10,
blocking_only_if_independent_nodes: bool = True,
promote_independent_memlets_for_blocking: bool = False,
blocking_independent_node_threshold: Optional[int] = None,
scan_loop_unrolling: bool = False,
scan_loop_unrolling_factor: int = 0,
disable_splitting: bool = False,
Expand Down Expand Up @@ -179,7 +181,7 @@ def gt_auto_optimize(
gpu_block_size_{1, 2, 3}d: Allows to specify the GPU thread block size for
1, 2 and 3 dimension Maps individually. See the `gpu_block_size_spec`
argument of `gt_gpu_transformation()` for more.
blocking_dim: On which dimension blocking should be applied.
blocking_dims: On which dimensions blocking should be applied. Priority based on the order of the passed dimensions.
blocking_size: How many elements each block should process.
blocking_only_if_independent_nodes: If `True`, the default, only apply loop
blocking if there are independent nodes in the Map, see the
Expand Down Expand Up @@ -323,9 +325,11 @@ def gt_auto_optimize(
# Optimize the interior of the Maps:
sdfg = _gt_auto_process_dataflow_inside_maps(
sdfg=sdfg,
blocking_dim=blocking_dim,
blocking_dims=blocking_dims,
blocking_size=blocking_size,
blocking_only_if_independent_nodes=blocking_only_if_independent_nodes,
promote_independent_memlets_for_blocking=promote_independent_memlets_for_blocking,
blocking_independent_node_threshold=blocking_independent_node_threshold,
scan_loop_unrolling=scan_loop_unrolling,
scan_loop_unrolling_factor=scan_loop_unrolling_factor,
fuse_tasklets=fuse_tasklets,
Expand Down Expand Up @@ -671,9 +675,11 @@ def _gt_auto_process_top_level_maps(

def _gt_auto_process_dataflow_inside_maps(
sdfg: dace.SDFG,
blocking_dim: Optional[gtx_common.Dimension],
blocking_dims: Optional[list[gtx_common.Dimension]],
blocking_size: int,
blocking_only_if_independent_nodes: Optional[bool],
promote_independent_memlets_for_blocking: Optional[bool],
blocking_independent_node_threshold: Optional[int],
scan_loop_unrolling: bool,
scan_loop_unrolling_factor: int,
fuse_tasklets: bool,
Expand All @@ -694,12 +700,14 @@ def _gt_auto_process_dataflow_inside_maps(
# Separate Tasklets into dependent and independent parts to promote data
# reusability. It is important that this step has to be performed before
# `TaskletFusion` is used.
if blocking_dim is not None:
if blocking_dims is not None:
sdfg.apply_transformations_once_everywhere(
gtx_transformations.LoopBlocking(
blocking_size=blocking_size,
blocking_parameter=blocking_dim,
blocking_parameters=blocking_dims,
require_independent_nodes=blocking_only_if_independent_nodes,
promote_independent_memlets=promote_independent_memlets_for_blocking,
independent_node_threshold=blocking_independent_node_threshold,
),
validate=False,
validate_all=validate_all,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,7 @@ def apply(
block_size[i] = map_size[map_dim_idx_to_inspect]

gpu_map.gpu_block_size = tuple(block_size)
if self.maxnreg is not None:
if self.maxnreg is not None and gpu_map.gpu_maxnreg == 0:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for my curiosity, what is the intention behind this change?

gpu_map.gpu_maxnreg = self.maxnreg
elif launch_bounds is not None: # Note: empty string has a meaning in DaCe
gpu_map.gpu_launch_bounds = launch_bounds
Expand Down
Loading