Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
79 commits
Select commit Hold shift + click to select a range
cad2f48
Tracing node execution in cudf-polars
TomAugspurger Aug 28, 2025
fcf5531
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 9, 2025
31606d1
Added tests
TomAugspurger Sep 9, 2025
6ba1cc9
More careful with statistics
TomAugspurger Sep 9, 2025
40dc34e
type fixes
TomAugspurger Sep 9, 2025
f7e3b72
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 9, 2025
9c256ef
dead code
TomAugspurger Sep 9, 2025
820680a
more types
TomAugspurger Sep 9, 2025
ad26c4e
Track nvml stats too
TomAugspurger Sep 9, 2025
64da24a
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 10, 2025
c57f7a4
fixups
TomAugspurger Sep 10, 2025
678bb6f
Packaging
TomAugspurger Sep 10, 2025
7ecceda
private HAS_STRUCTLOG
TomAugspurger Sep 10, 2025
3febbd6
docs
TomAugspurger Sep 10, 2025
75ccbec
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 10, 2025
4c34d39
fixes
TomAugspurger Sep 10, 2025
db57c2f
Import-time determination
TomAugspurger Sep 10, 2025
1f45e00
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 10, 2025
6069141
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 15, 2025
5f8c76f
doc fix
TomAugspurger Sep 15, 2025
a45b930
Test coverage up
TomAugspurger Sep 15, 2025
1849128
coverage subprocess config
TomAugspurger Sep 15, 2025
479fbeb
Merge branch 'branch-25.10' into tom/log-stats
TomAugspurger Sep 15, 2025
a9a0897
coverage pragmas
TomAugspurger Sep 15, 2025
375d96d
More coverage
TomAugspurger Sep 15, 2025
b8e64aa
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 15, 2025
a8886da
Fixes
TomAugspurger Sep 16, 2025
3fd15a3
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 16, 2025
2d18e40
Another attempt
TomAugspurger Sep 16, 2025
5002359
Another attempt
TomAugspurger Sep 16, 2025
178426f
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 16, 2025
25d2331
add a separate run
TomAugspurger Sep 17, 2025
9e008c0
Add a separate run
TomAugspurger Sep 17, 2025
e87722e
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 17, 2025
e3a6bcf
Revert "add a separate run"
TomAugspurger Sep 17, 2025
e2b92b0
Revert "Add a separate run"
TomAugspurger Sep 17, 2025
d285979
filter logs
TomAugspurger Sep 17, 2025
43ac2f6
set initial pool size
TomAugspurger Sep 17, 2025
958face
Ensure buffers are freed, even during exceptions
TomAugspurger Sep 18, 2025
8a6d4f8
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 18, 2025
d84d95a
remove duplicate code
TomAugspurger Sep 18, 2025
4278cd1
Align ConditionalJoin.do_evaluate signature
TomAugspurger Sep 18, 2025
aba0fde
Only clear the frames if we're logging traces
TomAugspurger Sep 18, 2025
3eaf54b
Use non-child args to pick out dataframes
TomAugspurger Sep 18, 2025
351988f
remove print
TomAugspurger Sep 18, 2025
b5c17a6
More ConditionalJoin hacking
TomAugspurger Sep 18, 2025
696951d
Remove unnecessary workaround
TomAugspurger Sep 19, 2025
4d8d7b5
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 19, 2025
b79fc08
More memory adjustment
TomAugspurger Sep 19, 2025
46b6cee
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 22, 2025
b988bf0
remove cov config change
TomAugspurger Sep 22, 2025
c79c808
Fix typing with Concatenate
TomAugspurger Sep 22, 2025
96a1e65
Reuse bool_converter
TomAugspurger Sep 22, 2025
6f105e2
Rename group to cudf_polars_trace
bdice Sep 22, 2025
1d568bb
Use cudf_polars_trace dependency list in "all" list and test environm…
bdice Sep 22, 2025
0ffc93f
Merge remote-tracking branch 'upstream/branch-25.10' into tom/log-stats
TomAugspurger Sep 23, 2025
e4391fc
Use `StatisticsResourceAdaptor` instead of cm
TomAugspurger Sep 24, 2025
2be0ccd
Merge remote-tracking branch 'upstream/branch-25.12' into tom/log-stats
TomAugspurger Sep 24, 2025
f3fb7f8
Merge remote-tracking branch 'upstream/branch-25.12' into tom/log-stats
TomAugspurger Sep 25, 2025
63f2170
import cleanup
TomAugspurger Sep 25, 2025
0fe7fe4
Fix coverage pragma
TomAugspurger Sep 25, 2025
409f5bb
Add fixture to cleare MR cache
TomAugspurger Sep 24, 2025
e05d085
Merge remote-tracking branch 'upstream/branch-25.12' into tom/log-stats
TomAugspurger Sep 25, 2025
fcf72dd
Merge remote-tracking branch 'upstream/branch-25.12' into tom/log-stats
TomAugspurger Sep 26, 2025
d14f433
Fix stale comment
TomAugspurger Sep 29, 2025
54d23eb
Merge remote-tracking branch 'upstream/branch-25.12' into tom/log-stats
TomAugspurger Sep 29, 2025
1889e2f
Merge branch 'branch-25.12' into tom/log-stats
TomAugspurger Sep 29, 2025
c5fc4c6
Merge branch 'branch-25.12' into tom/log-stats
TomAugspurger Sep 29, 2025
db916b7
Merge remote-tracking branch 'upstream/branch-25.12' into tom/log-stats
TomAugspurger Sep 30, 2025
c5f9c67
Merge remote-tracking branch 'upstream/branch-25.12' into tom/log-stats
TomAugspurger Sep 30, 2025
6a5c5bd
fixes
TomAugspurger Sep 30, 2025
a4eb89e
Merge remote-tracking branch 'upstream/branch-25.12' into tom/log-stats
TomAugspurger Oct 1, 2025
a0efb2f
Merge remote-tracking branch 'upstream/branch-25.12' into tom/log-stats
TomAugspurger Oct 1, 2025
be92e9f
Cache the cachable
TomAugspurger Oct 1, 2025
138ab2e
Merge remote-tracking branch 'upstream/branch-25.12' into tom/log-stats
TomAugspurger Oct 2, 2025
355cc74
typo
TomAugspurger Oct 2, 2025
23d4ebf
coverage
TomAugspurger Oct 2, 2025
e97b98e
Merge remote-tracking branch 'upstream/branch-25.12' into tom/log-stats
TomAugspurger Oct 2, 2025
bd9662f
doc fixes
TomAugspurger Oct 2, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conda/environments/all_cuda-129_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ dependencies:
- sphinx>=8.1.0
- sphinxcontrib-websupport
- streamz
- structlog
- sysroot_linux-aarch64==2.28
- typing_extensions>=4.0.0
- zlib>=1.2.13
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-129_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ dependencies:
- sphinx>=8.1.0
- sphinxcontrib-websupport
- streamz
- structlog
- sysroot_linux-64==2.28
- typing_extensions>=4.0.0
- zlib>=1.2.13
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-130_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ dependencies:
- sphinx>=8.1.0
- sphinxcontrib-websupport
- streamz
- structlog
- sysroot_linux-aarch64==2.28
- typing_extensions>=4.0.0
- zlib>=1.2.13
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-130_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ dependencies:
- sphinx>=8.1.0
- sphinxcontrib-websupport
- streamz
- structlog
- sysroot_linux-64==2.28
- typing_extensions>=4.0.0
- zlib>=1.2.13
Expand Down
15 changes: 15 additions & 0 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ files:
- clang
- cuda
- cuda_version
- cudf_polars_trace
- depends_on_cupy
- depends_on_dask_cuda
- depends_on_libkvikio
Expand Down Expand Up @@ -337,6 +338,15 @@ files:
- numpy_run
- test_python_common
- test_python_cudf_polars
- cudf_polars_trace
py_trace_cudf_polars:
output: pyproject
pyproject_dir: python/cudf_polars
extras:
table: project.optional-dependencies
key: trace
includes:
- cudf_polars_trace
py_build_dask_cudf:
output: pyproject
pyproject_dir: python/dask_cudf
Expand Down Expand Up @@ -1214,3 +1224,8 @@ dependencies:
- output_types: [conda, requirements, pyproject]
packages:
- narwhals==2.0.1
cudf_polars_trace:
common:
- output_types: [conda, requirements, pyproject]
packages:
- structlog
39 changes: 39 additions & 0 deletions docs/cudf/source/cudf_polars/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,42 @@ shape: (3, 3)
│ Scan ┆ 813 ┆ 233993 │
└────────────────────┴───────┴────────┘
```

## Tracing

cudf-polars can optionally trace execution of each node in the query plan.
To enable tracing, set the environment variable ``CUDF_POLARS_LOG_TRACES`` to a
true value ("1", "true", "y", "yes") before starting your process. This will
capture and log information about each node before and after it executes, including information on

- The type of the node being executed (e.g. `Scan`, `Select`, `Join`, `Groupby`, etc.)
- The shape and size (in memory) of each input and output DataFrame
- The GPU memory usage, as reported by [nvml], before and after executing the node
- A start and stop clock, which can be used to measure the duration of the node's execution

The implementation uses [structlog] to build log records. You can configure the
output using structlog's [configuration][structlog-configure] and enrich the
records with [context variables][structlog-context].


```
>>> df = pl.DataFrame({"a": ["a", "a", "b"], "b": [1, 2, 3]}).lazy()
>>> df.group_by("a").agg(pl.col("b").min().alias("min"), pl.col("b").max().alias("max")).collect(engine="gpu")
2025-09-10 07:44:01 [info ] Execute IR count_frames_input=0 count_frames_output=1 ... type=DataFrameScan
2025-09-10 07:44:01 [info ] Execute IR count_frames_input=1 count_frames_output=1 ... type=GroupBy
shape: (2, 3)
┌─────┬─────┬─────┐
│ a ┆ min ┆ max │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ b ┆ 3 ┆ 3 │
│ a ┆ 1 ┆ 2 │
└─────┴─────┴─────┘
```

[nvml]: https://developer.nvidia.com/management-library-nvml
[rmm-stats]: https://docs.rapids.ai/api/rmm/stable/guide/#memory-statistics-and-profiling
[structlog]: https://www.structlog.org/
[structlog-configure](https://www.structlog.org/en/stable/configuration.html)
[structlog-context](https://www.structlog.org/en/stable/contextvars.html)
7 changes: 7 additions & 0 deletions python/cudf_polars/cudf_polars/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import rmm
from rmm._cuda import gpu

import cudf_polars.dsl.tracing
from cudf_polars.dsl.tracing import CUDF_POLARS_NVTX_DOMAIN
from cudf_polars.dsl.translate import Translator
from cudf_polars.utils.config import _env_get_int, get_total_device_memory
Expand Down Expand Up @@ -133,6 +134,12 @@ def set_memory_resource(
!= 0
),
)

if (
cudf_polars.dsl.tracing.LOG_TRACES
): # pragma: no cover; requires CUDF_POLARS_LOG_TRACES=1
mr = rmm.mr.StatisticsResourceAdaptor(mr)

rmm.mr.set_current_device_resource(mr)
try:
yield mr
Expand Down
40 changes: 34 additions & 6 deletions python/cudf_polars/cudf_polars/dsl/ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from cudf_polars.dsl.expressions.base import ExecutionContext
from cudf_polars.dsl.nodebase import Node
from cudf_polars.dsl.to_ast import to_ast, to_parquet_filter
from cudf_polars.dsl.tracing import nvtx_annotate_cudf_polars
from cudf_polars.dsl.tracing import log_do_evaluate, nvtx_annotate_cudf_polars
from cudf_polars.dsl.utils.reshape import broadcast
from cudf_polars.dsl.utils.windows import range_window_bounds
from cudf_polars.utils import dtypes
Expand Down Expand Up @@ -479,6 +479,7 @@ def fast_count(self) -> int: # pragma: no cover
return max(total_rows, 0)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Scan")
def do_evaluate(
cls,
Expand Down Expand Up @@ -975,6 +976,7 @@ def _write_parquet(
plc.io.parquet.write_parquet(writer_options)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Sink")
def do_evaluate(
cls,
Expand Down Expand Up @@ -1034,6 +1036,7 @@ def is_equal(self, other: Self) -> bool: # noqa: D102
return False

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Cache")
def do_evaluate(
cls, key: int, refcount: int | None, df: DataFrame
Expand Down Expand Up @@ -1114,6 +1117,7 @@ def get_hashable(self) -> Hashable:
)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="DataFrameScan")
def do_evaluate(
cls,
Expand Down Expand Up @@ -1173,6 +1177,7 @@ def _is_len_expr(exprs: tuple[expr.NamedExpr, ...]) -> bool: # pragma: no cover
return False

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Select")
def do_evaluate(
cls,
Expand Down Expand Up @@ -1256,6 +1261,7 @@ def __init__(
self._non_child_args = (self.exprs,)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Reduce")
def do_evaluate(
cls,
Expand Down Expand Up @@ -1352,6 +1358,7 @@ def __init__(
)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Rolling")
def do_evaluate(
cls,
Expand Down Expand Up @@ -1476,6 +1483,7 @@ def __init__(
)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="GroupBy")
def do_evaluate(
cls,
Expand Down Expand Up @@ -1603,7 +1611,8 @@ def __reduce__(self) -> tuple[Any, ...]:
tuple[
str,
pl_expr.Operator | Iterable[pl_expr.Operator],
],
]
| None,
bool,
Zlice | None,
str,
Expand All @@ -1625,6 +1634,12 @@ def __init__(
) -> None:
self.schema = schema
self.predicate = predicate
# options[0] is a tuple[str, Operator, ...]
# The Operator class can't be pickled, but we don't use it anyway so
# just throw that away
if options[0] is not None:
options = (None, *options[1:])

self.options = options
self.children = (left, right)
predicate_wrapper = self.Predicate(predicate)
Expand All @@ -1637,20 +1652,21 @@ def __init__(
raise NotImplementedError(
f"Conditional join with predicate {predicate}"
) # pragma: no cover; polars never delivers expressions we can't handle
self._non_child_args = (predicate_wrapper, zlice, suffix, maintain_order)
self._non_child_args = (predicate_wrapper, options)
Copy link
Contributor Author

@TomAugspurger TomAugspurger Sep 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change, and the change to do_evaluate, are to align with the other IR nodes where self.non_child_args matches cls._non_chlid and the signature of cls.do_evaluate, followed by dataframes.

This lets get the dataframes passed to do_evaluate bys slicing off the last n values from args + kwargs, rather than introspecting the type of each value.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't feel great about b5c17a6, but we have tests ensuring that ConditionalJoin is picklable. Perhaps that's why this was different in the first place.


@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="ConditionalJoin")
def do_evaluate(
cls,
predicate_wrapper: Predicate,
zlice: Zlice | None,
suffix: str,
maintain_order: Literal["none", "left", "right", "left_right", "right_left"],
options: tuple,
left: DataFrame,
right: DataFrame,
) -> DataFrame:
"""Evaluate and return a dataframe."""
_, _, zlice, suffix, _, _ = options

lg, rg = plc.join.conditional_inner_join(
left.table,
right.table,
Expand Down Expand Up @@ -1875,6 +1891,7 @@ def _build_columns(
return columns

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Join")
def do_evaluate(
cls,
Expand Down Expand Up @@ -2027,6 +2044,7 @@ def __init__(
self.children = (df,)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="HStack")
def do_evaluate(
cls,
Expand Down Expand Up @@ -2092,6 +2110,7 @@ def __init__(
}

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Distinct")
def do_evaluate(
cls,
Expand Down Expand Up @@ -2182,6 +2201,7 @@ def __init__(
self.children = (df,)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Sort")
def do_evaluate(
cls,
Expand Down Expand Up @@ -2232,6 +2252,7 @@ def __init__(self, schema: Schema, offset: int, length: int | None, df: IR):
self.children = (df,)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Slice")
def do_evaluate(cls, offset: int, length: int, df: DataFrame) -> DataFrame:
"""Evaluate and return a dataframe."""
Expand All @@ -2253,6 +2274,7 @@ def __init__(self, schema: Schema, mask: expr.NamedExpr, df: IR):
self.children = (df,)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Filter")
def do_evaluate(cls, mask_expr: expr.NamedExpr, df: DataFrame) -> DataFrame:
"""Evaluate and return a dataframe."""
Expand All @@ -2272,6 +2294,7 @@ def __init__(self, schema: Schema, df: IR):
self.children = (df,)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Projection")
def do_evaluate(cls, schema: Schema, df: DataFrame) -> DataFrame:
"""Evaluate and return a dataframe."""
Expand Down Expand Up @@ -2305,6 +2328,7 @@ def __init__(self, schema: Schema, key: str, left: IR, right: IR):
self._non_child_args = (key,)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="MergeSorted")
def do_evaluate(cls, key: str, *dfs: DataFrame) -> DataFrame:
"""Evaluate and return a dataframe."""
Expand Down Expand Up @@ -2425,6 +2449,7 @@ def get_hashable(self) -> Hashable:
)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="MapFunction")
def do_evaluate(
cls, schema: Schema, name: str, options: Any, df: DataFrame
Expand Down Expand Up @@ -2525,6 +2550,7 @@ def __init__(self, schema: Schema, zlice: Zlice | None, *children: IR):
schema = self.children[0].schema

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Union")
def do_evaluate(cls, zlice: Zlice | None, *dfs: DataFrame) -> DataFrame:
"""Evaluate and return a dataframe."""
Expand Down Expand Up @@ -2582,6 +2608,7 @@ def _extend_with_nulls(table: plc.Table, *, nrows: int) -> plc.Table:
)

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="HConcat")
def do_evaluate(
cls,
Expand Down Expand Up @@ -2627,6 +2654,7 @@ def __init__(self, schema: Schema):
self.children = ()

@classmethod
@log_do_evaluate
@nvtx_annotate_cudf_polars(message="Empty")
def do_evaluate(cls, schema: Schema) -> DataFrame: # pragma: no cover
"""Evaluate and return a dataframe."""
Expand Down
Loading
Loading