Skip to content

Commit 065f1a1

Browse files
Merge pull request #1301 from Kotlin/count_kdocs
count and countDistinct kdocs
2 parents 28781e9 + 5bc26f5 commit 065f1a1

File tree

7 files changed

+381
-7
lines changed

7 files changed

+381
-7
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ public interface DataFrame<out T> :
6868

6969
// region rows
7070

71+
/**
72+
* Returns the total number of rows of this [DataFrame].
73+
*
74+
* @return The number of rows in the [DataFrame].
75+
*/
7176
public fun rowsCount(): Int
7277

7378
public operator fun iterator(): Iterator<DataRow<T>> = rows().iterator()

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/count.kt

Lines changed: 249 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,24 @@ import org.jetbrains.kotlinx.dataframe.Predicate
88
import org.jetbrains.kotlinx.dataframe.RowFilter
99
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
1010
import org.jetbrains.kotlinx.dataframe.annotations.Refine
11+
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
12+
import org.jetbrains.kotlinx.dataframe.documentation.RowFilterDescription
1113
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateValue
1214

1315
// region DataColumn
1416

17+
/**
18+
* Counts the elements in this [DataColumn] that satisfy a given [predicate] or returns the total count
19+
* if no predicate is provided.
20+
*
21+
* For more information: {@include [DocumentationUrls.Count]}
22+
*
23+
* @param predicate An optional predicate used to filter the elements.
24+
* The predicate should return `true` for elements to be counted.
25+
* If `null` (by default), all elements are counted.
26+
* @return The count of elements in the column
27+
* that either match the predicate or the total count of elements if no predicate is provided.
28+
*/
1529
public fun <T> DataColumn<T>.count(predicate: Predicate<T>? = null): Int =
1630
if (predicate == null) {
1731
size()
@@ -23,27 +37,123 @@ public fun <T> DataColumn<T>.count(predicate: Predicate<T>? = null): Int =
2337

2438
// region DataRow
2539

40+
/**
41+
* Returns the number of columns in this [DataRow].
42+
*
43+
* @return the number of columns in this row.
44+
* @see [columnsCount].
45+
*/
2646
public fun AnyRow.count(): Int = columnsCount()
2747

48+
/**
49+
* Counts the number of elements in the current row that satisfy the given [predicate].
50+
*
51+
* @param predicate A predicate function to test each element.
52+
* The predicate should return `true` for elements to be counted.
53+
* @return The number of elements that satisfy the predicate.
54+
*/
2855
public inline fun AnyRow.count(predicate: Predicate<Any?>): Int = values().count(predicate)
2956

3057
// endregion
3158

3259
// region DataFrame
3360

61+
/**
62+
* Returns the total number of rows of this [DataFrame].
63+
*
64+
* For more information: {@include [DocumentationUrls.Count]}
65+
*
66+
* @return The number of rows in the [DataFrame].
67+
*/
3468
public fun <T> DataFrame<T>.count(): Int = rowsCount()
3569

70+
/**
71+
* Counts the number of rows in this [DataFrame] that satisfy the given [predicate].
72+
*
73+
* {@include [RowFilterDescription]}
74+
*
75+
* See also:
76+
* - [filter][DataFrame.filter] — filters rows using a [RowFilter] condition.
77+
* - [countDistinct][DataFrame.countDistinct] — counts distinct rows or values.
78+
*
79+
* For more information: {@include [DocumentationUrls.Count]}
80+
*
81+
* ### Example
82+
* ```kotlin
83+
* // Count rows where the value in the "age" column is greater than 18
84+
* // and the "name/firstName" column starts with 'A'
85+
* df.count { age > 18 && name.firstName.startsWith("A") }
86+
* // Count rows
87+
* df.count { prev()?.length >= 50.0 ?: false }
88+
* ```
89+
*
90+
* @param T The schema marker type of the [DataFrame].
91+
* @param predicate A [RowFilter] that returns `true` for rows that should be counted.
92+
* @return The number of rows that satisfy the predicate.
93+
*/
3694
public inline fun <T> DataFrame<T>.count(predicate: RowFilter<T>): Int = rows().count { predicate(it, it) }
3795

3896
// endregion
3997

4098
// region GroupBy
4199

100+
/**
101+
* Aggregates this [GroupBy] by counting the number of rows in each group.
102+
*
103+
* Returns a new [DataFrame] where each row corresponds to a group.
104+
* The resulting frame contains:
105+
* - the original group key columns,
106+
* - a new column (named [resultName], default is `"count"`) that contains the number of rows in each group.
107+
*
108+
* This is equivalent to applying `.aggregate { count() }`, but more efficient.
109+
*
110+
* See also [DataFrame.groupBy] and common [aggregate][Grouped.aggregate].
111+
*
112+
* For more information: {@include [DocumentationUrls.Count]}
113+
*
114+
* ### Example
115+
* ```kotlin
116+
* // Counts number of rows for each city, returning
117+
* // a new DataFrame with columns "city" and "count"
118+
* df.groupBy { city }.count()
119+
* ```
120+
*
121+
* @param resultName The name of the result column that will store the group sizes. Defaults to `"count"`.
122+
* @return A new [DataFrame] with group keys and corresponding group sizes.
123+
*/
42124
@Refine
43125
@Interpretable("GroupByCount0")
44126
public fun <T> Grouped<T>.count(resultName: String = "count"): DataFrame<T> =
45127
aggregateValue(resultName) { count() default 0 }
46128

129+
/**
130+
* Aggregates this [GroupBy] by counting the number of rows in each group
131+
* that satisfy the given [predicate].
132+
*
133+
* {@include [RowFilterDescription]}
134+
*
135+
* Returns a new [DataFrame] where each row corresponds to a group.
136+
* The resulting frame contains:
137+
* - the original group key columns,
138+
* - a new column (named [resultName], defaults to `"count"`)
139+
* that stores the number of rows in each group matching the [predicate].
140+
*
141+
* This is equivalent to calling `.aggregate { count(predicate) }`, but more efficient.
142+
*
143+
* See also [DataFrame.groupBy] and common [aggregate][Grouped.aggregate].
144+
*
145+
* For more information: {@include [DocumentationUrls.Count]}
146+
*
147+
* ### Example
148+
* ```kotlin
149+
* // Count rows for each city where the "income" value is greater than 30.0.
150+
* // Returns a new DataFrame with columns "city" and "pointsCount".
151+
* df.groupBy { city }.count("pointsCount") { income >= 30.0 }
152+
* ```
153+
*
154+
* @param resultName The name of the result column containing the group sizes. Defaults to `"count"`.
155+
* @return A new [DataFrame] with group keys and filtered row counts per group.
156+
*/
47157
@Refine
48158
@Interpretable("GroupByCount0")
49159
public inline fun <T> Grouped<T>.count(
@@ -55,20 +165,157 @@ public inline fun <T> Grouped<T>.count(
55165

56166
// region Pivot
57167

168+
/**
169+
* Aggregates this [Pivot] by counting the number of rows in each group.
170+
*
171+
* Returns a single [DataRow] where:
172+
* - each column corresponds to a [pivot] group — if multiple pivot keys were used,
173+
* the result will contain column groups for each pivot key, with columns inside
174+
* corresponding to the values of that key;
175+
* - each value contains the number of rows in that group.
176+
*
177+
* The original [Pivot] column structure is preserved.
178+
* If the [Pivot] was created using multiple or nested keys
179+
* (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]),
180+
* the structure remains unchanged — only the contents of each group
181+
* are replaced with the number of rows in that group.
182+
*
183+
* This is equivalent to calling `.aggregate { count() }`, but more efficient.
184+
*
185+
* See also:
186+
* - [pivot].
187+
* - common [aggregate][Pivot.aggregate].
188+
* - [pivotCounts][DataFrame.pivotCounts] shortcut.
189+
*
190+
* For more information: {@include [DocumentationUrls.Count]}
191+
*
192+
* ### Example
193+
* ```kotlin
194+
* // Count the number of rows for each city.
195+
* // Returns a single DataRow with one column per city and the count of rows in each.
196+
* df.pivot { city }.count()
197+
* ```
198+
*
199+
* @return A single [DataRow] with one column per group and the corresponding group size as its value.
200+
*/
58201
public fun <T> Pivot<T>.count(): DataRow<T> = delegate { count() }
59202

203+
/**
204+
* Aggregates this [Pivot] by counting the number of rows in each group
205+
* that satisfy the given [predicate].
206+
*
207+
* {@include [RowFilterDescription]}
208+
*
209+
* Returns a single [DataRow] where:
210+
* - each column corresponds to a [pivot] group — if multiple pivot keys were used,
211+
* the result will contain column groups for each pivot key, with columns inside
212+
* corresponding to the values of that key;
213+
* - each value contains the number of rows in that group matching the [predicate].
214+
*
215+
* The original [Pivot] column structure is preserved.
216+
* If the [Pivot] was created using multiple or nested keys
217+
* (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]),
218+
* the structure remains unchanged — only the contents of each group
219+
* are replaced with the number of rows (matching the [predicate]) in that group.
220+
*
221+
* This is equivalent to calling `.aggregate { count(predicate) }`, but more efficient.
222+
*
223+
* See also:
224+
* - [pivot].
225+
* - common [aggregate][Pivot.aggregate].
226+
* - [pivotCounts][DataFrame.pivotCounts] shortcut.
227+
*
228+
* For more information: {@include [DocumentationUrls.Count]}
229+
*
230+
* ### Example
231+
* ```kotlin
232+
* // Count rows for each city where the "income" value is greater than 30.0.
233+
* // Returns a single DataRow with one column per city and the count of matching rows.
234+
* df.pivot { city }.count { income > 30.0 }
235+
* ```
236+
*
237+
* @return A single [DataRow] with original [Pivot] columns and filtered row counts per group.
238+
*/
60239
public inline fun <T> Pivot<T>.count(crossinline predicate: RowFilter<T>): DataRow<T> = delegate { count(predicate) }
61240

62241
// endregion
63242

64243
// region PivotGroupBy
65244

245+
/**
246+
* Aggregates this [PivotGroupBy] by counting the number of rows in each
247+
* combined [pivot] + [groupBy] group.
248+
*
249+
* Returns a new [DataFrame] containing a following matrix:
250+
* - one row per [groupBy] key (or keys set);
251+
* - one column group per [pivot] key, where each inner column corresponds to a value of that key;
252+
* - each cell contains the number of rows in the corresponding pivot–group pair.
253+
*
254+
* The original [Pivot] column structure is preserved.
255+
* If the [Pivot] was created using multiple or nested keys
256+
* (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]),
257+
* the result will contain nested column groups reflecting that key structure,
258+
* with each group containing columns for the values of the corresponding key.
259+
*
260+
* This is equivalent to calling `.aggregate { count() }`, but more efficient.
261+
*
262+
* See also:
263+
* - [pivot], [DataFrame.groupBy], [Pivot.groupBy] and [GroupBy.pivot].
264+
* - common [aggregate][PivotGroupBy.aggregate];
265+
* - [GroupBy.pivotCounts] shortcut.
266+
*
267+
* For more information: {@include [DocumentationUrls.Count]}
268+
*
269+
* ### Example
270+
* ```kotlin
271+
* // Compute a matrix with "city" values horizontally and
272+
* // "age" values vertically, where each cell contains
273+
* // the number of rows with the corresponding age–city pair.
274+
* df.pivot { city }.groupBy { age }.count()
275+
* ```
276+
*
277+
* @return A [DataFrame] with [groupBy] rows and pivoted counts as columns.
278+
*/
66279
public fun <T> PivotGroupBy<T>.count(): DataFrame<T> = aggregate { count() default 0 }
67280

281+
/**
282+
* Aggregates this [PivotGroupBy] by counting the number of rows in each
283+
* combined [pivot] + [groupBy] group, that satisfy the given [predicate].
284+
*
285+
* Returns a new [DataFrame] containing a following matrix:
286+
* - one row per [groupBy] key (or keys set);
287+
* - one column group per [pivot] key, where each inner column corresponds to a value of that key;
288+
* - each cell contains the number of rows in the corresponding pivot–group pair.
289+
*
290+
* The original [Pivot] column structure is preserved.
291+
* If the [Pivot] was created using multiple or nested keys
292+
* (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]),
293+
* the result will contain nested column groups reflecting that key structure,
294+
* with each group containing columns for the values
295+
* (matching the [predicate]) of the corresponding key.
296+
*
297+
* This is equivalent to calling `.aggregate { count() }`, but more efficient.
298+
*
299+
* See also:
300+
* - [pivot], [DataFrame.groupBy], [Pivot.groupBy] and [GroupBy.pivot].
301+
* - common [aggregate][PivotGroupBy.aggregate];
302+
* - [GroupBy.pivotCounts] shortcut.
303+
*
304+
* For more information: {@include [DocumentationUrls.Count]}
305+
*
306+
* ### Example
307+
* ```kotlin
308+
* // Compute a matrix with "city" values horizontally and
309+
* // "age" values vertically, where each cell contains
310+
* // the number of rows with the corresponding age–city pair.
311+
* df.pivot { city }.groupBy { age }.count()
312+
* ```
313+
*
314+
* @return A [DataFrame] with [groupBy] rows and pivoted counts as columns matching the [predicate]..
315+
*/
68316
public inline fun <T> PivotGroupBy<T>.count(crossinline predicate: RowFilter<T>): DataFrame<T> =
69317
aggregate {
70-
count(predicate) default
71-
0
318+
count(predicate) default 0
72319
}
73320

74321
// endregion

0 commit comments

Comments
 (0)