@@ -8,10 +8,24 @@ import org.jetbrains.kotlinx.dataframe.Predicate
8
8
import org.jetbrains.kotlinx.dataframe.RowFilter
9
9
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
10
10
import org.jetbrains.kotlinx.dataframe.annotations.Refine
11
+ import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
12
+ import org.jetbrains.kotlinx.dataframe.documentation.RowFilterDescription
11
13
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateValue
12
14
13
15
// region DataColumn
14
16
17
+ /* *
18
+ * Counts the elements in this [DataColumn] that satisfy a given [predicate] or returns the total count
19
+ * if no predicate is provided.
20
+ *
21
+ * For more information: {@include [DocumentationUrls.Count]}
22
+ *
23
+ * @param predicate An optional predicate used to filter the elements.
24
+ * The predicate should return `true` for elements to be counted.
25
+ * If `null` (by default), all elements are counted.
26
+ * @return The count of elements in the column
27
+ * that either match the predicate or the total count of elements if no predicate is provided.
28
+ */
15
29
public fun <T > DataColumn<T>.count (predicate : Predicate <T >? = null): Int =
16
30
if (predicate == null ) {
17
31
size()
@@ -23,27 +37,123 @@ public fun <T> DataColumn<T>.count(predicate: Predicate<T>? = null): Int =
23
37
24
38
// region DataRow
25
39
40
+ /* *
41
+ * Returns the number of columns in this [DataRow].
42
+ *
43
+ * @return the number of columns in this row.
44
+ * @see [columnsCount].
45
+ */
26
46
public fun AnyRow.count (): Int = columnsCount()
27
47
48
+ /* *
49
+ * Counts the number of elements in the current row that satisfy the given [predicate].
50
+ *
51
+ * @param predicate A predicate function to test each element.
52
+ * The predicate should return `true` for elements to be counted.
53
+ * @return The number of elements that satisfy the predicate.
54
+ */
28
55
public inline fun AnyRow.count (predicate : Predicate <Any ?>): Int = values().count(predicate)
29
56
30
57
// endregion
31
58
32
59
// region DataFrame
33
60
61
+ /* *
62
+ * Returns the total number of rows of this [DataFrame].
63
+ *
64
+ * For more information: {@include [DocumentationUrls.Count]}
65
+ *
66
+ * @return The number of rows in the [DataFrame].
67
+ */
34
68
public fun <T > DataFrame<T>.count (): Int = rowsCount()
35
69
70
+ /* *
71
+ * Counts the number of rows in this [DataFrame] that satisfy the given [predicate].
72
+ *
73
+ * {@include [RowFilterDescription]}
74
+ *
75
+ * See also:
76
+ * - [filter][DataFrame.filter] — filters rows using a [RowFilter] condition.
77
+ * - [countDistinct][DataFrame.countDistinct] — counts distinct rows or values.
78
+ *
79
+ * For more information: {@include [DocumentationUrls.Count]}
80
+ *
81
+ * ### Example
82
+ * ```kotlin
83
+ * // Count rows where the value in the "age" column is greater than 18
84
+ * // and the "name/firstName" column starts with 'A'
85
+ * df.count { age > 18 && name.firstName.startsWith("A") }
86
+ * // Count rows
87
+ * df.count { prev()?.length >= 50.0 ?: false }
88
+ * ```
89
+ *
90
+ * @param T The schema marker type of the [DataFrame].
91
+ * @param predicate A [RowFilter] that returns `true` for rows that should be counted.
92
+ * @return The number of rows that satisfy the predicate.
93
+ */
36
94
public inline fun <T > DataFrame<T>.count (predicate : RowFilter <T >): Int = rows().count { predicate(it, it) }
37
95
38
96
// endregion
39
97
40
98
// region GroupBy
41
99
100
+ /* *
101
+ * Aggregates this [GroupBy] by counting the number of rows in each group.
102
+ *
103
+ * Returns a new [DataFrame] where each row corresponds to a group.
104
+ * The resulting frame contains:
105
+ * - the original group key columns,
106
+ * - a new column (named [resultName], default is `"count"`) that contains the number of rows in each group.
107
+ *
108
+ * This is equivalent to applying `.aggregate { count() }`, but more efficient.
109
+ *
110
+ * See also [DataFrame.groupBy] and common [aggregate][Grouped.aggregate].
111
+ *
112
+ * For more information: {@include [DocumentationUrls.Count]}
113
+ *
114
+ * ### Example
115
+ * ```kotlin
116
+ * // Counts number of rows for each city, returning
117
+ * // a new DataFrame with columns "city" and "count"
118
+ * df.groupBy { city }.count()
119
+ * ```
120
+ *
121
+ * @param resultName The name of the result column that will store the group sizes. Defaults to `"count"`.
122
+ * @return A new [DataFrame] with group keys and corresponding group sizes.
123
+ */
42
124
@Refine
43
125
@Interpretable(" GroupByCount0" )
44
126
public fun <T > Grouped<T>.count (resultName : String = "count"): DataFrame <T > =
45
127
aggregateValue(resultName) { count() default 0 }
46
128
129
+ /* *
130
+ * Aggregates this [GroupBy] by counting the number of rows in each group
131
+ * that satisfy the given [predicate].
132
+ *
133
+ * {@include [RowFilterDescription]}
134
+ *
135
+ * Returns a new [DataFrame] where each row corresponds to a group.
136
+ * The resulting frame contains:
137
+ * - the original group key columns,
138
+ * - a new column (named [resultName], defaults to `"count"`)
139
+ * that stores the number of rows in each group matching the [predicate].
140
+ *
141
+ * This is equivalent to calling `.aggregate { count(predicate) }`, but more efficient.
142
+ *
143
+ * See also [DataFrame.groupBy] and common [aggregate][Grouped.aggregate].
144
+ *
145
+ * For more information: {@include [DocumentationUrls.Count]}
146
+ *
147
+ * ### Example
148
+ * ```kotlin
149
+ * // Count rows for each city where the "income" value is greater than 30.0.
150
+ * // Returns a new DataFrame with columns "city" and "pointsCount".
151
+ * df.groupBy { city }.count("pointsCount") { income >= 30.0 }
152
+ * ```
153
+ *
154
+ * @param resultName The name of the result column containing the group sizes. Defaults to `"count"`.
155
+ * @return A new [DataFrame] with group keys and filtered row counts per group.
156
+ */
47
157
@Refine
48
158
@Interpretable(" GroupByCount0" )
49
159
public inline fun <T > Grouped<T>.count (
@@ -55,20 +165,157 @@ public inline fun <T> Grouped<T>.count(
55
165
56
166
// region Pivot
57
167
168
+ /* *
169
+ * Aggregates this [Pivot] by counting the number of rows in each group.
170
+ *
171
+ * Returns a single [DataRow] where:
172
+ * - each column corresponds to a [pivot] group — if multiple pivot keys were used,
173
+ * the result will contain column groups for each pivot key, with columns inside
174
+ * corresponding to the values of that key;
175
+ * - each value contains the number of rows in that group.
176
+ *
177
+ * The original [Pivot] column structure is preserved.
178
+ * If the [Pivot] was created using multiple or nested keys
179
+ * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]),
180
+ * the structure remains unchanged — only the contents of each group
181
+ * are replaced with the number of rows in that group.
182
+ *
183
+ * This is equivalent to calling `.aggregate { count() }`, but more efficient.
184
+ *
185
+ * See also:
186
+ * - [pivot].
187
+ * - common [aggregate][Pivot.aggregate].
188
+ * - [pivotCounts][DataFrame.pivotCounts] shortcut.
189
+ *
190
+ * For more information: {@include [DocumentationUrls.Count]}
191
+ *
192
+ * ### Example
193
+ * ```kotlin
194
+ * // Count the number of rows for each city.
195
+ * // Returns a single DataRow with one column per city and the count of rows in each.
196
+ * df.pivot { city }.count()
197
+ * ```
198
+ *
199
+ * @return A single [DataRow] with one column per group and the corresponding group size as its value.
200
+ */
58
201
public fun <T > Pivot<T>.count (): DataRow <T > = delegate { count() }
59
202
203
+ /* *
204
+ * Aggregates this [Pivot] by counting the number of rows in each group
205
+ * that satisfy the given [predicate].
206
+ *
207
+ * {@include [RowFilterDescription]}
208
+ *
209
+ * Returns a single [DataRow] where:
210
+ * - each column corresponds to a [pivot] group — if multiple pivot keys were used,
211
+ * the result will contain column groups for each pivot key, with columns inside
212
+ * corresponding to the values of that key;
213
+ * - each value contains the number of rows in that group matching the [predicate].
214
+ *
215
+ * The original [Pivot] column structure is preserved.
216
+ * If the [Pivot] was created using multiple or nested keys
217
+ * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]),
218
+ * the structure remains unchanged — only the contents of each group
219
+ * are replaced with the number of rows (matching the [predicate]) in that group.
220
+ *
221
+ * This is equivalent to calling `.aggregate { count(predicate) }`, but more efficient.
222
+ *
223
+ * See also:
224
+ * - [pivot].
225
+ * - common [aggregate][Pivot.aggregate].
226
+ * - [pivotCounts][DataFrame.pivotCounts] shortcut.
227
+ *
228
+ * For more information: {@include [DocumentationUrls.Count]}
229
+ *
230
+ * ### Example
231
+ * ```kotlin
232
+ * // Count rows for each city where the "income" value is greater than 30.0.
233
+ * // Returns a single DataRow with one column per city and the count of matching rows.
234
+ * df.pivot { city }.count { income > 30.0 }
235
+ * ```
236
+ *
237
+ * @return A single [DataRow] with original [Pivot] columns and filtered row counts per group.
238
+ */
60
239
public inline fun <T > Pivot<T>.count (crossinline predicate : RowFilter <T >): DataRow <T > = delegate { count(predicate) }
61
240
62
241
// endregion
63
242
64
243
// region PivotGroupBy
65
244
245
+ /* *
246
+ * Aggregates this [PivotGroupBy] by counting the number of rows in each
247
+ * combined [pivot] + [groupBy] group.
248
+ *
249
+ * Returns a new [DataFrame] containing a following matrix:
250
+ * - one row per [groupBy] key (or keys set);
251
+ * - one column group per [pivot] key, where each inner column corresponds to a value of that key;
252
+ * - each cell contains the number of rows in the corresponding pivot–group pair.
253
+ *
254
+ * The original [Pivot] column structure is preserved.
255
+ * If the [Pivot] was created using multiple or nested keys
256
+ * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]),
257
+ * the result will contain nested column groups reflecting that key structure,
258
+ * with each group containing columns for the values of the corresponding key.
259
+ *
260
+ * This is equivalent to calling `.aggregate { count() }`, but more efficient.
261
+ *
262
+ * See also:
263
+ * - [pivot], [DataFrame.groupBy], [Pivot.groupBy] and [GroupBy.pivot].
264
+ * - common [aggregate][PivotGroupBy.aggregate];
265
+ * - [GroupBy.pivotCounts] shortcut.
266
+ *
267
+ * For more information: {@include [DocumentationUrls.Count]}
268
+ *
269
+ * ### Example
270
+ * ```kotlin
271
+ * // Compute a matrix with "city" values horizontally and
272
+ * // "age" values vertically, where each cell contains
273
+ * // the number of rows with the corresponding age–city pair.
274
+ * df.pivot { city }.groupBy { age }.count()
275
+ * ```
276
+ *
277
+ * @return A [DataFrame] with [groupBy] rows and pivoted counts as columns.
278
+ */
66
279
public fun <T > PivotGroupBy<T>.count (): DataFrame <T > = aggregate { count() default 0 }
67
280
281
+ /* *
282
+ * Aggregates this [PivotGroupBy] by counting the number of rows in each
283
+ * combined [pivot] + [groupBy] group, that satisfy the given [predicate].
284
+ *
285
+ * Returns a new [DataFrame] containing a following matrix:
286
+ * - one row per [groupBy] key (or keys set);
287
+ * - one column group per [pivot] key, where each inner column corresponds to a value of that key;
288
+ * - each cell contains the number of rows in the corresponding pivot–group pair.
289
+ *
290
+ * The original [Pivot] column structure is preserved.
291
+ * If the [Pivot] was created using multiple or nested keys
292
+ * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]),
293
+ * the result will contain nested column groups reflecting that key structure,
294
+ * with each group containing columns for the values
295
+ * (matching the [predicate]) of the corresponding key.
296
+ *
297
+ * This is equivalent to calling `.aggregate { count() }`, but more efficient.
298
+ *
299
+ * See also:
300
+ * - [pivot], [DataFrame.groupBy], [Pivot.groupBy] and [GroupBy.pivot].
301
+ * - common [aggregate][PivotGroupBy.aggregate];
302
+ * - [GroupBy.pivotCounts] shortcut.
303
+ *
304
+ * For more information: {@include [DocumentationUrls.Count]}
305
+ *
306
+ * ### Example
307
+ * ```kotlin
308
+ * // Compute a matrix with "city" values horizontally and
309
+ * // "age" values vertically, where each cell contains
310
+ * // the number of rows with the corresponding age–city pair.
311
+ * df.pivot { city }.groupBy { age }.count()
312
+ * ```
313
+ *
314
+ * @return A [DataFrame] with [groupBy] rows and pivoted counts as columns matching the [predicate]..
315
+ */
68
316
public inline fun <T > PivotGroupBy<T>.count (crossinline predicate : RowFilter <T >): DataFrame <T > =
69
317
aggregate {
70
- count(predicate) default
71
- 0
318
+ count(predicate) default 0
72
319
}
73
320
74
321
// endregion
0 commit comments