Skip to content
This repository was archived by the owner on Jan 28, 2023. It is now read-only.

Commit c7ad05c

Browse files
committed
Improved jupyter support for schema
1 parent 862e878 commit c7ad05c

File tree

8 files changed

+440
-32
lines changed

8 files changed

+440
-32
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,4 +95,7 @@ ExcelWriteResult.xlsx
9595

9696
# sonatype credentials
9797
local.properties
98-
*.gpg
98+
*.gpg
99+
100+
# jupyter
101+
.ipynb_checkpoints

docs/devel.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,3 +147,30 @@ val df = Dataframe(df.structure().target.selectWhere(column("Column Type").isEqu
147147
```
148148

149149

150+
# Jupyter Integaration
151+
152+
dev scratchpad
153+
154+
```bash
155+
export KRANGL_HOME=/d/projects/misc/krangl/
156+
cd $KRANGL_HOME
157+
158+
# start kernel
159+
cmd.exe "/K" C:\Users\brandl\Anaconda3\Scripts\activate.bat C:\Users\brandl\Anaconda3
160+
161+
# no longer needed becaue no part of ipynb preamble
162+
#rm -rf ~/.ivy2/cache/com.systema/
163+
#rm -rf ~/.ivy2/cache/org.kalasim/
164+
#rm -rf ~/.ivy2/cache/com.github.holgerbrandl/kravis/
165+
166+
#conda install -c jetbrains kotlin-jupyter-kernel
167+
# interactive use
168+
jupyter notebook --kernel=kotlin examples/jupyter/letsplot_example.ipynb
169+
```
170+
171+
References
172+
173+
* https://github.com/Kotlin/kotlin-jupyter/blob/master/docs/libraries.md
174+
* Many `letsplot` examples https://nbviewer.jupyter.org/github/JetBrains/lets-plot-kotlin/blob/master/docs/guide/user_guide.ipynb
175+
e
176+

examples/jupyter/letsplot_example.ipynb

Lines changed: 286 additions & 0 deletions
Large diffs are not rendered by default.

src/main/kotlin/krangl/Extensions.kt

Lines changed: 95 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,8 @@ fun main(args: Array<String>) {
435435
// "median" to { it["Species"].mean() }
436436
// )
437437

438+
irisData.print()
439+
irisData.schema()
438440
irisData.select { startsWith("Length") }.head().print()
439441
irisData.summarizeAt({ startsWith("Length") }) {
440442
add({ mean() }, "mean")
@@ -612,10 +614,10 @@ fun DataFrame.asString(
612614
// do the actual printing
613615
val sb = StringBuilder()
614616

615-
sb.appendln("${title}: ${nrow} x ${ncol}")
617+
sb.appendLine("${title}: ${nrow} x ${ncol}")
616618

617619
if (this is GroupedDataFrame) {
618-
sb.appendln("Groups: ${by.joinToString()} [${groups.size}]")
620+
sb.appendLine("Groups: ${by.joinToString()} [${groups.size}]")
619621
}
620622

621623

@@ -632,15 +634,15 @@ fun DataFrame.asString(
632634
if (colNames) widthTrimmed.cols.mapIndexed { index, col ->
633635
col.name.padStart(padding[index])
634636
}.joinToString("").apply {
635-
sb.appendln(this)
637+
sb.appendLine(this)
636638
}
637639

638640

639641
widthTrimmed.rows.map { it.values }.map { rowData ->
640642
// show null as NA when printing data
641643
rowData.mapIndexed { index, value ->
642644
valuePrinter(value).padStart(padding[index])
643-
}.joinToString("").apply { sb.appendln(this) }
645+
}.joinToString("").apply { sb.appendLine(this) }
644646
}
645647

646648
// similar to dplyr render a summary below the table
@@ -687,34 +689,106 @@ fun List<ColSpec>.asDf() = deparseRecords { mapOf("index" to it.pos, "name" to i
687689

688690
fun List<ColSpec>.print() = asDf().print()
689691

692+
internal val IS_JUPYTER by lazy {
693+
try {
694+
// check if we are in a notebook
695+
Class.forName("jupyter.kotlin.KotlinContext")
696+
true
697+
} catch (e: ClassNotFoundException) {
698+
// it's not jupyter
699+
false
700+
}
701+
}
690702

691-
// see https://spark.apache.org/docs/latest/sql-programming-guide.html#untyped-dataset-operations-aka-dataframe-operations
692-
/**
693-
* Prints the schema (that is column names, types, and the first few values per column) of a dataframe to stdout.
694-
*/
695-
fun DataFrame.schema(maxDigits: Int = 3, maxWidth: Int = PRINT_MAX_WIDTH) {
696-
if (this is GroupedDataFrame) {
697-
ungroup().schema(maxDigits, maxWidth)
698-
return
703+
class DataFrameSchema(
704+
private val df: DataFrame,
705+
private val maxDigits: Int = 3,
706+
private val maxWidth: Int = PRINT_MAX_WIDTH
707+
) {
708+
override fun toString(): String {
709+
val sb = StringBuilder()
710+
711+
with(df) {
712+
val topN = this
713+
sb.appendLine("DataFrame with ${nrow} observations")
714+
715+
val typeLabels = topN.cols.map { col -> getColumnType(col, wrapSquares = true) }
716+
717+
val namePadding = topN.cols.map { it.name.length }.maxOrNull() ?: 0
718+
val typePadding = typeLabels.map { it.length }.maxOrNull() ?: 0
719+
720+
topN.cols.zip(typeLabels).forEach { (col, typeLabel) ->
721+
val stringifiedVals = col.values().take(255).asSequence()
722+
.joinToMaxLengthString(maxLength = maxWidth, transform = createValuePrinter(maxDigits))
723+
724+
725+
sb.appendLine("${col.name.padEnd(namePadding)} ${typeLabel.padEnd(typePadding)} $stringifiedVals")
726+
}
727+
}
728+
729+
return sb.toString()
699730
}
700731

701-
val topN = this
702-
println("DataFrame with ${nrow} observations")
732+
fun toHTML(): String {
733+
return StringBuilder().apply {
734+
735+
append("<html><body>")
736+
737+
append("<table>")
738+
739+
// render header
740+
append("<tr>")
741+
listOf("Name", "Type", "Values").forEach { append("""<th style="text-align:left">${it}</th>""") }
742+
append("</tr>")
703743

704-
val namePadding = topN.cols.map { it.name.length }.maxOrNull() ?: 0
744+
with(df) {
745+
val topN = this
705746

706-
val typeLabels = topN.cols.map { col -> getColumnType(col, wrapSquares = true) }
747+
val typeLabels = topN.cols.map { col -> getColumnType(col, wrapSquares = true) }
707748

708-
val typePadding = typeLabels.map { it.length }.maxOrNull() ?: 0
709749

710-
topN.cols.zip(typeLabels).forEach { (col, typeLabel) ->
711-
val stringifiedVals = col.values().take(255).asSequence()
712-
.joinToMaxLengthString(maxLength = maxWidth, transform = createValuePrinter(maxDigits))
750+
topN.cols.zip(typeLabels).forEach { (col, typeLabel) ->
751+
val stringifiedVals = col.values().take(255).asSequence()
752+
.joinToMaxLengthString(maxLength = maxWidth, transform = createValuePrinter(maxDigits))
713753

714-
println("${col.name.padEnd(namePadding)} ${typeLabel.padEnd(typePadding)} $stringifiedVals")
754+
// cols.forEach { append("""<th style=\\" text -align:left\\">${it.name}</th>""") }
755+
756+
append("</tr>")
757+
758+
append("""<td style="text-align:left">${col.name}</td>""")
759+
append("""<td style="text-align:left">${typeLabel}</td>""")
760+
append("""<td style="text-align:left">${stringifiedVals}</td>""")
761+
762+
append("</tr>")
763+
}
764+
}
765+
766+
append("</table>")
767+
768+
appendLine("DataFrame with ${df.nrow} observations")
769+
770+
append("</body></html>")
771+
}.toString()
772+
}
773+
}
774+
775+
// see https://spark.apache.org/docs/latest/sql-programming-guide.html#untyped-dataset-operations-aka-dataframe-operations
776+
/**
777+
* Prints the schema (that is column names, types, and the first few values per column) of a dataframe to stdout.
778+
*/
779+
fun DataFrame.schema(maxDigits: Int = 3, maxWidth: Int = PRINT_MAX_WIDTH): DataFrameSchema {
780+
if (this is GroupedDataFrame) {
781+
return ungroup().schema(maxDigits, maxWidth)
782+
}
783+
784+
return DataFrameSchema(this, maxDigits, maxWidth).apply {
785+
if (!IS_JUPYTER) {
786+
println(toString())
787+
}
715788
}
716789
}
717790

791+
718792
internal fun getColumnType(col: DataCol, wrapSquares: Boolean = false): String {
719793
return when (col) {
720794
is DoubleCol -> "Dbl"

src/main/kotlin/krangl/LetsPlot.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1+
package krangl
2+
13
import jetbrains.letsPlot.intern.GenericAesMapping
2-
import krangl.DataFrame
3-
import krangl.toMap
44

55
/** Plot a data-frame with let-plot. To use this mapping add `implementation("org.jetbrains.lets-plot:lets-plot-kotlin-jvm:3.0.1")` or via `%use lets-plot` when using jupyter. */
6-
fun DataFrame.letsPlot(mapping: GenericAesMapping.() -> Unit = {}) = jetbrains.letsPlot.letsPlot(toMap())
6+
fun DataFrame.letsPlot(mapping: GenericAesMapping.() -> Unit = {}) = jetbrains.letsPlot.letsPlot(toMap(), mapping)
77

88

99
//fun main() {

src/main/kotlin/krangl/integration/Integration.kt

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package krangl.integration
22

3+
import krangl.DataFrame
4+
import krangl.DataFrameSchema
35
import krangl.GroupedDataFrame
46
import krangl.SimpleDataFrame
57
import org.jetbrains.kotlinx.jupyter.api.HTML
@@ -16,16 +18,20 @@ internal class Integration : JupyterIntegration() {
1618
import("krangl.*")
1719
render<SimpleDataFrame> { HTML(it.toHTML()) }
1820
render<GroupedDataFrame> { HTML(it.toHTML()) }
21+
render<DataFrameSchema> { HTML(it.toHTML()) }
1922
}
2023

21-
fun krangl.DataFrame.toHTML(limit: Int = 20, truncate: Int = 50): String = with(StringBuilder()) {
24+
fun DataFrame.toHTML(title: String="A DataFrame", maxRows: Int = 6, truncate: Int = 50): String = with(StringBuilder()) {
2225
append("<html><body>")
26+
27+
28+
2329
append("<table><tr>")
2430

25-
cols.forEach { append("""<th style=\\" text -align:left\\">${it.name}</th>""") }
31+
cols.forEach { append("""<th style="text-align:left">${it.name}</th>""") }
2632
append("</tr>")
2733

28-
rows.take(limit).forEach {
34+
rows.take(maxRows).forEach {
2935
append("<tr>")
3036
it.values.map { it.toString() }.forEach {
3137
val truncated = if (truncate > 0 && it.length > truncate) {
@@ -41,8 +47,20 @@ internal class Integration : JupyterIntegration() {
4147

4248
append("</table>")
4349

44-
if (limit < rows.count())
45-
append("<p>... only showing top $limit rows</p>")
50+
// render footer
51+
append("<p>")
52+
if (maxRows < rows.count()){
53+
append("... with ${nrow - maxRows} more rows. ")
54+
}
55+
56+
appendLine("Shape: ${nrow} x ${ncol}. ")
57+
58+
if (this@toHTML is GroupedDataFrame) {
59+
appendLine("Grouped by ${by.joinToString()} [${groups.size}]")
60+
}
61+
append("</p>")
62+
63+
4664
append("</body></html>")
4765
}.toString()
4866
}

src/test/kotlin/krangl/test/CoreVerbsTest.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,7 @@ class CoreTests {
682682
Petal.Width [Dbl] 0.2, 0.2, 0.2, 0.2, ...
683683
Species [Str] setosa, setosa, seto...
684684
id [Regex] foo1, foo2, foo3, fo...
685-
""".trimAndReline()
685+
""".trimIndent()
686686
}
687687

688688
@Test

src/test/kotlin/krangl/test/ReshapingTest.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ class NestingTests {
264264
DataFrame with 3 observations
265265
Species [Str] setosa, versicolor, virginica
266266
data [DataFrame] <DataFrame [50 x 4]>, <DataFrame [50 x 4]>, <DataFrame [50 x 4]>
267-
""".trimAndReline()
267+
""".trimIndent()
268268
}
269269
}
270270

0 commit comments

Comments
 (0)