@@ -435,6 +435,8 @@ fun main(args: Array<String>) {
435435 // "median" to { it["Species"].mean() }
436436 // )
437437
438+ irisData.print ()
439+ irisData.schema()
438440 irisData.select { startsWith(" Length" ) }.head().print ()
439441 irisData.summarizeAt({ startsWith(" Length" ) }) {
440442 add({ mean() }, " mean" )
@@ -612,10 +614,10 @@ fun DataFrame.asString(
612614 // do the actual printing
613615 val sb = StringBuilder ()
614616
615- sb.appendln (" ${title} : ${nrow} x ${ncol} " )
617+ sb.appendLine (" ${title} : ${nrow} x ${ncol} " )
616618
617619 if (this is GroupedDataFrame ) {
618- sb.appendln (" Groups: ${by.joinToString()} [${groups.size} ]" )
620+ sb.appendLine (" Groups: ${by.joinToString()} [${groups.size} ]" )
619621 }
620622
621623
@@ -632,15 +634,15 @@ fun DataFrame.asString(
632634 if (colNames) widthTrimmed.cols.mapIndexed { index, col ->
633635 col.name.padStart(padding[index])
634636 }.joinToString(" " ).apply {
635- sb.appendln (this )
637+ sb.appendLine (this )
636638 }
637639
638640
639641 widthTrimmed.rows.map { it.values }.map { rowData ->
640642 // show null as NA when printing data
641643 rowData.mapIndexed { index, value ->
642644 valuePrinter(value).padStart(padding[index])
643- }.joinToString(" " ).apply { sb.appendln (this ) }
645+ }.joinToString(" " ).apply { sb.appendLine (this ) }
644646 }
645647
646648 // similar to dplyr render a summary below the table
@@ -687,34 +689,106 @@ fun List<ColSpec>.asDf() = deparseRecords { mapOf("index" to it.pos, "name" to i
687689
688690fun List<ColSpec>.print () = asDf().print ()
689691
692+ internal val IS_JUPYTER by lazy {
693+ try {
694+ // check if we are in a notebook
695+ Class .forName(" jupyter.kotlin.KotlinContext" )
696+ true
697+ } catch (e: ClassNotFoundException ) {
698+ // it's not jupyter
699+ false
700+ }
701+ }
690702
691- // see https://spark.apache.org/docs/latest/sql-programming-guide.html#untyped-dataset-operations-aka-dataframe-operations
692- /* *
693- * Prints the schema (that is column names, types, and the first few values per column) of a dataframe to stdout.
694- */
695- fun DataFrame.schema (maxDigits : Int = 3, maxWidth : Int = PRINT_MAX_WIDTH ) {
696- if (this is GroupedDataFrame ) {
697- ungroup().schema(maxDigits, maxWidth)
698- return
703+ class DataFrameSchema (
704+ private val df : DataFrame ,
705+ private val maxDigits : Int = 3 ,
706+ private val maxWidth : Int = PRINT_MAX_WIDTH
707+ ) {
708+ override fun toString (): String {
709+ val sb = StringBuilder ()
710+
711+ with (df) {
712+ val topN = this
713+ sb.appendLine(" DataFrame with ${nrow} observations" )
714+
715+ val typeLabels = topN.cols.map { col -> getColumnType(col, wrapSquares = true ) }
716+
717+ val namePadding = topN.cols.map { it.name.length }.maxOrNull() ? : 0
718+ val typePadding = typeLabels.map { it.length }.maxOrNull() ? : 0
719+
720+ topN.cols.zip(typeLabels).forEach { (col, typeLabel) ->
721+ val stringifiedVals = col.values().take(255 ).asSequence()
722+ .joinToMaxLengthString(maxLength = maxWidth, transform = createValuePrinter(maxDigits))
723+
724+
725+ sb.appendLine(" ${col.name.padEnd(namePadding)} ${typeLabel.padEnd(typePadding)} $stringifiedVals " )
726+ }
727+ }
728+
729+ return sb.toString()
699730 }
700731
701- val topN = this
702- println (" DataFrame with ${nrow} observations" )
732+ fun toHTML (): String {
733+ return StringBuilder ().apply {
734+
735+ append(" <html><body>" )
736+
737+ append(" <table>" )
738+
739+ // render header
740+ append(" <tr>" )
741+ listOf (" Name" , " Type" , " Values" ).forEach { append(""" <th style="text-align:left">${it} </th>""" ) }
742+ append(" </tr>" )
703743
704- val namePadding = topN.cols.map { it.name.length }.maxOrNull() ? : 0
744+ with (df) {
745+ val topN = this
705746
706- val typeLabels = topN.cols.map { col -> getColumnType(col, wrapSquares = true ) }
747+ val typeLabels = topN.cols.map { col -> getColumnType(col, wrapSquares = true ) }
707748
708- val typePadding = typeLabels.map { it.length }.maxOrNull() ? : 0
709749
710- topN.cols.zip(typeLabels).forEach { (col, typeLabel) ->
711- val stringifiedVals = col.values().take(255 ).asSequence()
712- .joinToMaxLengthString(maxLength = maxWidth, transform = createValuePrinter(maxDigits))
750+ topN.cols.zip(typeLabels).forEach { (col, typeLabel) ->
751+ val stringifiedVals = col.values().take(255 ).asSequence()
752+ .joinToMaxLengthString(maxLength = maxWidth, transform = createValuePrinter(maxDigits))
713753
714- println (" ${col.name.padEnd(namePadding)} ${typeLabel.padEnd(typePadding)} $stringifiedVals " )
754+ // cols.forEach { append("""<th style=\\" text -align:left\\">${it.name}</th>""") }
755+
756+ append(" </tr>" )
757+
758+ append(""" <td style="text-align:left">${col.name} </td>""" )
759+ append(""" <td style="text-align:left">${typeLabel} </td>""" )
760+ append(""" <td style="text-align:left">${stringifiedVals} </td>""" )
761+
762+ append(" </tr>" )
763+ }
764+ }
765+
766+ append(" </table>" )
767+
768+ appendLine(" DataFrame with ${df.nrow} observations" )
769+
770+ append(" </body></html>" )
771+ }.toString()
772+ }
773+ }
774+
775+ // see https://spark.apache.org/docs/latest/sql-programming-guide.html#untyped-dataset-operations-aka-dataframe-operations
776+ /* *
777+ * Prints the schema (that is column names, types, and the first few values per column) of a dataframe to stdout.
778+ */
779+ fun DataFrame.schema (maxDigits : Int = 3, maxWidth : Int = PRINT_MAX_WIDTH ): DataFrameSchema {
780+ if (this is GroupedDataFrame ) {
781+ return ungroup().schema(maxDigits, maxWidth)
782+ }
783+
784+ return DataFrameSchema (this , maxDigits, maxWidth).apply {
785+ if (! IS_JUPYTER ) {
786+ println (toString())
787+ }
715788 }
716789}
717790
791+
718792internal fun getColumnType (col : DataCol , wrapSquares : Boolean = false): String {
719793 return when (col) {
720794 is DoubleCol -> " Dbl"
0 commit comments