@@ -18,9 +18,10 @@ use arrow::compute::{
1818use arrow:: row:: { RowConverter , SortField } ;
1919use arrow_array:: types:: TimestampNanosecondType ;
2020use arrow_array:: {
21- Array , BooleanArray , PrimitiveArray , RecordBatch , StructArray , TimestampNanosecondArray ,
21+ Array , ArrayRef , BooleanArray , PrimitiveArray , RecordBatch , StructArray ,
22+ TimestampMicrosecondArray , TimestampMillisecondArray , TimestampNanosecondArray , TimestampSecondArray ,
2223} ;
23- use arrow_schema:: { DataType , Field , FieldRef , Schema } ;
24+ use arrow_schema:: { DataType , Field , FieldRef , Schema , TimeUnit } ;
2425use datafusion:: execution:: context:: SessionContext ;
2526use datafusion:: execution:: runtime_env:: RuntimeEnvBuilder ;
2627use datafusion:: execution:: SendableRecordBatchStream ;
@@ -30,6 +31,7 @@ use datafusion_proto::protobuf::PhysicalPlanNode;
3031use futures:: StreamExt ;
3132use prost:: Message ;
3233use std:: collections:: { BTreeMap , HashMap , HashSet } ;
34+ use tracing:: info;
3335use std:: sync:: { Arc , RwLock } ;
3436use std:: time:: { Duration , SystemTime } ;
3537use tokio:: sync:: mpsc:: { unbounded_channel, UnboundedReceiver , UnboundedSender } ;
@@ -315,6 +317,53 @@ fn start_time_for_sorted_batch(batch: &RecordBatch, schema: &FsSchema) -> System
315317 from_nanos ( timestamp_array. value ( 0 ) as u128 )
316318}
317319
320+ /// Appends the stream `_timestamp` column (see [`build_session_output_schema`]) using each
321+ /// session's `window_end` as the row event time.
322+ fn append_output_timestamp_column (
323+ columns : & mut Vec < ArrayRef > ,
324+ session_results : & [ SessionWindowResult ] ,
325+ ts_field : & Field ,
326+ ) -> Result < ( ) > {
327+ let nanos = |r : & SessionWindowResult | to_nanos ( r. window_end ) as i64 - 1 ;
328+ match ts_field. data_type ( ) {
329+ DataType :: Timestamp ( TimeUnit :: Second , tz) => {
330+ let v: Vec < i64 > = session_results
331+ . iter ( )
332+ . map ( |r| ( nanos ( r) / 1_000_000_000 ) )
333+ . collect ( ) ;
334+ columns. push ( Arc :: new (
335+ TimestampSecondArray :: from ( v) . with_timezone_opt ( tz. clone ( ) ) ,
336+ ) ) ;
337+ }
338+ DataType :: Timestamp ( TimeUnit :: Millisecond , tz) => {
339+ let v: Vec < i64 > = session_results
340+ . iter ( )
341+ . map ( |r| ( nanos ( r) / 1_000_000 ) )
342+ . collect ( ) ;
343+ columns. push ( Arc :: new (
344+ TimestampMillisecondArray :: from ( v) . with_timezone_opt ( tz. clone ( ) ) ,
345+ ) ) ;
346+ }
347+ DataType :: Timestamp ( TimeUnit :: Microsecond , tz) => {
348+ let v: Vec < i64 > = session_results
349+ . iter ( )
350+ . map ( |r| ( nanos ( r) / 1000 ) )
351+ . collect ( ) ;
352+ columns. push ( Arc :: new (
353+ TimestampMicrosecondArray :: from ( v) . with_timezone_opt ( tz. clone ( ) ) ,
354+ ) ) ;
355+ }
356+ DataType :: Timestamp ( TimeUnit :: Nanosecond , tz) => {
357+ let v: Vec < i64 > = session_results. iter ( ) . map ( |r| nanos ( r) ) . collect ( ) ;
358+ columns. push ( Arc :: new (
359+ TimestampNanosecondArray :: from ( v) . with_timezone_opt ( tz. clone ( ) ) ,
360+ ) ) ;
361+ }
362+ dt => bail ! ( "unsupported timestamp type for session window output: {dt}" ) ,
363+ }
364+ Ok ( ( ) )
365+ }
366+
318367fn build_session_output_schema (
319368 input : & FsSchema ,
320369 window_field : FieldRef ,
@@ -590,6 +639,9 @@ impl SessionWindowOperator {
590639 columns. insert ( self . config . window_index , Arc :: new ( window_struct_array) ) ;
591640 columns. extend_from_slice ( merged_batch. columns ( ) ) ;
592641
642+ let ts_field = self . config . input_schema_ref . schema . field ( self . config . input_schema_ref . timestamp_index ) ;
643+ append_output_timestamp_column ( & mut columns, & session_results, ts_field) ?;
644+
593645 RecordBatch :: try_new ( self . config . output_schema . clone ( ) , columns)
594646 . context ( "failed to create session window output batch" )
595647 }
0 commit comments