tempoxyz · decofe · Mar 30, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
@@ -60,14 +60,15 @@ function cell(text) {
 }
 
 function fmtMs(v) { return v != null ? v.toFixed(2) + 'ms' : '-'; }
+function fmtSeconds(v) { return v != null ? v.toFixed(2) + 's' : '-'; }
 function fmtVal(v, suffix = '', precision = 2) { return v != null ? v.toFixed(precision) + suffix : '-'; }
 
-function tempoBlockTimeDeltas(deltas) {
-  return [deltas.block_time_p50, deltas.block_time_p90, deltas.block_time_p99];
+function tempoTimingDeltas(deltas) {
+  return [deltas.wall_clock_s, deltas.block_time_p50, deltas.block_time_p90, deltas.block_time_p99];
 }
 
 function tempoThroughputDeltas(deltas) {
-  return [deltas.tps, deltas.tps_p50, deltas.tps_p90, deltas.tps_p99, deltas.mgas_s];
+  return [deltas.tps_p50, deltas.tps_p90, deltas.tps_p99, deltas.mgas_s];
 }
 
 function fmtDelta(pct) {
@@ -77,6 +78,14 @@ function fmtDelta(pct) {
   return `${sign}${pct.toFixed(2)}% ${emoji}`;
 }
 
+function fmtTimingDelta(pct, relativeStddevPct) {
+  if (pct == null) return '';
+  const sign = pct >= 0 ? '+' : '';
+  const emoji = classifyDelta(pct);
+  const spread = relativeStddevPct != null ? ` (±${relativeStddevPct.toFixed(2)}%)` : '';
+  return `${sign}${pct.toFixed(2)}%${spread} ${emoji}`;
+}
+
 // For latency: negative = good (faster), positive = bad (slower)
 function classifyDelta(pct) {
   if (Math.abs(pct) < THRESHOLD_PCT) return '⚪';
@@ -97,12 +106,12 @@ function fmtDeltaInverse(pct) {
 }
 
 function verdict(deltas) {
-  const blockTimeDeltas = tempoBlockTimeDeltas(deltas);
+  const timingDeltas = tempoTimingDeltas(deltas);
   const throughputDeltas = tempoThroughputDeltas(deltas);
 
-  const hasBad = blockTimeDeltas.some(d => d != null && d > THRESHOLD_PCT) ||
+  const hasBad = timingDeltas.some(d => d != null && d > THRESHOLD_PCT) ||
                  throughputDeltas.some(d => d != null && d < -THRESHOLD_PCT);
-  const hasGood = blockTimeDeltas.some(d => d != null && d < -THRESHOLD_PCT) ||
+  const hasGood = timingDeltas.some(d => d != null && d < -THRESHOLD_PCT) ||
                   throughputDeltas.some(d => d != null && d > THRESHOLD_PCT);
 
   if (hasBad && hasGood) return { emoji: ':warning:', label: 'Mixed Results' };
@@ -112,7 +121,7 @@ function verdict(deltas) {
 }
 
 function hasSignificantChange(deltas) {
-  const all = [...tempoThroughputDeltas(deltas), ...tempoBlockTimeDeltas(deltas)];
+  const all = [...tempoThroughputDeltas(deltas), ...tempoTimingDeltas(deltas)];
   return all.some(d => d != null && Math.abs(d) >= THRESHOLD_PCT);
 }
 
@@ -134,14 +143,14 @@ function buildMetricRows(summary) {
   const f = summary.results.feature;
   const d = summary.results.deltas;
   return [
-    { label: 'Avg TPS',         baseline: fmtVal(b.tps, '', 0),     feature: fmtVal(f.tps, '', 0),     change: fmtDeltaInverse(d.tps) },
+    { label: 'Wall Clock',      baseline: fmtSeconds(b.wall_clock_s), feature: fmtSeconds(f.wall_clock_s), change: fmtTimingDelta(d.wall_clock_s, d.wall_clock_uncertainty_pct) },
     { label: 'TPS P50',         baseline: fmtVal(b.tps_p50, '', 1), feature: fmtVal(f.tps_p50, '', 1), change: fmtDeltaInverse(d.tps_p50) },
     { label: 'TPS P90',         baseline: fmtVal(b.tps_p90, '', 1), feature: fmtVal(f.tps_p90, '', 1), change: fmtDeltaInverse(d.tps_p90) },
     { label: 'TPS P99',         baseline: fmtVal(b.tps_p99, '', 1), feature: fmtVal(f.tps_p99, '', 1), change: fmtDeltaInverse(d.tps_p99) },
     { label: 'Gas/s',           baseline: fmtVal(b.mgas_s, ' Mgas/s', 1), feature: fmtVal(f.mgas_s, ' Mgas/s', 1), change: fmtDeltaInverse(d.mgas_s) },
-    { label: 'Block P50',       baseline: fmtMs(b.block_time_p50),  feature: fmtMs(f.block_time_p50),  change: fmtDelta(d.block_time_p50) },
-    { label: 'Block P90',       baseline: fmtMs(b.block_time_p90),  feature: fmtMs(f.block_time_p90),  change: fmtDelta(d.block_time_p90) },
-    { label: 'Block P99',       baseline: fmtMs(b.block_time_p99),  feature: fmtMs(f.block_time_p99),  change: fmtDelta(d.block_time_p99) },
+    { label: 'Block Time P50',  baseline: fmtMs(b.block_time_p50),  feature: fmtMs(f.block_time_p50),  change: fmtDelta(d.block_time_p50) },
+    { label: 'Block Time P90',  baseline: fmtMs(b.block_time_p90),  feature: fmtMs(f.block_time_p90),  change: fmtDelta(d.block_time_p90) },
+    { label: 'Block Time P99',  baseline: fmtMs(b.block_time_p99),  feature: fmtMs(f.block_time_p99),  change: fmtDelta(d.block_time_p99) },
   ];
 }
 

diff --git a/tempo.nu b/tempo.nu
@@ -750,6 +750,19 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
         }
     }
 
+    let compute_mean_stddev_stats = { |samples: list<any>|
+        let count = ($samples | length)
+        let mean = if $count > 0 { $samples | math avg } else { 0 }
+        let stddev = if $count > 1 { $samples | math stddev } else { 0 }
+        {
+            mean: ($mean | math round --precision 2)
+            stddev: ($stddev | math round --precision 2)
+            rel_stddev_pct: (if $count > 1 and $mean != 0 {
+                ((($stddev / $mean) * 100) | math round --precision 2)
+            } else { 0 })
+        }
+    }
+
     for label in $run_labels {
         let report_path = $"($results_dir)/report-($label).json"
         if not ($report_path | path exists) {
@@ -797,8 +810,6 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
         let total_ok = ($blocks | get ok_count | math sum)
         let total_err = ($blocks | get err_count | math sum)
         let total_gas = ($blocks | get gas_used | math sum)
-        let latencies = ($blocks | where latency_ms != null | get latency_ms | sort)
-        let p50_latency = (percentile $latencies 50 | math round --precision 1)
         let num_blocks = ($blocks | length)
 
         # Compute TPS from block timestamps (timestamps are in milliseconds)
@@ -824,7 +835,7 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
             ok: $total_ok
             err: $total_err
             total_gas: $total_gas
-            p50_latency: $p50_latency
+            wall_clock_s: ($time_span_s | math round --precision 2)
             tps: $actual_tps
             tps_p50: $run_tps.p50
             tps_p90: $run_tps.p90
@@ -842,21 +853,8 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
         return
     }
 
-    # Compute per-block latency percentiles for each group
-    let compute_latency_stats = { |blocks: list<any>|
-        let latencies = ($blocks | where latency_ms != null | get latency_ms | sort)
-        {
-            n: ($blocks | length)
-            mean: (if ($latencies | length) > 0 { $latencies | math avg | math round --precision 1 } else { 0 })
-            stddev: (if ($latencies | length) > 1 { $latencies | math stddev | math round --precision 1 } else { 0 })
-            p50: (percentile $latencies 50 | math round --precision 1)
-            p90: (percentile $latencies 90 | math round --precision 1)
-            p99: (percentile $latencies 99 | math round --precision 1)
-        }
-    }
-
-    let b_lat = do $compute_latency_stats $baseline_blocks
-    let f_lat = do $compute_latency_stats $feature_blocks
+    let b_num_blocks = ($baseline_blocks | length)
+    let f_num_blocks = ($feature_blocks | length)
 
     let b_bt = do $compute_block_time_stats $baseline_intervals
     let f_bt = do $compute_block_time_stats $feature_intervals
@@ -866,6 +864,11 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
     # Aggregate TPS and Mgas/s from per-run totals (total_tx / total_time)
     let baseline_runs = ($run_data | where { |r| $r.label | str starts-with "baseline" })
     let feature_runs = ($run_data | where { |r| $r.label | str starts-with "feature" })
+    let b_wall_samples = if ($baseline_runs | length) > 0 { $baseline_runs | get wall_clock_s } else { [] }
+    let f_wall_samples = if ($feature_runs | length) > 0 { $feature_runs | get wall_clock_s } else { [] }
+    let b_wall = do $compute_mean_stddev_stats $b_wall_samples
+    let f_wall = do $compute_mean_stddev_stats $f_wall_samples
+    let wall_clock_uncertainty_pct = (((($b_wall.rel_stddev_pct * $b_wall.rel_stddev_pct) + ($f_wall.rel_stddev_pct * $f_wall.rel_stddev_pct)) | math sqrt) | math round --precision 2)
 
     let b_tps = if ($baseline_runs | length) > 0 { $baseline_runs | get tps | math avg | math round --precision 0 } else { 0 }
     let f_tps = if ($feature_runs | length) > 0 { $feature_runs | get tps | math avg | math round --precision 0 } else { 0 }
@@ -885,14 +888,14 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
         $"- Target TPS: ($tps)"
         $"- Duration: ($duration)s"
         $"- Snapshot: (if (has-schelk) { 'schelk' } else { 'cp fallback' })"
-        $"- Baseline blocks: ($b_lat.n)"
-        $"- Feature blocks: ($f_lat.n)"
+        $"- Baseline blocks: ($b_num_blocks)"
+        $"- Feature blocks: ($f_num_blocks)"
         ""
         "## Tempo Metrics"
         ""
         "| Metric | Baseline | Feature | Delta |"
         "|--------|----------|---------|-------|"
-        $"| Avg TPS | ($b_tps) | ($f_tps) | (do $delta $b_tps $f_tps)% |"
+        $"| Wall Clock [s] | ($b_wall.mean) | ($f_wall.mean) | (do $delta $b_wall.mean $f_wall.mean)% (±($wall_clock_uncertainty_pct)%) |"
         $"| TPS P50 | ($b_tps_stats.p50) | ($f_tps_stats.p50) | (do $delta $b_tps_stats.p50 $f_tps_stats.p50)% |"
         $"| TPS P90 | ($b_tps_stats.p90) | ($f_tps_stats.p90) | (do $delta $b_tps_stats.p90 $f_tps_stats.p90)% |"
         $"| TPS P99 | ($b_tps_stats.p99) | ($f_tps_stats.p99) | (do $delta $b_tps_stats.p99 $f_tps_stats.p99)% |"
@@ -901,25 +904,15 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
         $"| Block Time P90 [ms] | ($b_bt.p90) | ($f_bt.p90) | (do $delta $b_bt.p90 $f_bt.p90)% |"
         $"| Block Time P99 [ms] | ($b_bt.p99) | ($f_bt.p99) | (do $delta $b_bt.p99 $f_bt.p99)% |"
         ""
-        "## Latency (Secondary)"
-        ""
-        "| Metric | Baseline | Feature | Delta |"
-        "|--------|----------|---------|-------|"
-        $"| Latency Mean [ms] | ($b_lat.mean) | ($f_lat.mean) | (do $delta $b_lat.mean $f_lat.mean)% |"
-        $"| Latency Std Dev [ms] | ($b_lat.stddev) | ($f_lat.stddev) | (do $delta $b_lat.stddev $f_lat.stddev)% |"
-        $"| Latency P50 [ms] | ($b_lat.p50) | ($f_lat.p50) | (do $delta $b_lat.p50 $f_lat.p50)% |"
-        $"| Latency P90 [ms] | ($b_lat.p90) | ($f_lat.p90) | (do $delta $b_lat.p90 $f_lat.p90)% |"
-        $"| Latency P99 [ms] | ($b_lat.p99) | ($f_lat.p99) | (do $delta $b_lat.p99 $f_lat.p99)% |"
-        ""
         "## Per-Run Details"
         ""
-        "| Run | Blocks | Total Tx | Success | Failed | Avg TPS | Block P50 | Mgas/s |"
+        "| Run | Blocks | Total Tx | Success | Failed | Wall Clock [s] | Block Time | Mgas/s |"
         "|-----|--------|----------|---------|--------|---------|-----------|--------|"
     ] | str join "\n")
 
     mut per_run_rows = ""
     for row in $run_data {
-        $per_run_rows = $"($per_run_rows)| ($row.label) | ($row.blocks) | ($row.total_tx) | ($row.ok) | ($row.err) | ($row.tps) | ($row.block_time_p50) | ($row.mgas_s) |\n"
+        $per_run_rows = $"($per_run_rows)| ($row.label) | ($row.blocks) | ($row.total_tx) | ($row.ok) | ($row.err) | ($row.wall_clock_s) | ($row.block_time_p50) | ($row.mgas_s) |\n"
     }
 
     let full_summary = $"($summary)\n($per_run_rows)"
@@ -941,11 +934,8 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
         }
         results: {
             baseline: {
-                latency_mean: $b_lat.mean
-                latency_stddev: $b_lat.stddev
-                latency_p50: $b_lat.p50
-                latency_p90: $b_lat.p90
-                latency_p99: $b_lat.p99
+                wall_clock_s: $b_wall.mean
+                wall_clock_stddev_s: $b_wall.stddev
                 tps: $b_tps
                 tps_p50: $b_tps_stats.p50
                 tps_p90: $b_tps_stats.p90
@@ -954,14 +944,11 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
                 block_time_p50: $b_bt.p50
                 block_time_p90: $b_bt.p90
                 block_time_p99: $b_bt.p99
-                blocks: $b_lat.n
+                blocks: $b_num_blocks
             }
             feature: {
-                latency_mean: $f_lat.mean
-                latency_stddev: $f_lat.stddev
-                latency_p50: $f_lat.p50
-                latency_p90: $f_lat.p90
-                latency_p99: $f_lat.p99
+                wall_clock_s: $f_wall.mean
+                wall_clock_stddev_s: $f_wall.stddev
                 tps: $f_tps
                 tps_p50: $f_tps_stats.p50
                 tps_p90: $f_tps_stats.p90
@@ -970,14 +957,11 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
                 block_time_p50: $f_bt.p50
                 block_time_p90: $f_bt.p90
                 block_time_p99: $f_bt.p99
-                blocks: $f_lat.n
+                blocks: $f_num_blocks
             }
             deltas: {
-                latency_mean: (do $delta $b_lat.mean $f_lat.mean)
-                latency_stddev: (do $delta $b_lat.stddev $f_lat.stddev)
-                latency_p50: (do $delta $b_lat.p50 $f_lat.p50)
-                latency_p90: (do $delta $b_lat.p90 $f_lat.p90)
-                latency_p99: (do $delta $b_lat.p99 $f_lat.p99)
+                wall_clock_s: (do $delta $b_wall.mean $f_wall.mean)
+                wall_clock_uncertainty_pct: $wall_clock_uncertainty_pct
                 tps: (do $delta $b_tps $f_tps)
                 tps_p50: (do $delta $b_tps_stats.p50 $f_tps_stats.p50)
                 tps_p90: (do $delta $b_tps_stats.p90 $f_tps_stats.p90)