Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions .github/scripts/bench-slack-notify.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,15 @@ function cell(text) {
}

function fmtMs(v) { return v != null ? v.toFixed(2) + 'ms' : '-'; }
function fmtSeconds(v) { return v != null ? v.toFixed(2) + 's' : '-'; }
function fmtVal(v, suffix = '', precision = 2) { return v != null ? v.toFixed(precision) + suffix : '-'; }

function tempoBlockTimeDeltas(deltas) {
return [deltas.block_time_p50, deltas.block_time_p90, deltas.block_time_p99];
function tempoTimingDeltas(deltas) {
return [deltas.wall_clock_s, deltas.block_time_p50, deltas.block_time_p90, deltas.block_time_p99];
}

function tempoThroughputDeltas(deltas) {
return [deltas.tps, deltas.tps_p50, deltas.tps_p90, deltas.tps_p99, deltas.mgas_s];
return [deltas.tps_p50, deltas.tps_p90, deltas.tps_p99, deltas.mgas_s];
}

function fmtDelta(pct) {
Expand All @@ -77,6 +78,14 @@ function fmtDelta(pct) {
return `${sign}${pct.toFixed(2)}% ${emoji}`;
}

function fmtTimingDelta(pct, relativeStddevPct) {
if (pct == null) return '';
const sign = pct >= 0 ? '+' : '';
const emoji = classifyDelta(pct);
const spread = relativeStddevPct != null ? ` (±${relativeStddevPct.toFixed(2)}%)` : '';
return `${sign}${pct.toFixed(2)}%${spread} ${emoji}`;
}

// For latency: negative = good (faster), positive = bad (slower)
function classifyDelta(pct) {
if (Math.abs(pct) < THRESHOLD_PCT) return '⚪';
Expand All @@ -97,12 +106,12 @@ function fmtDeltaInverse(pct) {
}

function verdict(deltas) {
const blockTimeDeltas = tempoBlockTimeDeltas(deltas);
const timingDeltas = tempoTimingDeltas(deltas);
const throughputDeltas = tempoThroughputDeltas(deltas);

const hasBad = blockTimeDeltas.some(d => d != null && d > THRESHOLD_PCT) ||
const hasBad = timingDeltas.some(d => d != null && d > THRESHOLD_PCT) ||
throughputDeltas.some(d => d != null && d < -THRESHOLD_PCT);
const hasGood = blockTimeDeltas.some(d => d != null && d < -THRESHOLD_PCT) ||
const hasGood = timingDeltas.some(d => d != null && d < -THRESHOLD_PCT) ||
throughputDeltas.some(d => d != null && d > THRESHOLD_PCT);

if (hasBad && hasGood) return { emoji: ':warning:', label: 'Mixed Results' };
Expand All @@ -112,7 +121,7 @@ function verdict(deltas) {
}

function hasSignificantChange(deltas) {
const all = [...tempoThroughputDeltas(deltas), ...tempoBlockTimeDeltas(deltas)];
const all = [...tempoThroughputDeltas(deltas), ...tempoTimingDeltas(deltas)];
return all.some(d => d != null && Math.abs(d) >= THRESHOLD_PCT);
}

Expand All @@ -134,14 +143,14 @@ function buildMetricRows(summary) {
const f = summary.results.feature;
const d = summary.results.deltas;
return [
{ label: 'Avg TPS', baseline: fmtVal(b.tps, '', 0), feature: fmtVal(f.tps, '', 0), change: fmtDeltaInverse(d.tps) },
{ label: 'Wall Clock', baseline: fmtSeconds(b.wall_clock_s), feature: fmtSeconds(f.wall_clock_s), change: fmtTimingDelta(d.wall_clock_s, d.wall_clock_uncertainty_pct) },
{ label: 'TPS P50', baseline: fmtVal(b.tps_p50, '', 1), feature: fmtVal(f.tps_p50, '', 1), change: fmtDeltaInverse(d.tps_p50) },
{ label: 'TPS P90', baseline: fmtVal(b.tps_p90, '', 1), feature: fmtVal(f.tps_p90, '', 1), change: fmtDeltaInverse(d.tps_p90) },
{ label: 'TPS P99', baseline: fmtVal(b.tps_p99, '', 1), feature: fmtVal(f.tps_p99, '', 1), change: fmtDeltaInverse(d.tps_p99) },
{ label: 'Gas/s', baseline: fmtVal(b.mgas_s, ' Mgas/s', 1), feature: fmtVal(f.mgas_s, ' Mgas/s', 1), change: fmtDeltaInverse(d.mgas_s) },
{ label: 'Block P50', baseline: fmtMs(b.block_time_p50), feature: fmtMs(f.block_time_p50), change: fmtDelta(d.block_time_p50) },
{ label: 'Block P90', baseline: fmtMs(b.block_time_p90), feature: fmtMs(f.block_time_p90), change: fmtDelta(d.block_time_p90) },
{ label: 'Block P99', baseline: fmtMs(b.block_time_p99), feature: fmtMs(f.block_time_p99), change: fmtDelta(d.block_time_p99) },
{ label: 'Block Time P50', baseline: fmtMs(b.block_time_p50), feature: fmtMs(f.block_time_p50), change: fmtDelta(d.block_time_p50) },
{ label: 'Block Time P90', baseline: fmtMs(b.block_time_p90), feature: fmtMs(f.block_time_p90), change: fmtDelta(d.block_time_p90) },
{ label: 'Block Time P99', baseline: fmtMs(b.block_time_p99), feature: fmtMs(f.block_time_p99), change: fmtDelta(d.block_time_p99) },
];
}

Expand Down
84 changes: 34 additions & 50 deletions tempo.nu
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,19 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
}
}

let compute_mean_stddev_stats = { |samples: list<any>|
let count = ($samples | length)
let mean = if $count > 0 { $samples | math avg } else { 0 }
let stddev = if $count > 1 { $samples | math stddev } else { 0 }
{
mean: ($mean | math round --precision 2)
stddev: ($stddev | math round --precision 2)
rel_stddev_pct: (if $count > 1 and $mean != 0 {
((($stddev / $mean) * 100) | math round --precision 2)
} else { 0 })
}
}

for label in $run_labels {
let report_path = $"($results_dir)/report-($label).json"
if not ($report_path | path exists) {
Expand Down Expand Up @@ -797,8 +810,6 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
let total_ok = ($blocks | get ok_count | math sum)
let total_err = ($blocks | get err_count | math sum)
let total_gas = ($blocks | get gas_used | math sum)
let latencies = ($blocks | where latency_ms != null | get latency_ms | sort)
let p50_latency = (percentile $latencies 50 | math round --precision 1)
let num_blocks = ($blocks | length)

# Compute TPS from block timestamps (timestamps are in milliseconds)
Expand All @@ -824,7 +835,7 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
ok: $total_ok
err: $total_err
total_gas: $total_gas
p50_latency: $p50_latency
wall_clock_s: ($time_span_s | math round --precision 2)
tps: $actual_tps
tps_p50: $run_tps.p50
tps_p90: $run_tps.p90
Expand All @@ -842,21 +853,8 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
return
}

# Compute per-block latency percentiles for each group
let compute_latency_stats = { |blocks: list<any>|
let latencies = ($blocks | where latency_ms != null | get latency_ms | sort)
{
n: ($blocks | length)
mean: (if ($latencies | length) > 0 { $latencies | math avg | math round --precision 1 } else { 0 })
stddev: (if ($latencies | length) > 1 { $latencies | math stddev | math round --precision 1 } else { 0 })
p50: (percentile $latencies 50 | math round --precision 1)
p90: (percentile $latencies 90 | math round --precision 1)
p99: (percentile $latencies 99 | math round --precision 1)
}
}

let b_lat = do $compute_latency_stats $baseline_blocks
let f_lat = do $compute_latency_stats $feature_blocks
let b_num_blocks = ($baseline_blocks | length)
let f_num_blocks = ($feature_blocks | length)

let b_bt = do $compute_block_time_stats $baseline_intervals
let f_bt = do $compute_block_time_stats $feature_intervals
Expand All @@ -866,6 +864,11 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
# Aggregate TPS and Mgas/s from per-run totals (total_tx / total_time)
let baseline_runs = ($run_data | where { |r| $r.label | str starts-with "baseline" })
let feature_runs = ($run_data | where { |r| $r.label | str starts-with "feature" })
let b_wall_samples = if ($baseline_runs | length) > 0 { $baseline_runs | get wall_clock_s } else { [] }
let f_wall_samples = if ($feature_runs | length) > 0 { $feature_runs | get wall_clock_s } else { [] }
let b_wall = do $compute_mean_stddev_stats $b_wall_samples
let f_wall = do $compute_mean_stddev_stats $f_wall_samples
let wall_clock_uncertainty_pct = (((($b_wall.rel_stddev_pct * $b_wall.rel_stddev_pct) + ($f_wall.rel_stddev_pct * $f_wall.rel_stddev_pct)) | math sqrt) | math round --precision 2)

let b_tps = if ($baseline_runs | length) > 0 { $baseline_runs | get tps | math avg | math round --precision 0 } else { 0 }
let f_tps = if ($feature_runs | length) > 0 { $feature_runs | get tps | math avg | math round --precision 0 } else { 0 }
Expand All @@ -885,14 +888,14 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
$"- Target TPS: ($tps)"
$"- Duration: ($duration)s"
$"- Snapshot: (if (has-schelk) { 'schelk' } else { 'cp fallback' })"
$"- Baseline blocks: ($b_lat.n)"
$"- Feature blocks: ($f_lat.n)"
$"- Baseline blocks: ($b_num_blocks)"
$"- Feature blocks: ($f_num_blocks)"
""
"## Tempo Metrics"
""
"| Metric | Baseline | Feature | Delta |"
"|--------|----------|---------|-------|"
$"| Avg TPS | ($b_tps) | ($f_tps) | (do $delta $b_tps $f_tps)% |"
$"| Wall Clock [s] | ($b_wall.mean) | ($f_wall.mean) | (do $delta $b_wall.mean $f_wall.mean)% (±($wall_clock_uncertainty_pct)%) |"
$"| TPS P50 | ($b_tps_stats.p50) | ($f_tps_stats.p50) | (do $delta $b_tps_stats.p50 $f_tps_stats.p50)% |"
$"| TPS P90 | ($b_tps_stats.p90) | ($f_tps_stats.p90) | (do $delta $b_tps_stats.p90 $f_tps_stats.p90)% |"
$"| TPS P99 | ($b_tps_stats.p99) | ($f_tps_stats.p99) | (do $delta $b_tps_stats.p99 $f_tps_stats.p99)% |"
Expand All @@ -901,25 +904,15 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
$"| Block Time P90 [ms] | ($b_bt.p90) | ($f_bt.p90) | (do $delta $b_bt.p90 $f_bt.p90)% |"
$"| Block Time P99 [ms] | ($b_bt.p99) | ($f_bt.p99) | (do $delta $b_bt.p99 $f_bt.p99)% |"
""
"## Latency (Secondary)"
""
"| Metric | Baseline | Feature | Delta |"
"|--------|----------|---------|-------|"
$"| Latency Mean [ms] | ($b_lat.mean) | ($f_lat.mean) | (do $delta $b_lat.mean $f_lat.mean)% |"
$"| Latency Std Dev [ms] | ($b_lat.stddev) | ($f_lat.stddev) | (do $delta $b_lat.stddev $f_lat.stddev)% |"
$"| Latency P50 [ms] | ($b_lat.p50) | ($f_lat.p50) | (do $delta $b_lat.p50 $f_lat.p50)% |"
$"| Latency P90 [ms] | ($b_lat.p90) | ($f_lat.p90) | (do $delta $b_lat.p90 $f_lat.p90)% |"
$"| Latency P99 [ms] | ($b_lat.p99) | ($f_lat.p99) | (do $delta $b_lat.p99 $f_lat.p99)% |"
""
"## Per-Run Details"
""
"| Run | Blocks | Total Tx | Success | Failed | Avg TPS | Block P50 | Mgas/s |"
"| Run | Blocks | Total Tx | Success | Failed | Wall Clock [s] | Block Time | Mgas/s |"
"|-----|--------|----------|---------|--------|---------|-----------|--------|"
] | str join "\n")

mut per_run_rows = ""
for row in $run_data {
$per_run_rows = $"($per_run_rows)| ($row.label) | ($row.blocks) | ($row.total_tx) | ($row.ok) | ($row.err) | ($row.tps) | ($row.block_time_p50) | ($row.mgas_s) |\n"
$per_run_rows = $"($per_run_rows)| ($row.label) | ($row.blocks) | ($row.total_tx) | ($row.ok) | ($row.err) | ($row.wall_clock_s) | ($row.block_time_p50) | ($row.mgas_s) |\n"
}

let full_summary = $"($summary)\n($per_run_rows)"
Expand All @@ -941,11 +934,8 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
}
results: {
baseline: {
latency_mean: $b_lat.mean
latency_stddev: $b_lat.stddev
latency_p50: $b_lat.p50
latency_p90: $b_lat.p90
latency_p99: $b_lat.p99
wall_clock_s: $b_wall.mean
wall_clock_stddev_s: $b_wall.stddev
tps: $b_tps
tps_p50: $b_tps_stats.p50
tps_p90: $b_tps_stats.p90
Expand All @@ -954,14 +944,11 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
block_time_p50: $b_bt.p50
block_time_p90: $b_bt.p90
block_time_p99: $b_bt.p99
blocks: $b_lat.n
blocks: $b_num_blocks
}
feature: {
latency_mean: $f_lat.mean
latency_stddev: $f_lat.stddev
latency_p50: $f_lat.p50
latency_p90: $f_lat.p90
latency_p99: $f_lat.p99
wall_clock_s: $f_wall.mean
wall_clock_stddev_s: $f_wall.stddev
tps: $f_tps
tps_p50: $f_tps_stats.p50
tps_p90: $f_tps_stats.p90
Expand All @@ -970,14 +957,11 @@ def generate-summary [results_dir: string, baseline_ref: string, feature_ref: st
block_time_p50: $f_bt.p50
block_time_p90: $f_bt.p90
block_time_p99: $f_bt.p99
blocks: $f_lat.n
blocks: $f_num_blocks
}
deltas: {
latency_mean: (do $delta $b_lat.mean $f_lat.mean)
latency_stddev: (do $delta $b_lat.stddev $f_lat.stddev)
latency_p50: (do $delta $b_lat.p50 $f_lat.p50)
latency_p90: (do $delta $b_lat.p90 $f_lat.p90)
latency_p99: (do $delta $b_lat.p99 $f_lat.p99)
wall_clock_s: (do $delta $b_wall.mean $f_wall.mean)
wall_clock_uncertainty_pct: $wall_clock_uncertainty_pct
tps: (do $delta $b_tps $f_tps)
tps_p50: (do $delta $b_tps_stats.p50 $f_tps_stats.p50)
tps_p90: (do $delta $b_tps_stats.p90 $f_tps_stats.p90)
Expand Down
Loading