Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 0 additions & 28 deletions src/palabra_ai/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,34 +259,6 @@ def to_ws(self) -> bytes:
}
)

def to_bench(self):
result = {
"message_type": "__$bench_audio_frame",
"__dbg": {
"size": len(self.data),
"sample_rate": self.sample_rate,
"num_channels": self.num_channels,
"samples_per_channel": self.samples_per_channel,
},
"data": {},
}

# Include transcription metadata only for output frames (those with transcription_id)
if self.transcription_id:
result["data"].update(
{
"transcription_id": self.transcription_id,
"language": self.language,
"last_chunk": self.last_chunk,
}
)

# Replace base64 audio data with "..." to avoid log pollution
if "data" in result["data"] and isinstance(result["data"]["data"], str):
result["data"]["data"] = "..."

return result


@dataclass
class AudioBuffer:
Expand Down
150 changes: 24 additions & 126 deletions src/palabra_ai/benchmark/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,13 @@
from palabra_ai import Config, PalabraAI, SourceLang, TargetLang
from palabra_ai.audio import save_wav
from palabra_ai.benchmark.report import BENCHMARK_ALLOWED_MESSAGE_TYPES
from palabra_ai.benchmark.report import format_report
from palabra_ai.benchmark.report import INPUT_CHUNK_DURATION_S
from palabra_ai.benchmark.report import Report
from palabra_ai.benchmark.report import save_benchmark_files
from palabra_ai.config import WsMode
from palabra_ai.lang import Language
from palabra_ai.task.adapter.dummy import DummyWriter
from palabra_ai.task.adapter.file import FileReader
from palabra_ai.util.fileio import save_text
from palabra_ai.util.orjson import to_json
from palabra_ai.util.sysinfo import get_system_info

Expand Down Expand Up @@ -51,20 +50,6 @@ def main():
raise FileNotFoundError(f"Audio file not found: {args.audio}")
mode = WsMode(input_chunk_duration_ms=INPUT_CHUNK_DURATION_S * 1000)

# Setup output directory and timestamp if --out is specified
if args.out:
output_dir = args.out
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Save sysinfo immediately at startup
sysinfo = get_system_info()
sysinfo["command"] = " ".join(sys.argv)
sysinfo["argv"] = sys.argv
sysinfo["cwd"] = str(Path.cwd())
sysinfo_path = output_dir / f"{timestamp}_bench_sysinfo.json"
sysinfo_path.write_bytes(to_json(sysinfo, True))

# Get audio duration for progress tracking
with av.open(str(audio_path)) as container:
audio_duration = container.duration / 1000000 # convert microseconds to seconds
Expand Down Expand Up @@ -98,64 +83,40 @@ def on_transcription(msg):
# Force benchmark mode with 100ms buffer regardless of config
# Config loaded from JSON defaults to 320ms chunks, but benchmark needs 100ms for optimal performance
config.mode = WsMode(input_chunk_duration_ms=INPUT_CHUNK_DURATION_S * 1000)

source_lang = config.source.lang.code
target_lang = config.targets[0].lang.code
else:
if not args.source_lang or not args.target_lang:
parser.error("source_lang and target_lang required without --config")
source_lang = args.source_lang
target_lang = args.target_lang

config = Config(
source=SourceLang(Language.get_or_create(source_lang), reader, on_transcription=on_transcription),
targets=[TargetLang(Language.get_or_create(target_lang), DummyWriter())],
source=SourceLang(Language.get_or_create(args.source_lang), reader, on_transcription=on_transcription),
targets=[TargetLang(Language.get_or_create(args.target_lang), DummyWriter())],
benchmark=True,
mode=mode,
allowed_message_types=BENCHMARK_ALLOWED_MESSAGE_TYPES,
)

# Enable debug mode and logging when --out is specified
if output_dir and timestamp:
config.debug = True
config.log_file = str(output_dir / f"{timestamp}_bench.log")

# Save exact config that goes to set_task (SetTaskMessage.from_config uses to_dict)
config_dict = config.to_dict()
config_path = output_dir / f"{timestamp}_bench_config.json"
config_path.write_bytes(to_json(config_dict, True))
# Enable debug mode and output directory when --out is specified
# Core will auto-save log, trace, result.json, and audio files
if args.out:
# config.debug = True
config.output_dir = Path(args.out)
print(f"Files will be saved to {args.out}")

# Create progress bar with language info
progress_bar[0] = tqdm(
total=100,
desc=f"Processing {source_lang}→{target_lang}",
desc=f"Processing {config.source_lang}→{config.target_lang}",
unit="%",
mininterval=7.0,
bar_format="{desc}: {percentage:3.0f}%|{bar}| [{elapsed}<{remaining}]"
)

print(f"Running benchmark: {source_lang} → {target_lang}")
if args.out:
print(f"Files will be saved to {args.out}")
print(f"Running benchmark: {config.source_lang} → {config.target_lang}")
print("-" * 60)

palabra = PalabraAI()
result = palabra.run(config, no_raise=True)

# Save RunResult in debug mode when --out is specified
if output_dir and timestamp and result is not None:
try:
result_debug_path = output_dir / f"{timestamp}_bench_runresult_debug.json"
result_debug_path.write_bytes(to_json(result.model_dump(), True))
except Exception as e:
# If serialization fails, save error info
error_path = output_dir / f"{timestamp}_bench_runresult_error.txt"
error_path.write_text(
f"Failed to serialize RunResult: {e}\n\n"
f"RunResult repr:\n{repr(result)}\n\n"
f"Exception: {result.exc if result else 'N/A'}"
)

# Complete and close progress bar
if progress_bar[0]:
progress_bar[0].update(100 - progress_bar[0].n)
Expand Down Expand Up @@ -183,25 +144,11 @@ def on_transcription(msg):
print(" - Task was cancelled by timeout")
print(" - Internal cancellation due to error")
print(" - One of the subtasks failed and caused cascade cancellation\n")

# For CancelledError, show ALL logs to understand what happened
if result.log_data and result.log_data.logs:
print(f"Full logs (all {len(result.log_data.logs)} entries):")
for log_line in result.log_data.logs:
print(log_line, end='')
print()
else:
print(f"\n{'='*80}")
print(f"BENCHMARK FAILED: {exc_type}: {exc_msg}")
print(f"{'='*80}\n")

# For other errors, show last 100
if result.log_data and result.log_data.logs:
print("Last 100 log entries:")
for log_line in result.log_data.logs[-100:]:
print(log_line, end='')
print()

# Print traceback from exception if available
if hasattr(result.exc, '__traceback__') and result.exc.__traceback__:
print("\nOriginal exception traceback:")
Expand All @@ -212,47 +159,12 @@ def on_transcription(msg):
raise RuntimeError("Benchmark failed: no io_data")

# Parse report
report, in_audio_canvas, out_audio_canvas = Report.parse(result.io_data)

# Create file paths (used in report and optionally saved with --out)
if not timestamp:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
in_wav_name = f"{timestamp}_bench_in_{source_lang}.wav"
out_wav_name = f"{timestamp}_bench_out_{target_lang}.wav"

# Generate text report
report_text = format_report(
report,
result.io_data,
source_lang,
target_lang,
str(audio_path),
out_wav_name,
config
)

if args.out:
# Use the shared save function
if not output_dir:
output_dir = args.out
save_benchmark_files(
output_dir=output_dir,
timestamp=timestamp,
report=report,
io_data=result.io_data,
config=config,
result=result,
in_audio_canvas=in_audio_canvas,
out_audio_canvas=out_audio_canvas,
source_lang=source_lang,
target_lang=target_lang,
report_text=report_text,
input_file_path=str(audio_path),
file_prefix="bench"
)

# Always print report to console
print("\n" + report_text)
report = Report.parse(result.io_data, Path(args.out))
report.save_all()
else:
report = Report.parse(result.io_data)
print("\n" + report.report_txt)

except Exception as e:
# Capture traceback IMMEDIATELY - must be done in except block!
Expand All @@ -264,34 +176,20 @@ def on_transcription(msg):
print(f"{'='*80}\n")
print(tb_string)

# Save error to file if output directory exists
if output_dir and timestamp:
try:
error_file = output_dir / f"{timestamp}_bench_error.txt"
error_file.write_text(f"Benchmark Error:\n\n{tb_string}")
print(f"\nError details saved to: {error_file}")
except Exception as save_error:
print(f"Failed to save error file: {save_error}")
if config and args.out:
save_text(config.get_out_path(".error.txt"), f"Benchmark Error:\n\n{tb_string}")

# Try to save partial report/audio even on error (for debugging)
if output_dir and timestamp and result and result.io_data:
if result and result.io_data:
try:
print("\nAttempting to save partial results for debugging...")

# Try to parse report
report, in_audio, out_audio = Report.parse(result.io_data)

# Save report files
report_path = output_dir / f"{timestamp}_bench_report_partial.json"
report_path.write_bytes(to_json(report, True))
print(f"✓ Partial report saved to: {report_path}")

# Save audio (always when --out is specified)
in_wav = output_dir / f"{timestamp}_bench_in_partial.wav"
out_wav = output_dir / f"{timestamp}_bench_out_partial.wav"
save_wav(in_audio, in_wav, result.io_data.in_sr, result.io_data.channels)
save_wav(out_audio, out_wav, result.io_data.out_sr, result.io_data.channels)
print(f"✓ Partial audio saved: {in_wav.name}, {out_wav.name}")
if args.out:
output_dir = Path(args.out)
report = Report.parse(result.io_data, output_dir)
report.save_all()
print(f"✓ Something saved to: {args.out}")

except Exception as save_err:
print(f"Could not save partial results: {save_err}")
Expand Down
Loading