jmg049 · Copilot · May 3, 2026 · May 3, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -96,6 +96,7 @@ resampling = ["dep:rubato", "dep:rayon", "audioadapter", "audioadapter-buffers"]
 vad = [] # Voice activity detection operations
 psychoacoustic = ["transforms"] # Psychoacoustic analysis: band layouts, masking thresholds, SMR
 parallel = ["dep:rayon"] # Parallel codec encoding/decoding via rayon (works with psychoacoustic)
+opus-codec = ["psychoacoustic"] # Opus-inspired codec: SILK (speech) and CELT (music) modes
 plotting = ["dep:plotly", "dep:base64", "dep:serde_json", "channels", "transforms"]
 envelopes = [
     "dynamic-range",
@@ -127,6 +128,7 @@ full = [
     "random-generation",
     "psychoacoustic",
     "parallel",
+    "opus-codec",
 ]
 
 full_no_plotting = [
@@ -149,6 +151,7 @@ full_no_plotting = [
     "envelopes",
     "random-generation",
     "psychoacoustic",
+    "opus-codec",
 ]
 
 educational = ["dep:explainable", "dep:term-maths", "dep:open", "processing"]

diff --git a/src/codecs/mod.rs b/src/codecs/mod.rs
@@ -8,6 +8,9 @@
 //! together with the [`PerceptualCodec`] and [`StereoPerceptualCodec`] implementations
 //! that use psychoacoustic masking to drive perceptual quantization.
 //!
+//! An Opus-inspired codec ([`opus::OpusCodec`]) is also available under the
+//! `opus-codec` feature. It supports SILK (speech) and CELT (music) modes.
+//!
 //! ## Why
 //!
 //! `audio_samples` exposes the full signal-processing toolkit that perceptual codecs
@@ -35,5 +38,11 @@
 
 pub mod perceptual;
 
+/// Opus codec skeleton: SILK (speech) and CELT (music) modes.
+///
+/// Requires the `opus-codec` feature flag.
+#[cfg(feature = "opus-codec")]
+pub mod opus;
+
 pub use perceptual::codec::{AudioCodec, decode, encode};
 pub use perceptual::stereo::{StereoPerceptualCodec, StereoPerceptualEncodedAudio};
diff --git a/src/codecs/opus/celt.rs b/src/codecs/opus/celt.rs
@@ -0,0 +1,169 @@
+//! CELT wideband audio codec: MDCT + psychoacoustic bit allocation for Opus's music mode.
+//!
+//! ## What
+//!
+//! Implements one encode/decode cycle for a single Opus CELT audio frame.
+//! Each frame runs the full existing perceptual codec pipeline from
+//! [`crate::codecs::perceptual`]:
+//!
+//! 1. MDCT analysis (window size = frame length).
+//! 2. Psychoacoustic masking → per-band importance and allowed noise.
+//! 3. Bit allocation: distribute the bit budget across bands proportional to
+//!    perceptual importance.
+//! 4. Scalar quantisation of MDCT coefficients per band.
+//!
+//! Decoding runs the inverse: dequantise → IMDCT with overlap-add.
+//!
+//! ## Why
+//!
+//! CELT is the wideband, low-latency half of Opus. It analyses and codes the
+//! entire spectrum in one MDCT block matching the frame length, making it ideal
+//! for music and generic audio. Unlike SILK, it makes no speech-specific
+//! assumptions.
+//!
+//! ## Relationship to `PerceptualCodec`
+//!
+//! [`celt_encode_frame`] is essentially one call to the internal
+//! `encode_segment` helper from `PerceptualCodec`, scoped to a single Opus
+//! frame. The [`CeltEncodedFrame`] type mirrors
+//! [`crate::codecs::perceptual::codec::EncodedSegment`] with the addition of
+//! the per-frame sample count.
+
+use std::num::{NonZeroU32, NonZeroUsize};
+
+use non_empty_slice::NonEmptyVec;
+use spectrograms::{MdctParams, WindowType};
+
+use crate::codecs::perceptual::quantization::{BitAllocationResult, allocate_bits, dequantize, quantize};
+use crate::codecs::perceptual::{BandLayout, PsychoacousticConfig, analyse_signal_with_window_size, reconstruct_signal};
+use crate::{AudioSampleResult, AudioSamples, StandardSample};
+
+// ── CeltEncodedFrame ──────────────────────────────────────────────────────────
+
+/// One CELT-encoded Opus audio frame.
+///
+/// The in-memory representation is equivalent to
+/// [`crate::codecs::perceptual::codec::EncodedSegment`] scoped to a single
+/// Opus frame. The `window_size` equals the frame length, so `n_frames` is
+/// typically 1 or 2 depending on how the MDCT hop interacts with the frame
+/// boundary.
+///
+/// Everything needed to reconstruct the frame is self-contained: MDCT
+/// parameters, per-band bit allocation, and the original sample count.
+#[derive(Debug, Clone)]
+pub struct CeltEncodedFrame {
+    /// Quantised MDCT coefficients, row-major: index `k × n_frames + f`.
+    pub quantized: NonEmptyVec<i32>,
+    /// Number of MDCT bins per frame (`window_size / 2`).
+    pub n_coefficients: NonZeroUsize,
+    /// Number of MDCT analysis frames produced from this Opus frame.
+    pub n_frames: NonZeroUsize,
+    /// MDCT parameters used during analysis.
+    pub mdct_params: MdctParams,
+    /// Per-band bit allocation used for quantisation and dequantisation.
+    pub allocation: BitAllocationResult,
+    /// Number of PCM samples in the original Opus frame.
+    pub n_samples: usize,
+}
+
+// ── celt_encode_frame ─────────────────────────────────────────────────────────
+
+/// Encodes a single CELT audio frame.
+///
+/// The frame is analysed with the MDCT, processed through the psychoacoustic
+/// model, and the resulting coefficients are quantised with the per-band
+/// allocation from [`allocate_bits`].
+///
+/// # Arguments
+/// - `frame` – Mono audio frame to encode.
+/// - `band_layout` – Perceptual frequency-band partitioning (e.g. [`crate::BandLayout::celt`]).
+/// - `psych_config` – Psychoacoustic masking configuration. Must have the same
+///   number of weights as `band_layout.len()`.
+/// - `window` – MDCT window function. [`spectrograms::WindowType::Hanning`] is a
+///   reasonable default.
+/// - `window_size` – Explicit MDCT window size (typically `= frame_length`, i.e.
+///   the number of samples in `frame`). When `None`, an automatic size ≤ 2048 is
+///   chosen.
+/// - `bit_budget` – Total bits to allocate across all bands.
+/// - `min_bits_per_band` – Minimum bits guaranteed to every band (typically 1).
+///
+/// # Errors
+/// Returns [`crate::AudioSampleError::Parameter`] if `frame` is not mono, is
+/// fewer than 4 samples, or `psych_config` is incompatible with `band_layout`.
+pub fn celt_encode_frame<T: StandardSample>(
+    frame: &AudioSamples<T>,
+    band_layout: &BandLayout,
+    psych_config: &PsychoacousticConfig,
+    window: WindowType,
+    window_size: Option<NonZeroUsize>,
+    bit_budget: u32,
+    min_bits_per_band: u8,
+) -> AudioSampleResult<CeltEncodedFrame> {
+    let n_samples = frame.samples_per_channel().get();
+
+    let result =
+        analyse_signal_with_window_size(frame, window, window_size, band_layout, psych_config)?;
+
+    let allocation = allocate_bits(&result.band_metrics, bit_budget, min_bits_per_band);
+    let quantized = quantize(
+        result.coefficients.as_non_empty_slice(),
+        result.n_coefficients,
+        result.n_frames,
+        &allocation,
+    );
+
+    Ok(CeltEncodedFrame {
+        quantized,
+        n_coefficients: result.n_coefficients,
+        n_frames: result.n_frames,
+        mdct_params: result.mdct_params,
+        allocation,
+        n_samples,
+    })
+}
+
+// ── celt_decode_frame ─────────────────────────────────────────────────────────
+
+/// Decodes a CELT-encoded Opus audio frame.
+///
+/// Dequantises the MDCT coefficients and applies the IMDCT with overlap-add to
+/// reconstruct the time-domain signal.
+///
+/// # Arguments
+/// - `frame` – A CELT frame produced by [`celt_encode_frame`].
+/// - `sample_rate` – Sample rate for the returned audio.
+///
+/// # Errors
+/// Returns [`crate::AudioSampleError`] if the IMDCT reconstruction fails.
+///
+/// # Returns
+/// A `Vec<f32>` of `frame.n_samples` reconstructed PCM samples.
+pub fn celt_decode_frame(
+    frame: CeltEncodedFrame,
+    sample_rate: NonZeroU32,
+) -> AudioSampleResult<Vec<f32>> {
+    let coefficients = dequantize(
+        frame.quantized.as_non_empty_slice(),
+        frame.n_coefficients,
+        frame.n_frames,
+        &frame.allocation,
+    );
+
+    let audio = reconstruct_signal(
+        &coefficients,
+        frame.n_coefficients,
+        frame.n_frames,
+        &frame.mdct_params,
+        Some(frame.n_samples),
+        sample_rate,
+    )?;
+
+    let ch = audio
+        .channels()
+        .next()
+        .expect("reconstruct_signal always returns mono");
+    Ok(ch
+        .as_slice()
+        .expect("mono channel is always contiguous")
+        .to_vec())
+}