Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ resampling = ["dep:rubato", "dep:rayon", "audioadapter", "audioadapter-buffers"]
vad = [] # Voice activity detection operations
psychoacoustic = ["transforms"] # Psychoacoustic analysis: band layouts, masking thresholds, SMR
parallel = ["dep:rayon"] # Parallel codec encoding/decoding via rayon (works with psychoacoustic)
opus-codec = ["psychoacoustic"] # Opus-inspired codec: SILK (speech) and CELT (music) modes
plotting = ["dep:plotly", "dep:base64", "dep:serde_json", "channels", "transforms"]
envelopes = [
"dynamic-range",
Expand Down Expand Up @@ -127,6 +128,7 @@ full = [
"random-generation",
"psychoacoustic",
"parallel",
"opus-codec",
]

full_no_plotting = [
Expand All @@ -149,6 +151,7 @@ full_no_plotting = [
"envelopes",
"random-generation",
"psychoacoustic",
"opus-codec",
]

educational = ["dep:explainable", "dep:term-maths", "dep:open", "processing"]
Expand Down
9 changes: 9 additions & 0 deletions src/codecs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
//! together with the [`PerceptualCodec`] and [`StereoPerceptualCodec`] implementations
//! that use psychoacoustic masking to drive perceptual quantization.
//!
//! An Opus-inspired codec ([`opus::OpusCodec`]) is also available under the
//! `opus-codec` feature. It supports SILK (speech) and CELT (music) modes.
//!
//! ## Why
//!
//! `audio_samples` exposes the full signal-processing toolkit that perceptual codecs
Expand Down Expand Up @@ -35,5 +38,11 @@

pub mod perceptual;

/// Opus codec skeleton: SILK (speech) and CELT (music) modes.
///
/// Requires the `opus-codec` feature flag.
#[cfg(feature = "opus-codec")]
pub mod opus;

pub use perceptual::codec::{AudioCodec, decode, encode};
pub use perceptual::stereo::{StereoPerceptualCodec, StereoPerceptualEncodedAudio};
169 changes: 169 additions & 0 deletions src/codecs/opus/celt.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
//! CELT wideband audio codec: MDCT + psychoacoustic bit allocation for Opus's music mode.
//!
//! ## What
//!
//! Implements one encode/decode cycle for a single Opus CELT audio frame.
//! Each frame runs the full existing perceptual codec pipeline from
//! [`crate::codecs::perceptual`]:
//!
//! 1. MDCT analysis (window size = frame length).
//! 2. Psychoacoustic masking → per-band importance and allowed noise.
//! 3. Bit allocation: distribute the bit budget across bands proportional to
//! perceptual importance.
//! 4. Scalar quantisation of MDCT coefficients per band.
//!
//! Decoding runs the inverse: dequantise → IMDCT with overlap-add.
//!
//! ## Why
//!
//! CELT is the wideband, low-latency half of Opus. It analyses and codes the
//! entire spectrum in one MDCT block matching the frame length, making it ideal
//! for music and generic audio. Unlike SILK, it makes no speech-specific
//! assumptions.
//!
//! ## Relationship to `PerceptualCodec`
//!
//! [`celt_encode_frame`] is essentially one call to the internal
//! `encode_segment` helper from `PerceptualCodec`, scoped to a single Opus
//! frame. The [`CeltEncodedFrame`] type mirrors
//! [`crate::codecs::perceptual::codec::EncodedSegment`] with the addition of
//! the per-frame sample count.

use std::num::{NonZeroU32, NonZeroUsize};

use non_empty_slice::NonEmptyVec;
use spectrograms::{MdctParams, WindowType};

use crate::codecs::perceptual::quantization::{BitAllocationResult, allocate_bits, dequantize, quantize};
use crate::codecs::perceptual::{BandLayout, PsychoacousticConfig, analyse_signal_with_window_size, reconstruct_signal};
use crate::{AudioSampleResult, AudioSamples, StandardSample};

// ── CeltEncodedFrame ──────────────────────────────────────────────────────────

/// One CELT-encoded Opus audio frame.
///
/// The in-memory representation is equivalent to
/// [`crate::codecs::perceptual::codec::EncodedSegment`] scoped to a single
/// Opus frame. The `window_size` equals the frame length, so `n_frames` is
/// typically 1 or 2 depending on how the MDCT hop interacts with the frame
/// boundary.
///
/// Everything needed to reconstruct the frame is self-contained: MDCT
/// parameters, per-band bit allocation, and the original sample count.
#[derive(Debug, Clone)]
pub struct CeltEncodedFrame {
/// Quantised MDCT coefficients, row-major: index `k × n_frames + f`.
pub quantized: NonEmptyVec<i32>,
/// Number of MDCT bins per frame (`window_size / 2`).
pub n_coefficients: NonZeroUsize,
/// Number of MDCT analysis frames produced from this Opus frame.
pub n_frames: NonZeroUsize,
/// MDCT parameters used during analysis.
pub mdct_params: MdctParams,
/// Per-band bit allocation used for quantisation and dequantisation.
pub allocation: BitAllocationResult,
/// Number of PCM samples in the original Opus frame.
pub n_samples: usize,
}

// ── celt_encode_frame ─────────────────────────────────────────────────────────

/// Encodes a single CELT audio frame.
///
/// The frame is analysed with the MDCT, processed through the psychoacoustic
/// model, and the resulting coefficients are quantised with the per-band
/// allocation from [`allocate_bits`].
///
/// # Arguments
/// - `frame` – Mono audio frame to encode.
/// - `band_layout` – Perceptual frequency-band partitioning (e.g. [`crate::BandLayout::celt`]).
/// - `psych_config` – Psychoacoustic masking configuration. Must have the same
/// number of weights as `band_layout.len()`.
/// - `window` – MDCT window function. [`spectrograms::WindowType::Hanning`] is a
/// reasonable default.
/// - `window_size` – Explicit MDCT window size (typically `= frame_length`, i.e.
/// the number of samples in `frame`). When `None`, an automatic size ≤ 2048 is
/// chosen.
/// - `bit_budget` – Total bits to allocate across all bands.
/// - `min_bits_per_band` – Minimum bits guaranteed to every band (typically 1).
///
/// # Errors
/// Returns [`crate::AudioSampleError::Parameter`] if `frame` is not mono, is
/// fewer than 4 samples, or `psych_config` is incompatible with `band_layout`.
pub fn celt_encode_frame<T: StandardSample>(
frame: &AudioSamples<T>,
band_layout: &BandLayout,
psych_config: &PsychoacousticConfig,
window: WindowType,
window_size: Option<NonZeroUsize>,
bit_budget: u32,
min_bits_per_band: u8,
) -> AudioSampleResult<CeltEncodedFrame> {
let n_samples = frame.samples_per_channel().get();

let result =
analyse_signal_with_window_size(frame, window, window_size, band_layout, psych_config)?;

let allocation = allocate_bits(&result.band_metrics, bit_budget, min_bits_per_band);
let quantized = quantize(
result.coefficients.as_non_empty_slice(),
result.n_coefficients,
result.n_frames,
&allocation,
);

Ok(CeltEncodedFrame {
quantized,
n_coefficients: result.n_coefficients,
n_frames: result.n_frames,
mdct_params: result.mdct_params,
allocation,
n_samples,
})
}

// ── celt_decode_frame ─────────────────────────────────────────────────────────

/// Decodes a CELT-encoded Opus audio frame.
///
/// Dequantises the MDCT coefficients and applies the IMDCT with overlap-add to
/// reconstruct the time-domain signal.
///
/// # Arguments
/// - `frame` – A CELT frame produced by [`celt_encode_frame`].
/// - `sample_rate` – Sample rate for the returned audio.
///
/// # Errors
/// Returns [`crate::AudioSampleError`] if the IMDCT reconstruction fails.
///
/// # Returns
/// A `Vec<f32>` of `frame.n_samples` reconstructed PCM samples.
pub fn celt_decode_frame(
frame: CeltEncodedFrame,
sample_rate: NonZeroU32,
) -> AudioSampleResult<Vec<f32>> {
let coefficients = dequantize(
frame.quantized.as_non_empty_slice(),
frame.n_coefficients,
frame.n_frames,
&frame.allocation,
);

let audio = reconstruct_signal(
&coefficients,
frame.n_coefficients,
frame.n_frames,
&frame.mdct_params,
Some(frame.n_samples),
sample_rate,
)?;

let ch = audio
.channels()
.next()
.expect("reconstruct_signal always returns mono");
Ok(ch
.as_slice()
.expect("mono channel is always contiguous")
.to_vec())
}
Loading