Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ resampling = ["dep:rubato", "dep:rayon"]
vad = [] # Voice activity detection operations
plotting = ["dep:plotly", "dep:base64", "dep:serde_json"]
envelopes = ["dynamic-range", "editing", "random-generation"] # Envelope generation operations
dithering = ["random-generation"] # Dithering and noise-shaping operations
fixed-size-audio = [] # Support for fixed-size audio buffers (no heap allocation)

full = [
Expand All @@ -101,6 +102,7 @@ full = [
"iir-filtering",
"plotting",
"envelopes",
"dithering",
"random-generation",
]

Expand All @@ -122,6 +124,7 @@ full_no_plotting = [
"vad",
"iir-filtering",
"envelopes",
"dithering",
"random-generation",
]

Expand Down
6 changes: 6 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,12 @@ pub use crate::operations::traits::AudioDecomposition;
#[cfg(feature = "dynamic-range")]
pub use crate::operations::traits::AudioDynamicRange;

#[cfg(feature = "dithering")]
pub use crate::operations::traits::AudioDithering;

#[cfg(feature = "dithering")]
pub use crate::operations::types::NoiseShape;

#[cfg(feature = "iir-filtering")]
pub use crate::operations::traits::AudioIirFiltering;

Expand Down
235 changes: 235 additions & 0 deletions src/operations/dithering.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
//! Dithering and bit-depth reduction operations for [`AudioSamples`].
//!
//! This module implements the [`AudioDithering`] trait, providing TPDF dithering
//! with optional noise shaping and in-place bit-depth reduction (`requantize`).
//!
//! Dithering improves the perceived quality of audio that will be reduced to a
//! lower bit depth by replacing deterministic quantisation distortion with
//! spectrally controlled stochastic noise that is less audible. The recommended
//! workflow is to call [`dither`][AudioDithering::dither] immediately before
//! [`requantize`][AudioDithering::requantize].
//!
//! The dither noise is generated using the `rand` crate, which is enabled
//! transitively by the `dithering` feature flag via `random-generation`.
//!
//! # Noise amplitude
//!
//! For integer sample types (`u8`, `i16`, `I24`, `i32`) the noise amplitude is
//! set to 1 LSB of the native representation (i.e. 1 / MAX in the normalised
//! `[−1, 1]` domain). For floating-point types (`f32`, `f64`) a fixed
//! 24-bit-equivalent noise floor (≈ 2⁻²³) is used so that the dither is
//! effective when combined with a subsequent `requantize(≤24)` call but
//! inaudible at full float precision.
//!
//! # Noise shaping
//!
//! - [`NoiseShape::Flat`]: pure TPDF — two independent uniform random values are
//! subtracted to produce a triangular distribution.
//! - [`NoiseShape::FWeighted`]: first-order high-pass shaped TPDF — the raw noise
//! is filtered with the transfer function `H(z) = 1 − 0.5·z⁻¹`, concentrating
//! noise energy towards higher frequencies where the ear is less sensitive.
//! State is maintained per channel so that multi-channel audio is shaped
//! independently.
//!
//! [`AudioSamples`]: crate::AudioSamples

use crate::operations::traits::AudioDithering;
use crate::operations::types::NoiseShape;
use crate::repr::AudioData;
use crate::traits::StandardSample;
use crate::{AudioSampleError, AudioSampleResult, AudioSamples, ParameterError};

use ndarray::Axis;

impl<T> AudioDithering for AudioSamples<'_, T>
where
T: StandardSample,
{
fn dither(mut self, shape: NoiseShape) -> Self {
// Compute 1-LSB amplitude in the normalised [−1, 1] domain.
//
// For integer types the raw MAX cast to f64 gives the number of
// representable positive levels (e.g. 32 767 for i16), so one LSB
// normalised = 1 / MAX.
//
// For float types (f32::MAX or f64::MAX ≫ 1) the concept of "1 LSB"
// depends on the value; we use an approximation of 2⁻²³ (~24-bit
// noise floor) as a practical default.
let max_raw: f64 = T::MAX.cast_into();
let lsb_norm: f64 = if max_raw > 1.0 {
1.0 / max_raw
} else {
// ~24-bit equivalent for floating-point containers (2^-23)
1.192_093_0e-7_f64
};

match shape {
NoiseShape::Flat => {
// Pure TPDF: u1 − u2 gives triangular distribution on (−1, 1).
// Uses `mapv_inplace` for cache-friendly traversal.
self.data.mapv_inplace(|sample| {
let tpdf: f64 = rand::random::<f64>() - rand::random::<f64>();
let s: f64 = sample.convert_to();
T::convert_from(tpdf.mul_add(lsb_norm, s))
});
}
NoiseShape::FWeighted => {
// First-order high-pass shaped TPDF.
//
// Each channel maintains independent feedback state so that
// inter-channel correlation does not introduce spectral artefacts.
// Transfer function: H(z) = 1 − 0.5·z⁻¹ (single-pole HP).
match &mut self.data {
AudioData::Mono(arr) => {
let mut prev: f64 = 0.0;
for sample in arr.iter_mut() {
let tpdf: f64 = rand::random::<f64>() - rand::random::<f64>();
let shaped = 0.5_f64.mul_add(-prev, tpdf);
prev = tpdf;
let s: f64 = (*sample).convert_to();
*sample = T::convert_from(shaped.mul_add(lsb_norm, s));
}
}
AudioData::Multi(arr) => {
for mut channel in arr.axis_iter_mut(Axis(0)) {
let mut prev: f64 = 0.0;
for sample in &mut channel {
let tpdf: f64 = rand::random::<f64>() - rand::random::<f64>();
let shaped = 0.5_f64.mul_add(-prev, tpdf);
prev = tpdf;
let s: f64 = (*sample).convert_to();
*sample = T::convert_from(shaped.mul_add(lsb_norm, s));
}
}
}
}
}
}

self
}

fn requantize(mut self, bits: u32) -> AudioSampleResult<Self> {
if bits == 0 || bits > 32 {
return Err(AudioSampleError::Parameter(ParameterError::out_of_range(
"bits",
bits.to_string(),
"1",
"32",
"bit depth must be in the range [1, 32]",
)));
}

// Quantise each sample to a grid of 2^(bits−1) levels per polarity.
//
// All arithmetic is performed in the normalised f64 domain:
// 1. sample.convert_to() → f64 in [−1, 1] (or wider for float types)
// 2. snap to nearest grid point
// 3. T::convert_from(quantized_f64) → back to T
let levels = (1u64 << (bits - 1)) as f64;
self.data.mapv_inplace(|sample| {
let s: f64 = sample.convert_to();
let quantized = (s * levels).round() / levels;
T::convert_from(quantized)
});

Ok(self)
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::operations::types::NoiseShape;
use crate::sample_rate;
use ndarray::array;

#[test]
fn test_dither_flat_changes_samples() {
// After flat dithering, samples should differ slightly from originals.
// With a large enough buffer the probability of no change is negligible.
let data: ndarray::Array1<f32> = ndarray::Array1::from_elem(256, 0.5);
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
let dithered = audio.dither(NoiseShape::Flat);
// At least one sample should have been perturbed; check via indexing
let arr = dithered.as_mono().unwrap();
assert!(arr.iter().any(|&s| (s - 0.5f32).abs() > 0.0));
}

#[test]
fn test_dither_fweighted_changes_samples() {
let data: ndarray::Array1<f32> = ndarray::Array1::from_elem(256, 0.5);
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
let dithered = audio.dither(NoiseShape::FWeighted);
let arr = dithered.as_mono().unwrap();
assert!(arr.iter().any(|&s| (s - 0.5f32).abs() > 0.0));
}

#[test]
fn test_dither_preserves_approximate_level() {
// Dithering should add only tiny noise; the mean should remain close.
let data: ndarray::Array1<f32> = ndarray::Array1::from_elem(1024, 0.5);
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
let dithered = audio.dither(NoiseShape::Flat);
let arr = dithered.as_mono().unwrap();
let mean: f32 = arr.iter().sum::<f32>() / 1024.0;
assert!((mean - 0.5).abs() < 0.001, "Mean shifted too much: {mean}");
}

#[test]
fn test_requantize_8bit_exact() {
// 0.5 * 128 = 64.0 → 64 / 128 = 0.5 (exact grid point)
let data = array![0.5f32, -0.5, 0.25, -0.25];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
let rq = audio.requantize(8).unwrap();
// 0.5 is a grid point for 8-bit quantization
assert!((rq[0] - 0.5f32).abs() < 1e-5);
// -0.5 is also a grid point
assert!((rq[1] - (-0.5f32)).abs() < 1e-5);
}

#[test]
fn test_requantize_reduces_precision() {
// 0.501 should be rounded to the nearest 8-bit grid point ≈ 0.5
let data = array![0.501f32, -0.001];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
let rq = audio.requantize(8).unwrap();
// 0.501 * 128 = 64.128 → round to 64 → /128 = 0.5
assert!((rq[0] - 0.5f32).abs() < 0.01);
}

#[test]
fn test_requantize_error_bits_zero() {
let data = array![0.5f32, -0.5];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
assert!(audio.requantize(0).is_err());
}

#[test]
fn test_requantize_error_bits_too_large() {
let data = array![0.5f32, -0.5];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
assert!(audio.requantize(33).is_err());
}

#[test]
fn test_requantize_32bit_is_identity_for_float() {
// 32-bit quantization of a float value should be essentially identity
// since float precision exceeds 32-bit integer precision
let data = array![0.123_456_789_f32, -0.987_654_3];
let audio = AudioSamples::new_mono(data.clone(), sample_rate!(44100)).unwrap();
let rq = audio.requantize(32).unwrap();
// levels = 2^31, so step = 1/2^31 ≈ 4.6e-10 — well below f32 precision
assert!((rq[0] - data[0]).abs() < 1e-5);
}

#[test]
fn test_dither_multichannel() {
// Multi-channel audio: each channel should receive independent dithering.
let data = ndarray::array![[0.5f32, 0.5, 0.5], [0.3f32, 0.3, 0.3]];
let audio = AudioSamples::new_multi_channel(data.into(), sample_rate!(44100)).unwrap();
let dithered = audio.dither(NoiseShape::Flat);
// The audio should still be multi-channel with the same shape
assert!(dithered.is_multi_channel());
}
}
6 changes: 6 additions & 0 deletions src/operations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ pub mod beat;
#[cfg(feature = "channels")]
pub mod channels;

#[cfg(feature = "dithering")]
pub mod dithering;

#[cfg(feature = "dynamic-range")]
pub mod dynamic_range;

Expand Down Expand Up @@ -128,6 +131,9 @@ pub use traits::AudioPitchAnalysis;
#[cfg(feature = "dynamic-range")]
pub use traits::AudioDynamicRange;

#[cfg(feature = "dithering")]
pub use traits::AudioDithering;

#[cfg(feature = "transforms")]
pub use traits::AudioTransforms;

Expand Down
41 changes: 41 additions & 0 deletions src/operations/processing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,47 @@ mod tests {
use ndarray::array;

use crate::AudioProcessing;
use crate::AudioStatistics;

#[test]
fn test_normalize_to_dbfs() {
// A signal with peak 0.5 normalized to -6 dBFS should have peak ≈ 0.5012
let data = array![0.5f32, -0.25, 0.1];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
let normalized = audio.normalize_to_dbfs(-6.0).unwrap();
// peak_dbfs should be approximately -6.0
assert_approx_eq!(normalized.peak_dbfs(), -6.0, 0.01);
}

#[test]
fn test_normalize_to_dbfs_zero() {
// Normalizing to 0 dBFS should give peak = 1.0
let data = array![0.25f32, -0.5, 0.125];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
let normalized = audio.normalize_to_dbfs(0.0).unwrap();
assert_approx_eq!(normalized.peak() as f64, 1.0, 0.001);
}

#[test]
fn test_apply_gain_db_attenuation() {
// Applying -6 dB should multiply amplitude by ~0.5012
let data = array![1.0f32, -1.0];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
let attenuated = audio.apply_gain_db(-6.0);
// 10^(-6/20) ≈ 0.5012
assert_approx_eq!(attenuated.peak() as f64, 0.501_187_2, 0.0001);
}

#[test]
fn test_apply_gain_db_unity() {
// 0 dB gain should leave signal unchanged
let data = array![0.5f32, -0.3, 0.1];
let audio = AudioSamples::new_mono(data.clone(), sample_rate!(44100)).unwrap();
let gained = audio.apply_gain_db(0.0);
assert_approx_eq!(gained[0] as f64, 0.5, 1e-6);
assert_approx_eq!(gained[1] as f64, -0.3, 1e-6);
assert_approx_eq!(gained[2] as f64, 0.1, 1e-4);
}

#[test]
fn test_normalize_min_max() {
Expand Down
45 changes: 45 additions & 0 deletions src/operations/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,51 @@ mod tests {
assert_eq!(audio.max_sample(), 4.0);
}

#[test]
fn test_peak_dbfs_full_scale() {
// A peak of 1.0 should give 0 dBFS
let data = array![1.0f32, -1.0, 0.5];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
assert_approx_eq!(audio.peak_dbfs(), 0.0, 1e-6);
}

#[test]
fn test_peak_dbfs_half_amplitude() {
// A peak of 0.5 should give approximately -6.02 dBFS
let data = array![0.5f32, -0.25];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
// 20 * log10(0.5) ≈ -6.0206
assert_approx_eq!(audio.peak_dbfs(), -6.020_600_0, 0.001);
}

#[test]
fn test_peak_dbfs_silence() {
// A zero-amplitude signal should return the -80 dB floor
let data = array![0.0f32, 0.0, 0.0];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
assert_approx_eq!(audio.peak_dbfs(), -80.0, 1e-6);
}

#[test]
fn test_rms_dbfs_full_scale_square_wave() {
// A full-scale square wave has RMS = 1.0, so rms_dbfs should be 0.0
let data = array![1.0f32, -1.0, 1.0, -1.0];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
assert_approx_eq!(audio.rms_dbfs(), 0.0, 1e-6);
}

#[test]
fn test_rms_dbfs_sine_wave() {
// A sine wave with amplitude A has RMS = A / sqrt(2)
// rms_dbfs = 20 * log10(A/sqrt(2)) ≈ 20 * log10(A) - 3.01
let data = array![1.0f32, 0.0, -1.0, 0.0]; // approximates a sine wave
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
let rms_dbfs = audio.rms_dbfs();
// RMS of this signal is sqrt((1+0+1+0)/4) = sqrt(0.5) ≈ 0.7071
// 20 * log10(0.7071) ≈ -3.01
assert!(rms_dbfs < 0.0 && rms_dbfs > -10.0);
}

#[test]
fn test_rms_computation() {
// Simple test case where we can verify RMS manually
Expand Down
Loading