diff --git a/Cargo.toml b/Cargo.toml index d728138..7ffc726 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,7 @@ resampling = ["dep:rubato", "dep:rayon"] vad = [] # Voice activity detection operations plotting = ["dep:plotly", "dep:base64", "dep:serde_json"] envelopes = ["dynamic-range", "editing", "random-generation"] # Envelope generation operations +dithering = ["random-generation"] # Dithering and noise-shaping operations fixed-size-audio = [] # Support for fixed-size audio buffers (no heap allocation) full = [ @@ -101,6 +102,7 @@ full = [ "iir-filtering", "plotting", "envelopes", + "dithering", "random-generation", ] @@ -122,6 +124,7 @@ full_no_plotting = [ "vad", "iir-filtering", "envelopes", + "dithering", "random-generation", ] diff --git a/src/lib.rs b/src/lib.rs index 982e186..4209549 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -370,6 +370,12 @@ pub use crate::operations::traits::AudioDecomposition; #[cfg(feature = "dynamic-range")] pub use crate::operations::traits::AudioDynamicRange; +#[cfg(feature = "dithering")] +pub use crate::operations::traits::AudioDithering; + +#[cfg(feature = "dithering")] +pub use crate::operations::types::NoiseShape; + #[cfg(feature = "iir-filtering")] pub use crate::operations::traits::AudioIirFiltering; diff --git a/src/operations/dithering.rs b/src/operations/dithering.rs new file mode 100644 index 0000000..d054d67 --- /dev/null +++ b/src/operations/dithering.rs @@ -0,0 +1,235 @@ +//! Dithering and bit-depth reduction operations for [`AudioSamples`]. +//! +//! This module implements the [`AudioDithering`] trait, providing TPDF dithering +//! with optional noise shaping and in-place bit-depth reduction (`requantize`). +//! +//! Dithering improves the perceived quality of audio that will be reduced to a +//! lower bit depth by replacing deterministic quantisation distortion with +//! spectrally controlled stochastic noise that is less audible. The recommended +//! workflow is to call [`dither`][AudioDithering::dither] immediately before +//! [`requantize`][AudioDithering::requantize]. +//! +//! The dither noise is generated using the `rand` crate, which is enabled +//! transitively by the `dithering` feature flag via `random-generation`. +//! +//! # Noise amplitude +//! +//! For integer sample types (`u8`, `i16`, `I24`, `i32`) the noise amplitude is +//! set to 1 LSB of the native representation (i.e. 1 / MAX in the normalised +//! `[−1, 1]` domain). For floating-point types (`f32`, `f64`) a fixed +//! 24-bit-equivalent noise floor (≈ 2⁻²³) is used so that the dither is +//! effective when combined with a subsequent `requantize(≤24)` call but +//! inaudible at full float precision. +//! +//! # Noise shaping +//! +//! - [`NoiseShape::Flat`]: pure TPDF — two independent uniform random values are +//! subtracted to produce a triangular distribution. +//! - [`NoiseShape::FWeighted`]: first-order high-pass shaped TPDF — the raw noise +//! is filtered with the transfer function `H(z) = 1 − 0.5·z⁻¹`, concentrating +//! noise energy towards higher frequencies where the ear is less sensitive. +//! State is maintained per channel so that multi-channel audio is shaped +//! independently. +//! +//! [`AudioSamples`]: crate::AudioSamples + +use crate::operations::traits::AudioDithering; +use crate::operations::types::NoiseShape; +use crate::repr::AudioData; +use crate::traits::StandardSample; +use crate::{AudioSampleError, AudioSampleResult, AudioSamples, ParameterError}; + +use ndarray::Axis; + +impl AudioDithering for AudioSamples<'_, T> +where + T: StandardSample, +{ + fn dither(mut self, shape: NoiseShape) -> Self { + // Compute 1-LSB amplitude in the normalised [−1, 1] domain. + // + // For integer types the raw MAX cast to f64 gives the number of + // representable positive levels (e.g. 32 767 for i16), so one LSB + // normalised = 1 / MAX. + // + // For float types (f32::MAX or f64::MAX ≫ 1) the concept of "1 LSB" + // depends on the value; we use an approximation of 2⁻²³ (~24-bit + // noise floor) as a practical default. + let max_raw: f64 = T::MAX.cast_into(); + let lsb_norm: f64 = if max_raw > 1.0 { + 1.0 / max_raw + } else { + // ~24-bit equivalent for floating-point containers (2^-23) + 1.192_093_0e-7_f64 + }; + + match shape { + NoiseShape::Flat => { + // Pure TPDF: u1 − u2 gives triangular distribution on (−1, 1). + // Uses `mapv_inplace` for cache-friendly traversal. + self.data.mapv_inplace(|sample| { + let tpdf: f64 = rand::random::() - rand::random::(); + let s: f64 = sample.convert_to(); + T::convert_from(tpdf.mul_add(lsb_norm, s)) + }); + } + NoiseShape::FWeighted => { + // First-order high-pass shaped TPDF. + // + // Each channel maintains independent feedback state so that + // inter-channel correlation does not introduce spectral artefacts. + // Transfer function: H(z) = 1 − 0.5·z⁻¹ (single-pole HP). + match &mut self.data { + AudioData::Mono(arr) => { + let mut prev: f64 = 0.0; + for sample in arr.iter_mut() { + let tpdf: f64 = rand::random::() - rand::random::(); + let shaped = 0.5_f64.mul_add(-prev, tpdf); + prev = tpdf; + let s: f64 = (*sample).convert_to(); + *sample = T::convert_from(shaped.mul_add(lsb_norm, s)); + } + } + AudioData::Multi(arr) => { + for mut channel in arr.axis_iter_mut(Axis(0)) { + let mut prev: f64 = 0.0; + for sample in &mut channel { + let tpdf: f64 = rand::random::() - rand::random::(); + let shaped = 0.5_f64.mul_add(-prev, tpdf); + prev = tpdf; + let s: f64 = (*sample).convert_to(); + *sample = T::convert_from(shaped.mul_add(lsb_norm, s)); + } + } + } + } + } + } + + self + } + + fn requantize(mut self, bits: u32) -> AudioSampleResult { + if bits == 0 || bits > 32 { + return Err(AudioSampleError::Parameter(ParameterError::out_of_range( + "bits", + bits.to_string(), + "1", + "32", + "bit depth must be in the range [1, 32]", + ))); + } + + // Quantise each sample to a grid of 2^(bits−1) levels per polarity. + // + // All arithmetic is performed in the normalised f64 domain: + // 1. sample.convert_to() → f64 in [−1, 1] (or wider for float types) + // 2. snap to nearest grid point + // 3. T::convert_from(quantized_f64) → back to T + let levels = (1u64 << (bits - 1)) as f64; + self.data.mapv_inplace(|sample| { + let s: f64 = sample.convert_to(); + let quantized = (s * levels).round() / levels; + T::convert_from(quantized) + }); + + Ok(self) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::operations::types::NoiseShape; + use crate::sample_rate; + use ndarray::array; + + #[test] + fn test_dither_flat_changes_samples() { + // After flat dithering, samples should differ slightly from originals. + // With a large enough buffer the probability of no change is negligible. + let data: ndarray::Array1 = ndarray::Array1::from_elem(256, 0.5); + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + let dithered = audio.dither(NoiseShape::Flat); + // At least one sample should have been perturbed; check via indexing + let arr = dithered.as_mono().unwrap(); + assert!(arr.iter().any(|&s| (s - 0.5f32).abs() > 0.0)); + } + + #[test] + fn test_dither_fweighted_changes_samples() { + let data: ndarray::Array1 = ndarray::Array1::from_elem(256, 0.5); + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + let dithered = audio.dither(NoiseShape::FWeighted); + let arr = dithered.as_mono().unwrap(); + assert!(arr.iter().any(|&s| (s - 0.5f32).abs() > 0.0)); + } + + #[test] + fn test_dither_preserves_approximate_level() { + // Dithering should add only tiny noise; the mean should remain close. + let data: ndarray::Array1 = ndarray::Array1::from_elem(1024, 0.5); + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + let dithered = audio.dither(NoiseShape::Flat); + let arr = dithered.as_mono().unwrap(); + let mean: f32 = arr.iter().sum::() / 1024.0; + assert!((mean - 0.5).abs() < 0.001, "Mean shifted too much: {mean}"); + } + + #[test] + fn test_requantize_8bit_exact() { + // 0.5 * 128 = 64.0 → 64 / 128 = 0.5 (exact grid point) + let data = array![0.5f32, -0.5, 0.25, -0.25]; + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + let rq = audio.requantize(8).unwrap(); + // 0.5 is a grid point for 8-bit quantization + assert!((rq[0] - 0.5f32).abs() < 1e-5); + // -0.5 is also a grid point + assert!((rq[1] - (-0.5f32)).abs() < 1e-5); + } + + #[test] + fn test_requantize_reduces_precision() { + // 0.501 should be rounded to the nearest 8-bit grid point ≈ 0.5 + let data = array![0.501f32, -0.001]; + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + let rq = audio.requantize(8).unwrap(); + // 0.501 * 128 = 64.128 → round to 64 → /128 = 0.5 + assert!((rq[0] - 0.5f32).abs() < 0.01); + } + + #[test] + fn test_requantize_error_bits_zero() { + let data = array![0.5f32, -0.5]; + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + assert!(audio.requantize(0).is_err()); + } + + #[test] + fn test_requantize_error_bits_too_large() { + let data = array![0.5f32, -0.5]; + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + assert!(audio.requantize(33).is_err()); + } + + #[test] + fn test_requantize_32bit_is_identity_for_float() { + // 32-bit quantization of a float value should be essentially identity + // since float precision exceeds 32-bit integer precision + let data = array![0.123_456_789_f32, -0.987_654_3]; + let audio = AudioSamples::new_mono(data.clone(), sample_rate!(44100)).unwrap(); + let rq = audio.requantize(32).unwrap(); + // levels = 2^31, so step = 1/2^31 ≈ 4.6e-10 — well below f32 precision + assert!((rq[0] - data[0]).abs() < 1e-5); + } + + #[test] + fn test_dither_multichannel() { + // Multi-channel audio: each channel should receive independent dithering. + let data = ndarray::array![[0.5f32, 0.5, 0.5], [0.3f32, 0.3, 0.3]]; + let audio = AudioSamples::new_multi_channel(data.into(), sample_rate!(44100)).unwrap(); + let dithered = audio.dither(NoiseShape::Flat); + // The audio should still be multi-channel with the same shape + assert!(dithered.is_multi_channel()); + } +} diff --git a/src/operations/mod.rs b/src/operations/mod.rs index 2048e17..2057cbd 100644 --- a/src/operations/mod.rs +++ b/src/operations/mod.rs @@ -55,6 +55,9 @@ pub mod beat; #[cfg(feature = "channels")] pub mod channels; +#[cfg(feature = "dithering")] +pub mod dithering; + #[cfg(feature = "dynamic-range")] pub mod dynamic_range; @@ -128,6 +131,9 @@ pub use traits::AudioPitchAnalysis; #[cfg(feature = "dynamic-range")] pub use traits::AudioDynamicRange; +#[cfg(feature = "dithering")] +pub use traits::AudioDithering; + #[cfg(feature = "transforms")] pub use traits::AudioTransforms; diff --git a/src/operations/processing.rs b/src/operations/processing.rs index ef8e355..2db6a4e 100644 --- a/src/operations/processing.rs +++ b/src/operations/processing.rs @@ -1020,6 +1020,47 @@ mod tests { use ndarray::array; use crate::AudioProcessing; + use crate::AudioStatistics; + + #[test] + fn test_normalize_to_dbfs() { + // A signal with peak 0.5 normalized to -6 dBFS should have peak ≈ 0.5012 + let data = array![0.5f32, -0.25, 0.1]; + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + let normalized = audio.normalize_to_dbfs(-6.0).unwrap(); + // peak_dbfs should be approximately -6.0 + assert_approx_eq!(normalized.peak_dbfs(), -6.0, 0.01); + } + + #[test] + fn test_normalize_to_dbfs_zero() { + // Normalizing to 0 dBFS should give peak = 1.0 + let data = array![0.25f32, -0.5, 0.125]; + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + let normalized = audio.normalize_to_dbfs(0.0).unwrap(); + assert_approx_eq!(normalized.peak() as f64, 1.0, 0.001); + } + + #[test] + fn test_apply_gain_db_attenuation() { + // Applying -6 dB should multiply amplitude by ~0.5012 + let data = array![1.0f32, -1.0]; + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + let attenuated = audio.apply_gain_db(-6.0); + // 10^(-6/20) ≈ 0.5012 + assert_approx_eq!(attenuated.peak() as f64, 0.501_187_2, 0.0001); + } + + #[test] + fn test_apply_gain_db_unity() { + // 0 dB gain should leave signal unchanged + let data = array![0.5f32, -0.3, 0.1]; + let audio = AudioSamples::new_mono(data.clone(), sample_rate!(44100)).unwrap(); + let gained = audio.apply_gain_db(0.0); + assert_approx_eq!(gained[0] as f64, 0.5, 1e-6); + assert_approx_eq!(gained[1] as f64, -0.3, 1e-6); + assert_approx_eq!(gained[2] as f64, 0.1, 1e-4); + } #[test] fn test_normalize_min_max() { diff --git a/src/operations/statistics.rs b/src/operations/statistics.rs index 2de4a32..48ffe5b 100644 --- a/src/operations/statistics.rs +++ b/src/operations/statistics.rs @@ -1102,6 +1102,51 @@ mod tests { assert_eq!(audio.max_sample(), 4.0); } + #[test] + fn test_peak_dbfs_full_scale() { + // A peak of 1.0 should give 0 dBFS + let data = array![1.0f32, -1.0, 0.5]; + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + assert_approx_eq!(audio.peak_dbfs(), 0.0, 1e-6); + } + + #[test] + fn test_peak_dbfs_half_amplitude() { + // A peak of 0.5 should give approximately -6.02 dBFS + let data = array![0.5f32, -0.25]; + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + // 20 * log10(0.5) ≈ -6.0206 + assert_approx_eq!(audio.peak_dbfs(), -6.020_600_0, 0.001); + } + + #[test] + fn test_peak_dbfs_silence() { + // A zero-amplitude signal should return the -80 dB floor + let data = array![0.0f32, 0.0, 0.0]; + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + assert_approx_eq!(audio.peak_dbfs(), -80.0, 1e-6); + } + + #[test] + fn test_rms_dbfs_full_scale_square_wave() { + // A full-scale square wave has RMS = 1.0, so rms_dbfs should be 0.0 + let data = array![1.0f32, -1.0, 1.0, -1.0]; + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + assert_approx_eq!(audio.rms_dbfs(), 0.0, 1e-6); + } + + #[test] + fn test_rms_dbfs_sine_wave() { + // A sine wave with amplitude A has RMS = A / sqrt(2) + // rms_dbfs = 20 * log10(A/sqrt(2)) ≈ 20 * log10(A) - 3.01 + let data = array![1.0f32, 0.0, -1.0, 0.0]; // approximates a sine wave + let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + let rms_dbfs = audio.rms_dbfs(); + // RMS of this signal is sqrt((1+0+1+0)/4) = sqrt(0.5) ≈ 0.7071 + // 20 * log10(0.7071) ≈ -3.01 + assert!(rms_dbfs < 0.0 && rms_dbfs > -10.0); + } + #[test] fn test_rms_computation() { // Simple test case where we can verify RMS manually diff --git a/src/operations/traits.rs b/src/operations/traits.rs index 9a27c25..4e9e7c3 100644 --- a/src/operations/traits.rs +++ b/src/operations/traits.rs @@ -21,6 +21,7 @@ //! | [`AudioOnsetDetection`] | `onset-detection` | Onset times, spectral flux, complex ODF | //! | [`AudioBeatTracking`] | `beat-tracking` | Tempo-aware beat detection | //! | [`AudioPlotting`](crate::operations::AudioPlotting) | `plotting` | Waveform, spectrogram, magnitude-spectrum plots | +//! | [`AudioDithering`] | `dithering` | TPDF dithering and bit-depth reduction | //! //! Grouping operations into separate traits keeps compile times low — only the code //! required for the enabled features is compiled — while providing a clean extension @@ -125,12 +126,15 @@ use crate::operations::{ use num_complex::Complex; // "Unused" imports below are required pretty much as soon as any of the traits are implemented, this is cleaner than a huge cfg(any(...)) block. #[allow(unused_imports)] -use crate::{AudioSampleResult, AudioSamples, AudioTypeConversion, StandardSample}; +use crate::{AudioSampleResult, AudioSamples, AudioTypeConversion, CastFrom, ConvertTo, StandardSample}; #[allow(unused_imports)] use non_empty_slice::{NonEmptySlice, NonEmptyVec}; #[allow(unused_imports)] use std::num::NonZeroUsize; +#[cfg(feature = "dithering")] +use crate::operations::types::NoiseShape; + /// Statistical analysis operations for audio data. /// /// # Purpose @@ -537,6 +541,58 @@ where /// ``` #[cfg(feature = "transforms")] fn spectral_rolloff(&self, rolloff_percent: f64) -> AudioSampleResult; + + /// Returns the peak level expressed as dBFS (decibels relative to full scale). + /// + /// Computed as `20 × log₁₀(peak)` where `peak` is the value returned by + /// [`AudioStatistics::peak`]. Zero or near-zero signals are clamped to a + /// −80 dB floor rather than producing `−∞`. + /// + /// # Returns + /// The peak level in dBFS. Values range from −80 (silence) to 0 (full scale). + /// + /// # Examples + /// ``` + /// use audio_samples::{AudioSamples, AudioStatistics, sample_rate}; + /// use ndarray::array; + /// + /// let data = array![1.0f32, -0.5, 0.25]; + /// let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + /// let peak_dbfs = audio.peak_dbfs(); + /// assert!((peak_dbfs - 0.0).abs() < 1e-6); // peak is 1.0 → 0 dBFS + /// ``` + #[inline] + fn peak_dbfs(&self) -> f64 { + let peak: f64 = self.peak().convert_to(); + crate::utils::audio_math::amplitude_to_db(peak.abs()) + } + + /// Returns the RMS level expressed as dBFS (decibels relative to full scale). + /// + /// Computed as `20 × log₁₀(rms)` where `rms` is the value returned by + /// [`AudioStatistics::rms`]. Zero RMS is clamped to a −80 dB floor. + /// + /// This is the natural dBFS companion to [`peak_dbfs`][Self::peak_dbfs]: + /// `peak_dbfs` captures the instantaneous maximum while `rms_dbfs` reflects + /// the average energy level. + /// + /// # Returns + /// The RMS level in dBFS. + /// + /// # Examples + /// ``` + /// use audio_samples::{AudioSamples, AudioStatistics, sample_rate}; + /// use ndarray::array; + /// + /// let data = array![1.0f32, -1.0, 1.0, -1.0]; + /// let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap(); + /// let rms_dbfs = audio.rms_dbfs(); + /// assert!((rms_dbfs - 0.0).abs() < 1e-6); // RMS of full-scale square wave is 1.0 → 0 dBFS + /// ``` + #[inline] + fn rms_dbfs(&self) -> f64 { + crate::utils::audio_math::amplitude_to_db(self.rms()) + } } /// Voice Activity Detection (VAD) operations. @@ -1110,6 +1166,74 @@ where ratio: f64, quality: ResamplingQuality, ) -> AudioSampleResult>; + + /// Normalizes to a target level expressed as dBFS. + /// + /// Converts `target` from dBFS to a linear amplitude and delegates to + /// [`normalize`][Self::normalize] with a [`NormalizationConfig::peak`] configuration. + /// This is the ergonomic dBFS complement to the linear + /// `normalize(NormalizationConfig::peak(t))` call. + /// + /// # Arguments + /// - `target` — desired peak level in dBFS (e.g. `-3.0` for −3 dBFS). + /// Use `0.0` for full-scale normalization. + /// + /// # Returns + /// The normalized audio samples. + /// + /// # Errors + /// Propagates any error returned by [`normalize`][Self::normalize]. + /// + /// # Examples + /// ``` + /// use audio_samples::{AudioSamples, AudioProcessing, AudioStatistics, sample_rate}; + /// use ndarray::array; + /// + /// let data = array![0.5f32, -0.25, 0.1]; + /// let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap() + /// .normalize_to_dbfs(-6.0) + /// .unwrap(); + /// // peak should be approximately -6 dBFS (≈ 0.501) + /// assert!((audio.peak_dbfs() - (-6.0)).abs() < 0.1); + /// ``` + #[inline] + fn normalize_to_dbfs(self, target: f64) -> AudioSampleResult { + let target_linear = crate::utils::audio_math::db_to_amplitude(target); + self.normalize(NormalizationConfig::peak( + Self::Sample::cast_from(target_linear), + )) + } + + /// Applies a gain adjustment expressed in decibels. + /// + /// Converts `db` to a linear scale factor (`10^(db/20)`) and delegates to + /// [`scale`][Self::scale]. This is the ergonomic dBFS complement to the linear + /// [`scale`][Self::scale] method. + /// + /// A gain of `0.0 dB` leaves the signal unchanged; negative values attenuate + /// and positive values amplify. + /// + /// # Arguments + /// - `db` — gain in decibels. + /// + /// # Returns + /// The gain-adjusted audio samples. + /// + /// # Examples + /// ``` + /// use audio_samples::{AudioSamples, AudioProcessing, sample_rate}; + /// use ndarray::array; + /// + /// let data = array![1.0f32, -1.0]; + /// let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap() + /// .apply_gain_db(-6.0); // attenuate by 6 dB + /// assert!((audio[0] - 0.501).abs() < 0.001); + /// ``` + #[must_use] + #[inline] + fn apply_gain_db(self, db: f64) -> Self { + self.scale(crate::utils::audio_math::db_to_amplitude(db)) + } } /// Frequency-domain analysis and spectral transformation operations. @@ -5108,3 +5232,94 @@ where hop_size: NonZeroUsize, ) -> NdResult; } + +/// Dithering and bit-depth reduction operations. +/// +/// # Purpose +/// +/// Provides noise-shaped dithering and lossless-in-container bit-depth reduction +/// for audio signals. Dithering masks quantisation distortion by adding a small +/// amount of shaped noise before the quantisation step, trading deterministic +/// harmonic distortion for less audible stochastic noise. +/// +/// # Intended Usage +/// +/// Call [`dither`][Self::dither] first to add TPDF (or shaped) noise, then call +/// [`requantize`][Self::requantize] to reduce the effective bit depth while keeping +/// the same sample container type: +/// +/// ```rust,ignore +/// use audio_samples::operations::traits::AudioDithering; +/// use audio_samples::operations::types::NoiseShape; +/// +/// let dithered = audio +/// .dither(NoiseShape::Flat) +/// .requantize(16) +/// .unwrap(); +/// ``` +/// +/// This is distinct from [`AudioTypeConversion::to_type::()`] which rescales +/// the signal to the full integer range; `requantize` reduces precision while the +/// signal remains in its original container type. +/// +/// # Invariants +/// +/// - All methods return a new owned `AudioSamples`; the original is not mutated. +/// - `requantize` accepts bit depths in the range `[1, 32]`. +#[cfg(feature = "dithering")] +pub trait AudioDithering: AudioTypeConversion +where + Self::Sample: StandardSample, +{ + /// Applies TPDF dithering with the given noise shape. + /// + /// Adds a small amount of triangular-PDF noise to every sample in the signal. + /// The noise amplitude is set to 1 LSB of the native sample type so that it + /// is below the perceptible threshold for the source format but sufficient to + /// linearise the quantisation error before [`requantize`][Self::requantize]. + /// + /// # Arguments + /// - `shape` — spectral distribution of the added noise (see [`NoiseShape`]). + /// + /// # Returns + /// A new `AudioSamples` with dither noise mixed in. + /// + /// # Examples + /// ```rust,ignore + /// use audio_samples::operations::traits::AudioDithering; + /// use audio_samples::operations::types::NoiseShape; + /// + /// let dithered = audio.dither(NoiseShape::Flat); + /// ``` + #[must_use] + fn dither(self, shape: NoiseShape) -> Self; + + /// Reduces the effective bit depth to `bits` while remaining in the same + /// container type. + /// + /// Each sample is quantised to a grid of `2^bits` evenly-spaced levels spanning + /// the full normalised range `[−1, 1]`. The sample is first converted to f64, + /// snapped to the nearest grid point, then converted back to `T`. + /// + /// This is distinct from [`AudioTypeConversion::to_type::()`] which + /// rescales to the full integer range; `requantize` reduces precision without + /// changing the container type. + /// + /// # Arguments + /// - `bits` — target bit depth. Must be in the range `[1, 32]`. + /// + /// # Returns + /// A new `AudioSamples` quantised to `bits` bit precision. + /// + /// # Errors + /// - [`crate::AudioSampleError::Parameter`] if `bits` is `0` or greater than `32`. + /// + /// # Examples + /// ```rust,ignore + /// use audio_samples::operations::traits::AudioDithering; + /// + /// // Reduce to 16-bit precision while keeping f32 container + /// let requantized = audio.requantize(16).unwrap(); + /// ``` + fn requantize(self, bits: u32) -> AudioSampleResult; +} diff --git a/src/operations/types.rs b/src/operations/types.rs index 88b48c2..271269d 100644 --- a/src/operations/types.rs +++ b/src/operations/types.rs @@ -4224,3 +4224,42 @@ impl Default for PerturbationConfig { }) } } + + +/// Noise shaping algorithm applied during dithering. +/// +/// Selects the spectral distribution of the dither noise added by +/// [`AudioDithering::dither`][crate::operations::traits::AudioDithering::dither]. +/// All variants use triangular-probability-density-function (TPDF) dither as the +/// noise source; the variant controls how that noise is spectrally shaped before it +/// is mixed into the signal. +/// +/// ## Intended Usage +/// +/// Pass a `NoiseShape` value to +/// [`AudioDithering::dither`][crate::operations::traits::AudioDithering::dither] +/// to choose between flat or perceptually optimized noise placement. +#[cfg(feature = "dithering")] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum NoiseShape { + /// Flat (white) TPDF dithering. + /// + /// Adds triangular noise with a flat power spectral density across all + /// frequencies. This is the simplest and most widely supported dither + /// variant and is appropriate when downstream processing will handle + /// perceptual weighting, or when the extra computation of noise shaping + /// is undesirable. + Flat, + + /// F-weighted noise shaping. + /// + /// Applies a first-order high-pass filter to the TPDF noise so that + /// noise energy is redistributed towards higher frequencies where the + /// human ear is less sensitive. The result is perceptually quieter + /// dither at the cost of increased noise power near Nyquist. + /// + /// This variant approximates the behaviour of the classic Wannamaker + /// F-weighting filter using a single-pole recursive shaper. + FWeighted, +}