//! Encode/decode throughput benchmarks. //! //! Uses the nightly `test` crate harness (`#[bench]`), not criterion. Run //! with `cargo bench`. Results are wall-clock nanoseconds per iteration; to //! convert to samples-per-second, divide the frame sample count by the //! reported ns/iter and multiply by 10⁹. //! //! Representative sizes exercise the encoder's exhaustive search behaviour //! at different `partition_order` ceilings — 256 and 1024 are power-of-two //! frames (all seven partition orders available); 960 and 2880 mimic //! Opus/WebRTC frame sizes (only some partition orders divide evenly, so //! the inner search is sparser). #![feature(test)] extern crate test; use lac::{decode_frame, encode_frame}; use test::Bencher; // ── Synthetic-signal benches ──────────────────────────────────────────────── // // These benches drive the encoder without any WAV I/O overhead, so the // measurement is pure codec work. Four signal shapes cover the space: // // - `silence`: order-0 short-circuit path (skips Levinson entirely). // - `multi_sine`: maximally LPC-friendly — order search converges fast. // - `pseudo_speech`: AR(2) resonance on LFSR excitation, which is the // textbook model of the vocal tract. Residuals are near-Laplacian, // exercising the Rice k-search at realistic speech statistics. // - `filtered_noise`: LFSR through a biquad low-pass. Broad-spectrum // content with no strong tonal structure — the hard case for LPC and // a reasonable proxy for music-like workload. fn silence(n: usize) -> Vec { vec![0i32; n] } fn multi_sine(n: usize) -> Vec { // Three superimposed sinusoids with incommensurate frequencies, scaled // to the 24-bit range. Picks up realistic LPC workload without needing // to read a WAV file in the bench body. (0..n) .map(|i| { let t = i as f64; let a = (t * 0.11).sin() * 3_000_000.0; let b = (t * 0.27).sin() * 1_500_000.0; let c = (t * 0.43).sin() * 750_000.0; (a + b + c) as i32 }) .collect() } /// 32-bit Galois LFSR. Deterministic (seeded) pseudo-random i32 sequence /// in approximately `±2^19` — one-eighth of full 24-bit scale, chosen to /// leave headroom for AR(2) resonance gain without clipping. fn lfsr_noise(n: usize, seed: u32) -> Vec { // Non-zero seed required: the LFSR would otherwise lock at zero. let mut state = if seed == 0 { 0xACE1_ACE1 } else { seed }; (0..n) .map(|_| { // Maximal-length 32-bit Galois polynomial x^32 + x^22 + x^2 + x + 1 // (tap mask 0x8020_0003). Period = 2^32 − 1, which is comfortably // larger than any bench frame size. let lsb = state & 1; state >>= 1; if lsb != 0 { state ^= 0x8020_0003; } // Sign-extend via `as i32`, arithmetic shift narrows to ~±2^19. (state as i32) >> 12 }) .collect() } /// AR(2) pseudo-speech: LFSR excitation filtered through a single formant /// resonance at ~700 Hz / 16 kHz with bandwidth ~100 Hz. The pole pair /// gives speech-shaped spectral envelope; residuals are near-Laplacian, /// matching the statistical profile of real vowel segments. This is the /// content class LPC + Rice is designed for, so it stresses the inner /// encoder loops more realistically than multi_sine (which converges /// instantly) or white noise (which doesn't benefit from LPC). fn pseudo_speech(n: usize) -> Vec { // Q14 coefficients for a pole at r·e^{±jθ} with r = 0.9806, θ = 2π·700/16000. // a1 = 2r·cos(θ) · 2^14 ≈ 30933 // a2 = −r² · 2^14 ≈ −15751 // The implied real-valued poles live inside the unit circle so the // recursion is stable for unbounded input lengths. const A1_Q14: i64 = 30_933; const A2_Q14: i64 = -15_751; let excitation = lfsr_noise(n, 0x5EED); let mut out = Vec::with_capacity(n); let mut y1: i64 = 0; let mut y2: i64 = 0; for &e in &excitation { // Resonance gain at the peak is ~1/(1−r) ≈ 50, so y magnitudes // reach ~2^19 · 50 ≈ 2^25. i64 intermediates prevent the // multiply-accumulate from overflowing; the final clamp keeps // the output inside the codec's 24-bit input contract. let sum = A1_Q14 * y1 + A2_Q14 * y2; // Round-to-nearest for the Q14 → integer demotion. let ar = (sum + (1 << 13)) >> 14; let y = ar + e as i64; let clamped = y.clamp(-((1 << 23) - 1), (1 << 23) - 1); out.push(clamped as i32); y2 = y1; y1 = clamped; } out } /// Broadband low-passed noise — LFSR excitation through a simple /// single-pole IIR low-pass (pole at 0.9 in Q15). Covers the content /// class where LPC cannot predict efficiently: residuals retain most /// of the source entropy, so the Rice coder dominates the output bit /// budget. fn filtered_noise(n: usize) -> Vec { const POLE_Q15: i64 = 29_491; // round(0.9 · 2^15) // (1 − pole) in Q15 is the DC gain correction keeping output // magnitude near the excitation range. const ONE_MINUS_POLE_Q15: i64 = (1 << 15) - POLE_Q15; let excitation = lfsr_noise(n, 0xFEED); let mut out = Vec::with_capacity(n); let mut y: i64 = 0; for &e in &excitation { // y[n] = pole·y[n−1] + (1−pole)·x[n], all in Q15. let sum = POLE_Q15 * y + ONE_MINUS_POLE_Q15 * e as i64; y = (sum + (1 << 14)) >> 15; out.push(y.clamp(-((1 << 23) - 1), (1 << 23) - 1) as i32); } out } macro_rules! encode_bench { ($name:ident, $signal:ident, $size:expr) => { #[bench] fn $name(b: &mut Bencher) { let samples = $signal($size); b.iter(|| encode_frame(test::black_box(&samples))); } }; } macro_rules! decode_bench { ($name:ident, $signal:ident, $size:expr) => { #[bench] fn $name(b: &mut Bencher) { let samples = $signal($size); let encoded = encode_frame(&samples); b.iter(|| decode_frame(test::black_box(&encoded)).unwrap()); } }; } encode_bench!(encode_silence_960, silence, 960); encode_bench!(encode_silence_4096, silence, 4096); encode_bench!(encode_sine_256, multi_sine, 256); encode_bench!(encode_sine_960, multi_sine, 960); encode_bench!(encode_sine_1024, multi_sine, 1024); encode_bench!(encode_sine_2048, multi_sine, 2048); encode_bench!(encode_sine_2880, multi_sine, 2880); encode_bench!(encode_sine_4096, multi_sine, 4096); encode_bench!(encode_speech_320, pseudo_speech, 320); encode_bench!(encode_speech_960, pseudo_speech, 960); encode_bench!(encode_speech_2048, pseudo_speech, 2048); encode_bench!(encode_music_960, filtered_noise, 960); encode_bench!(encode_music_2048, filtered_noise, 2048); encode_bench!(encode_music_4096, filtered_noise, 4096); decode_bench!(decode_silence_4096, silence, 4096); decode_bench!(decode_sine_960, multi_sine, 960); decode_bench!(decode_sine_4096, multi_sine, 4096); decode_bench!(decode_speech_960, pseudo_speech, 960); decode_bench!(decode_music_2048, filtered_noise, 2048); // ── SIMD dot-product kernel isolation benches ─────────────────────────────── // // The encode benches above measure end-to-end encode cost, which is // dominated by per-frame allocations, Rice k-search, and the order // loop. When evaluating a SIMD kernel change these confound the signal. // These benches exercise just the kernel at realistic LPC orders so a // change to `compute_residuals` shows up directly. macro_rules! compute_residuals_bench { ($name:ident, $order:expr, $len:expr) => { #[bench] fn $name(b: &mut Bencher) { let samples = multi_sine($len); let coeffs: Vec = (0..$order) .map(|i| ((i as i16) * 711).wrapping_sub(100)) .collect(); b.iter(|| { lac::compute_residuals(test::black_box(&samples), test::black_box(&coeffs), 1) }); } }; } compute_residuals_bench!(compute_residuals_order_4_n320, 4, 320); compute_residuals_bench!(compute_residuals_order_8_n320, 8, 320); compute_residuals_bench!(compute_residuals_order_16_n320, 16, 320); compute_residuals_bench!(compute_residuals_order_32_n320, 32, 320); compute_residuals_bench!(compute_residuals_order_8_n960, 8, 960); compute_residuals_bench!(compute_residuals_order_32_n960, 32, 960); compute_residuals_bench!(compute_residuals_order_32_n4096, 32, 4096);