//! Synthetic round-trip tests covering bit depths and pathological //! content the real-audio corpus doesn't exercise. //! //! # Why these exist //! //! The `corpus.rs` suite is 16-bit PCM only (AMI + music). The spec //! permits any source in `|sample| ≤ 2²³ − 1`, so 8-bit, 20-bit, and //! 24-bit inputs are supported but untested by corpus data alone. //! Likewise, real audio rarely exhibits exact DC, exact full-scale, //! pure Nyquist, or cleanly bounded white noise — conditions the //! numerical-stability paths inside the encoder are expected to //! handle but which deserve explicit regression fences. //! //! Everything here is deterministic and integer-only: a 32-bit LFSR //! drives the noise cases, and fixed constants drive the pathological //! ones. No corpus files needed; the tests always run in CI. //! //! Frame size is 1024 (power-of-two so every partition order is //! available to the encoder search). Every test round-trips through //! `encode_frame`/`decode_frame` and asserts bit-exact recovery — the //! only acceptable outcome for a lossless codec. use lac::{decode_frame, encode_frame}; const FRAME_SIZE: usize = 1024; // ── LFSR noise generator ──────────────────────────────────────────────────── /// 32-bit Galois LFSR producing deterministic pseudo-random i32 values /// in `[-(2^{bit_depth-1} − 1), 2^{bit_depth-1} − 1]` — the arithmetic /// right shift would in principle include the extra negative value /// `-2^{bit_depth-1}` (from `i32::MIN >> shift`), but LAC's input /// contract (spec §1) excludes that value, so it's clamped out here. /// A fixed seed per call keeps tests reproducible across runs and /// platforms. fn lfsr_noise(n: usize, bit_depth: u8, seed: u32) -> Vec { assert!((1..=24).contains(&bit_depth)); // Non-zero seed: a zero state would lock the LFSR at zero. let mut state = if seed == 0 { 0xACE1_ACE1 } else { seed }; let shift = 32 - bit_depth as u32; // Contract upper bound for this bit depth: ±(2^(bit_depth-1) − 1). // At bit_depth=24 this matches LAC's input contract exactly; at // narrower depths it matches the symmetric-range PCM convention. let max: i32 = (1i32 << (bit_depth - 1)) - 1; (0..n) .map(|_| { // Maximal-length 32-bit Galois polynomial (tap mask 0x8020_0003). // Period 2^32 − 1 dwarfs any frame size this suite uses. let lsb = state & 1; state >>= 1; if lsb != 0 { state ^= 0x8020_0003; } // Sign-extend via `as i32`, then arithmetic-right-shift to // the requested bit depth. Clamp the asymmetric lower edge // up to match the symmetric contract. ((state as i32) >> shift).max(-max) }) .collect() } /// Encode every `FRAME_SIZE`-sample chunk of `samples`, decode, and /// assert exact recovery. Returns `(raw_bytes, encoded_bytes)` under /// the assumption that `bytes_per_sample` reflects the *source* PCM /// width the caller originally packed the signal into — so the /// reported ratio is comparable to what a user would measure when /// running LAC against a file of that depth. fn roundtrip(samples: &[i32], bytes_per_sample: usize) -> (usize, usize) { assert!(!samples.is_empty()); let mut raw = 0usize; let mut encoded_total = 0usize; for chunk in samples.chunks(FRAME_SIZE) { let encoded = encode_frame(chunk); let decoded = decode_frame(&encoded).expect("decode_frame rejected its own output"); assert_eq!( decoded, chunk, "round-trip mismatch on {}-sample frame", chunk.len() ); raw += chunk.len() * bytes_per_sample; encoded_total += encoded.len(); } (raw, encoded_total) } // ── Bit-depth coverage ────────────────────────────────────────────────────── // // The codec's input contract is `|sample| ≤ 2^23 − 1`, but the spec // emphasises that narrower sources (8/16/20-bit) "compress at the bit // cost of their actual values, not a 24-bit ceiling." These tests // verify that claim holds — round-trip is bit-exact at every width, and // the compressed size stays proportional to the source range, not // inflated to a 24-bit ceiling. #[test] fn roundtrip_8bit_noise() { // 8-bit PCM: samples in [-128, 127]. This is the narrowest format // LAC's spec mentions explicitly. Residuals are tiny, so the Rice // k-selection should land at very low k (often 0-2). let samples = lfsr_noise(4 * FRAME_SIZE, 8, 0x8ACE); let (raw, encoded) = roundtrip(&samples, 1); eprintln!( "roundtrip_8bit_noise raw={} encoded={} ratio={:.3}", raw, encoded, encoded as f64 / raw as f64, ); // White noise at 8-bit is incompressible in principle — LPC cannot // predict i.i.d. values, so the Rice coding essentially passes the // samples through. Ratio should be near 1.0; ceiling 1.5× absorbs // the fixed-header + per-partition-k overhead at small frames. assert!( encoded < raw * 3 / 2, "8-bit noise inflated by more than 50% (encoded={encoded}, raw={raw})" ); } #[test] fn roundtrip_16bit_noise() { let samples = lfsr_noise(4 * FRAME_SIZE, 16, 0x16AC); let (raw, encoded) = roundtrip(&samples, 2); eprintln!( "roundtrip_16bit_noise raw={} encoded={} ratio={:.3}", raw, encoded, encoded as f64 / raw as f64, ); // Same reasoning as the 8-bit case. Header overhead is proportionally // smaller at 16-bit, so the ceiling can be tighter (1.1×). assert!( encoded < raw * 11 / 10, "16-bit noise inflated by more than 10% (encoded={encoded}, raw={raw})" ); } #[test] fn roundtrip_20bit_noise() { // 20-bit PCM: studio-mastered material. Residual range is wider so // Rice k ends up in the middle of its domain (~18-19). let samples = lfsr_noise(4 * FRAME_SIZE, 20, 0x20AC); let (raw, encoded) = roundtrip(&samples, 3); eprintln!( "roundtrip_20bit_noise raw={} encoded={} ratio={:.3}", raw, encoded, encoded as f64 / raw as f64, ); // 3 bytes packs 24 bits for a 20-bit source, so ratio below ~1.0 // implies the codec is honouring the source width rather than // charging 24-bit-ceiling rates. assert!( encoded < raw, "20-bit noise inflated past raw size (encoded={encoded}, raw={raw})" ); } #[test] fn roundtrip_24bit_noise() { let samples = lfsr_noise(4 * FRAME_SIZE, 24, 0x24AC); let (raw, encoded) = roundtrip(&samples, 3); eprintln!( "roundtrip_24bit_noise raw={} encoded={} ratio={:.3}", raw, encoded, encoded as f64 / raw as f64, ); assert!( encoded < raw * 11 / 10, "24-bit noise inflated by more than 10% (encoded={encoded}, raw={raw})" ); } #[test] fn roundtrip_24bit_full_scale() { // Every sample at the 24-bit ceiling. Exercises the autocorrelation // accumulator's worst case — `R[0] = N · (2^23 − 1)^2 ≈ 2^46` for a // 1024-sample frame, comfortably inside i64 but worth a regression // fence to catch a future narrowing to i32. let samples = vec![(1 << 23) - 1; 4 * FRAME_SIZE]; let (_raw, encoded) = roundtrip(&samples, 3); eprintln!("roundtrip_24bit_full_scale encoded={}", encoded); } // ── Pathological content ──────────────────────────────────────────────────── #[test] fn roundtrip_all_zeros() { // Degenerate case called out by the spec: prediction_order MUST be 0 // because Levinson-Durbin is undefined at R[0] = 0. This test is a // regression fence on the encoder's order-0 short-circuit. let samples = vec![0i32; 4 * FRAME_SIZE]; let (raw, encoded) = roundtrip(&samples, 2); eprintln!( "roundtrip_all_zeros raw={} encoded={} ratio={:.3}", raw, encoded, encoded as f64 / raw as f64, ); // All-zero frames compress to ~header + one bit per sample // (k=0 unary terminator). At 1024-sample frames the fixed 7-byte // header is still a visible fraction of the output. Measured ratio // is ~0.066; ceiling 0.15 absorbs header-overhead variance at other // frame sizes and keeps a ~2× regression budget. assert!( encoded < raw * 3 / 20, "all-zero frame compressed poorly (encoded={encoded}, raw={raw})" ); } #[test] fn roundtrip_dc_offset() { // Constant non-zero sample — `R[0] > 0` but all autocorrelation // lags are equal, so the LPC model captures the signal perfectly // with order 1 (coefficient = 1.0). Residuals are zero after the // warm-up sample. let samples = vec![12_345i32; 4 * FRAME_SIZE]; let (raw, encoded) = roundtrip(&samples, 2); eprintln!( "roundtrip_dc_offset raw={} encoded={} ratio={:.3}", raw, encoded, encoded as f64 / raw as f64, ); // Measured ratio is ~0.097: header + one big warm-up residual for // the DC level + unary-zero tail. Ceiling 0.20 leaves ~2× regression // headroom without flaking on encoder-tuning changes that shift the // warm-up residual's Rice k by one. assert!( encoded < raw / 5, "DC-offset frame compressed poorly (encoded={encoded}, raw={raw})" ); } #[test] fn roundtrip_nyquist_square() { // Pure Nyquist: alternating +A, −A, +A, −A. An order-1 predictor // with coefficient −1 would give zero residuals, but the encoder's // sparse LPC grid starts at order 2 and the fixed-predictor // post-pass ships FLAC-style orders 1-4 whose coefficients do not // include the `a = −1` Nyquist match — so this signal is // structurally hard for LAC despite its regularity. The result is // that Nyquist compresses only modestly (~52% measured). // // Kept as a regression fence: a future encoder that extends the // grid or adds a Nyquist-aware fixed predictor would dramatically // improve this ratio, and the ceiling here shouldn't fight that; // meanwhile a regression that makes it *worse* than ~60% is real. let a = 1_000_000i32; let samples: Vec = (0..4 * FRAME_SIZE) .map(|i| if i & 1 == 0 { a } else { -a }) .collect(); let (raw, encoded) = roundtrip(&samples, 3); eprintln!( "roundtrip_nyquist_square raw={} encoded={} ratio={:.3}", raw, encoded, encoded as f64 / raw as f64, ); assert!( encoded < raw * 3 / 5, "Nyquist square compressed poorly (encoded={encoded}, raw={raw})" ); } #[test] fn roundtrip_silence_with_click() { // Zero everywhere except a single full-scale impulse partway through. // Exercises the case where one residual is enormous (effectively the // click amplitude itself, since predecessors are zero) while every // other residual is zero. The Rice k-search has to pick a k that // doesn't over-serve the impulse at the cost of the silence. let mut samples = vec![0i32; 4 * FRAME_SIZE]; samples[FRAME_SIZE / 2] = (1 << 22) - 1; let (_raw, encoded) = roundtrip(&samples, 2); eprintln!("roundtrip_silence_with_click encoded={}", encoded); } #[test] fn roundtrip_prime_frame_size() { // Prime frame size forces `partition_order = 0` — the Rice bitstream // has a single partition, and the encoder's partition search is // skipped entirely. Ensures the single-partition path is exercised // independently of the corpus tests (which all use power-of-two // frame sizes). 509 is the largest prime ≤ 512. let samples = lfsr_noise(509, 16, 0x509D); let encoded = encode_frame(&samples); let decoded = decode_frame(&encoded).expect("decode"); assert_eq!(decoded, samples, "prime-length frame round-trip mismatch"); }