lac/tests/synthetic.rs
Kamal Tufekcic 7862cb1d9d
All checks were successful
CI / lint (push) Successful in 5s
CI / fuzz-regression (push) Successful in 14s
CI / build (push) Successful in 4s
CI / test (push) Successful in 6m54s
CI / publish (push) Successful in 8s
initial commit
Signed-off-by: Kamal Tufekcic <kamal@lo.sh>
2026-04-23 14:58:32 +03:00

290 lines
12 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Synthetic round-trip tests covering bit depths and pathological
//! content the real-audio corpus doesn't exercise.
//!
//! # Why these exist
//!
//! The `corpus.rs` suite is 16-bit PCM only (AMI + music). The spec
//! permits any source in `|sample| ≤ 2²³ 1`, so 8-bit, 20-bit, and
//! 24-bit inputs are supported but untested by corpus data alone.
//! Likewise, real audio rarely exhibits exact DC, exact full-scale,
//! pure Nyquist, or cleanly bounded white noise — conditions the
//! numerical-stability paths inside the encoder are expected to
//! handle but which deserve explicit regression fences.
//!
//! Everything here is deterministic and integer-only: a 32-bit LFSR
//! drives the noise cases, and fixed constants drive the pathological
//! ones. No corpus files needed; the tests always run in CI.
//!
//! Frame size is 1024 (power-of-two so every partition order is
//! available to the encoder search). Every test round-trips through
//! `encode_frame`/`decode_frame` and asserts bit-exact recovery — the
//! only acceptable outcome for a lossless codec.
use lac::{decode_frame, encode_frame};
const FRAME_SIZE: usize = 1024;
// ── LFSR noise generator ────────────────────────────────────────────────────
/// 32-bit Galois LFSR producing deterministic pseudo-random i32 values
/// in `[-(2^{bit_depth-1} 1), 2^{bit_depth-1} 1]` — the arithmetic
/// right shift would in principle include the extra negative value
/// `-2^{bit_depth-1}` (from `i32::MIN >> shift`), but LAC's input
/// contract (spec §1) excludes that value, so it's clamped out here.
/// A fixed seed per call keeps tests reproducible across runs and
/// platforms.
fn lfsr_noise(n: usize, bit_depth: u8, seed: u32) -> Vec<i32> {
assert!((1..=24).contains(&bit_depth));
// Non-zero seed: a zero state would lock the LFSR at zero.
let mut state = if seed == 0 { 0xACE1_ACE1 } else { seed };
let shift = 32 - bit_depth as u32;
// Contract upper bound for this bit depth: ±(2^(bit_depth-1) 1).
// At bit_depth=24 this matches LAC's input contract exactly; at
// narrower depths it matches the symmetric-range PCM convention.
let max: i32 = (1i32 << (bit_depth - 1)) - 1;
(0..n)
.map(|_| {
// Maximal-length 32-bit Galois polynomial (tap mask 0x8020_0003).
// Period 2^32 1 dwarfs any frame size this suite uses.
let lsb = state & 1;
state >>= 1;
if lsb != 0 {
state ^= 0x8020_0003;
}
// Sign-extend via `as i32`, then arithmetic-right-shift to
// the requested bit depth. Clamp the asymmetric lower edge
// up to match the symmetric contract.
((state as i32) >> shift).max(-max)
})
.collect()
}
/// Encode every `FRAME_SIZE`-sample chunk of `samples`, decode, and
/// assert exact recovery. Returns `(raw_bytes, encoded_bytes)` under
/// the assumption that `bytes_per_sample` reflects the *source* PCM
/// width the caller originally packed the signal into — so the
/// reported ratio is comparable to what a user would measure when
/// running LAC against a file of that depth.
fn roundtrip(samples: &[i32], bytes_per_sample: usize) -> (usize, usize) {
assert!(!samples.is_empty());
let mut raw = 0usize;
let mut encoded_total = 0usize;
for chunk in samples.chunks(FRAME_SIZE) {
let encoded = encode_frame(chunk);
let decoded = decode_frame(&encoded).expect("decode_frame rejected its own output");
assert_eq!(
decoded,
chunk,
"round-trip mismatch on {}-sample frame",
chunk.len()
);
raw += chunk.len() * bytes_per_sample;
encoded_total += encoded.len();
}
(raw, encoded_total)
}
// ── Bit-depth coverage ──────────────────────────────────────────────────────
//
// The codec's input contract is `|sample| ≤ 2^23 1`, but the spec
// emphasises that narrower sources (8/16/20-bit) "compress at the bit
// cost of their actual values, not a 24-bit ceiling." These tests
// verify that claim holds — round-trip is bit-exact at every width, and
// the compressed size stays proportional to the source range, not
// inflated to a 24-bit ceiling.
#[test]
fn roundtrip_8bit_noise() {
// 8-bit PCM: samples in [-128, 127]. This is the narrowest format
// LAC's spec mentions explicitly. Residuals are tiny, so the Rice
// k-selection should land at very low k (often 0-2).
let samples = lfsr_noise(4 * FRAME_SIZE, 8, 0x8ACE);
let (raw, encoded) = roundtrip(&samples, 1);
eprintln!(
"roundtrip_8bit_noise raw={} encoded={} ratio={:.3}",
raw,
encoded,
encoded as f64 / raw as f64,
);
// White noise at 8-bit is incompressible in principle — LPC cannot
// predict i.i.d. values, so the Rice coding essentially passes the
// samples through. Ratio should be near 1.0; ceiling 1.5× absorbs
// the fixed-header + per-partition-k overhead at small frames.
assert!(
encoded < raw * 3 / 2,
"8-bit noise inflated by more than 50% (encoded={encoded}, raw={raw})"
);
}
#[test]
fn roundtrip_16bit_noise() {
let samples = lfsr_noise(4 * FRAME_SIZE, 16, 0x16AC);
let (raw, encoded) = roundtrip(&samples, 2);
eprintln!(
"roundtrip_16bit_noise raw={} encoded={} ratio={:.3}",
raw,
encoded,
encoded as f64 / raw as f64,
);
// Same reasoning as the 8-bit case. Header overhead is proportionally
// smaller at 16-bit, so the ceiling can be tighter (1.1×).
assert!(
encoded < raw * 11 / 10,
"16-bit noise inflated by more than 10% (encoded={encoded}, raw={raw})"
);
}
#[test]
fn roundtrip_20bit_noise() {
// 20-bit PCM: studio-mastered material. Residual range is wider so
// Rice k ends up in the middle of its domain (~18-19).
let samples = lfsr_noise(4 * FRAME_SIZE, 20, 0x20AC);
let (raw, encoded) = roundtrip(&samples, 3);
eprintln!(
"roundtrip_20bit_noise raw={} encoded={} ratio={:.3}",
raw,
encoded,
encoded as f64 / raw as f64,
);
// 3 bytes packs 24 bits for a 20-bit source, so ratio below ~1.0
// implies the codec is honouring the source width rather than
// charging 24-bit-ceiling rates.
assert!(
encoded < raw,
"20-bit noise inflated past raw size (encoded={encoded}, raw={raw})"
);
}
#[test]
fn roundtrip_24bit_noise() {
let samples = lfsr_noise(4 * FRAME_SIZE, 24, 0x24AC);
let (raw, encoded) = roundtrip(&samples, 3);
eprintln!(
"roundtrip_24bit_noise raw={} encoded={} ratio={:.3}",
raw,
encoded,
encoded as f64 / raw as f64,
);
assert!(
encoded < raw * 11 / 10,
"24-bit noise inflated by more than 10% (encoded={encoded}, raw={raw})"
);
}
#[test]
fn roundtrip_24bit_full_scale() {
// Every sample at the 24-bit ceiling. Exercises the autocorrelation
// accumulator's worst case — `R[0] = N · (2^23 1)^2 ≈ 2^46` for a
// 1024-sample frame, comfortably inside i64 but worth a regression
// fence to catch a future narrowing to i32.
let samples = vec![(1 << 23) - 1; 4 * FRAME_SIZE];
let (_raw, encoded) = roundtrip(&samples, 3);
eprintln!("roundtrip_24bit_full_scale encoded={}", encoded);
}
// ── Pathological content ────────────────────────────────────────────────────
#[test]
fn roundtrip_all_zeros() {
// Degenerate case called out by the spec: prediction_order MUST be 0
// because Levinson-Durbin is undefined at R[0] = 0. This test is a
// regression fence on the encoder's order-0 short-circuit.
let samples = vec![0i32; 4 * FRAME_SIZE];
let (raw, encoded) = roundtrip(&samples, 2);
eprintln!(
"roundtrip_all_zeros raw={} encoded={} ratio={:.3}",
raw,
encoded,
encoded as f64 / raw as f64,
);
// All-zero frames compress to ~header + one bit per sample
// (k=0 unary terminator). At 1024-sample frames the fixed 7-byte
// header is still a visible fraction of the output. Measured ratio
// is ~0.066; ceiling 0.15 absorbs header-overhead variance at other
// frame sizes and keeps a ~2× regression budget.
assert!(
encoded < raw * 3 / 20,
"all-zero frame compressed poorly (encoded={encoded}, raw={raw})"
);
}
#[test]
fn roundtrip_dc_offset() {
// Constant non-zero sample — `R[0] > 0` but all autocorrelation
// lags are equal, so the LPC model captures the signal perfectly
// with order 1 (coefficient = 1.0). Residuals are zero after the
// warm-up sample.
let samples = vec![12_345i32; 4 * FRAME_SIZE];
let (raw, encoded) = roundtrip(&samples, 2);
eprintln!(
"roundtrip_dc_offset raw={} encoded={} ratio={:.3}",
raw,
encoded,
encoded as f64 / raw as f64,
);
// Measured ratio is ~0.097: header + one big warm-up residual for
// the DC level + unary-zero tail. Ceiling 0.20 leaves ~2× regression
// headroom without flaking on encoder-tuning changes that shift the
// warm-up residual's Rice k by one.
assert!(
encoded < raw / 5,
"DC-offset frame compressed poorly (encoded={encoded}, raw={raw})"
);
}
#[test]
fn roundtrip_nyquist_square() {
// Pure Nyquist: alternating +A, A, +A, A. An order-1 predictor
// with coefficient 1 would give zero residuals, but the encoder's
// sparse LPC grid starts at order 2 and the fixed-predictor
// post-pass ships FLAC-style orders 1-4 whose coefficients do not
// include the `a = 1` Nyquist match — so this signal is
// structurally hard for LAC despite its regularity. The result is
// that Nyquist compresses only modestly (~52% measured).
//
// Kept as a regression fence: a future encoder that extends the
// grid or adds a Nyquist-aware fixed predictor would dramatically
// improve this ratio, and the ceiling here shouldn't fight that;
// meanwhile a regression that makes it *worse* than ~60% is real.
let a = 1_000_000i32;
let samples: Vec<i32> = (0..4 * FRAME_SIZE)
.map(|i| if i & 1 == 0 { a } else { -a })
.collect();
let (raw, encoded) = roundtrip(&samples, 3);
eprintln!(
"roundtrip_nyquist_square raw={} encoded={} ratio={:.3}",
raw,
encoded,
encoded as f64 / raw as f64,
);
assert!(
encoded < raw * 3 / 5,
"Nyquist square compressed poorly (encoded={encoded}, raw={raw})"
);
}
#[test]
fn roundtrip_silence_with_click() {
// Zero everywhere except a single full-scale impulse partway through.
// Exercises the case where one residual is enormous (effectively the
// click amplitude itself, since predecessors are zero) while every
// other residual is zero. The Rice k-search has to pick a k that
// doesn't over-serve the impulse at the cost of the silence.
let mut samples = vec![0i32; 4 * FRAME_SIZE];
samples[FRAME_SIZE / 2] = (1 << 22) - 1;
let (_raw, encoded) = roundtrip(&samples, 2);
eprintln!("roundtrip_silence_with_click encoded={}", encoded);
}
#[test]
fn roundtrip_prime_frame_size() {
// Prime frame size forces `partition_order = 0` — the Rice bitstream
// has a single partition, and the encoder's partition search is
// skipped entirely. Ensures the single-partition path is exercised
// independently of the corpus tests (which all use power-of-two
// frame sizes). 509 is the largest prime ≤ 512.
let samples = lfsr_noise(509, 16, 0x509D);
let encoded = encode_frame(&samples);
let decoded = decode_frame(&encoded).expect("decode");
assert_eq!(decoded, samples, "prime-length frame round-trip mismatch");
}