initial commit
Signed-off-by: Kamal Tufekcic <kamal@lo.sh>
This commit is contained in:
commit
7862cb1d9d
2884 changed files with 16797 additions and 0 deletions
290
tests/synthetic.rs
Normal file
290
tests/synthetic.rs
Normal file
|
|
@ -0,0 +1,290 @@
|
|||
//! Synthetic round-trip tests covering bit depths and pathological
|
||||
//! content the real-audio corpus doesn't exercise.
|
||||
//!
|
||||
//! # Why these exist
|
||||
//!
|
||||
//! The `corpus.rs` suite is 16-bit PCM only (AMI + music). The spec
|
||||
//! permits any source in `|sample| ≤ 2²³ − 1`, so 8-bit, 20-bit, and
|
||||
//! 24-bit inputs are supported but untested by corpus data alone.
|
||||
//! Likewise, real audio rarely exhibits exact DC, exact full-scale,
|
||||
//! pure Nyquist, or cleanly bounded white noise — conditions the
|
||||
//! numerical-stability paths inside the encoder are expected to
|
||||
//! handle but which deserve explicit regression fences.
|
||||
//!
|
||||
//! Everything here is deterministic and integer-only: a 32-bit LFSR
|
||||
//! drives the noise cases, and fixed constants drive the pathological
|
||||
//! ones. No corpus files needed; the tests always run in CI.
|
||||
//!
|
||||
//! Frame size is 1024 (power-of-two so every partition order is
|
||||
//! available to the encoder search). Every test round-trips through
|
||||
//! `encode_frame`/`decode_frame` and asserts bit-exact recovery — the
|
||||
//! only acceptable outcome for a lossless codec.
|
||||
|
||||
use lac::{decode_frame, encode_frame};
|
||||
|
||||
const FRAME_SIZE: usize = 1024;
|
||||
|
||||
// ── LFSR noise generator ────────────────────────────────────────────────────
|
||||
|
||||
/// 32-bit Galois LFSR producing deterministic pseudo-random i32 values
|
||||
/// in `[-(2^{bit_depth-1} − 1), 2^{bit_depth-1} − 1]` — the arithmetic
|
||||
/// right shift would in principle include the extra negative value
|
||||
/// `-2^{bit_depth-1}` (from `i32::MIN >> shift`), but LAC's input
|
||||
/// contract (spec §1) excludes that value, so it's clamped out here.
|
||||
/// A fixed seed per call keeps tests reproducible across runs and
|
||||
/// platforms.
|
||||
fn lfsr_noise(n: usize, bit_depth: u8, seed: u32) -> Vec<i32> {
|
||||
assert!((1..=24).contains(&bit_depth));
|
||||
// Non-zero seed: a zero state would lock the LFSR at zero.
|
||||
let mut state = if seed == 0 { 0xACE1_ACE1 } else { seed };
|
||||
let shift = 32 - bit_depth as u32;
|
||||
// Contract upper bound for this bit depth: ±(2^(bit_depth-1) − 1).
|
||||
// At bit_depth=24 this matches LAC's input contract exactly; at
|
||||
// narrower depths it matches the symmetric-range PCM convention.
|
||||
let max: i32 = (1i32 << (bit_depth - 1)) - 1;
|
||||
(0..n)
|
||||
.map(|_| {
|
||||
// Maximal-length 32-bit Galois polynomial (tap mask 0x8020_0003).
|
||||
// Period 2^32 − 1 dwarfs any frame size this suite uses.
|
||||
let lsb = state & 1;
|
||||
state >>= 1;
|
||||
if lsb != 0 {
|
||||
state ^= 0x8020_0003;
|
||||
}
|
||||
// Sign-extend via `as i32`, then arithmetic-right-shift to
|
||||
// the requested bit depth. Clamp the asymmetric lower edge
|
||||
// up to match the symmetric contract.
|
||||
((state as i32) >> shift).max(-max)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Encode every `FRAME_SIZE`-sample chunk of `samples`, decode, and
|
||||
/// assert exact recovery. Returns `(raw_bytes, encoded_bytes)` under
|
||||
/// the assumption that `bytes_per_sample` reflects the *source* PCM
|
||||
/// width the caller originally packed the signal into — so the
|
||||
/// reported ratio is comparable to what a user would measure when
|
||||
/// running LAC against a file of that depth.
|
||||
fn roundtrip(samples: &[i32], bytes_per_sample: usize) -> (usize, usize) {
|
||||
assert!(!samples.is_empty());
|
||||
let mut raw = 0usize;
|
||||
let mut encoded_total = 0usize;
|
||||
for chunk in samples.chunks(FRAME_SIZE) {
|
||||
let encoded = encode_frame(chunk);
|
||||
let decoded = decode_frame(&encoded).expect("decode_frame rejected its own output");
|
||||
assert_eq!(
|
||||
decoded,
|
||||
chunk,
|
||||
"round-trip mismatch on {}-sample frame",
|
||||
chunk.len()
|
||||
);
|
||||
raw += chunk.len() * bytes_per_sample;
|
||||
encoded_total += encoded.len();
|
||||
}
|
||||
(raw, encoded_total)
|
||||
}
|
||||
|
||||
// ── Bit-depth coverage ──────────────────────────────────────────────────────
|
||||
//
|
||||
// The codec's input contract is `|sample| ≤ 2^23 − 1`, but the spec
|
||||
// emphasises that narrower sources (8/16/20-bit) "compress at the bit
|
||||
// cost of their actual values, not a 24-bit ceiling." These tests
|
||||
// verify that claim holds — round-trip is bit-exact at every width, and
|
||||
// the compressed size stays proportional to the source range, not
|
||||
// inflated to a 24-bit ceiling.
|
||||
|
||||
#[test]
|
||||
fn roundtrip_8bit_noise() {
|
||||
// 8-bit PCM: samples in [-128, 127]. This is the narrowest format
|
||||
// LAC's spec mentions explicitly. Residuals are tiny, so the Rice
|
||||
// k-selection should land at very low k (often 0-2).
|
||||
let samples = lfsr_noise(4 * FRAME_SIZE, 8, 0x8ACE);
|
||||
let (raw, encoded) = roundtrip(&samples, 1);
|
||||
eprintln!(
|
||||
"roundtrip_8bit_noise raw={} encoded={} ratio={:.3}",
|
||||
raw,
|
||||
encoded,
|
||||
encoded as f64 / raw as f64,
|
||||
);
|
||||
// White noise at 8-bit is incompressible in principle — LPC cannot
|
||||
// predict i.i.d. values, so the Rice coding essentially passes the
|
||||
// samples through. Ratio should be near 1.0; ceiling 1.5× absorbs
|
||||
// the fixed-header + per-partition-k overhead at small frames.
|
||||
assert!(
|
||||
encoded < raw * 3 / 2,
|
||||
"8-bit noise inflated by more than 50% (encoded={encoded}, raw={raw})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roundtrip_16bit_noise() {
|
||||
let samples = lfsr_noise(4 * FRAME_SIZE, 16, 0x16AC);
|
||||
let (raw, encoded) = roundtrip(&samples, 2);
|
||||
eprintln!(
|
||||
"roundtrip_16bit_noise raw={} encoded={} ratio={:.3}",
|
||||
raw,
|
||||
encoded,
|
||||
encoded as f64 / raw as f64,
|
||||
);
|
||||
// Same reasoning as the 8-bit case. Header overhead is proportionally
|
||||
// smaller at 16-bit, so the ceiling can be tighter (1.1×).
|
||||
assert!(
|
||||
encoded < raw * 11 / 10,
|
||||
"16-bit noise inflated by more than 10% (encoded={encoded}, raw={raw})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roundtrip_20bit_noise() {
|
||||
// 20-bit PCM: studio-mastered material. Residual range is wider so
|
||||
// Rice k ends up in the middle of its domain (~18-19).
|
||||
let samples = lfsr_noise(4 * FRAME_SIZE, 20, 0x20AC);
|
||||
let (raw, encoded) = roundtrip(&samples, 3);
|
||||
eprintln!(
|
||||
"roundtrip_20bit_noise raw={} encoded={} ratio={:.3}",
|
||||
raw,
|
||||
encoded,
|
||||
encoded as f64 / raw as f64,
|
||||
);
|
||||
// 3 bytes packs 24 bits for a 20-bit source, so ratio below ~1.0
|
||||
// implies the codec is honouring the source width rather than
|
||||
// charging 24-bit-ceiling rates.
|
||||
assert!(
|
||||
encoded < raw,
|
||||
"20-bit noise inflated past raw size (encoded={encoded}, raw={raw})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roundtrip_24bit_noise() {
|
||||
let samples = lfsr_noise(4 * FRAME_SIZE, 24, 0x24AC);
|
||||
let (raw, encoded) = roundtrip(&samples, 3);
|
||||
eprintln!(
|
||||
"roundtrip_24bit_noise raw={} encoded={} ratio={:.3}",
|
||||
raw,
|
||||
encoded,
|
||||
encoded as f64 / raw as f64,
|
||||
);
|
||||
assert!(
|
||||
encoded < raw * 11 / 10,
|
||||
"24-bit noise inflated by more than 10% (encoded={encoded}, raw={raw})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roundtrip_24bit_full_scale() {
|
||||
// Every sample at the 24-bit ceiling. Exercises the autocorrelation
|
||||
// accumulator's worst case — `R[0] = N · (2^23 − 1)^2 ≈ 2^46` for a
|
||||
// 1024-sample frame, comfortably inside i64 but worth a regression
|
||||
// fence to catch a future narrowing to i32.
|
||||
let samples = vec![(1 << 23) - 1; 4 * FRAME_SIZE];
|
||||
let (_raw, encoded) = roundtrip(&samples, 3);
|
||||
eprintln!("roundtrip_24bit_full_scale encoded={}", encoded);
|
||||
}
|
||||
|
||||
// ── Pathological content ────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn roundtrip_all_zeros() {
|
||||
// Degenerate case called out by the spec: prediction_order MUST be 0
|
||||
// because Levinson-Durbin is undefined at R[0] = 0. This test is a
|
||||
// regression fence on the encoder's order-0 short-circuit.
|
||||
let samples = vec![0i32; 4 * FRAME_SIZE];
|
||||
let (raw, encoded) = roundtrip(&samples, 2);
|
||||
eprintln!(
|
||||
"roundtrip_all_zeros raw={} encoded={} ratio={:.3}",
|
||||
raw,
|
||||
encoded,
|
||||
encoded as f64 / raw as f64,
|
||||
);
|
||||
// All-zero frames compress to ~header + one bit per sample
|
||||
// (k=0 unary terminator). At 1024-sample frames the fixed 7-byte
|
||||
// header is still a visible fraction of the output. Measured ratio
|
||||
// is ~0.066; ceiling 0.15 absorbs header-overhead variance at other
|
||||
// frame sizes and keeps a ~2× regression budget.
|
||||
assert!(
|
||||
encoded < raw * 3 / 20,
|
||||
"all-zero frame compressed poorly (encoded={encoded}, raw={raw})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roundtrip_dc_offset() {
|
||||
// Constant non-zero sample — `R[0] > 0` but all autocorrelation
|
||||
// lags are equal, so the LPC model captures the signal perfectly
|
||||
// with order 1 (coefficient = 1.0). Residuals are zero after the
|
||||
// warm-up sample.
|
||||
let samples = vec![12_345i32; 4 * FRAME_SIZE];
|
||||
let (raw, encoded) = roundtrip(&samples, 2);
|
||||
eprintln!(
|
||||
"roundtrip_dc_offset raw={} encoded={} ratio={:.3}",
|
||||
raw,
|
||||
encoded,
|
||||
encoded as f64 / raw as f64,
|
||||
);
|
||||
// Measured ratio is ~0.097: header + one big warm-up residual for
|
||||
// the DC level + unary-zero tail. Ceiling 0.20 leaves ~2× regression
|
||||
// headroom without flaking on encoder-tuning changes that shift the
|
||||
// warm-up residual's Rice k by one.
|
||||
assert!(
|
||||
encoded < raw / 5,
|
||||
"DC-offset frame compressed poorly (encoded={encoded}, raw={raw})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roundtrip_nyquist_square() {
|
||||
// Pure Nyquist: alternating +A, −A, +A, −A. An order-1 predictor
|
||||
// with coefficient −1 would give zero residuals, but the encoder's
|
||||
// sparse LPC grid starts at order 2 and the fixed-predictor
|
||||
// post-pass ships FLAC-style orders 1-4 whose coefficients do not
|
||||
// include the `a = −1` Nyquist match — so this signal is
|
||||
// structurally hard for LAC despite its regularity. The result is
|
||||
// that Nyquist compresses only modestly (~52% measured).
|
||||
//
|
||||
// Kept as a regression fence: a future encoder that extends the
|
||||
// grid or adds a Nyquist-aware fixed predictor would dramatically
|
||||
// improve this ratio, and the ceiling here shouldn't fight that;
|
||||
// meanwhile a regression that makes it *worse* than ~60% is real.
|
||||
let a = 1_000_000i32;
|
||||
let samples: Vec<i32> = (0..4 * FRAME_SIZE)
|
||||
.map(|i| if i & 1 == 0 { a } else { -a })
|
||||
.collect();
|
||||
let (raw, encoded) = roundtrip(&samples, 3);
|
||||
eprintln!(
|
||||
"roundtrip_nyquist_square raw={} encoded={} ratio={:.3}",
|
||||
raw,
|
||||
encoded,
|
||||
encoded as f64 / raw as f64,
|
||||
);
|
||||
assert!(
|
||||
encoded < raw * 3 / 5,
|
||||
"Nyquist square compressed poorly (encoded={encoded}, raw={raw})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roundtrip_silence_with_click() {
|
||||
// Zero everywhere except a single full-scale impulse partway through.
|
||||
// Exercises the case where one residual is enormous (effectively the
|
||||
// click amplitude itself, since predecessors are zero) while every
|
||||
// other residual is zero. The Rice k-search has to pick a k that
|
||||
// doesn't over-serve the impulse at the cost of the silence.
|
||||
let mut samples = vec![0i32; 4 * FRAME_SIZE];
|
||||
samples[FRAME_SIZE / 2] = (1 << 22) - 1;
|
||||
let (_raw, encoded) = roundtrip(&samples, 2);
|
||||
eprintln!("roundtrip_silence_with_click encoded={}", encoded);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roundtrip_prime_frame_size() {
|
||||
// Prime frame size forces `partition_order = 0` — the Rice bitstream
|
||||
// has a single partition, and the encoder's partition search is
|
||||
// skipped entirely. Ensures the single-partition path is exercised
|
||||
// independently of the corpus tests (which all use power-of-two
|
||||
// frame sizes). 509 is the largest prime ≤ 512.
|
||||
let samples = lfsr_noise(509, 16, 0x509D);
|
||||
let encoded = encode_frame(&samples);
|
||||
let decoded = decode_frame(&encoded).expect("decode");
|
||||
assert_eq!(decoded, samples, "prime-length frame round-trip mismatch");
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue