lac/tests/corpus.rs
Kamal Tufekcic 7862cb1d9d
All checks were successful
CI / lint (push) Successful in 5s
CI / fuzz-regression (push) Successful in 14s
CI / build (push) Successful in 4s
CI / test (push) Successful in 6m54s
CI / publish (push) Successful in 8s
initial commit
Signed-off-by: Kamal Tufekcic <kamal@lo.sh>
2026-04-23 14:58:32 +03:00

351 lines
14 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Real-audio corpus tests: round-trip, compression ratio, FLAC comparison.
//!
//! These are integration tests — run with `cargo test --test corpus` to see
//! the printed ratio numbers, or `cargo test --test corpus -- --nocapture`
//! for verbose output during development.
//!
//! Each test is gated by the presence of its corpus file so the suite still
//! passes on a clean checkout without the WAV data.
//!
//! # What we measure
//!
//! - **Round-trip**: encode every frame of the input, decode back, verify
//! sample-for-sample equality. Failure here means the codec is broken.
//! - **Compression ratio**: `encoded_size / raw_size` for the full file,
//! framed at a realistic frame size. Ratios below 0.6 are "good" for a
//! lossless codec; ratios above 0.8 suggest something is wrong with the
//! LPC or Rice tuning.
//! - **FLAC comparison**: invoke `flac` CLI on the same input, compare
//! compressed sizes. We expect LAC to be within 10-25% of FLAC size on
//! most content; consistently much worse indicates a real codec gap (the
//! Q15 coefficient-clamping limitation, most likely).
//! - **LAC encode wall-clock**: printed as `lac_enc_ms`. Not asserted —
//! CI hardware variance makes any ceiling either useless or flaky. The
//! number is visibility-only, meant to be correlated against
//! `bench/compare-flac.sh` output for an engineer-side speed sanity
//! check. Only the encode hot path is timed (decode is excluded: at
//! microsecond scale, decode speed is dominated by allocator noise).
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::{Duration, Instant};
use hound::WavReader;
use lac::{decode_frame, encode_frame};
const CORPUS_DIR: &str = "corpus";
/// Encode-side frame length. 4096 is FLAC's default blocksize at
/// compression levels 1-8 for `≤ 48 kHz` content and sits inside LAC's
/// own archival-default band (README "Offline / archival"). Using the
/// same block size on both sides makes the FLAC comparison
/// apples-to-apples: neither codec is charged for a block-size
/// mismatch with the other.
///
/// Every partition order `0..=7` divides 4096, so the encoder stays on
/// the dense search path throughout.
const FRAME_SIZE: usize = 4096;
// ── WAV loading ─────────────────────────────────────────────────────────────
/// Load a WAV file as separated mono channels of i32 samples.
///
/// Samples are passed through at their native width — 16-bit values stay
/// 16-bit, inside the i32 carrier. LAC's only hard constraint is that
/// `|sample|` fits in 24 bits (ceiling for autocorrelation overflow
/// analysis); narrower inputs compress according to their actual
/// magnitudes.
///
/// The earlier version of this loader left-shifted to "promote" 16-bit to
/// 24-bit. That was wrong: Rice coding tracks residual magnitude, so a
/// 256× amplification costs 8 extra bits per residual and inflates the
/// output size proportionally.
fn load_wav_channels(path: &Path) -> Option<Vec<Vec<i32>>> {
let mut reader = WavReader::open(path).ok()?;
let spec = reader.spec();
if spec.sample_format != hound::SampleFormat::Int {
return None;
}
let ch = spec.channels as usize;
// Sanity check: reject inputs whose values won't fit in 24 bits. In
// practice the corpus is 16-bit or 24-bit integer PCM so this is just
// a defensive guard.
if spec.bits_per_sample > 24 {
return None;
}
let mut channels: Vec<Vec<i32>> = (0..ch).map(|_| Vec::new()).collect();
for (i, s) in reader.samples::<i32>().enumerate() {
let s = s.ok()?;
channels[i % ch].push(s);
}
Some(channels)
}
fn corpus_path(name: &str) -> PathBuf {
Path::new(CORPUS_DIR).join(name)
}
/// Skip a test if its WAV file isn't present, so the suite stays green on
/// a clean checkout. Prints a one-line hint so the operator knows why.
macro_rules! require_corpus {
($path:expr) => {
if !$path.exists() {
eprintln!("skipping: corpus file not found: {}", $path.display());
return;
}
};
}
// ── Per-frame round-trip harness ────────────────────────────────────────────
/// Aggregate per-file measurement: raw byte count, encoded byte count,
/// and wall-clock time spent inside `encode_frame`. The decode time is
/// not reported — it is an order of magnitude smaller than encode for
/// every content class in the corpus, so the ratio of interest is
/// encode-side.
struct Measurement {
raw_bytes: usize,
encoded_bytes: usize,
encode_time: Duration,
}
impl Measurement {
fn new() -> Self {
Self {
raw_bytes: 0,
encoded_bytes: 0,
encode_time: Duration::ZERO,
}
}
fn add(&mut self, other: &Measurement) {
self.raw_bytes += other.raw_bytes;
self.encoded_bytes += other.encoded_bytes;
self.encode_time += other.encode_time;
}
}
/// Encode every `FRAME_SIZE`-sample chunk of `channel`, decode, assert
/// equality. The trailing partial chunk (if any) is encoded at whatever
/// partition_order divides its length; the encoder's search handles that
/// automatically. `encode_time` captures only the encode hot path —
/// decode, allocation of the returned `Vec`, and the round-trip assert
/// are excluded so the number is directly comparable to `flac`'s wall
/// clock at the same input.
fn roundtrip_channel(channel: &[i32], bytes_per_sample: usize) -> Measurement {
let mut m = Measurement::new();
for chunk in channel.chunks(FRAME_SIZE) {
let t = Instant::now();
let encoded = encode_frame(chunk);
m.encode_time += t.elapsed();
let decoded = decode_frame(&encoded).expect("decode_frame failed on own output");
assert_eq!(
decoded,
chunk,
"round-trip mismatch in frame of {} samples",
chunk.len()
);
m.raw_bytes += chunk.len() * bytes_per_sample;
m.encoded_bytes += encoded.len();
}
m
}
/// Aggregate round-trip over every channel in a WAV file. Returns a
/// `Measurement` whose `raw_bytes` uses the WAV's actual sample width,
/// so the ratio corresponds to the over-the-wire comparison a user
/// would do against the same file encoded with FLAC.
fn roundtrip_wav(path: &Path) -> Measurement {
// Probe the spec once to know the bytes-per-sample for the raw-size
// denominator. `load_wav_channels` filters unsupported formats, so the
// spec read here is guaranteed to be a supported integer format.
let spec = WavReader::open(path).expect("open for spec").spec();
// Bits per sample rounds up to bytes: 16 → 2, 20 → 3, 24 → 3.
let bytes_per_sample = spec.bits_per_sample.div_ceil(8) as usize;
let channels = load_wav_channels(path).expect("load_wav_channels failed");
let mut totals = Measurement::new();
for ch in &channels {
let m = roundtrip_channel(ch, bytes_per_sample);
totals.add(&m);
}
totals
}
// ── FLAC comparison ─────────────────────────────────────────────────────────
/// Invoke the `flac` CLI to compress `path`, return the resulting byte
/// count. Returns `None` if the FLAC tool isn't installed.
fn flac_compress_size(path: &Path) -> Option<usize> {
// `flac --stdout --silent <file>` writes compressed FLAC to stdout so
// we never touch the filesystem for the output. `-o -` would do the
// same but is not universally supported across FLAC versions.
let out = Command::new("flac")
.arg("--stdout")
.arg("--silent")
.arg("--best")
.arg(path)
.output()
.ok()?;
if !out.status.success() {
return None;
}
Some(out.stdout.len())
}
// ── Tests ───────────────────────────────────────────────────────────────────
fn report_ratio(name: &str, m: &Measurement, flac_size: Option<usize>) {
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
let enc_ms = m.encode_time.as_secs_f64() * 1000.0;
eprint!(
"{name:40} raw={:>10} lac={:>10} ratio={ratio:.3} lac_enc_ms={enc_ms:>7.1}",
m.raw_bytes, m.encoded_bytes,
);
if let Some(flac) = flac_size {
let flac_ratio = flac as f64 / m.raw_bytes as f64;
// `lac_vs_flac` > 1.0 means LAC is bigger than FLAC. Anything
// above ~1.3 on typical content points at the Q15-clamping
// limitation and motivates adding the coefficient-shift field.
let lac_vs_flac = m.encoded_bytes as f64 / flac as f64;
eprint!(" flac={flac:>10} flac_ratio={flac_ratio:.3} lac/flac={lac_vs_flac:.3}");
}
eprintln!();
}
// Music — solo piano from the Open Goldberg Variations project (Kimiko
// Ishizaka's recording of J.S. Bach's BWV 988, released CC0 by the
// project). 96 kHz / 24-bit / stereo studio masters — genuine lossless
// source, redistributable, covering three distinct pianistic content
// classes between them.
const GOLDBERG_PREFIX: &str =
"Kimiko Ishizaka - J.S. Bach- -Open- Goldberg Variations, BWV 988 (Piano)";
#[test]
fn bach_aria() {
// Slow, sustained, lyrical sarabande with long held notes and gentle
// bass — LPC's best-case piano content (sustained harmonics,
// minimal transients, smooth melodic motion). 300 s / 96 kHz / 24-bit
// stereo.
let path = corpus_path(&format!("{GOLDBERG_PREFIX} - 01 Aria.wav"));
require_corpus!(path);
let m = roundtrip_wav(&path);
let flac = flac_compress_size(&path);
report_ratio("bach_aria (solo piano, tonal)", &m, flac);
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
// Measured ~0.483 at FRAME_SIZE=4096 on 96 kHz / 24-bit stereo;
// ceiling 0.503 keeps the ~2 pp regression budget used elsewhere.
assert!(
ratio < 0.503,
"bach_aria ratio {} exceeds regression ceiling 0.503",
ratio
);
}
#[test]
fn bach_variatio_4_fughetta() {
// Short fugal variation — tight polyphonic counterpoint with
// interleaved voices. Fast melodic runs stress Rice k-selection on
// richer residual statistics than the Aria. Cheapest music test in
// the suite at ~68 s.
let path = corpus_path(&format!("{GOLDBERG_PREFIX} - 05 Variatio 4 a 1 Clav..wav"));
require_corpus!(path);
let m = roundtrip_wav(&path);
let flac = flac_compress_size(&path);
report_ratio("bach_variatio_4 (fugal)", &m, flac);
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
// Measured ~0.514; ceiling 0.534 keeps the ~2 pp regression budget.
assert!(
ratio < 0.534,
"bach_variatio_4 ratio {} exceeds regression ceiling 0.534",
ratio
);
}
#[test]
fn bach_variatio_16_ouverture() {
// French-overture style: dotted rhythms, strong attacks, ornamented
// melodic runs. The transient-heavy content class — residual
// statistics shift mid-frame as runs punctuate sustained harmonies,
// exercising partition_order search.
let path = corpus_path(&format!(
"{GOLDBERG_PREFIX} - 17 Variatio 16 a 1 Clav. Ouverture.wav"
));
require_corpus!(path);
let m = roundtrip_wav(&path);
let flac = flac_compress_size(&path);
report_ratio("bach_variatio_16 (ouverture)", &m, flac);
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
// Measured ~0.512; ceiling 0.532 keeps the ~2 pp regression budget.
assert!(
ratio < 0.532,
"bach_variatio_16 ratio {} exceeds regression ceiling 0.532",
ratio
);
}
// Speech — AMI meeting corpus. Clean headset mic gives near-ideal speech
// conditions; residuals should be near-Laplacian, which is exactly what
// Rice coding is optimal for. Expect the best ratios here.
#[test]
fn ami_headset_speech() {
let path = corpus_path("ES2002a.Headset-0.wav");
require_corpus!(path);
let m = roundtrip_wav(&path);
let flac = flac_compress_size(&path);
report_ratio("ami_headset_speech", &m, flac);
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
// Measured ~0.178 at FRAME_SIZE=4096; ceiling 0.195 gives ~2 pp budget.
assert!(
ratio < 0.195,
"headset ratio {} exceeds regression ceiling 0.195",
ratio
);
}
#[test]
fn ami_array_speech() {
// Tabletop array mic: distant speech with room acoustics. Less
// predictable than a headset mic — a useful stress case for LPC.
let path = corpus_path("ES2002a.Array1-01.wav");
require_corpus!(path);
let m = roundtrip_wav(&path);
let flac = flac_compress_size(&path);
report_ratio("ami_array_speech", &m, flac);
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
// Measured ~0.375 at FRAME_SIZE=4096; ceiling 0.395 gives ~2 pp budget.
assert!(
ratio < 0.395,
"array speech ratio {} exceeds regression ceiling 0.395",
ratio
);
}
#[test]
fn ami_mixed_meeting() {
// Mixed headset: multiple simultaneous speakers. Highest spectral
// complexity in the corpus.
let path = corpus_path("ES2002a.Mix-Headset.wav");
require_corpus!(path);
let m = roundtrip_wav(&path);
let flac = flac_compress_size(&path);
report_ratio("ami_mixed_meeting", &m, flac);
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
// Measured ~0.292 at FRAME_SIZE=4096; ceiling 0.312 gives ~2 pp budget.
assert!(
ratio < 0.312,
"mixed meeting ratio {} exceeds regression ceiling 0.312",
ratio
);
}
// The sparse-vs-exhaustive encoder differential lives in
// `src/frame.rs::tests::sparse_vs_exhaustive_on_headset_speech` — it
// needs `encode_frame_with_grid`, which is `pub(crate)` rather than
// part of the semver surface. Same fixture file, same assertions;
// moved into the crate-private test module when the grid entry-point
// was demoted from `pub #[doc(hidden)]`.