initial commit
Signed-off-by: Kamal Tufekcic <kamal@lo.sh>
This commit is contained in:
commit
7862cb1d9d
2884 changed files with 16797 additions and 0 deletions
351
tests/corpus.rs
Normal file
351
tests/corpus.rs
Normal file
|
|
@ -0,0 +1,351 @@
|
|||
//! Real-audio corpus tests: round-trip, compression ratio, FLAC comparison.
|
||||
//!
|
||||
//! These are integration tests — run with `cargo test --test corpus` to see
|
||||
//! the printed ratio numbers, or `cargo test --test corpus -- --nocapture`
|
||||
//! for verbose output during development.
|
||||
//!
|
||||
//! Each test is gated by the presence of its corpus file so the suite still
|
||||
//! passes on a clean checkout without the WAV data.
|
||||
//!
|
||||
//! # What we measure
|
||||
//!
|
||||
//! - **Round-trip**: encode every frame of the input, decode back, verify
|
||||
//! sample-for-sample equality. Failure here means the codec is broken.
|
||||
//! - **Compression ratio**: `encoded_size / raw_size` for the full file,
|
||||
//! framed at a realistic frame size. Ratios below 0.6 are "good" for a
|
||||
//! lossless codec; ratios above 0.8 suggest something is wrong with the
|
||||
//! LPC or Rice tuning.
|
||||
//! - **FLAC comparison**: invoke `flac` CLI on the same input, compare
|
||||
//! compressed sizes. We expect LAC to be within 10-25% of FLAC size on
|
||||
//! most content; consistently much worse indicates a real codec gap (the
|
||||
//! Q15 coefficient-clamping limitation, most likely).
|
||||
//! - **LAC encode wall-clock**: printed as `lac_enc_ms`. Not asserted —
|
||||
//! CI hardware variance makes any ceiling either useless or flaky. The
|
||||
//! number is visibility-only, meant to be correlated against
|
||||
//! `bench/compare-flac.sh` output for an engineer-side speed sanity
|
||||
//! check. Only the encode hot path is timed (decode is excluded: at
|
||||
//! microsecond scale, decode speed is dominated by allocator noise).
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use hound::WavReader;
|
||||
use lac::{decode_frame, encode_frame};
|
||||
|
||||
const CORPUS_DIR: &str = "corpus";
|
||||
|
||||
/// Encode-side frame length. 4096 is FLAC's default blocksize at
|
||||
/// compression levels 1-8 for `≤ 48 kHz` content and sits inside LAC's
|
||||
/// own archival-default band (README "Offline / archival"). Using the
|
||||
/// same block size on both sides makes the FLAC comparison
|
||||
/// apples-to-apples: neither codec is charged for a block-size
|
||||
/// mismatch with the other.
|
||||
///
|
||||
/// Every partition order `0..=7` divides 4096, so the encoder stays on
|
||||
/// the dense search path throughout.
|
||||
const FRAME_SIZE: usize = 4096;
|
||||
|
||||
// ── WAV loading ─────────────────────────────────────────────────────────────
|
||||
|
||||
/// Load a WAV file as separated mono channels of i32 samples.
|
||||
///
|
||||
/// Samples are passed through at their native width — 16-bit values stay
|
||||
/// 16-bit, inside the i32 carrier. LAC's only hard constraint is that
|
||||
/// `|sample|` fits in 24 bits (ceiling for autocorrelation overflow
|
||||
/// analysis); narrower inputs compress according to their actual
|
||||
/// magnitudes.
|
||||
///
|
||||
/// The earlier version of this loader left-shifted to "promote" 16-bit to
|
||||
/// 24-bit. That was wrong: Rice coding tracks residual magnitude, so a
|
||||
/// 256× amplification costs 8 extra bits per residual and inflates the
|
||||
/// output size proportionally.
|
||||
fn load_wav_channels(path: &Path) -> Option<Vec<Vec<i32>>> {
|
||||
let mut reader = WavReader::open(path).ok()?;
|
||||
let spec = reader.spec();
|
||||
if spec.sample_format != hound::SampleFormat::Int {
|
||||
return None;
|
||||
}
|
||||
let ch = spec.channels as usize;
|
||||
|
||||
// Sanity check: reject inputs whose values won't fit in 24 bits. In
|
||||
// practice the corpus is 16-bit or 24-bit integer PCM so this is just
|
||||
// a defensive guard.
|
||||
if spec.bits_per_sample > 24 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut channels: Vec<Vec<i32>> = (0..ch).map(|_| Vec::new()).collect();
|
||||
for (i, s) in reader.samples::<i32>().enumerate() {
|
||||
let s = s.ok()?;
|
||||
channels[i % ch].push(s);
|
||||
}
|
||||
Some(channels)
|
||||
}
|
||||
|
||||
fn corpus_path(name: &str) -> PathBuf {
|
||||
Path::new(CORPUS_DIR).join(name)
|
||||
}
|
||||
|
||||
/// Skip a test if its WAV file isn't present, so the suite stays green on
|
||||
/// a clean checkout. Prints a one-line hint so the operator knows why.
|
||||
macro_rules! require_corpus {
|
||||
($path:expr) => {
|
||||
if !$path.exists() {
|
||||
eprintln!("skipping: corpus file not found: {}", $path.display());
|
||||
return;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// ── Per-frame round-trip harness ────────────────────────────────────────────
|
||||
|
||||
/// Aggregate per-file measurement: raw byte count, encoded byte count,
|
||||
/// and wall-clock time spent inside `encode_frame`. The decode time is
|
||||
/// not reported — it is an order of magnitude smaller than encode for
|
||||
/// every content class in the corpus, so the ratio of interest is
|
||||
/// encode-side.
|
||||
struct Measurement {
|
||||
raw_bytes: usize,
|
||||
encoded_bytes: usize,
|
||||
encode_time: Duration,
|
||||
}
|
||||
|
||||
impl Measurement {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
raw_bytes: 0,
|
||||
encoded_bytes: 0,
|
||||
encode_time: Duration::ZERO,
|
||||
}
|
||||
}
|
||||
|
||||
fn add(&mut self, other: &Measurement) {
|
||||
self.raw_bytes += other.raw_bytes;
|
||||
self.encoded_bytes += other.encoded_bytes;
|
||||
self.encode_time += other.encode_time;
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode every `FRAME_SIZE`-sample chunk of `channel`, decode, assert
|
||||
/// equality. The trailing partial chunk (if any) is encoded at whatever
|
||||
/// partition_order divides its length; the encoder's search handles that
|
||||
/// automatically. `encode_time` captures only the encode hot path —
|
||||
/// decode, allocation of the returned `Vec`, and the round-trip assert
|
||||
/// are excluded so the number is directly comparable to `flac`'s wall
|
||||
/// clock at the same input.
|
||||
fn roundtrip_channel(channel: &[i32], bytes_per_sample: usize) -> Measurement {
|
||||
let mut m = Measurement::new();
|
||||
for chunk in channel.chunks(FRAME_SIZE) {
|
||||
let t = Instant::now();
|
||||
let encoded = encode_frame(chunk);
|
||||
m.encode_time += t.elapsed();
|
||||
let decoded = decode_frame(&encoded).expect("decode_frame failed on own output");
|
||||
assert_eq!(
|
||||
decoded,
|
||||
chunk,
|
||||
"round-trip mismatch in frame of {} samples",
|
||||
chunk.len()
|
||||
);
|
||||
m.raw_bytes += chunk.len() * bytes_per_sample;
|
||||
m.encoded_bytes += encoded.len();
|
||||
}
|
||||
m
|
||||
}
|
||||
|
||||
/// Aggregate round-trip over every channel in a WAV file. Returns a
|
||||
/// `Measurement` whose `raw_bytes` uses the WAV's actual sample width,
|
||||
/// so the ratio corresponds to the over-the-wire comparison a user
|
||||
/// would do against the same file encoded with FLAC.
|
||||
fn roundtrip_wav(path: &Path) -> Measurement {
|
||||
// Probe the spec once to know the bytes-per-sample for the raw-size
|
||||
// denominator. `load_wav_channels` filters unsupported formats, so the
|
||||
// spec read here is guaranteed to be a supported integer format.
|
||||
let spec = WavReader::open(path).expect("open for spec").spec();
|
||||
// Bits per sample rounds up to bytes: 16 → 2, 20 → 3, 24 → 3.
|
||||
let bytes_per_sample = spec.bits_per_sample.div_ceil(8) as usize;
|
||||
|
||||
let channels = load_wav_channels(path).expect("load_wav_channels failed");
|
||||
let mut totals = Measurement::new();
|
||||
for ch in &channels {
|
||||
let m = roundtrip_channel(ch, bytes_per_sample);
|
||||
totals.add(&m);
|
||||
}
|
||||
totals
|
||||
}
|
||||
|
||||
// ── FLAC comparison ─────────────────────────────────────────────────────────
|
||||
|
||||
/// Invoke the `flac` CLI to compress `path`, return the resulting byte
|
||||
/// count. Returns `None` if the FLAC tool isn't installed.
|
||||
fn flac_compress_size(path: &Path) -> Option<usize> {
|
||||
// `flac --stdout --silent <file>` writes compressed FLAC to stdout so
|
||||
// we never touch the filesystem for the output. `-o -` would do the
|
||||
// same but is not universally supported across FLAC versions.
|
||||
let out = Command::new("flac")
|
||||
.arg("--stdout")
|
||||
.arg("--silent")
|
||||
.arg("--best")
|
||||
.arg(path)
|
||||
.output()
|
||||
.ok()?;
|
||||
if !out.status.success() {
|
||||
return None;
|
||||
}
|
||||
Some(out.stdout.len())
|
||||
}
|
||||
|
||||
// ── Tests ───────────────────────────────────────────────────────────────────
|
||||
|
||||
fn report_ratio(name: &str, m: &Measurement, flac_size: Option<usize>) {
|
||||
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
|
||||
let enc_ms = m.encode_time.as_secs_f64() * 1000.0;
|
||||
eprint!(
|
||||
"{name:40} raw={:>10} lac={:>10} ratio={ratio:.3} lac_enc_ms={enc_ms:>7.1}",
|
||||
m.raw_bytes, m.encoded_bytes,
|
||||
);
|
||||
if let Some(flac) = flac_size {
|
||||
let flac_ratio = flac as f64 / m.raw_bytes as f64;
|
||||
// `lac_vs_flac` > 1.0 means LAC is bigger than FLAC. Anything
|
||||
// above ~1.3 on typical content points at the Q15-clamping
|
||||
// limitation and motivates adding the coefficient-shift field.
|
||||
let lac_vs_flac = m.encoded_bytes as f64 / flac as f64;
|
||||
eprint!(" flac={flac:>10} flac_ratio={flac_ratio:.3} lac/flac={lac_vs_flac:.3}");
|
||||
}
|
||||
eprintln!();
|
||||
}
|
||||
|
||||
// Music — solo piano from the Open Goldberg Variations project (Kimiko
|
||||
// Ishizaka's recording of J.S. Bach's BWV 988, released CC0 by the
|
||||
// project). 96 kHz / 24-bit / stereo studio masters — genuine lossless
|
||||
// source, redistributable, covering three distinct pianistic content
|
||||
// classes between them.
|
||||
|
||||
const GOLDBERG_PREFIX: &str =
|
||||
"Kimiko Ishizaka - J.S. Bach- -Open- Goldberg Variations, BWV 988 (Piano)";
|
||||
|
||||
#[test]
|
||||
fn bach_aria() {
|
||||
// Slow, sustained, lyrical sarabande with long held notes and gentle
|
||||
// bass — LPC's best-case piano content (sustained harmonics,
|
||||
// minimal transients, smooth melodic motion). 300 s / 96 kHz / 24-bit
|
||||
// stereo.
|
||||
let path = corpus_path(&format!("{GOLDBERG_PREFIX} - 01 Aria.wav"));
|
||||
require_corpus!(path);
|
||||
let m = roundtrip_wav(&path);
|
||||
let flac = flac_compress_size(&path);
|
||||
report_ratio("bach_aria (solo piano, tonal)", &m, flac);
|
||||
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
|
||||
// Measured ~0.483 at FRAME_SIZE=4096 on 96 kHz / 24-bit stereo;
|
||||
// ceiling 0.503 keeps the ~2 pp regression budget used elsewhere.
|
||||
assert!(
|
||||
ratio < 0.503,
|
||||
"bach_aria ratio {} exceeds regression ceiling 0.503",
|
||||
ratio
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bach_variatio_4_fughetta() {
|
||||
// Short fugal variation — tight polyphonic counterpoint with
|
||||
// interleaved voices. Fast melodic runs stress Rice k-selection on
|
||||
// richer residual statistics than the Aria. Cheapest music test in
|
||||
// the suite at ~68 s.
|
||||
let path = corpus_path(&format!("{GOLDBERG_PREFIX} - 05 Variatio 4 a 1 Clav..wav"));
|
||||
require_corpus!(path);
|
||||
let m = roundtrip_wav(&path);
|
||||
let flac = flac_compress_size(&path);
|
||||
report_ratio("bach_variatio_4 (fugal)", &m, flac);
|
||||
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
|
||||
// Measured ~0.514; ceiling 0.534 keeps the ~2 pp regression budget.
|
||||
assert!(
|
||||
ratio < 0.534,
|
||||
"bach_variatio_4 ratio {} exceeds regression ceiling 0.534",
|
||||
ratio
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bach_variatio_16_ouverture() {
|
||||
// French-overture style: dotted rhythms, strong attacks, ornamented
|
||||
// melodic runs. The transient-heavy content class — residual
|
||||
// statistics shift mid-frame as runs punctuate sustained harmonies,
|
||||
// exercising partition_order search.
|
||||
let path = corpus_path(&format!(
|
||||
"{GOLDBERG_PREFIX} - 17 Variatio 16 a 1 Clav. Ouverture.wav"
|
||||
));
|
||||
require_corpus!(path);
|
||||
let m = roundtrip_wav(&path);
|
||||
let flac = flac_compress_size(&path);
|
||||
report_ratio("bach_variatio_16 (ouverture)", &m, flac);
|
||||
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
|
||||
// Measured ~0.512; ceiling 0.532 keeps the ~2 pp regression budget.
|
||||
assert!(
|
||||
ratio < 0.532,
|
||||
"bach_variatio_16 ratio {} exceeds regression ceiling 0.532",
|
||||
ratio
|
||||
);
|
||||
}
|
||||
|
||||
// Speech — AMI meeting corpus. Clean headset mic gives near-ideal speech
|
||||
// conditions; residuals should be near-Laplacian, which is exactly what
|
||||
// Rice coding is optimal for. Expect the best ratios here.
|
||||
|
||||
#[test]
|
||||
fn ami_headset_speech() {
|
||||
let path = corpus_path("ES2002a.Headset-0.wav");
|
||||
require_corpus!(path);
|
||||
let m = roundtrip_wav(&path);
|
||||
let flac = flac_compress_size(&path);
|
||||
report_ratio("ami_headset_speech", &m, flac);
|
||||
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
|
||||
// Measured ~0.178 at FRAME_SIZE=4096; ceiling 0.195 gives ~2 pp budget.
|
||||
assert!(
|
||||
ratio < 0.195,
|
||||
"headset ratio {} exceeds regression ceiling 0.195",
|
||||
ratio
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ami_array_speech() {
|
||||
// Tabletop array mic: distant speech with room acoustics. Less
|
||||
// predictable than a headset mic — a useful stress case for LPC.
|
||||
let path = corpus_path("ES2002a.Array1-01.wav");
|
||||
require_corpus!(path);
|
||||
let m = roundtrip_wav(&path);
|
||||
let flac = flac_compress_size(&path);
|
||||
report_ratio("ami_array_speech", &m, flac);
|
||||
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
|
||||
// Measured ~0.375 at FRAME_SIZE=4096; ceiling 0.395 gives ~2 pp budget.
|
||||
assert!(
|
||||
ratio < 0.395,
|
||||
"array speech ratio {} exceeds regression ceiling 0.395",
|
||||
ratio
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ami_mixed_meeting() {
|
||||
// Mixed headset: multiple simultaneous speakers. Highest spectral
|
||||
// complexity in the corpus.
|
||||
let path = corpus_path("ES2002a.Mix-Headset.wav");
|
||||
require_corpus!(path);
|
||||
let m = roundtrip_wav(&path);
|
||||
let flac = flac_compress_size(&path);
|
||||
report_ratio("ami_mixed_meeting", &m, flac);
|
||||
let ratio = m.encoded_bytes as f64 / m.raw_bytes as f64;
|
||||
// Measured ~0.292 at FRAME_SIZE=4096; ceiling 0.312 gives ~2 pp budget.
|
||||
assert!(
|
||||
ratio < 0.312,
|
||||
"mixed meeting ratio {} exceeds regression ceiling 0.312",
|
||||
ratio
|
||||
);
|
||||
}
|
||||
|
||||
// The sparse-vs-exhaustive encoder differential lives in
|
||||
// `src/frame.rs::tests::sparse_vs_exhaustive_on_headset_speech` — it
|
||||
// needs `encode_frame_with_grid`, which is `pub(crate)` rather than
|
||||
// part of the semver surface. Same fixture file, same assertions;
|
||||
// moved into the crate-private test module when the grid entry-point
|
||||
// was demoted from `pub #[doc(hidden)]`.
|
||||
Loading…
Add table
Add a link
Reference in a new issue