libsoliton/soliton/benches/bench.rs
Kamal Tufekcic 1d99048c95
Some checks failed
CI / lint (push) Successful in 1m37s
CI / test-python (push) Successful in 1m49s
CI / test-zig (push) Successful in 1m39s
CI / test-wasm (push) Successful in 1m54s
CI / test (push) Successful in 14m44s
CI / miri (push) Successful in 14m18s
CI / build (push) Successful in 1m9s
CI / fuzz-regression (push) Successful in 9m9s
CI / publish (push) Failing after 1m10s
CI / publish-python (push) Failing after 1m46s
CI / publish-wasm (push) Has been cancelled
initial commit
Signed-off-by: Kamal Tufekcic <kamal@lo.sh>
2026-04-02 23:48:10 +03:00

704 lines
27 KiB
Rust
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#![feature(test)]
extern crate test;
use rayon::prelude::*;
use soliton::{
constants,
identity::{
GeneratedIdentity, IdentityPublicKey, generate_identity, hybrid_sign, hybrid_verify,
},
kex::{
PreKeyBundle, build_first_message_aad, initiate_session, receive_session, sign_prekey,
verify_bundle,
},
primitives::{
argon2::{Argon2Params, argon2id},
hkdf::hkdf_sha3_256,
random::random_array,
xwing,
},
ratchet::{RatchetHeader, RatchetState},
streaming::{stream_decrypt_init, stream_encrypt_init},
};
use test::{Bencher, black_box};
// ── Setup helpers ──────────────────────────────────────────────────────────────
/// Build a full LO-KEX and return initialized Alice + Bob ratchet states plus fingerprints.
///
/// Alice is initialized with `init_alice` (she holds `ek_pk` + `ek_sk`).
/// Bob is initialized with `init_bob` (he holds Alice's `peer_ek` as his receive ratchet key).
///
/// Used to pre-allocate ratchet state outside `b.iter()` closures.
fn setup_ratchet() -> (RatchetState, RatchetState, [u8; 32], [u8; 32]) {
let GeneratedIdentity {
public_key: alice_pk,
secret_key: alice_sk,
..
} = generate_identity().unwrap();
let GeneratedIdentity {
public_key: bob_pk,
secret_key: bob_sk,
..
} = generate_identity().unwrap();
let (spk_pk, spk_sk) = xwing::keygen().unwrap();
let spk_sig = sign_prekey(&bob_sk, &spk_pk).unwrap();
let bundle = PreKeyBundle {
ik_pub: IdentityPublicKey::from_bytes(bob_pk.as_bytes().to_vec()).unwrap(),
crypto_version: constants::CRYPTO_VERSION.to_string(),
spk_pub: spk_pk,
spk_id: 1,
spk_sig,
opk_pub: None,
opk_id: None,
};
let fp_a = alice_pk.fingerprint_raw();
let fp_b = bob_pk.fingerprint_raw();
let verified = verify_bundle(bundle, &bob_pk).unwrap();
let mut initiated = initiate_session(&alice_pk, &alice_sk, &verified).unwrap();
let aad = build_first_message_aad(&fp_a, &fp_b, &initiated.session_init).unwrap();
let (first_ct, alice_ck) =
RatchetState::encrypt_first_message(initiated.take_initial_chain_key(), b"hello", &aad)
.unwrap();
let mut received = receive_session(
&bob_pk,
&bob_sk,
&alice_pk,
&initiated.session_init,
&initiated.sender_sig,
&spk_sk,
None,
)
.unwrap();
let (_, bob_ck) =
RatchetState::decrypt_first_message(received.take_initial_chain_key(), &first_ct, &aad)
.unwrap();
// Clone the EK bytes before init_alice consumes ek_pk.
let ek_pk_bytes = initiated.ek_pk.as_bytes().to_vec();
let ek_sk_bytes = initiated.ek_sk().as_bytes().to_vec();
let peer_ek = xwing::PublicKey::from_bytes(ek_pk_bytes.clone()).unwrap();
let ek_pk = xwing::PublicKey::from_bytes(ek_pk_bytes).unwrap();
let ek_sk = xwing::SecretKey::from_bytes(ek_sk_bytes).unwrap();
let alice = RatchetState::init_alice(
*initiated.take_root_key(),
*alice_ck,
fp_a,
fp_b,
ek_pk,
ek_sk,
)
.unwrap();
let bob =
RatchetState::init_bob(*received.take_root_key(), *bob_ck, fp_b, fp_a, peer_ek).unwrap();
(alice, bob, fp_a, fp_b)
}
// ── Identity ────────────────────────────────────────────────────────────────
/// ML-DSA-65 + X-Wing + Ed25519 keygen: the most expensive single call ("first launch cost").
#[bench]
fn identity_keygen(b: &mut Bencher) {
b.iter(|| {
let id = generate_identity().unwrap();
black_box(id);
});
}
/// Hybrid sign (Ed25519 + ML-DSA-65): Alice's signing step in session initiation.
#[bench]
fn hybrid_sign_bench(b: &mut Bencher) {
let GeneratedIdentity { secret_key: sk, .. } = generate_identity().unwrap();
let msg = [0u8; 64];
b.iter(|| {
let sig = hybrid_sign(&sk, &msg).unwrap();
black_box(sig);
});
}
/// Hybrid verify (Ed25519 + ML-DSA-65): Bob's first operation — sets the floor for receive_session.
#[bench]
fn hybrid_verify_bench(b: &mut Bencher) {
let GeneratedIdentity {
public_key: pk,
secret_key: sk,
..
} = generate_identity().unwrap();
let msg = [0u8; 64];
let sig = hybrid_sign(&sk, &msg).unwrap();
b.iter(|| {
hybrid_verify(&pk, &msg, &sig).unwrap();
black_box(());
});
}
// ── X-Wing ──────────────────────────────────────────────────────────────────
/// X-Wing encapsulation: isolates ML-KEM-768 + X25519 encapsulation cost.
#[bench]
fn xwing_encap(b: &mut Bencher) {
let (pk, _sk) = xwing::keygen().unwrap();
b.iter(|| {
let (ct, ss) = xwing::encapsulate(&pk).unwrap();
black_box((ct, ss));
});
}
/// X-Wing decapsulation: isolates ML-KEM-768 + X25519 decapsulation cost.
#[bench]
fn xwing_decap(b: &mut Bencher) {
let (pk, sk) = xwing::keygen().unwrap();
let (ct, _ss) = xwing::encapsulate(&pk).unwrap();
b.iter(|| {
let ss = xwing::decapsulate(&sk, &ct).unwrap();
black_box(ss);
});
}
// ── KEX ─────────────────────────────────────────────────────────────────────
/// Alice's full session setup: ephemeral keygen + 3 encaps + HKDF + sign.
///
/// Pre-builds a verified bundle outside the iter closure so bundle verification
/// is not included. The measured path is `initiate_session` alone.
#[bench]
fn initiate_session_bench(b: &mut Bencher) {
let GeneratedIdentity {
public_key: alice_pk,
secret_key: alice_sk,
..
} = generate_identity().unwrap();
let GeneratedIdentity {
public_key: bob_pk,
secret_key: bob_sk,
..
} = generate_identity().unwrap();
let (spk_pk, _spk_sk) = xwing::keygen().unwrap();
let spk_sig = sign_prekey(&bob_sk, &spk_pk).unwrap();
let bundle = PreKeyBundle {
ik_pub: IdentityPublicKey::from_bytes(bob_pk.as_bytes().to_vec()).unwrap(),
crypto_version: constants::CRYPTO_VERSION.to_string(),
spk_pub: spk_pk,
spk_id: 1,
spk_sig,
opk_pub: None,
opk_id: None,
};
let verified = verify_bundle(bundle, &bob_pk).unwrap();
b.iter(|| {
let session = initiate_session(&alice_pk, &alice_sk, &verified).unwrap();
black_box(session);
});
}
/// Bob's full session reception: verify + 3 decaps + HKDF.
///
/// Pre-initiates a session outside the iter closure so Alice's setup cost is
/// excluded. `receive_session` takes all inputs by reference and is idempotent
/// (same ciphertexts produce the same root key each call).
#[bench]
fn receive_session_bench(b: &mut Bencher) {
let GeneratedIdentity {
public_key: alice_pk,
secret_key: alice_sk,
..
} = generate_identity().unwrap();
let GeneratedIdentity {
public_key: bob_pk,
secret_key: bob_sk,
..
} = generate_identity().unwrap();
let (spk_pk, spk_sk) = xwing::keygen().unwrap();
let spk_sig = sign_prekey(&bob_sk, &spk_pk).unwrap();
let bundle = PreKeyBundle {
ik_pub: IdentityPublicKey::from_bytes(bob_pk.as_bytes().to_vec()).unwrap(),
crypto_version: constants::CRYPTO_VERSION.to_string(),
spk_pub: spk_pk,
spk_id: 1,
spk_sig,
opk_pub: None,
opk_id: None,
};
let verified = verify_bundle(bundle, &bob_pk).unwrap();
let initiated = initiate_session(&alice_pk, &alice_sk, &verified).unwrap();
b.iter(|| {
let received = receive_session(
&bob_pk,
&bob_sk,
&alice_pk,
&initiated.session_init,
&initiated.sender_sig,
&spk_sk,
None,
)
.unwrap();
black_box(received);
});
}
// ── Ratchet: per-message hot paths ──────────────────────────────────────────
/// Same-epoch encrypt: HMAC-SHA3-256 key derivation + XChaCha20-Poly1305 encrypt.
///
/// RatchetState is built before `b.iter()`. Each call increments `send_count`
/// but stays in the same epoch (no KEM step). The hot path is O(1) per message.
#[bench]
fn ratchet_encrypt_same_epoch(b: &mut Bencher) {
let (mut alice, _bob, _fp_a, _fp_b) = setup_ratchet();
let msg = [0u8; 256];
b.iter(|| {
let enc = alice.encrypt(&msg).unwrap();
black_box(enc);
});
}
/// Same-epoch decrypt: HMAC-SHA3-256 key derivation + XChaCha20-Poly1305 decrypt.
///
/// Bob's `recv_seen` duplicate-detection set is bounded at 65,536 entries and
/// never resets within an epoch. For a ~4 µs operation the harness would run
/// ~230,000 iterations, exhausting the set and returning `ChainExhausted`.
/// To avoid this, Bob's state is restored from a pre-serialized blob each
/// iteration via `from_bytes_with_min_epoch` (~817 ns overhead per the
/// `ratchet_from_bytes` benchmark). Alice pre-encrypts a single message so no
/// encryption cost is included in the measurement.
#[bench]
fn ratchet_decrypt_same_epoch(b: &mut Bencher) {
let (mut alice, bob, _fp_a, _fp_b) = setup_ratchet();
let msg = [0u8; 256];
// Pre-encrypt one message to produce a valid ciphertext for Bob to decrypt.
let enc = alice.encrypt(&msg).unwrap();
// Alice's first encrypt is same-epoch (no direction change): kem_ct is None.
let header_rk_bytes = enc.header.ratchet_pk.as_bytes().to_vec();
let header_n = enc.header.n;
let header_pn = enc.header.pn;
let ciphertext = enc.ciphertext.clone();
// Serialize Bob's state before any decryption so it can be restored each
// iteration. to_bytes consumes bob; epoch is not needed (min_epoch = 0).
let (bob_bytes, _) = bob.to_bytes().unwrap();
let bob_bytes: Vec<u8> = bob_bytes.to_vec();
b.iter(|| {
// Restore Bob each iteration to keep recv_seen empty, preventing
// ChainExhausted after 65,536 iterations.
let mut bob_fresh = RatchetState::from_bytes_with_min_epoch(&bob_bytes, 0).unwrap();
let header = RatchetHeader {
ratchet_pk: xwing::PublicKey::from_bytes(header_rk_bytes.clone()).unwrap(),
kem_ct: None,
n: header_n,
pn: header_pn,
};
let pt = bob_fresh.decrypt(&header, &ciphertext).unwrap();
black_box(pt);
});
}
// ── Ratchet: direction-change paths ─────────────────────────────────────────
/// Direction-change encrypt cost: X-Wing keygen + encap (to peer's ratchet pk) + kdf_root.
///
/// Each iteration reconstructs a fresh Bob state from pre-computed key material.
/// `init_bob` is cheap (struct initialization only — no KEM). The measured cost
/// is `bob.encrypt()` which, with `send_ratchet_pk == None`, always triggers a
/// KEM ratchet step: keygen + encapsulate + HKDF-SHA3-256.
#[bench]
fn ratchet_encrypt_direction_change(b: &mut Bencher) {
// Run KEX to get realistic key material; extract what init_bob needs.
let GeneratedIdentity {
public_key: alice_pk,
secret_key: alice_sk,
..
} = generate_identity().unwrap();
let GeneratedIdentity {
public_key: bob_pk,
secret_key: bob_sk,
..
} = generate_identity().unwrap();
let (spk_pk, spk_sk) = xwing::keygen().unwrap();
let spk_sig = sign_prekey(&bob_sk, &spk_pk).unwrap();
let bundle = PreKeyBundle {
ik_pub: IdentityPublicKey::from_bytes(bob_pk.as_bytes().to_vec()).unwrap(),
crypto_version: constants::CRYPTO_VERSION.to_string(),
spk_pub: spk_pk,
spk_id: 1,
spk_sig,
opk_pub: None,
opk_id: None,
};
let fp_a = alice_pk.fingerprint_raw();
let fp_b = bob_pk.fingerprint_raw();
let verified = verify_bundle(bundle, &bob_pk).unwrap();
let mut initiated = initiate_session(&alice_pk, &alice_sk, &verified).unwrap();
let aad = build_first_message_aad(&fp_a, &fp_b, &initiated.session_init).unwrap();
let (first_ct, _alice_ck) =
RatchetState::encrypt_first_message(initiated.take_initial_chain_key(), b"hello", &aad)
.unwrap();
let mut received = receive_session(
&bob_pk,
&bob_sk,
&alice_pk,
&initiated.session_init,
&initiated.sender_sig,
&spk_sk,
None,
)
.unwrap();
let (_, bob_ck) =
RatchetState::decrypt_first_message(received.take_initial_chain_key(), &first_ct, &aad)
.unwrap();
// Save the raw key bytes for per-iter reconstruction.
// [u8; 32] is Copy — these are moved into the arrays and the originals
// on the Zeroizing<...> wrappers are dropped (zeroized) immediately after.
let bob_root_key: [u8; 32] = *received.take_root_key();
let bob_chain_key: [u8; 32] = *bob_ck;
// Alice's ephemeral pk becomes Bob's recv_ratchet_pk — the key he encapsulates
// to on his first direction-change encrypt.
let alice_ek_pk_bytes = initiated.ek_pk.as_bytes().to_vec();
let msg = [0u8; 256];
b.iter(|| {
// Reconstruct Alice's ek as Bob's recv_ratchet_pk. from_bytes is a Vec
// allocation + length check — negligible vs the KEM operations below.
let peer_ek = xwing::PublicKey::from_bytes(alice_ek_pk_bytes.clone()).unwrap();
// init_bob is struct initialization only (no crypto). send_ratchet_pk is
// None, so the first encrypt call unconditionally performs a direction-change
// KEM step: keygen + encapsulate (to peer_ek) + kdf_root.
let mut bob =
RatchetState::init_bob(bob_root_key, bob_chain_key, fp_b, fp_a, peer_ek).unwrap();
let enc = bob.encrypt(&msg).unwrap();
black_box(enc);
});
}
/// Direction-change decrypt cost: X-Wing decapsulate (using send_ratchet_sk) + kdf_root.
///
/// Pre-computes a direction-change message from Bob outside the iter closure.
/// Alice's ratchet state is serialized before the iter and restored via
/// `from_bytes_with_min_epoch` each iteration so she can re-process the same
/// direction-change message with a fresh decapsulation key.
#[bench]
fn ratchet_decrypt_direction_change(b: &mut Bencher) {
let (alice, mut bob, _fp_a, _fp_b) = setup_ratchet();
// Bob has recv_ratchet_pk = Some(alice's ek_pk). His first encrypt triggers a
// direction change: keygen new ratchet keypair, encapsulate to alice's ek_pk,
// kdf_root, then AEAD-encrypt the payload. Alice needs her ek_sk (stored in
// her send_ratchet_sk) to decapsulate.
let bob_dc_msg = bob.encrypt(&[0u8; 256]).unwrap();
// Serialize Alice's ratchet state (includes send_ratchet_sk = ek_sk).
// to_bytes() consumes Alice; the blob captures her state at epoch 1.
let (alice_bytes, _epoch) = alice.to_bytes().unwrap();
let alice_bytes: Vec<u8> = alice_bytes.to_vec();
// Pre-extract header fields as raw bytes so RatchetHeader can be reconstructed
// per iteration without allocating inside the benchmark.
let rk_pk_bytes = bob_dc_msg.header.ratchet_pk.as_bytes().to_vec();
let kem_ct_bytes = bob_dc_msg
.header
.kem_ct
.as_ref()
.unwrap()
.as_bytes()
.to_vec();
let header_n = bob_dc_msg.header.n;
let header_pn = bob_dc_msg.header.pn;
let ciphertext = bob_dc_msg.ciphertext.clone();
b.iter(|| {
// Restore Alice's state. from_bytes_with_min_epoch validates and
// deserializes the blob — its cost is small relative to the decap + kdf
// in the decrypt call below.
let mut alice_restored = RatchetState::from_bytes_with_min_epoch(&alice_bytes, 0).unwrap();
// Reconstruct the RatchetHeader. The kem_ct triggers Alice to run
// perform_kem_ratchet_recv: xwing::decapsulate(ek_sk, ct) + kdf_root.
let header = RatchetHeader {
ratchet_pk: xwing::PublicKey::from_bytes(rk_pk_bytes.clone()).unwrap(),
kem_ct: Some(xwing::Ciphertext::from_bytes(kem_ct_bytes.clone()).unwrap()),
n: header_n,
pn: header_pn,
};
let pt = alice_restored.decrypt(&header, &ciphertext).unwrap();
black_box(pt);
});
}
// ── Ratchet: KDF isolation ───────────────────────────────────────────────────
/// HKDF-SHA3-256(root_key, kem_ss) → 64 bytes.
///
/// Isolates the KDF step from the KEM operations in a direction-change cycle.
/// Uses the same construction as `kdf_root` inside the ratchet: salt=root_key,
/// ikm=kem_ss, info=RATCHET_HKDF_INFO, output=64 bytes.
#[bench]
fn kdf_root_isolated(b: &mut Bencher) {
let root_key: [u8; 32] = random_array();
let kem_ss: [u8; 32] = random_array();
let mut out = [0u8; 64];
b.iter(|| {
// Matches kdf_root internals: salt=root_key, ikm=kem_ss, info=RATCHET_HKDF_INFO.
hkdf_sha3_256(&root_key, &kem_ss, constants::RATCHET_HKDF_INFO, &mut out).unwrap();
black_box(out);
});
}
// ── Ratchet: serialization ───────────────────────────────────────────────────
/// Ratchet serialization: mobile clients serialize after every message.
///
/// Each iteration reconstructs Alice's ratchet state from pre-computed key
/// material (cheap — no KEM) then calls `to_bytes`. The measured path is the
/// serialization itself: counter encoding, key serialization, recv_seen set
/// encoding, Vec allocation.
#[bench]
fn ratchet_to_bytes(b: &mut Bencher) {
// Extract raw init material from a real KEX.
let GeneratedIdentity {
public_key: alice_pk,
secret_key: alice_sk,
..
} = generate_identity().unwrap();
let GeneratedIdentity {
public_key: bob_pk,
secret_key: bob_sk,
..
} = generate_identity().unwrap();
let (spk_pk, spk_sk) = xwing::keygen().unwrap();
let spk_sig = sign_prekey(&bob_sk, &spk_pk).unwrap();
let bundle = PreKeyBundle {
ik_pub: IdentityPublicKey::from_bytes(bob_pk.as_bytes().to_vec()).unwrap(),
crypto_version: constants::CRYPTO_VERSION.to_string(),
spk_pub: spk_pk,
spk_id: 1,
spk_sig,
opk_pub: None,
opk_id: None,
};
let fp_a = alice_pk.fingerprint_raw();
let fp_b = bob_pk.fingerprint_raw();
let verified = verify_bundle(bundle, &bob_pk).unwrap();
let mut initiated = initiate_session(&alice_pk, &alice_sk, &verified).unwrap();
let aad = build_first_message_aad(&fp_a, &fp_b, &initiated.session_init).unwrap();
let (first_ct, alice_ck) =
RatchetState::encrypt_first_message(initiated.take_initial_chain_key(), b"hello", &aad)
.unwrap();
let mut received = receive_session(
&bob_pk,
&bob_sk,
&alice_pk,
&initiated.session_init,
&initiated.sender_sig,
&spk_sk,
None,
)
.unwrap();
let (_, _bob_ck) =
RatchetState::decrypt_first_message(received.take_initial_chain_key(), &first_ct, &aad)
.unwrap();
// Save raw key bytes for per-iter reconstruction of Alice's state.
let alice_root: [u8; 32] = *initiated.take_root_key();
let alice_chain: [u8; 32] = *alice_ck;
let ek_pk_bytes = initiated.ek_pk.as_bytes().to_vec();
let ek_sk_bytes = initiated.ek_sk().as_bytes().to_vec();
b.iter(|| {
// Reconstruct Alice's ratchet state. init_alice is struct initialization
// only (no KEM) — its cost is negligible vs to_bytes.
let ek_pk = xwing::PublicKey::from_bytes(ek_pk_bytes.clone()).unwrap();
let ek_sk = xwing::SecretKey::from_bytes(ek_sk_bytes.clone()).unwrap();
let alice =
RatchetState::init_alice(alice_root, alice_chain, fp_a, fp_b, ek_pk, ek_sk).unwrap();
let (blob, epoch) = alice.to_bytes().unwrap();
black_box((blob, epoch));
});
}
/// Ratchet deserialization: load on app resume.
///
/// Pre-serializes a state once. Each iteration deserializes it fresh.
/// The measured path is `from_bytes_with_min_epoch`: version check, field
/// parsing, recv_seen reconstruction, anti-rollback epoch check.
#[bench]
fn ratchet_from_bytes(b: &mut Bencher) {
let (alice, _bob, _fp_a, _fp_b) = setup_ratchet();
let (blob, _epoch) = alice.to_bytes().unwrap();
let blob: Vec<u8> = blob.to_vec();
b.iter(|| {
// min_epoch = 0: accept any epoch — anti-rollback protection is not
// the subject of this benchmark.
let state = RatchetState::from_bytes_with_min_epoch(&blob, 0).unwrap();
black_box(state);
});
}
// ── Argon2id ────────────────────────────────────────────────────────────────
/// Argon2id at OWASP interactive floor: 19 MiB, 2 passes, 1 lane.
///
/// Validates the "interactive login floor" documentation claim.
/// Variance is wide (±20% typical) — these are indicative values.
#[bench]
fn argon2id_owasp_min(b: &mut Bencher) {
let password = b"bench-password";
let salt: [u8; 16] = random_array();
let mut out = [0u8; 32];
b.iter(|| {
argon2id(password, &salt, Argon2Params::OWASP_MIN, &mut out).unwrap();
black_box(out);
});
}
/// Argon2id at recommended keypair-protection settings: 64 MiB, 3 passes, 4 lanes.
///
/// Validates the "0.1-1 s on modern hardware" documentation claim.
/// Variance is wide (±20% typical) — these are indicative values.
#[bench]
fn argon2id_recommended(b: &mut Bencher) {
let password = b"bench-password";
let salt: [u8; 16] = random_array();
let mut out = [0u8; 32];
b.iter(|| {
argon2id(password, &salt, Argon2Params::RECOMMENDED, &mut out).unwrap();
black_box(out);
});
}
// ── Streaming AEAD ──────────────────────────────────────────────────────────
/// Stream encrypt 1 MiB: throughput baseline.
///
/// Encrypts exactly one STREAM_CHUNK_SIZE (1 MiB) chunk as the final chunk.
/// The throughput in MB/s is: `1_048_576_000 / ns_per_iter`.
#[bench]
fn stream_encrypt_1mib(b: &mut Bencher) {
let key: [u8; 32] = random_array();
let aad = b"bench-aad";
// 1 MiB plaintext pre-allocated outside the iter closure.
let plaintext = vec![0u8; constants::STREAM_CHUNK_SIZE];
b.iter(|| {
let mut enc = stream_encrypt_init(&key, aad, false).unwrap();
// Single final chunk — exactly STREAM_CHUNK_SIZE bytes is valid for is_last=true.
let chunk = enc.encrypt_chunk(&plaintext, true).unwrap();
black_box(chunk);
});
}
/// Stream encrypt 4 MiB sequentially: baseline for parallel comparison.
///
/// Encrypts four STREAM_CHUNK_SIZE (1 MiB) chunks back-to-back via the
/// sequential `encrypt_chunk` API. The throughput in MB/s is:
/// `4_194_304_000 / ns_per_iter`. Compare with `stream_encrypt_4mib_parallel`
/// to measure the wall-clock speedup from rayon parallelism on this machine.
#[bench]
fn stream_encrypt_4mib_sequential(b: &mut Bencher) {
let key: [u8; 32] = random_array();
let plaintext = vec![0u8; constants::STREAM_CHUNK_SIZE];
b.iter(|| {
let mut enc = stream_encrypt_init(&key, b"", false).unwrap();
for _ in 0..3 {
black_box(enc.encrypt_chunk(&plaintext, false).unwrap());
}
black_box(enc.encrypt_chunk(&plaintext, true).unwrap());
});
}
/// Stream encrypt 4 MiB in parallel via rayon: measures parallelism speedup.
///
/// Encrypts four STREAM_CHUNK_SIZE (1 MiB) chunks concurrently using
/// `encrypt_chunk_at` dispatched over rayon's thread pool. Because chunks are
/// index-keyed and the encryptor takes `&self`, no synchronization is needed
/// between workers. The thread pool is pre-warmed by rayon before `b.iter()`
/// runs, so spawn cost is excluded from the measurement.
///
/// Divide `stream_encrypt_4mib_sequential` ns/iter by this result to get the
/// effective parallelism factor on this machine.
#[bench]
fn stream_encrypt_4mib_parallel(b: &mut Bencher) {
let key: [u8; 32] = random_array();
let plaintext = vec![0u8; constants::STREAM_CHUNK_SIZE];
// Force rayon's global pool to initialize before the timed loop so the
// first b.iter() call does not include thread-pool startup latency.
rayon::current_num_threads();
b.iter(|| {
let enc = stream_encrypt_init(&key, b"", false).unwrap();
let chunks: Vec<Vec<u8>> = (0u64..4)
.into_par_iter()
.map(|i| enc.encrypt_chunk_at(i, i == 3, &plaintext).unwrap())
.collect();
black_box(chunks);
});
}
/// Stream encrypt 8 MiB in parallel via rayon: bandwidth ceiling check.
///
/// Same construction as `stream_encrypt_4mib_parallel` but with 8 chunks.
/// If ns/iter stays roughly flat vs the 4-chunk bench, the bottleneck is
/// memory bandwidth — all cores are already saturating the bus at 4 chunks.
/// If it scales toward 2× the 4-chunk time, there was spare bandwidth and
/// the 4-chunk result was CPU-limited.
#[bench]
fn stream_encrypt_8mib_parallel(b: &mut Bencher) {
let key: [u8; 32] = random_array();
let plaintext = vec![0u8; constants::STREAM_CHUNK_SIZE];
rayon::current_num_threads();
b.iter(|| {
let enc = stream_encrypt_init(&key, b"", false).unwrap();
let chunks: Vec<Vec<u8>> = (0u64..8)
.into_par_iter()
.map(|i| enc.encrypt_chunk_at(i, i == 7, &plaintext).unwrap())
.collect();
black_box(chunks);
});
}
/// Stream decrypt 1 MiB: throughput baseline.
///
/// Pre-encrypts a 1 MiB chunk outside the iter closure and decrypts it each
/// iteration. The throughput in MB/s is: `1_048_576_000 / ns_per_iter`.
#[bench]
fn stream_decrypt_1mib(b: &mut Bencher) {
let key: [u8; 32] = random_array();
let aad = b"bench-aad";
let plaintext = vec![0u8; constants::STREAM_CHUNK_SIZE];
// Pre-encrypt: produces the 26-byte header and the encrypted chunk bytes.
let mut enc = stream_encrypt_init(&key, aad, false).unwrap();
let header = enc.header();
let encrypted_chunk = enc.encrypt_chunk(&plaintext, true).unwrap();
b.iter(|| {
let mut dec = stream_decrypt_init(&key, &header, aad).unwrap();
let (pt, _is_last) = dec.decrypt_chunk(&encrypted_chunk).unwrap();
black_box(pt);
});
}