libsoliton/soliton/src/verification.rs

//! Key verification phrases.
//!
//! Generates a human-readable phrase from two identity public keys that both
//! parties can compare out-of-band to verify identity key continuity.
//! This is a display-layer concern outside the formal cryptographic model;
//! the phrase is a human-readable encoding of the fingerprint comparison.
//!
//! Algorithm:
//! 1. Sort the two public keys lexicographically.
//! 2. Concatenate with domain separation: `"lo-verification-v1" || sorted_pk_a || sorted_pk_b`.
//! 3. Compute `hash = SHA3-256(concatenation)`.
//! 4. Map 7 chunks of the hash to words from the EFF large wordlist (7776 words).
//!
//! Each word carries ~12.9 bits of entropy. 7 words ≈ 90.3 bits of
//! second-preimage resistance (the relevant metric for pairwise verification:
//! given a specific key pair, how hard is it to find another pair producing the
//! same phrase). Birthday-bound collision resistance is ~45 bits.

use crate::constants;
use crate::error::{Error, Result};
use crate::primitives::sha3_256;

// Generated by build.rs from the EFF large wordlist.
include!(concat!(env!("OUT_DIR"), "/eff_wordlist.rs"));
const _: () = assert!(
    EFF_WORDLIST.len() == 7776,
    "EFF_WORDLIST must contain exactly 7776 words"
);

/// Generate a verification phrase from two identity public keys.
///
/// The phrase is deterministic and order-independent: `verification_phrase(a, b)`
/// produces the same result as `verification_phrase(b, a)`.
///
/// Returns 7 space-separated words from the EFF large wordlist.
#[must_use = "verification phrase must be displayed to the user"]
pub fn verification_phrase(pk_a: &[u8], pk_b: &[u8]) -> Result<String> {
    if pk_a.len() != constants::LO_PUBLIC_KEY_SIZE {
        return Err(Error::InvalidLength {
            expected: constants::LO_PUBLIC_KEY_SIZE,
            got: pk_a.len(),
        });
    }
    if pk_b.len() != constants::LO_PUBLIC_KEY_SIZE {
        return Err(Error::InvalidLength {
            expected: constants::LO_PUBLIC_KEY_SIZE,
            got: pk_b.len(),
        });
    }
    // Self-pair produces a valid phrase but verifies nothing — a UI bug
    // passing the user's own key twice would give a false sense of security.
    if pk_a == pk_b {
        return Err(Error::InvalidData);
    }

    // Step 1: Sort keys lexicographically.
    let (first, second) = if pk_a <= pk_b {
        (pk_a, pk_b)
    } else {
        (pk_b, pk_a)
    };

    // Step 2: Concatenate with domain separation label and hash.
    // The label prevents collisions with any other protocol function that
    // hashes two concatenated public keys.
    let mut input =
        Vec::with_capacity(constants::PHRASE_HASH_LABEL.len() + first.len() + second.len());
    input.extend_from_slice(constants::PHRASE_HASH_LABEL);
    input.extend_from_slice(first);
    input.extend_from_slice(second);
    let mut hash = sha3_256::hash(&input);

    // Step 3: Map hash bytes to word indices.
    // We need 7 indices into a 7776-word list.
    // Take 2 bytes per word (u16). Accept only values in 0..62208
    // (62208 = 7776 * 8) to eliminate modular bias: val % 7776 is uniform
    // for val in [0, 62208) because 62208 divides evenly. Rejection
    // probability: 3328/65536 ≈ 5.1% per sample. Expected ~7.4 pairs
    // consumed per phrase; ~8.6 spare slots in the first 32-byte hash
    // before rehash. Across 20 rounds × 16 pairs = 320 samples,
    // termination failure < 2^-150.
    const LIMIT: u16 = 7776 * 8;
    // Compile-time guard: if wordlist size or multiplier changes, the u16
    // cast would silently truncate. This makes the invariant robust.
    const _: () = assert!(7776 * 8 <= u16::MAX as u32);
    let mut words = Vec::with_capacity(7);
    let mut offset = 0;
    let mut rehash_count = 0u32;
    while words.len() < 7 {
        // Rehash before reading if we've exhausted all 16 pairs (32 bytes).
        if offset + 2 > 32 {
            // Cap at 19 rehash rounds (range 1..=19). Each round has ~95%
            // acceptance per sample, so failing to fill 7 words in
            // 16 + 19 × 16 = 320 samples is astronomically improbable
            // (~2^-150). This makes termination unconditional in the code
            // rather than probabilistic.
            rehash_count += 1;
            if rehash_count >= 20 {
                // Structurally unreachable: 320 samples with ~94.9% acceptance
                // each → failure probability < 2^-150. Returns Internal rather
                // than unreachable!() to preserve panic=abort safety.
                return Err(Error::Internal);
            }
            // Include the round counter as a single byte to make each rehash
            // round a distinct function, preventing degenerate hash chain cycles.
            // rehash_count is in [1, 20], fits in u8. try_from makes this
            // fail-fast if the cap is ever raised above 255.
            let mut expand_input = Vec::with_capacity(19 + 1 + 32);
            expand_input.extend_from_slice(constants::PHRASE_EXPAND_LABEL);
            expand_input.push(u8::try_from(rehash_count).map_err(|_| Error::Internal)?);
            expand_input.extend_from_slice(&hash);
            hash = sha3_256::hash(&expand_input);
            offset = 0;
        }
        let val = u16::from_be_bytes([hash[offset], hash[offset + 1]]);
        offset += 2;
        if val < LIMIT {
            let index = (val as usize) % EFF_WORDLIST.len();
            words.push(EFF_WORDLIST[index]);
        }
    }

    Ok(words.join(" "))
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::error::Error;
    use crate::identity::{GeneratedIdentity, generate_identity};

    #[test]
    fn deterministic() {
        let GeneratedIdentity {
            public_key: pk_a, ..
        } = generate_identity().unwrap();
        let GeneratedIdentity {
            public_key: pk_b, ..
        } = generate_identity().unwrap();
        let phrase1 = verification_phrase(pk_a.as_bytes(), pk_b.as_bytes()).unwrap();
        let phrase2 = verification_phrase(pk_a.as_bytes(), pk_b.as_bytes()).unwrap();
        assert_eq!(phrase1, phrase2);
    }

    #[test]
    fn order_independent() {
        let GeneratedIdentity {
            public_key: pk_a, ..
        } = generate_identity().unwrap();
        let GeneratedIdentity {
            public_key: pk_b, ..
        } = generate_identity().unwrap();
        let ab = verification_phrase(pk_a.as_bytes(), pk_b.as_bytes()).unwrap();
        let ba = verification_phrase(pk_b.as_bytes(), pk_a.as_bytes()).unwrap();
        assert_eq!(ab, ba);
    }

    #[test]
    fn seven_words() {
        let GeneratedIdentity {
            public_key: pk_a, ..
        } = generate_identity().unwrap();
        let GeneratedIdentity {
            public_key: pk_b, ..
        } = generate_identity().unwrap();
        let phrase = verification_phrase(pk_a.as_bytes(), pk_b.as_bytes()).unwrap();
        let words: Vec<&str> = phrase.split(' ').collect();
        assert_eq!(words.len(), 7);
    }

    #[test]
    fn all_words_in_wordlist() {
        let GeneratedIdentity {
            public_key: pk_a, ..
        } = generate_identity().unwrap();
        let GeneratedIdentity {
            public_key: pk_b, ..
        } = generate_identity().unwrap();
        let phrase = verification_phrase(pk_a.as_bytes(), pk_b.as_bytes()).unwrap();
        for word in phrase.split(' ') {
            assert!(
                EFF_WORDLIST.contains(&word),
                "word '{}' not in EFF_WORDLIST",
                word
            );
        }
    }

    #[test]
    fn different_keys_differ() {
        let GeneratedIdentity {
            public_key: pk_a, ..
        } = generate_identity().unwrap();
        let GeneratedIdentity {
            public_key: pk_b, ..
        } = generate_identity().unwrap();
        let GeneratedIdentity {
            public_key: pk_c, ..
        } = generate_identity().unwrap();
        let phrase_ab = verification_phrase(pk_a.as_bytes(), pk_b.as_bytes()).unwrap();
        let phrase_ac = verification_phrase(pk_a.as_bytes(), pk_c.as_bytes()).unwrap();
        assert_ne!(phrase_ab, phrase_ac);
    }

    #[test]
    fn wrong_pk_a_size() {
        let GeneratedIdentity {
            public_key: pk_b, ..
        } = generate_identity().unwrap();
        assert!(matches!(
            verification_phrase(&[0u8; 100], pk_b.as_bytes()),
            Err(Error::InvalidLength {
                expected: 3200,
                got: 100
            })
        ));
    }

    #[test]
    fn wrong_pk_b_size() {
        let GeneratedIdentity {
            public_key: pk_a, ..
        } = generate_identity().unwrap();
        assert!(matches!(
            verification_phrase(pk_a.as_bytes(), &[0u8; 100]),
            Err(Error::InvalidLength {
                expected: 3200,
                got: 100
            })
        ));
    }

    #[test]
    fn self_pair_rejected() {
        let GeneratedIdentity {
            public_key: pk_a, ..
        } = generate_identity().unwrap();
        assert!(matches!(
            verification_phrase(pk_a.as_bytes(), pk_a.as_bytes()),
            Err(Error::InvalidData)
        ));
    }
}