From b3190449a4e6be85e627d2bbd403863ea940a08c Mon Sep 17 00:00:00 2001 From: iAmMichaelConnor Date: Thu, 4 Sep 2025 19:47:45 +0000 Subject: [PATCH] mike audit of zkpassport circuits and verifier contract --- package.json | 3 +- src/noir/bin/bind/evm/src/main.nr | 3 +- src/noir/bin/compare/age/evm/src/main.nr | 8 + .../bin/compare/birthdate/evm/src/main.nr | 2 + src/noir/bin/compare/citizenship/src/main.nr | 14 ++ src/noir/bin/compare/expiry/evm/src/main.nr | 1 + src/noir/bin/data-check/expiry/src/main.nr | 2 + .../integrity/sa_sha256/dg_sha256/src/main.nr | 4 + src/noir/bin/disclose/bytes/evm/src/main.nr | 4 +- .../bin/disclose/bytes/standard/src/main.nr | 4 +- src/noir/bin/disclose/flags/src/main.nr | 3 + .../issuing-country/evm/src/main.nr | 1 + .../nationality/evm/src/main.nr | 1 + .../exclusion-check/sanctions/evm/src/main.nr | 10 + .../issuing-country/evm/src/main.nr | 1 + .../nationality/evm/src/main.nr | 1 + src/noir/bin/main/outer/count_9/src/main.nr | 13 +- .../ecdsa/nist/p384/sha256/src/main.nr | 3 + .../ecdsa/nist/p384/sha256/src/main.nr | 4 +- src/noir/lib/commitment/common/src/lib.nr | 23 +- src/noir/lib/commitment/csc-to-dsc/src/lib.nr | 3 + .../integrity-to-disclosure/src/lib.nr | 1 + .../commitment/scoped-nullifier/src/lib.nr | 5 +- src/noir/lib/compare/age/src/lib.nr | 15 ++ src/noir/lib/compare/citizenship/src/lib.nr | 1 + src/noir/lib/compare/date/src/lib.nr | 14 ++ src/noir/lib/data-check/expiry/src/lib.nr | 12 +- src/noir/lib/data-check/integrity/src/lib.nr | 37 +++ src/noir/lib/data-check/tbs-pubkey/src/lib.nr | 10 +- src/noir/lib/disclose/src/lib.nr | 30 ++- .../lib/exclusion-check/country/src/lib.nr | 142 ++++++++++- .../lib/exclusion-check/sanctions/src/lib.nr | 26 ++ .../sanctions/src/ordered_mt.nr | 27 +- .../sanctions/src/param_commit.nr | 6 + .../lib/inclusion-check/country/src/lib.nr | 28 +++ src/noir/lib/outer/src/lib.nr | 17 +- src/noir/lib/sig-check/common/src/lib.nr | 15 ++ src/noir/lib/utils/src/lib.nr | 232 +++++++++++++++++- src/noir/lib/utils/src/tests.nr | 7 + src/noir/lib/utils/src/types.nr | 2 + src/solidity/src/ArrayUtils.sol | 1 + src/solidity/src/Constants.sol | 14 ++ src/solidity/src/DateUtils.sol | 2 + src/solidity/src/IRootRegistry.sol | 1 + src/solidity/src/SampleContract.sol | 23 +- src/solidity/src/StringUtils.sol | 1 + src/solidity/src/ZKPassportVerifier.sol | 188 +++++++++++++- src/ts/scripts/circuit-builder.ts | 18 +- 48 files changed, 935 insertions(+), 48 deletions(-) diff --git a/package.json b/package.json index 4b459b830..cf223701b 100644 --- a/package.json +++ b/package.json @@ -48,5 +48,6 @@ "ts-jest": "^29.1.2", "tsx": "^4.19.3", "typescript": "^5.0.0" - } + }, + "packageManager": "yarn@3.5.1+sha512.8cd0e31bd60779ef4ca92b855fb3462c7ec35ce8b345752b7349a68239776417f46d41d79c8047242d9c93b48a1516f64c7444ebe747d9a02bf26868e6fa1f2b" } diff --git a/src/noir/bin/bind/evm/src/main.nr b/src/noir/bin/bind/evm/src/main.nr index 23160d0f1..8872d1af1 100644 --- a/src/noir/bin/bind/evm/src/main.nr +++ b/src/noir/bin/bind/evm/src/main.nr @@ -1,6 +1,7 @@ use bind::calculate_param_commitment_sha2; use commitment::nullify; +// Visually inspected. fn main( comm_in: pub Field, salt: Field, @@ -9,7 +10,7 @@ fn main( // @committed // The data is public (via the parameter commitment) so verifiers can check the data // provided to the proof is correct - data: [u8; 500], + data: [u8; 500], // Why the choice of 500 bytes? I'd have expected a nice round number like 256 or 512, to enable neat hashing of large amounts of bind data. service_scope: pub Field, service_subscope: pub Field, ) -> pub (Field, Field) { diff --git a/src/noir/bin/compare/age/evm/src/main.nr b/src/noir/bin/compare/age/evm/src/main.nr index 4ae32e4f1..37f39ac39 100644 --- a/src/noir/bin/compare/age/evm/src/main.nr +++ b/src/noir/bin/compare/age/evm/src/main.nr @@ -9,6 +9,14 @@ fn main( // @committed // The current date timestamp is public (via the parameter commitment) so verifiers can check the date // provided to the proof is correct + // Danger: need to assert that this `current_date` is equal to the `current_date` public input + // of the "Data Integrity Check" circuit. I can't find where that check is happening in the codebase. + // It's not happening in the "outer circuit". Perhaps it happens in the Verifier code? + // I read somewhere that as a lovely optimisation, a user generates the first 3 proofs when they + // first scan their passport, and then they only need to generate the disclosure proof & outer proof on demand. + // This particular disclosure circuit might not be amenable to that optimisation, since the Data Integrity Check + // circuit would have to be re-proven with the current date. + // Maybe you've already thought of all this :) current_date: u64, // @committed // The minimum age required is public (via the parameter commitment) so verifiers can check diff --git a/src/noir/bin/compare/birthdate/evm/src/main.nr b/src/noir/bin/compare/birthdate/evm/src/main.nr index 9bf6532bc..753318f32 100644 --- a/src/noir/bin/compare/birthdate/evm/src/main.nr +++ b/src/noir/bin/compare/birthdate/evm/src/main.nr @@ -2,6 +2,7 @@ use commitment::nullify; use compare_date_lib::{calculate_param_commitment_sha2, compare_date, get_birthdate}; use utils::PROOF_TYPE_BIRTHDATE; +// Visually inspected. fn main( comm_in: pub Field, salt: Field, @@ -10,6 +11,7 @@ fn main( // @committed // The current date timestamp is public (via the parameter commitment) so verifiers can check the date // provided to the proof is correct + // Danger: see concerns in `compare/age/evm/src/main.nr`. current_date: u64, // @committed // The minimum date required is public (via the parameter commitment) so verifiers can check diff --git a/src/noir/bin/compare/citizenship/src/main.nr b/src/noir/bin/compare/citizenship/src/main.nr index f9a2ce5dc..cfe658ef9 100644 --- a/src/noir/bin/compare/citizenship/src/main.nr +++ b/src/noir/bin/compare/citizenship/src/main.nr @@ -1,6 +1,7 @@ use commitment::nullify; use compare_citizenship::compare_citizenship; +// Q: Is this superfluous, given that this can be achieved with `disclose`? fn main( comm_in: pub Field, salt: Field, @@ -8,6 +9,19 @@ fn main( dg1: [u8; 95], // The country is public so verifiers can check // the country provided to the proof is correct + // Danger: these public inputs are inconsistent with those of other disclosure circuits that I've looked at so far. + // In particular, I suspect this is breaking the rigid ordering that is expected by the Outer circuit. + // The outer circuit expects public inputs of disclosure proofs to be in the order: + // - comm_in + // - service_scope + // - service_subscope + // - param_commitment + // - scoped_nullifier + // + // But this circuit has no `param_commitment`, and although the `country` could feasibly replace + // that `param_commitment` field, it is in the wrong position, and it is a str<3> type, which I + // presume is naively serialized into 3 distinct fields. + // So how is this circuit's public inputs "shape" compatible with the Outer circuit? country: pub str<3>, service_scope: pub Field, service_subscope: pub Field, diff --git a/src/noir/bin/compare/expiry/evm/src/main.nr b/src/noir/bin/compare/expiry/evm/src/main.nr index 7ec70820e..3c3e9bf8e 100644 --- a/src/noir/bin/compare/expiry/evm/src/main.nr +++ b/src/noir/bin/compare/expiry/evm/src/main.nr @@ -10,6 +10,7 @@ fn main( // @committed // The current date timestamp is public so verifiers can check the date // provided to the proof is correct + // Danger: see my similar concern in `compare/age/evm/src/main.nr` current_date: u64, // @committed // The minimum date required is public so verifiers can check diff --git a/src/noir/bin/data-check/expiry/src/main.nr b/src/noir/bin/data-check/expiry/src/main.nr index 6400521cf..62c9c6a8d 100644 --- a/src/noir/bin/data-check/expiry/src/main.nr +++ b/src/noir/bin/data-check/expiry/src/main.nr @@ -1,5 +1,7 @@ use data_check_expiry::check_expiry; +// Suggestion: Use DG1Data type +// Question: How is this circuit used, and how does it fit in with all the other circuits? fn main(dg1: [u8; 95], current_date_timestamp: pub u64) { // Check the ID is not expired first check_expiry(dg1, current_date_timestamp); diff --git a/src/noir/bin/data-check/integrity/sa_sha256/dg_sha256/src/main.nr b/src/noir/bin/data-check/integrity/sa_sha256/dg_sha256/src/main.nr index 69e4602fd..f60b93575 100644 --- a/src/noir/bin/data-check/integrity/sa_sha256/dg_sha256/src/main.nr +++ b/src/noir/bin/data-check/integrity/sa_sha256/dg_sha256/src/main.nr @@ -18,6 +18,10 @@ fn main( // Get the length of e_content by parsing the ASN.1 // Safety: This is safe because the length must be correct for econtent to hash to // the expected digest in signed attributes as checked below in check_signed_attributes_sha256 + // Warning: A pattern in other circuits has been to constrain that the length returned from one of these unconstrained calls + // is somewhat correct, by asserting that all elements _after_ the claimed length are all zero. + // I can't see that being done within any of the functions of this circuit, so e_content might contain + // nonzero entries after this `e_content_size`. let e_content_size = unsafe { utils::unsafe_get_asn1_element_length(e_content) }; // Check the integrity of the data check_dg1_sha256(dg1, e_content, e_content_size); diff --git a/src/noir/bin/disclose/bytes/evm/src/main.nr b/src/noir/bin/disclose/bytes/evm/src/main.nr index 945d9a1c9..a77fce751 100644 --- a/src/noir/bin/disclose/bytes/evm/src/main.nr +++ b/src/noir/bin/disclose/bytes/evm/src/main.nr @@ -7,13 +7,13 @@ fn main( private_nullifier: Field, dg1: [u8; 95], // @committed - // The disclose mask is public (via the parameter commitment) so verifiers can check the mask + // The disclose mask is public (via the `param_commitment`) so verifiers can check the mask disclose_mask: [u8; 90], service_scope: pub Field, service_subscope: pub Field, ) -> pub (Field, Field) { // @committed - // The disclose bytes are public (via the parameter commitment) so verifiers can check the bytes + // The disclosed bytes are public (via the `param_commitment`) so verifiers can check the bytes let disclosed_bytes = get_disclosed_bytes(dg1, disclose_mask); let scoped_nullifier = nullify( comm_in, diff --git a/src/noir/bin/disclose/bytes/standard/src/main.nr b/src/noir/bin/disclose/bytes/standard/src/main.nr index 9343b1cbb..adc973363 100644 --- a/src/noir/bin/disclose/bytes/standard/src/main.nr +++ b/src/noir/bin/disclose/bytes/standard/src/main.nr @@ -5,10 +5,12 @@ fn main( comm_in: pub Field, salt: Field, private_nullifier: Field, +// Suggestion: Use DG1Data type dg1: [u8; 95], // @committed // The disclose mask is public (via the parameter commitment) so verifiers can check the mask - disclose_mask: [u8; 90], + // ^^^ I see. I was going to comment that these masks should be `bool` types, or the existing `u8` types should be constrained to be `0 or 1` only, to prevent malicious usage of the `mask[i] * mrz[i]` calculation in `get_disclosed_bytes`. + disclose_mask: [u8; 90], // Consider replacing `90` with a global constant. service_scope: pub Field, service_subscope: pub Field, ) -> pub (Field, Field) { diff --git a/src/noir/bin/disclose/flags/src/main.nr b/src/noir/bin/disclose/flags/src/main.nr index 469ea6a34..d2865f9ca 100644 --- a/src/noir/bin/disclose/flags/src/main.nr +++ b/src/noir/bin/disclose/flags/src/main.nr @@ -2,10 +2,13 @@ use commitment::nullify; use disclose::get_disclosed_data; use utils::{DisclosedData, DiscloseFlags}; +// Apparently this is not used. Consider removing. +// Not audited. fn main( comm_in: pub Field, salt: Field, private_nullifier: Field, +// Suggestion: Use DG1Data type dg1: [u8; 95], disclose_flags: DiscloseFlags, service_scope: pub Field, diff --git a/src/noir/bin/exclusion-check/issuing-country/evm/src/main.nr b/src/noir/bin/exclusion-check/issuing-country/evm/src/main.nr index f139bd0e8..f8816e2da 100644 --- a/src/noir/bin/exclusion-check/issuing-country/evm/src/main.nr +++ b/src/noir/bin/exclusion-check/issuing-country/evm/src/main.nr @@ -2,6 +2,7 @@ use commitment::nullify; use exclusion_check_country::{calculate_param_commitment_sha2, check_issuing_country_exclusion}; use utils::PROOF_TYPE_ISSUING_COUNTRY_EXCLUSION; +// Visually inspected fn main( comm_in: pub Field, salt: Field, diff --git a/src/noir/bin/exclusion-check/nationality/evm/src/main.nr b/src/noir/bin/exclusion-check/nationality/evm/src/main.nr index 1238acb7f..6517b9174 100644 --- a/src/noir/bin/exclusion-check/nationality/evm/src/main.nr +++ b/src/noir/bin/exclusion-check/nationality/evm/src/main.nr @@ -2,6 +2,7 @@ use commitment::nullify; use exclusion_check_country::{calculate_param_commitment_sha2, check_nationality_exclusion}; use utils::PROOF_TYPE_NATIONALITY_EXCLUSION; +// Visually inspected fn main( comm_in: pub Field, salt: Field, diff --git a/src/noir/bin/exclusion-check/sanctions/evm/src/main.nr b/src/noir/bin/exclusion-check/sanctions/evm/src/main.nr index 934f5357a..d67fb8825 100644 --- a/src/noir/bin/exclusion-check/sanctions/evm/src/main.nr +++ b/src/noir/bin/exclusion-check/sanctions/evm/src/main.nr @@ -5,6 +5,7 @@ use exclusion_check_sanctions::{ }; use utils::PROOF_TYPE_SANCTIONS_EXCLUSION; + fn main( comm_in: pub Field, salt: Field, @@ -15,6 +16,15 @@ fn main( service_scope: pub Field, service_subscope: pub Field, ) -> pub (Field, Field) { + // So is the plan to always hard-code the root in the circuit, or to instead + // have the root be a public input (or committed into param_commitment) so that + // the list can be more-easily updated onchain? + // Iiuc, it might make more sense to not have a hard-coded root, and instead + // instantiate this as: + // `trees = SanctionsSparseMerkleTrees { root };` using the circuit private input `root`, + // then checking that root within the Verifier. + // Also "trees" (plural) is confusing, because it appears to be just one big tree, + // which contains leaves that might represent one of three different concepts. let trees = SanctionsSparseMerkleTrees::default(); assert(trees.root() == root); // Check that nationality of the passport holder is not in the list of countries diff --git a/src/noir/bin/inclusion-check/issuing-country/evm/src/main.nr b/src/noir/bin/inclusion-check/issuing-country/evm/src/main.nr index 3a14b15ec..e047091af 100644 --- a/src/noir/bin/inclusion-check/issuing-country/evm/src/main.nr +++ b/src/noir/bin/inclusion-check/issuing-country/evm/src/main.nr @@ -2,6 +2,7 @@ use commitment::nullify; use inclusion_check_country::{calculate_param_commitment_sha2, check_issuing_country_inclusion}; use utils::PROOF_TYPE_ISSUING_COUNTRY_INCLUSION; +// Visually inspected. fn main( comm_in: pub Field, salt: Field, diff --git a/src/noir/bin/inclusion-check/nationality/evm/src/main.nr b/src/noir/bin/inclusion-check/nationality/evm/src/main.nr index 58fd48fc4..497ca29dd 100644 --- a/src/noir/bin/inclusion-check/nationality/evm/src/main.nr +++ b/src/noir/bin/inclusion-check/nationality/evm/src/main.nr @@ -2,6 +2,7 @@ use commitment::nullify; use inclusion_check_country::{calculate_param_commitment_sha2, check_nationality_inclusion}; use utils::PROOF_TYPE_NATIONALITY_INCLUSION; +// Visually inspected. fn main( comm_in: pub Field, salt: Field, diff --git a/src/noir/bin/main/outer/count_9/src/main.nr b/src/noir/bin/main/outer/count_9/src/main.nr index 6016717e5..d44c94b9e 100644 --- a/src/noir/bin/main/outer/count_9/src/main.nr +++ b/src/noir/bin/main/outer/count_9/src/main.nr @@ -30,6 +30,7 @@ use outer_lib::{ use std::verify_proof_with_type; global PROOF_TYPE_HONK_ZK: u32 = 7; +// Visually inspected. fn verify_subproofs( // Root of the certificate merkle tree certificate_registry_root: Field, @@ -128,8 +129,8 @@ fn verify_subproofs( // Commitment out from CSC to DSC circuit == commitment in from DSC to ID Data circuit assert_eq( - csc_to_dsc_proof.public_inputs[0], - dsc_to_id_data_proof.public_inputs[0], + csc_to_dsc_proof.public_inputs[0], // comm_out + dsc_to_id_data_proof.public_inputs[0], // comm_in "Commitment out from CSC to DSC circuit != commitment in from DSC to ID Data circuit", ); @@ -146,8 +147,8 @@ fn verify_subproofs( // Commitment out from DSC to ID Data circuit == commitment in from integrity check circuit assert_eq( - dsc_to_id_data_proof.public_inputs[1], - integrity_check_proof.public_inputs[0], + dsc_to_id_data_proof.public_inputs[1], // comm_out + integrity_check_proof.public_inputs[0], // comm_in "Commitment out from DSC to ID Data circuit != commitment in from integrity check circuit", ); @@ -166,8 +167,8 @@ fn verify_subproofs( for i in 0..disclosure_proofs.len() { // Commitment out from integrity check circuit == commitment in from disclosure circuit assert_eq( - integrity_check_proof.public_inputs[1], - disclosure_proofs[i].public_inputs[0], + integrity_check_proof.public_inputs[1], // comm_out + disclosure_proofs[i].public_inputs[0], // comm_in "Commitment out from integrity check circuit != commitment in from disclosure circuit", ); diff --git a/src/noir/bin/sig-check/dsc/tbs_700/ecdsa/nist/p384/sha256/src/main.nr b/src/noir/bin/sig-check/dsc/tbs_700/ecdsa/nist/p384/sha256/src/main.nr index c74eb135a..81f261378 100644 --- a/src/noir/bin/sig-check/dsc/tbs_700/ecdsa/nist/p384/sha256/src/main.nr +++ b/src/noir/bin/sig-check/dsc/tbs_700/ecdsa/nist/p384/sha256/src/main.nr @@ -4,6 +4,7 @@ use sig_check_common::sha256_and_check_data_to_sign; use sig_check_ecdsa::verify_nist_p384; use utils::{concat_array, split_array}; +// Visually inspected this example. fn main( certificate_registry_root: pub Field, certificate_registry_index: Field, @@ -18,6 +19,8 @@ fn main( ) -> pub Field { // Get the length of tbs_certificate by parsing the ASN.1 // Safety: This is safe because the length must be correct for the hash and signature to be valid + // There's a check that the tbs_certificate is all zeroes after `tbs_certificate_len` (within `verify..()`). Consider moving + // that check to immediately after this unsafe call, to make it clear that the check is being done. let tbs_certificate_len = unsafe { utils::unsafe_get_asn1_element_length(tbs_certificate) }; let (r, s) = split_array(dsc_signature); let msg_hash = sha256_and_check_data_to_sign(tbs_certificate, tbs_certificate_len); diff --git a/src/noir/bin/sig-check/id-data/tbs_700/ecdsa/nist/p384/sha256/src/main.nr b/src/noir/bin/sig-check/id-data/tbs_700/ecdsa/nist/p384/sha256/src/main.nr index f23ee1ab9..f16298960 100644 --- a/src/noir/bin/sig-check/id-data/tbs_700/ecdsa/nist/p384/sha256/src/main.nr +++ b/src/noir/bin/sig-check/id-data/tbs_700/ecdsa/nist/p384/sha256/src/main.nr @@ -5,11 +5,13 @@ use sig_check_common::sha256_and_check_data_to_sign; use sig_check_ecdsa::verify_nist_p384; use utils::split_array; +// Visually inspected this binary, as an example. +// Consider using globals for all of the magic numbers that are the same for all circuit variants (95, 700), or better still: custom types, as has been begun in `types.nr`. fn main( comm_in: pub Field, salt_in: Field, salt_out: Field, - dg1: [u8; 95], + dg1: [u8; 95], // Consider using the DG1Data type dsc_pubkey_x: [u8; 48], dsc_pubkey_y: [u8; 48], sod_signature: [u8; 96], diff --git a/src/noir/lib/commitment/common/src/lib.nr b/src/noir/lib/commitment/common/src/lib.nr index 9d74e4135..e3f472155 100644 --- a/src/noir/lib/commitment/common/src/lib.nr +++ b/src/noir/lib/commitment/common/src/lib.nr @@ -40,6 +40,7 @@ fn has_value_in_tags( result } +// Visually inspected. pub fn calculate_certificate_registry_leaf( tags: [Field; 3], cert_type: u8, @@ -48,14 +49,15 @@ pub fn calculate_certificate_registry_leaf( ) -> Field { // Pack certificate type and country code into a single field let country_bytes: [u8; 3] = country.as_bytes(); + // No need to cast cert_type `as u8`. let unpacked_bytes: [u8; 4] = [cert_type as u8, country_bytes[0], country_bytes[1], country_bytes[2]]; // Prepare and hash Poseidon2 inputs - let mut poseidon_inputs: [Field; 4 + (N + 30) / 31] = [0; 4 + (N + 30) / 31]; + let mut poseidon_inputs: [Field; 4 + (N + 30) / 31] = [0; 4 + (N + 30) / 31]; // [Field; 4 + ceil(N/31)] poseidon_inputs[0] = tags[0]; poseidon_inputs[1] = tags[1]; poseidon_inputs[2] = tags[2]; - poseidon_inputs[3] = utils::pack_be_bytes_into_fields::<4, 1, 31>(unpacked_bytes)[0]; + poseidon_inputs[3] = utils::pack_be_bytes_into_fields::<4, 1, 31>(unpacked_bytes)[0]; // consider using pack_be_bytes_into_field (singular) let packed_pubkey = utils::pack_be_bytes_into_fields::(public_key); for i in 0..packed_pubkey.len() { poseidon_inputs[4 + i] = packed_pubkey[i] as Field; @@ -159,8 +161,10 @@ pub fn test_calculate_certificate_registry_leaf() { assert(leaf_hash == 0x10370abe147332c5b039fb820a32964f7567998e9c54f93595bb95a1dc4ba62f); } +// Duplicate of `get_issuing_country_from_mrz` in utils/src/lib.nr. Consider deleting this one and only using that one. (Also see my comments on that one). pub fn get_country_from_dg1(dg1: [u8; 95]) -> str<3> { // There 5 padding bytes in the dg1 before the actual MRZ + // Consider globals for the magic numbers 5, 3. let country_offset = 5 + PASSPORT_MRZ_COUNTRY_INDEX; let mut country_bytes: [u8; 3] = [0; 3]; for i in 0..3 { @@ -174,16 +178,18 @@ pub fn get_country_from_dg1(dg1: [u8; 95]) -> str<3> { country } +// Visually inspected. pub fn hash_salt_country_tbs( salt: Field, country: str<3>, tbs: [u8; TBS_MAX_SIZE], ) -> Field { let country_bytes: [u8; 3] = country.as_bytes(); - let mut result: [Field; 2 + ((TBS_MAX_SIZE + 30) / 31)] = [0; 2 + ((TBS_MAX_SIZE + 30) / 31)]; + // Consider renaming `result` to something like `hash_input` or `commitment_preimage` + let mut result: [Field; 2 + ((TBS_MAX_SIZE + 30) / 31)] = [0; 2 + ((TBS_MAX_SIZE + 30) / 31)]; // [Field; 2 + ceil(TBS_MAX_SIZE/31)] result[0] = salt; - let packed_country: Field = pack_be_bytes_into_fields::<3, 1, 31>(country_bytes)[0]; + let packed_country: Field = pack_be_bytes_into_fields::<3, 1, 31>(country_bytes)[0]; // Technically already packed within `calculate_certificate_registry_leaf` -- albeit it was mixed-in with `cert_type` in that fn. result[1] = packed_country; let packed_tbs: [Field; (TBS_MAX_SIZE + 30) / 31] = @@ -200,6 +206,8 @@ pub fn hash_salt_dg1_private_nullifier( dg1: [u8; N], private_nullifier: Field, ) -> Field { + // You might be able to just write `= std::mem::zeroed()` to reduce verbosity. + // Or `let mut result = [0 as Field; 2 + ((N + 30) / 31)];` let mut result: [Field; 2 + ((N + 30) / 31)] = [0; 2 + ((N + 30) / 31)]; result[0] = salt; @@ -211,11 +219,13 @@ pub fn hash_salt_dg1_private_nullifier( Poseidon2::hash(result, 2 + ((N + 30) / 31)) } +// Visually inspected. pub fn calculate_private_nullifier( dg1: [u8; DG1], e_content: [u8; ECONTENT], sod_sig: [u8; SIG], ) -> Field { + // You might be able to just write `= std::mem::zeroed()` to reduce verbosity. let mut result: [Field; (DG1 + 30) / 31 + (ECONTENT + 30) / 31 + (SIG + 30) / 31] = [0; (DG1 + 30) / 31 + (ECONTENT + 30) / 31 + (SIG + 30) / 31]; @@ -241,6 +251,7 @@ pub fn calculate_private_nullifier( salt: Field, country: str<3>, @@ -252,6 +263,7 @@ pub fn hash_salt_country_signed_attr_dg1_e_content_private_nullifier Field { let country_bytes: [u8; 3] = country.as_bytes(); + // You might be able to just write `= std::mem::zeroed()` to reduce verbosity. let mut result: [Field; 4 + (SA + 30) / 31 + ((DG1 + 30) / 31) + ((ECONTENT + 30) / 31)] = [0; 4 + (SA + 30) / 31 + ((DG1 + 30) / 31) + ((ECONTENT + 30) / 31)]; result[0] = salt; @@ -285,6 +297,7 @@ pub fn hash_salt_country_signed_attr_dg1_e_content_private_nullifier(leaf: Field, index: Field, hash_path: [Field; N]) -> Field { @@ -302,6 +315,8 @@ pub fn compute_merkle_root(leaf: Field, index: Field, hash_path: [Fi current } +// Recommendation: a negative test that fails a merkle root inclusion check. + #[test] fn test_compute_merkle_root1() { let leaf = 0x2fe190f39de3fcf4cbc2eb334d0dc76e4d06f2350aa6056c91ff5f11ded9fb4a; diff --git a/src/noir/lib/commitment/csc-to-dsc/src/lib.nr b/src/noir/lib/commitment/csc-to-dsc/src/lib.nr index 10e21375b..11c1b8144 100644 --- a/src/noir/lib/commitment/csc-to-dsc/src/lib.nr +++ b/src/noir/lib/commitment/csc-to-dsc/src/lib.nr @@ -38,6 +38,9 @@ pub fn commit_to_dsc Field { // Verify csc_pubkey exists in certificate registry + // How is this registry merkle tree secured / maintained? + // Where is the root published, for reference? + // Who has access to manipulate the root? let leaf: Field = calculate_certificate_registry_leaf(certificate_tags, CSC_CERT_TYPE, country, csc_pubkey); let root = compute_merkle_root( diff --git a/src/noir/lib/commitment/integrity-to-disclosure/src/lib.nr b/src/noir/lib/commitment/integrity-to-disclosure/src/lib.nr index d7d847949..b8b95caf0 100644 --- a/src/noir/lib/commitment/integrity-to-disclosure/src/lib.nr +++ b/src/noir/lib/commitment/integrity-to-disclosure/src/lib.nr @@ -49,6 +49,7 @@ pub fn commit_to_disclosure( ), "Commitment from 2nd subproof doesn't match in 3rd subproof", ); + // Lingering print statements :) // println(f"comm_in: {comm_in}"); // println(f"private_nullifier: {private_nullifier}"); let comm_out = hash_salt_dg1_private_nullifier(salt_out, dg1, private_nullifier); diff --git a/src/noir/lib/commitment/scoped-nullifier/src/lib.nr b/src/noir/lib/commitment/scoped-nullifier/src/lib.nr index 9c5fec165..2226ce4af 100644 --- a/src/noir/lib/commitment/scoped-nullifier/src/lib.nr +++ b/src/noir/lib/commitment/scoped-nullifier/src/lib.nr @@ -4,12 +4,13 @@ use utils::get_issuing_country_from_mrz; // The ZKR (or Zero Knowledge Republic) is a mock country used for testing purposes global ZKR_COUNTRY_CODE_BYTES: [u8; 3] = [90, 75, 82]; +// Visually inspected. /* ############################################################ # Circuit D ############################################################ # Generates a scoped nullifier that is scoped by service -# Allows selective disclosure of dg1 via a reveal bitmask +# Allows selective disclosure of dg1 via a reveal bitmask // <-- perhaps there was a refactor, because this responsibility is now in a different file. ############################################################ # Inputs/Outputs @@ -24,7 +25,7 @@ scoped_nullifier `H(private_nullifier, service_scope, service_subscope)` # Checks ############################################################ -- Checks that dg1_reveal is the correct reveal of dg1 for the given dg1_mask +- Checks that dg1_reveal is the correct reveal of dg1 for the given dg1_mask // <-- this file doesn't do this. - Constrains scoped_nullifier to be `H(private_nullifier, service_scope, service_subscope)` */ pub fn nullify( diff --git a/src/noir/lib/compare/age/src/lib.nr b/src/noir/lib/compare/age/src/lib.nr index fbe445d7e..024848300 100644 --- a/src/noir/lib/compare/age/src/lib.nr +++ b/src/noir/lib/compare/age/src/lib.nr @@ -9,6 +9,7 @@ fn get_birthdate(dg1: [u8; 95], timestamp: u64) -> Date { let mut birthdate_bytes = [0 as u8; 6]; let mrz = get_mrz_from_dg1(dg1); + // There's `get_dob_from_mrz` in utils; consider using that. // Get the slice of the MRZ representing the birthdate if is_id_card(dg1) { birthdate_bytes = get_array_slice( @@ -25,6 +26,7 @@ fn get_birthdate(dg1: [u8; 95], timestamp: u64) -> Date { ); } + // Date library not audited. let current_date = Date::from_timestamp(timestamp); // Create a Date object from the birthdate using the current date as @@ -55,14 +57,27 @@ pub fn compare_age(dg1: [u8; 95], min_age: u8, max_age: u8, timestamp: u64) { } else if (max_age != 0) & (min_age == 0) { // Check if age is below max age // The maximum age is more likely to be exclusive, so we use lt + // Danger: from a quick glance at `add_years`, there might be a bug. `add_years` will result in a non-existent (invalid) date if the person was born on Feb 29th, + // when adding a number of years that is not a multiple of 4 (or when landing on century years that are not a multiple of 400). assert(current_date.lt(birthdate.add_years(max_age as u32)), "Age is not below max age"); } else { + // I don't understand the approach for `max_age` in this `else` block. It's inconsistent with the previous + // `else if` block's approach to comparing against the max_age. + // It says in the zkpassport docs: + // "The lower bound for age comparison is inclusive, but the upper bound is exclusive. + // Hence, the available operators for age are gte (greater than or equal to) and lt (less than)." + // Given that intention, I think this function should disallow `min_age = max_age`, and enforce + // a strict `<`. assert(min_age <= max_age, "Min age must be less than or equal to max age"); assert( current_date.gte(birthdate.add_years(min_age as u32)), "Age is not above or equal to min age", ); + + // If the max_age is specified as 50, based on the comments and docs, you only want to + // allow someone who is <= 49 (hasn't had their 50th birthday yet). But this function will allow + // someone who has long been age 50 for ~364 days. // This way if max_age = min_age, the proof will be valid whenever the age // is equal to min_age = max_age assert( diff --git a/src/noir/lib/compare/citizenship/src/lib.nr b/src/noir/lib/compare/citizenship/src/lib.nr index cabee34dd..e05f98be8 100644 --- a/src/noir/lib/compare/citizenship/src/lib.nr +++ b/src/noir/lib/compare/citizenship/src/lib.nr @@ -1,5 +1,6 @@ use utils::get_nationality_from_mrz; +// Visually inspected. pub fn compare_citizenship(dg1: [u8; 95], country: [u8; 3]) { let country_bytes = get_nationality_from_mrz(dg1); diff --git a/src/noir/lib/compare/date/src/lib.nr b/src/noir/lib/compare/date/src/lib.nr index b664e37c0..04f6c416b 100644 --- a/src/noir/lib/compare/date/src/lib.nr +++ b/src/noir/lib/compare/date/src/lib.nr @@ -16,10 +16,13 @@ fn get_date(dg1: [u8; 95], threshold_date: Date) -> Date { Date::from_bytes_short_year(date_bytes, threshold_date) } +// Duplicate of `get_birthdate` in `lib/compare/age/...`? Consider consolidating, to reduce code surface. +// Consider moving to utils. pub fn get_birthdate(dg1: [u8; 95], timestamp: u64) -> Date { let current_date = Date::from_timestamp(timestamp); let mut birthdate = Date::new(1, 1, 1); + // There's `get_dob_from_mrz` in utils; consider using that. // Get the slice of the MRZ representing the birthdate if is_id_card(dg1) { birthdate = get_date::(dg1, current_date); @@ -39,6 +42,7 @@ pub fn get_expiry_date(dg1: [u8; 95], timestamp: u64) -> Date { // So with 20 years we should be safe let threshold_year = current_date.add_years(20); + // Consider moving this logic to utils, to live with all its similar "dg1 extraction" friends. // Get the slice of the MRZ representing the expiry date if is_id_card(dg1) { expiry_date = get_date::(dg1, threshold_year); @@ -49,6 +53,15 @@ pub fn get_expiry_date(dg1: [u8; 95], timestamp: u64) -> Date { expiry_date } +// The upper bound for age comparison vs date comparison is inconsistent, in the sense that one +// is exclusive and the other is inclusive. This might lead to user confusion, since the `QueryBuilder::range` +// interface is the same for both kinds (age and date), but the behaviour is different. (Unless of course +// the Typescript code is already mutating the user's inputs to adhere to the differences in these circuits). +// According to the docs, the `range` method: "Verifies if a numeric field is within a range (inclusive start, exclusive end)." +// Consider making both exclusive, unless typescript is already mutating the user's inputs before reaching the circuit +// (but even so, the inconsistency could lead to bugs in whomever is maintaining these circuits, and could lead +// to bugs in the verifier contracts that users will be handling themselves). +// Tentative recommendation: make all date/age upper bounds be exclusive. pub fn compare_date( date_to_compare: Date, min_date_timestamp: u64, @@ -78,6 +91,7 @@ pub fn compare_date( } } +// Somewhat inefficient packing. /// Calculate the commitment of the current date, min date and max date using Poseidon2 /// /// This is less demanding on the prover but much more demanding on EVM verifiers diff --git a/src/noir/lib/data-check/expiry/src/lib.nr b/src/noir/lib/data-check/expiry/src/lib.nr index 48eab1617..8461ca01d 100644 --- a/src/noir/lib/data-check/expiry/src/lib.nr +++ b/src/noir/lib/data-check/expiry/src/lib.nr @@ -5,22 +5,28 @@ use utils::{ }; pub fn check_expiry(dg1: [u8; 95], current_date_timestamp: u64) { + // Date library not audited. let current_date = Date::from_timestamp(current_date_timestamp); let mrz = get_mrz_from_dg1(dg1); + // Recommend defining "threshold year" here. From clicking through the code, + // it seems "threshold_year" establishes 0 mod 100 for the YY date format? + // So the YYYY of `expiry_date` chooses between 19XX and 20XX based on the threshold_year. + // We base the threshold year for the expiry date on the current date plus 30 years // As most documents will have a 10 year validity (and some maybe 15 years?) // So with 30 years we should be safe - let threshold_year = current_date.add_years(30); + let threshold_year = current_date.add_years(30); // Consider using global instead of magic number - let mut expiry_date_bytes = [0 as u8; 6]; + let mut expiry_date_bytes = [0 as u8; 6]; // Consider using global instead of magic number + // This logic to extract bytes could potentially live in utils. if is_id_card(dg1) { expiry_date_bytes = get_array_slice( mrz, ID_CARD_MRZ_EXPIRY_DATE_INDEX, - ID_CARD_MRZ_EXPIRY_DATE_INDEX + 6, + ID_CARD_MRZ_EXPIRY_DATE_INDEX + 6, // Consider using global instead of magic number 6 ); } else { expiry_date_bytes = get_array_slice( diff --git a/src/noir/lib/data-check/integrity/src/lib.nr b/src/noir/lib/data-check/integrity/src/lib.nr index ccd46b4c8..f7814e577 100644 --- a/src/noir/lib/data-check/integrity/src/lib.nr +++ b/src/noir/lib/data-check/integrity/src/lib.nr @@ -3,6 +3,10 @@ use sha256::{sha224_var, sha256_var}; use sha512::{sha384, sha512}; use utils::{find_subarray_index, is_id_card, is_subarray_in_array}; +// Recommend replacing this file's magic constants (95, 700, 93, 20, 28, 32, 48, 64) with globals (e.g. from utils/lib.nr). +// Recommend using the DG1Data type. +// Recommend introducing a type `EContent` in types.nr for the `e_content: [u8; 700]` + /** * Computes the hash (using SHA1) of the first data group (containing the MRZ) and checks it is the same as the one * included in eContent at the offset provided (along with the rest of the data group hashes) @@ -28,6 +32,7 @@ pub fn check_dg1_sha1(dg1: [u8; 95], e_content: [u8; 700], e_content_size: u32) let dg1_offset_in_e_content = find_subarray_index(dg1_hash, e_content); // Check that the hash of dg1 is in the e_content, we use the actual size rather than generic size // of econtent to ignore the padding bytes at the end + // Not clear what `20` is: recommend replacing with a global constant. assert(dg1_offset_in_e_content + 20 <= e_content_size, "Hash of dg1 not found in eContent"); } @@ -75,6 +80,7 @@ pub fn check_dg1_sha224(dg1: [u8; 95], e_content: [u8; 700], e_content_size: u32 let dg1_offset_in_e_content = find_subarray_index(dg1_hash, e_content); // Check that the hash of dg1 is in the e_content, we use the actual size rather than generic size // of econtent to ignore the padding bytes at the end + // Not clear what `28` is: recommend replacing with a global constant. assert(dg1_offset_in_e_content + 28 <= e_content_size, "Hash of dg1 not found in eContent"); } @@ -101,8 +107,13 @@ pub fn check_signed_attributes_sha224( * included in eContent at the offset provided (along with the rest of the data group hashes) */ pub fn check_dg1_sha256(dg1: [u8; 95], e_content: [u8; 700], e_content_size: u32) { + // If the e_content_size is constrained to be correct in a later function (check_signed_attributes_sha256), + // why is this assertion needed? (Of course, it doesn't hurt to be extra careful if you can afford the constraints). + // Or if it is needed, consider placing this line immediately after the unconstrained call which assigns this witness. assert(e_content_size <= 700, "eContent size is too large"); + // Consider extracting the assignment of `dg1_size` (based on dg1) into a utils function, because it's repeated a few + // times in the codebase. // For passports we ignore the last padding characters let mut dg1_size: u64 = 93; @@ -117,9 +128,25 @@ pub fn check_dg1_sha256(dg1: [u8; 95], e_content: [u8; 700], e_content_size: u32 let dg1_hash = sha256_var(dg1, dg1_size); // Find the offset of the hash of dg1 in e_content + // Danger: see the comments on `find_subarray_index`. + // Danger: this function doesn't seem to explicitly handle a `HAYSTACK_SIZE` return value. + // Consider renaming from `dg1_offset_in_e_content` to `dg1_hash_offset_in_e_content`; I was confused for a little bit. + // Possible Danger (TODO: check this later in audit): if this offset is not rigid, then a malicious + // prover has the ability to choose the offset. If there are any other items in the econtent which + // are a sha256 hash, and if the prover also knows the preimage to that sha256 hash, then there's an + // outside chance that the prover could open that other data and pretend it's dg1. TODO(me): understand the + // layout and contents of e_content better. + // Presumably the position of the needle in the haystack might vary from country to country, + // hence this "find" complexity? let dg1_offset_in_e_content = find_subarray_index(dg1_hash, e_content); // Check that the hash of dg1 is in the e_content, we use the actual size rather than generic size // of econtent to ignore the padding bytes at the end + // Danger: this will only catch a "not found" return value from `find_subarray_index` if the + // dg1_hash is consistently always the last 32bits of the e_content. (I'm not familiar enough with + // passports to know whether that is indeed always the case). If dg_hash is sometimes earlier in the + // e_content, then a "not found" result won't be caught by this assertion. + // Please could you add a comment here to assuage any concerns that a reader might have + // (or consider fixing the code if dg1_hash is not always the last 32bits). assert(dg1_offset_in_e_content + 32 <= e_content_size, "Hash of dg1 not found in eContent"); } @@ -135,6 +162,14 @@ pub fn check_signed_attributes_sha256( let computed_final_hash = sha256_var(e_content, e_content_size as u64); // Check that the computed final hash is in the signed attributes + // There are some concerns with `find_subarray_index`, although they're unlikely to bite + // here, because: + // - we can rely on the collision resistance of sha256 to assume there are unlikely to be + // any other substrings within signed_attributes that might result in a malicious match. + // - we're explicitly disallowing a `HAYSTACK_SIZE` return value. + // + // Presumably the position of the needle in the haystack might vary from country to country, + // hence this "find" complexity? assert( is_subarray_in_array(computed_final_hash, signed_attributes), "Computed final hash not found in signed attributes", @@ -166,6 +201,7 @@ pub fn check_dg1_sha384(dg1: [u8; 95], e_content: [u8; 700], e_content_size: u32 let dg1_offset_in_e_content = find_subarray_index(dg1_hash, e_content); // Check that the hash of dg1 is in the e_content, we use the actual size rather than generic size // of econtent to ignore the padding bytes at the end + // Not clear what `48` is: recommend replacing with a global constant. assert(dg1_offset_in_e_content + 48 <= e_content_size, "Hash of dg1 not found in eContent"); } @@ -214,6 +250,7 @@ pub fn check_dg1_sha512(dg1: [u8; 95], e_content: [u8; 700], e_content_size: u32 let dg1_offset_in_e_content = find_subarray_index(dg1_hash, e_content); // Check that the hash of dg1 is in the e_content, we use the actual size rather than generic size // of econtent to ignore the padding bytes at the end + // Not clear what `64` is: recommend replacing with a global constant. assert(dg1_offset_in_e_content + 64 <= e_content_size, "Hash of dg1 not found in eContent"); } diff --git a/src/noir/lib/data-check/tbs-pubkey/src/lib.nr b/src/noir/lib/data-check/tbs-pubkey/src/lib.nr index 344aee800..001ce4d2b 100644 --- a/src/noir/lib/data-check/tbs-pubkey/src/lib.nr +++ b/src/noir/lib/data-check/tbs-pubkey/src/lib.nr @@ -3,6 +3,7 @@ pub fn verify_rsa_pubkey_in_tbs( pubkey: [u8; PUBKEY_SIZE], tbs: [u8; TBS_SIZE], ) { + // Similar comments to `verify_ecdsa_pubkey_in_tbs` below. let pubkey_offset = utils::find_subarray_index(pubkey, tbs); for i in 0..PUBKEY_SIZE { assert(tbs[i + pubkey_offset] == pubkey[i], "Public key of DSC not found in TBS"); @@ -15,12 +16,19 @@ pub fn verify_ecdsa_pubkey_in_tbs( pubkey_y: [u8; PUBKEY_SIZE], tbs: [u8; TBS_SIZE], ) { + // This assumes there's no possibility for the x-coordinate to match any other substring in the tbs-certificate + // (i.e. no colliding bytes at some other position in `tbs`), because `find_subarray_index` does not necessarily guarantee that the returned value is the _first_ occurrence (see comments in that function). + // That seems like a reasonable assumption, but I don't know the contents of a tbs-certificate well enough to validate that assumption. + // Danger: see concerns in `find_subarray_index`. + // Danger: this function does not handle a return value of `HAYSTACK_SIZE` meaning "not found": it will blindly use that value as an offset + // for the y-coord check below. Now that might fail just because it's unlikely that there will be a matching y-coordinate at the `HAYSTACK_SIZE` + // offset, but it's difficult to validate that the circuit would indeed always fail in such a case. let pubkey_offset = utils::find_subarray_index(pubkey_x, tbs); for i in 0..PUBKEY_SIZE { // We only need to check that the y coord is found after the x coord, since // find_substring_index() already guarantees that the x coord exists in the TBS assert( - tbs[i + pubkey_offset + PUBKEY_SIZE] == pubkey_y[i], + tbs[i + pubkey_offset + PUBKEY_SIZE] == pubkey_y[i], // hopefully the compiler is good enough to remove the redundant `pubkey_offset + PUBKEY_SIZE` operation each loop. "Public key y coord not found in TBS", ); } diff --git a/src/noir/lib/disclose/src/lib.nr b/src/noir/lib/disclose/src/lib.nr index 32acc46ca..959522fe0 100644 --- a/src/noir/lib/disclose/src/lib.nr +++ b/src/noir/lib/disclose/src/lib.nr @@ -10,6 +10,13 @@ use utils::{ PROOF_TYPE_DISCLOSE, }; +// Missing tests +// Visually inspected. +// Recommendation: copy this over to `utils/` to avoid duplicated code (replacing the one that's currently there, because this one looks to be more efficient). +// Recommendation: get rid of `end` and call this function with turbofish syntax, if you need to convey the expected return length. +// The function could then be called as: +// - `get_array_slice::<_, M>(arr, start);` or +// - let sliced_arr: [M; u8] = get_arr_slice(arr, start); fn get_array_slice(array: [u8; N], start: u32, end: u32) -> [u8; M] { let mut slice = [0 as u8; M]; for i in 0..M { @@ -18,8 +25,13 @@ fn get_array_slice(array: [u8; N], start: u32, end: u32) slice } + +// Apparently this is not used. Consider removing. pub fn get_disclosed_data(dg1: [u8; 95], flags: DiscloseFlags) -> DisclosedData { let mrz = get_mrz_from_dg1(dg1); + + // Recommendation: don't use magic numbers: define globals in `utils/` for these magic number byte-lengths. + // You could then also write tests to reconcile these globals against the `PASSPORT_MRZ__INDEX` and `ID_CARD_MRZ__INDEX` indices. let mut disclosed_data = DisclosedData { issuing_country: [0 as u8; 3], date_of_birth: [0 as u8; 6], @@ -36,7 +48,7 @@ pub fn get_disclosed_data(dg1: [u8; 95], flags: DiscloseFlags) -> DisclosedData disclosed_data.issuing_country = get_array_slice( mrz, ID_CARD_MRZ_COUNTRY_INDEX, - ID_CARD_MRZ_COUNTRY_INDEX + 3, + ID_CARD_MRZ_COUNTRY_INDEX + 3, // Recommendation: avoid magic numbers (3). (And so on for the rest of this function). ); } if flags.date_of_birth { @@ -145,11 +157,27 @@ pub fn get_disclosed_bytes(dg1: [u8; 95], mask: [u8; 90]) -> [u8; 90] { let mut disclosed_bytes = [0 as u8; 90]; let mrz = get_mrz_from_dg1(dg1); for i in 0..90 { + // Danger: If `mask[i]` is NOT being constrained to be only `0` or `1`, then the disclosed bytes can be manipulated! + // E.g. if `mask[i]` is maliciously set to `2`, then someone can effectively change their birthday or country code etc. by a factor of 2. + // It's arguably a "critical" bug, but... + // There's a comment in `bin/disclose/standard/src/main` that the disclosure mask is public, so verifiers can validate the mask. + // If verifiers do indeed validate that each of the mask bytes is only 0 or 1 (and no other number), then all is fine. + // If you're leaving it to zkpassport sdk users (i.e. app developers) to write their own verifiers, then this would arguably be too dangerous, as they + // might forget to constrain the mask bytes to be bools, because this is a very low-level thing to understand. + // From a quick investigation into the Solidity and Typescript, I can't see any auto-generated verifier code (which might protect sdk users), + // and I can't see any verifier code that's validating that each mask is 0 or 1. + // To be extra extra safe, I'd recommend passing the `mask` as a `mask: [bool; 90]` type to `main.nr` instead, + // and threading bools through all these functions instead of bytes. + // Alternatively, you could continue to pass `mask: [u8; 90]` type, as long as you add constraints to assert that these bytes are actually bools (but that feels more brittle). + // OR, if the intention is to actually be able to do proper masking of individual bits of each passport field (e.g. extracting just the first letter of their name, or extracting just the year of birth), + // then consider using "more traditional" masking using the `&` operator. But even then, it could be an sdk-user footgun if they're expected to write their own verifier to check the masking is correct. disclosed_bytes[i] = mask[i] * mrz[i]; } disclosed_bytes } +// Inefficient packing. Each byte occupies a Field, instead of packing bytes into fields. +// Possibly several thousand constraints of "wastage". /// Calculate the commitment of the disclose mask and disclosed bytes using Poseidon2 /// /// This is less demanding on the prover but much more demanding on EVM verifiers diff --git a/src/noir/lib/exclusion-check/country/src/lib.nr b/src/noir/lib/exclusion-check/country/src/lib.nr index 48f2b6f6a..f57b2079a 100644 --- a/src/noir/lib/exclusion-check/country/src/lib.nr +++ b/src/noir/lib/exclusion-check/country/src/lib.nr @@ -1,6 +1,14 @@ use poseidon::poseidon2::Poseidon2; use utils::{get_issuing_country_from_mrz, get_nationality_from_mrz}; +// Missing tests. +// Consider wrapping this in a constrained function that performs validation checks +// on this result, that are currently bleeding into both `check_nationality_exclusion` +// and `check_issuing_country_exclusion`. +// Edit: I've written such a function as an example, below, for your consideration. +// Returns the index of the item in the list which is closest (strictly greater than) the value. +// Even if there is a match in the list, it will return the index of the next-highest item +// in the list, because the intention is for this function to be used for _non-membership_ proofs. unconstrained fn get_closest_index(sorted_list: [u32; N], value: u32) -> i64 { let mut index: i64 = -1; for i in 0..N { @@ -12,6 +20,12 @@ unconstrained fn get_closest_index(sorted_list: [u32; N], value: u32 index } +// Missing tests. +// Assumes the list is all 0`s after the first `0`. +// Consider wrapping this in a constrained function that performs validation checks +// on this result, that are currently bleeding into both `check_nationality_exclusion` +// and `check_issuing_country_exclusion`. +// Edit: I've written such a function as an example, below, for your consideration. unconstrained fn get_last_index(sorted_list: [u32; N]) -> i64 { let mut index: i64 = -1; for i in 0..N { @@ -23,17 +37,28 @@ unconstrained fn get_last_index(sorted_list: [u32; N]) -> i64 { index } +// This was added after I did my first pass of this file, so some of my earlier comments might +// be stale (but most will still be good). +// This is technically checking that it's sorted up until the first zero, and zero thereafter. pub fn check_country_list_sorted(country_list: [u32; N]) -> bool { let mut result = true; let mut foundAZero = false; assert(country_list[0] != 0, "Country list cannot start with 0"); for i in 0..N - 1 { // Zeroes mark the end of the list (i.e. start of the padding) + // Consider replacing this line with: + // if country_list[i + 1] == 0 { if !foundAZero & (country_list[i + 1] == 0) { foundAZero = true; } // If the current element is greater than the next element, then the list is not sorted - // We also want to make sure there are no duplicates so we actually check greater than or equal + // We also want to make sure there are no duplicates so we actually check greater than or equal. + // Question: what's the motivation behind enabling invalid inputs to gracefully result in + // a proof that can be verified? Usually, circuit designs just die and are unprovable if the inputs + // aren't valid, since gracefully allowing invalid inputs costs constraints (and could be + // harder to test). + // I ask, because I was going to suggest "just use assert and don't bother returning false", + // but then it dawned on me what you were trying to achieve. if !foundAZero & (country_list[i] >= country_list[i + 1]) { result = false; } @@ -47,6 +72,14 @@ pub fn check_country_list_sorted(country_list: [u32; N]) -> bool { result } +// Missing tests. +// Strongly recommend tests for all of these branches, because it's difficult to follow. +// Also recommend _negative_ tests; at least one for every assert (and error messages to ensure the correct assertion is throwing). +// Tests should also be written to test for malicious values returned from the two unconstrained functions, +// perhaps by writing test-only versions where the results of unconstrained functions can be mocked. +// +// Edit after some consideration: I've proposed an alternative layout for this logic in some functions below, +// for your consideration. (They would need to be audited independently). fn check_exclusion(country_sum: u32, country_list: [u32; N]) { // Safety: since the list is assumed to be sorted in ascending order, we can get the index to check against // from an unconstrained function @@ -79,6 +112,101 @@ fn check_exclusion(country_sum: u32, country_list: [u32; N]) { // If those two checks pass, then the nationality is not in the country list } +// SUGGESTION to improve encapsulation and testability and auditability (but would need to be vetted). +// Would need tests. +// The current `check_exclusion` fn took me a while to work through, because it combines several checks. +// It would also be difficult to test the current `check_exclusion` thoroughly. +fn get_last_index_constrained(sorted_list: [u32; N]) -> i64 { + // Safety: the returned value is constrained below (with the assumptions that: + // - the list is sorted + // - every item after the first 0 is also 0 + // ) + let last_index = unsafe { get_last_index(sorted_list) }; + + constrain_last_index(sorted_list, last_index); + + last_index +} + +// Separated into a separate function, so that you can test your constraining logic +// with malicious `last_index` values, because you can't trust the returned value +// from `unsafe { get_last_index(sorted_list) }` +fn constrain_last_index(sorted_list: [u32; N], last_index: i64) { + if last_index != (N - 1) as i64 { // I.e. if it's not the last index in the array. + // This will fail for last_index >= N - 1, due to out of bounds errors. + + // If there are still 0s after the last nonzero element, + // then we need to check that the next element is 0 + assert_eq(sorted_list[last_index as u32 + 1], 0); + } + + if last_index != -1 { // I.e. if it's not the first index in the array. + // This will fail if last_index <= -2 also, due to out of bounds errors. + assert(sorted_list[last_index as u32] != 0); + } +} + +// SUGGESTION to improve encapsulation and testability and auditability (but would need to be vetted). +// Would need tests. +fn check_exclusion_ALTERNATIVE(sorted_list: [u32; N], value: u32) { + // Safety: the returned value is constrained below, with the assumptions that: + // - the list is sorted + // - every item after the first 0 is also 0 + let closest_index_from_above = unsafe { get_closest_index(sorted_list, value) }; + + constrain_closest_index(sorted_list, value, closest_index_from_above); +} + +// Separated into a separate function, so that you can test your constraining logic +// with malicious `last_index` values, because you can't trust the returned value +// from `unsafe { get_closest_index(sorted_list) }` +fn constrain_closest_index(sorted_list: [u32; N], value: u32, closest_index_from_above: i64) { + // assert(closest_index_from_above < 0x80000000); // < 2^31 // technically needed, due to the `as u32` conversion below, but practically not a problem. + + // Recommendation for a big block comment to show what's going on: + // + // [b , d , f , h , j , l , n] + // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ + // | | | | | | | | | | | | | | | + // 0 1 1 2 2 3 3 4 4 5 5 6 6 -1 -1 <-- the returned closest index (from above), if the + // `value` sits at or between these + // elements in the array. + // + // [b , d , f , h , 0 , 0 , 0] + // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ + // | | | | | | | | | | | | | | | + // 0 1 1 2 2 3 3 -1 -1 -1 -1 -1 -1 -1 -1 <-- the returned closest index (from above), if the + // `value` sits at or between these + // elements in the array. + // + // if 0: + // assert(value < list[0]); // which implies the whole list is > value, since it's sorted. So value is not in the list. Great. + // else if -1: + // // Then the `value` might still be at the "last" nonzero position in the list. + // // So we ensure that the `value` is > the last item, which implies the value is not in the list (since it's sorted). Great. + // Get the last_index (which is constrained to indeed be the last index) + // assert(list[last_index] < value); + // else: + // // The `value` is somewhere in the middle of the first and last element of the list. + // // We don't want the `value` to _equal_ any item in the list, so we sandwich it between + // // two consecutive items in the list: + // assert(list[closest_index_from_above - 1] < value); + // assert(value < list[closest_index_from_above]); + // + if closest_index_from_above == 0 { + assert(value < sorted_list[0]); + } + else if closest_index_from_above == -1 { + let last_index = get_last_index_constrained(sorted_list); + assert(sorted_list[last_index as u32] < value); + } else { + let index = closest_index_from_above as u32; + // Will fail with out of bounds errors if index >= N. + assert(sorted_list[index - 1] < value); + assert(value < sorted_list[index]); + } +} + /// Check if the nationality from the MRZ is not in the country list /// The list of countries is assumed to be sorted in ascending order /// So it must come either from a public input that can be independently checked @@ -91,6 +219,7 @@ fn check_exclusion(country_sum: u32, country_list: [u32; N]) { /// * `country_list` - The sorted list of countries to check against, must be formatted as a list of u32 /// which are the Alpha-3 codes of the countries with each letter ASCII code put together using a weighted sum. /// e.g. for "FRA", the sum is 70 * 2^16 (0x10000) + 82 * 2^8 (0x100) + 65 = 4587520 + 20992 + 65 = 4608577 +// Q: why is the country list u32's instead of u8's, if they represent individual bytes (u8's)? pub fn check_nationality_exclusion(dg1: [u8; 95], country_list: [u32; N]) { // Check that the list is sorted in ascending order assert( @@ -106,6 +235,7 @@ pub fn check_nationality_exclusion(dg1: [u8; 95], country_list: [u32 check_exclusion(nationality_sum, country_list); } +// Missing tests. /// Check if the issuing country from the MRZ is not in the country list /// The list of countries is assumed to be sorted in ascending order /// So it must come either from a public input that can be independently checked @@ -126,6 +256,11 @@ pub fn check_issuing_country_exclusion(dg1: [u8; 95], country_list: check_exclusion(issuing_country_sum, country_list); } +// Visually inspected. +// Missing tests against some hard-coded param commitment somewhere. +// Inefficient packing. Instead of converting each u32 into its own field, you could more-tightly pack them. +// Why is the country_list for "exclusion" a `[u32; N]`, but the country list for "inclusion" is `[str<3>; N]`? +// Presumably both inclusion and exclusion could use the same `calculate_param_commitment` fn? /// Calculate the commitment of the country list using Poseidon2 /// /// This is less demanding on the prover but much more demanding on EVM verifiers @@ -147,6 +282,9 @@ pub fn calculate_param_commitment(proof_type: u8, country_list: [u32 Poseidon2::hash(params, N + 1) } +// Visually inspected. +// Missing tests against some hard-coded param commitment somewhere. +// Inconsistent vs the `inclusion-check/` version of `calculate_para_commitment_sha2`. Why is the country list a different type? /// Calculate the commitment of the country list using SHA2-256 /// /// This is more demanding on the prover but less demanding on EVM verifiers @@ -174,6 +312,8 @@ pub fn calculate_param_commitment_sha2( utils::pack_be_bytes_into_field::<32, 31>(hash) } +// Consider adding extra tests that have full country lists (instead of 0s at the end), +// to ensure the `check_country_list_sorted` function works for that edge case. #[test] fn test_country_list_sorting_check() { // List in descending order, so not sorted in ascending order diff --git a/src/noir/lib/exclusion-check/sanctions/src/lib.nr b/src/noir/lib/exclusion-check/sanctions/src/lib.nr index 576d98d72..b944fe096 100644 --- a/src/noir/lib/exclusion-check/sanctions/src/lib.nr +++ b/src/noir/lib/exclusion-check/sanctions/src/lib.nr @@ -21,17 +21,38 @@ use utils::{ types::{DG1Data, MRZDOB, MRZDocumentNumber, MRZName, MRZNationality, MRZYOB}, }; +// How is this global maintained? global TREE_ROOT: Field = 0x099699583ea7729a4a05821667645e927b74feb4e6e5382c6e4370e35ed2b23c; +// I'm not sure "sparse merkle tree" is the correct term. A sparse merkle tree would have 2^254 leaves. +// It appears to be a tree whose leaf values +// are sorted in ascending order. A non-membership proof for value x would then show left < x < right +// for two consecutive leaves left & right (i.e. demonstrating that there is no "space" for x in the +// sorted tree). The sorting will have to be verified independently, by anyone who wants to inspect +// the correctness of the hard-coded tree. +// Edit after writing that big comment ^^^: The term OrderedMerkleTree is being used in other files, +// so maybe "Sparse" here is just lingering from earlier in the design process, and can be renamed? +// +// Note: it might be difficult to update such a tree efficiently in any verifier contract, as any +// insertion would need to shift any leaves to the right of the new value. It's likely that the root +// would be overwritten as part of an update. +// Recommendation (nice to have): clearly publish a lightweight tool (with clear instructions and explanation of why the tool exists) +// that can be used by anyone to independently validate the contents of all of the zkpassport protocol's hard-coded tree roots, +// by recomputing those roots from lists of leaves. This ensures "nothing up my sleeve". +// E.g. in the case of this sanctions tree, the tool +// would compute the leaves from human-readable data, then validate that the leaves are strictly increasing, and then compute and output the root. /// The Sanctions sparse merkle trees /// This is a collection of the three trees that are used to check the Sanctions exclusion /// for passports and ID cards /// +// I'm confused by this comment below, because there appears to only be one tree (with one root) in +// all of the code below (not 3 distinct trees). /// The trees are: /// 1. Passport No and Nationality /// 2. Name and DOB /// 3. Name and YOB pub struct SanctionsSparseMerkleTrees { + // Consider renaming this property to `root`, because it appears to always represent a root. pub tree: Field, } @@ -49,6 +70,7 @@ impl SanctionsSparseMerkleTrees { } impl SanctionsSparseMerkleTrees { + // Visually inspected. /// Check the Sanctions exclusion for a given DG1 /// /// # Arguments @@ -67,13 +89,17 @@ impl SanctionsSparseMerkleTrees { let document_number: MRZDocumentNumber = get_document_number_from_mrz(dg1); let nationality: MRZNationality = get_nationality_from_mrz(dg1); + // Casting each u8 to a field is then inefficient to poseidon2 hash together. + // Tightly-packing the bytes into fields would save ~200 gates here. let passportNoAndNationality = array_concat_as_field(document_number, nationality); let passportNoAndNationalityHash: Field = Poseidon2::hash(passportNoAndNationality, passportNoAndNationality.len()); + // Tightly-packing the bytes into fields would save ~1k gates here. let nameAndDob = array_concat_as_field(name, dob); let nameAndDobHash: Field = Poseidon2::hash(nameAndDob, nameAndDob.len()); + // Tightly-packing the bytes into fields would save ~1k gates here. let nameAndYob = array_concat_as_field(name, yob); let nameAndYobHash: Field = Poseidon2::hash(nameAndYob, nameAndYob.len()); diff --git a/src/noir/lib/exclusion-check/sanctions/src/ordered_mt.nr b/src/noir/lib/exclusion-check/sanctions/src/ordered_mt.nr index 8b8e14f99..81731c740 100644 --- a/src/noir/lib/exclusion-check/sanctions/src/ordered_mt.nr +++ b/src/noir/lib/exclusion-check/sanctions/src/ordered_mt.nr @@ -5,6 +5,7 @@ use crate::types::{ }; use poseidon::poseidon2::Poseidon2; +// Visually inspected. /// /// Ordered Merkle Tree Inclusion Proof /// @@ -14,11 +15,15 @@ pub fn ordered_mt_verify_inclusion( root: Field, inclusion_proof: SanctionsOrderedMerkleTreeInclusionProof, ) { + // Nit: Inconsistent endianness vs `compute_merkle_root`. + // It's so close to being possible to nuke this function and have just one merkle inclusion function (`compute_merkle_root`). + // (You'd then be able to rely on the tests for `compute_merkle_root`, because there are no tests for this function). let path_in_bits: [u1; SANCTIONS_ORDERED_MERKLE_TREE_LEAF_DEPTH] = inclusion_proof.leaf_index.to_be_bits::(); let mut current = inclusion_proof.leaf; for i in 0..SANCTIONS_ORDERED_MERKLE_TREE_LEAF_DEPTH { + // Nit: Accessing the `path_in_bits` and the `sibling_path` in opposite directions is jarring. let (left, right) = if path_in_bits[SANCTIONS_ORDERED_MERKLE_TREE_LEAF_DEPTH - i - 1] == 1 { (inclusion_proof.sibling_path[i], current) } else { @@ -30,8 +35,17 @@ pub fn ordered_mt_verify_inclusion( assert(current == root); } +// Visually inspected. +// Calling it `leaf` is a bit confusing, because it's not a leaf of the tree. It's a value that +// doesn't exist as a leaf in the tree. +// Danger: I'm not sure this would work if the `leaf` (the value being checked for non-membership) is: +// - gt the largest leaf (because there is no `right` to compare against; or +// - lt the smallest leaf (leaf index `0`) (because there is no `left` to compare against). +// Unless... there hard-coded dummy leaves of `0` at the start and `-1` at the end of the tree's leaves? +// If you are relying on such an assumption (dummy start and end leaves), consider asserting their +// existence within this circuit. pub fn ordered_mt_non_membership(root: Field, leaf: Field, proof: SanctionsOrderedMerkleTreeProof) { - // Perform inclusion checks for bothe the closest upper and the closest lower + // Perform inclusion checks for both the closest upper and the closest lower ordered_mt_verify_inclusion(root, proof.left); ordered_mt_verify_inclusion(root, proof.right); @@ -39,13 +53,22 @@ pub fn ordered_mt_non_membership(root: Field, leaf: Field, proof: SanctionsOrder assert(proof.left.leaf.lt(leaf)); assert(leaf.lt(proof.right.leaf)); - // Check that their indexes are adjacent + // Check that their indexes are adjacent, meaning there is no "room" for the `leaf` because + // the leaf values are strictly increasing. (Validation that they're strictly increasing is + // the responsibility of the verifier). assert(proof.left.leaf_index + 1 == proof.right.leaf_index); } +// Recommend tests for: +// - Non membership of a leaf the very beginning of the tree (presumably against a `left` leaf of `0`?). +// - Non membership of a leaf the very end of the tree (presumably against a `right` leaf of `-1`??). +// - Negative tests: at least as many as trigger each of the 3 assertions in the above function to _fail_. + #[test] fn test_ordered_mt_non_membership() { + // I can't find evidence of this command in the repo. // Run yarn generate-sanction-test-data to generate the test data + // Why is there typescript in these comments? // const passportNoAndCountrySMT = await AsyncOrderedMT.fromSerialized(passportNoAndCountryJson, poseidon2) // const passportNoAndCountryNonMembershipProof = await passportNoAndCountrySMT.createNonMembershipProof(1n); // console.log(passportNoAndCountryNonMembershipProof); diff --git a/src/noir/lib/exclusion-check/sanctions/src/param_commit.nr b/src/noir/lib/exclusion-check/sanctions/src/param_commit.nr index dfd33060d..fa657c6b9 100644 --- a/src/noir/lib/exclusion-check/sanctions/src/param_commit.nr +++ b/src/noir/lib/exclusion-check/sanctions/src/param_commit.nr @@ -17,6 +17,12 @@ global Sanctions_PARAM_COMMITMENT_SIZE: u32 = 33; pub fn calculate_param_commitment(proof_type: u8, root_hash: Field) -> Field { let mut params = [0 as Field; Sanctions_PARAM_COMMITMENT_SIZE]; params[0] = proof_type as Field; + // This seems unintentional (maybe a copy-pasta from the sha256 version below). + // It seems unnecessary to convert an input that's already + // a Field into bytes and then back into a Field. + // Consider just going directly to: + // `Poseidon2::hash([proof_type as Field, root_hash], Sanctions_PARAM_COMMITMENT_SIZE)`, where + // Sanctions_PARAM_COMMITMENT_SIZE is redefined to be `2`. let root_hash_bytes: [u8; 32] = root_hash.to_be_bytes(); for i in 1..root_hash_bytes.len() + 1 { params[i] = root_hash_bytes[i - 1] as Field; diff --git a/src/noir/lib/inclusion-check/country/src/lib.nr b/src/noir/lib/inclusion-check/country/src/lib.nr index f4cae24e5..a139774ab 100644 --- a/src/noir/lib/inclusion-check/country/src/lib.nr +++ b/src/noir/lib/inclusion-check/country/src/lib.nr @@ -1,6 +1,15 @@ use poseidon::poseidon2::Poseidon2; use utils::{get_issuing_country_from_mrz, get_nationality_from_mrz}; +// Missing tests. +// Visually inspected. +// We should technically static_assert that the list size N is less than 2^31, +// since otherwise the conversion from the i32 result of this function back to a u32 (`as u32`) +// in other functions will yield a positive number. This would enable an attacker to access +// elements 2^31..2^32 of a list (e.g. in your `check_nationality_inclusion` function). +// But of course, with the way this function is actually called, the list size is actually size 200, +// and the plonk trusted setup is much smaller than 2^31 points, so we're probably already safe. +// (I just get nervous when I see signed integers). unconstrained fn get_index(list: [str<3>; N], value: [u8; 3]) -> i32 { let mut index: i32 = -1; for i in 0..N { @@ -12,6 +21,9 @@ unconstrained fn get_index(list: [str<3>; N], value: [u8; 3]) -> i32 index } +// Missing tests. +// Visually inspected. +// TODO: ensure the country_list can't be padded (near the end) with malicious / duplicate country codes. /// Check if the nationality from the MRZ is in the country list /// /// # Arguments @@ -29,11 +41,16 @@ pub fn check_nationality_inclusion(dg1: [u8; 95], country_list: [str // Assert that the nationality is equal to the country in the list // at the index found by the unconstrained function let country_code = country_list[country_index as u32].as_bytes(); + + // You could instead compare equality of the entire length-3 arrays in a single assertion, + // since arrays implement the Eq trait. assert_eq(nationality_bytes[0], country_code[0], "Nationality does not match the country code"); assert_eq(nationality_bytes[1], country_code[1], "Nationality does not match the country code"); assert_eq(nationality_bytes[2], country_code[2], "Nationality does not match the country code"); } +// Missing tests. +// Visually inspected. /// Check if the issuing country from the MRZ is in the country list /// /// # Arguments @@ -51,6 +68,9 @@ pub fn check_issuing_country_inclusion(dg1: [u8; 95], country_list: // Assert that the issuing country is equal to the country in the list // at the index found by the unconstrained function let country_code = country_list[country_index as u32].as_bytes(); + + // You could instead compare equality of the entire length-3 arrays in a single assertion, + // since arrays implement the Eq trait. assert_eq( issuing_country_bytes[0], country_code[0], @@ -68,6 +88,14 @@ pub fn check_issuing_country_inclusion(dg1: [u8; 95], country_list: ); } +// Visually inspected. +// Missing tests. +// TODO: check that the verifier actually checks the preimage of this param commitment (or has already precomputed this param commitment). We don't want someone sneaking a country code in the final entries of the list. +// Visually inspected. +// Inefficient packing. This uses N + 1 fields instead of roughly ceil((3*N + 1) / 31) fields, +// so is technically doing unnecessary poseidon2 constraints. Roughly ~13k unnecessary constraints. +// You could instead flatten entire country list into one bytes array, and then convert those +// bytes into fields, using a function in the utils.nr file. /// Calculate the commitment of the country list using Poseidon2 /// /// This is less demanding on the prover but much more demanding on EVM verifiers diff --git a/src/noir/lib/outer/src/lib.nr b/src/noir/lib/outer/src/lib.nr index 51a186525..f6889e71c 100644 --- a/src/noir/lib/outer/src/lib.nr +++ b/src/noir/lib/outer/src/lib.nr @@ -3,8 +3,9 @@ use poseidon::poseidon2::Poseidon2; global PROOF_SIZE: u32 = 508; global VKEY_SIZE: u32 = 115; +// Visually inspected. pub fn prepare_integrity_check_inputs( - timestamp: u64, + timestamp: u64, // consider renaming to `current_date`, for consistency comm_in: Field, comm_out: Field, ) -> [Field; 3] { @@ -15,6 +16,7 @@ pub fn prepare_integrity_check_inputs( result } +// Visually inspected. pub fn prepare_disclosure_inputs( comm_in: Field, param_commitment: Field, @@ -31,6 +33,7 @@ pub fn prepare_disclosure_inputs( result } +// Visually inspected. pub fn poseidon2_hash(input: [Field; INPUT_LEN]) -> Field { Poseidon2::hash(input, INPUT_LEN) } @@ -40,7 +43,7 @@ pub struct CSCtoDSCProof { pub proof: [Field; PROOF_SIZE], // Excluding the certificate registry root // which is passed separately - pub public_inputs: [Field; 1], + pub public_inputs: [Field; 1], // comm_out pub key_hash: Field, // Hash path for circuit A key hash in the sub-circuit merkle tree // Allows for up to 4,096 leaves @@ -52,7 +55,7 @@ pub struct CSCtoDSCProof { pub struct DSCtoIDDataProof { pub vkey: [Field; VKEY_SIZE], pub proof: [Field; PROOF_SIZE], - pub public_inputs: [Field; 2], + pub public_inputs: [Field; 2], // comm_in, comm_out pub key_hash: Field, // Hash path for circuit B key hash in the sub-circuit merkle tree pub tree_hash_path: [Field; 12], @@ -63,7 +66,9 @@ pub struct DSCtoIDDataProof { pub struct IntegrityCheckProof { pub vkey: [Field; VKEY_SIZE], pub proof: [Field; PROOF_SIZE], - pub public_inputs: [Field; 2], + // Excluding the current_date + // which is passed separately + pub public_inputs: [Field; 2], // comm_in, comm_out pub key_hash: Field, // Hash path for circuit C key hash in the sub-circuit merkle tree pub tree_hash_path: [Field; 12], @@ -74,7 +79,9 @@ pub struct IntegrityCheckProof { pub struct DisclosureProof { pub vkey: [Field; VKEY_SIZE], pub proof: [Field; PROOF_SIZE], - pub public_inputs: [Field; 1], + // Excluding: service_scope, service_subscope, param_commitment, scoped_nullifier, + // which are passed separately + pub public_inputs: [Field; 1], // comm_in pub key_hash: Field, // Hash path for circuit D key hash in the sub-circuit merkle tree pub tree_hash_path: [Field; 12], diff --git a/src/noir/lib/sig-check/common/src/lib.nr b/src/noir/lib/sig-check/common/src/lib.nr index 58e69d111..22dd37296 100644 --- a/src/noir/lib/sig-check/common/src/lib.nr +++ b/src/noir/lib/sig-check/common/src/lib.nr @@ -3,6 +3,9 @@ use sha256::{sha224_var, sha256_var}; use sha512::{sha384, sha512}; use utils::check_zero_padding; +// File visually inspected. +// Technically missing tests to ensure the outputs are as expected, +// and to catch examples of badly-padded inputs. pub fn sha1_and_check_data_to_sign( data_to_sign: [u8; DATA_TO_SIGN_MAX_LEN], data_to_sign_len: u32, @@ -10,6 +13,15 @@ pub fn sha1_and_check_data_to_sign( // Ensure all bytes beyond data_to_sign_len are zero to prevent them // from being used by the prover check_zero_padding(data_to_sign, data_to_sign_len); + + // Consider using `::from_parts_unchecked`, since `::from_parts` is doing redundant work, + // given the above call to `check_zero_padding`. + // Also `from_parts` mutates the contents of + // the array which seems unnecessarily risky (even though that mutation won't bite in this case, + // given the call to `check_zero_padding`). + // Same comment for all other functions in this file that use BoundedVec. + // (If you do choose to use `from_parts_unchecked`, please do add a comment to say that it's only safe to do so because of the `check_zero_padding` call, in case someone removes that call in future). + // Calculate the hash of data_to_sign up to data_to_sign_len let data_to_sign_vec = BoundedVec::from_parts(data_to_sign, data_to_sign_len); sha1::sha1_var(data_to_sign_vec) @@ -22,6 +34,9 @@ pub fn sha224_and_check_data_to_sign( // Ensure all bytes beyond data_to_sign_len are zero to prevent them // from being used by the prover check_zero_padding(data_to_sign, data_to_sign_len); + + // Strange that some sha functions take a BoundedVec and others take a vanilla array. + // Calculate the hash of data_to_sign up to data_to_sign_len sha224_var(data_to_sign, data_to_sign_len as u64) } diff --git a/src/noir/lib/utils/src/lib.nr b/src/noir/lib/utils/src/lib.nr index db5877c79..a85f10b27 100644 --- a/src/noir/lib/utils/src/lib.nr +++ b/src/noir/lib/utils/src/lib.nr @@ -21,8 +21,10 @@ use types::{ DG1Data, MRZData, MRZName, MRZDOB, MRZYOB, MRZDocumentNumber, MRZNationality, MRZIssuingCountry, }; +// Consider creating a dedicated constants.nr file for all these globals. + // Index for the country of issuance of the passport -pub global PASSPORT_MRZ_COUNTRY_INDEX: u32 = 2; +pub global PASSPORT_MRZ_COUNTRY_INDEX: u32 = 2; // Consider renaming to "PASSPORT_MRZ_ISSUING_COUNTRY_INDEX" // Length of the country code in the MRZ pub global PASSPORT_MRZ_COUNTRY_LENGTH: u32 = 3; // Index for the three letter code of the country of citizenship @@ -201,14 +203,24 @@ pub struct DisclosedData { pub gender: [u8; 1], } +// Missing tests +// Visually inspected. +// `end` is not technically needed, if it's always anticipated that `end - start == M`. +// In fact, for safety, consider removing end, (or at least static_asserting that `end - start == M`). +// The function could then be called as: +// - `get_array_slice::<_, M>(arr, start);` or +// - let sliced_arr: [M; u8] = get_arr_slice(arr, start); pub fn get_array_slice(arr: [u8; N], start: u32, end: u32) -> [u8; M] { let mut slice = [0 as u8; M]; for i in start..end { + // Avoid writing to a dynamic index (relatively expensive): it's cheaper to _read_ from a dynamic index. + // Consider using the implementation in `disclose/src/lib.nr` slice[i - start] = arr[i]; } slice } +// Not used. Not checked. Consider removing. pub fn get_array_slice_constant(arr: [u8; N]) -> [u8; M] { let mut slice = [0 as u8; M]; for i in 0..M { @@ -217,6 +229,7 @@ pub fn get_array_slice_constant(arr: [u8; N]) -> [u8; M] slice } +// Not used. Not checked. Consider removing. // Reverse the bytes of an array so you can switch from // big endian to little endian order and vice versa pub fn reverse_bytes_array(arr: [u8; N]) -> [u8; N] { @@ -228,6 +241,7 @@ pub fn reverse_bytes_array(arr: [u8; N]) -> [u8; N] { reversed_arr } +// Not used. Not checked. Consider removing. pub fn insert_into_array( mut arr: [u8; N], sub_arr: [u8; M], @@ -239,6 +253,7 @@ pub fn insert_into_array( arr } +// Not used. Not checked. Consider removing. pub fn dynamic_insert_into_array( mut arr: [u8; N], sub_arr: [u8; M], @@ -253,6 +268,7 @@ pub fn dynamic_insert_into_array( arr } +// Visually inspected. pub fn is_id_card(dg1: DG1Data) -> bool { // For passport, the last two bytes are 0 // since the real length is 93 for passports @@ -260,6 +276,10 @@ pub fn is_id_card(dg1: DG1Data) -> bool { (dg1[93] != 0) & (dg1[94] != 0) } +// Missing Tests. +// Consider renaming MAX_FIELD_SIZE -> MAX_FIELD_SIZE_IN_BYTES. +// Consider adding std::static_assert(MAX_FIELD_SIZE <= 31, "..."); - Edit: I just saw in the bind circuit (and all `calculate_param_commitment_sha2` functions) that sometimes you want a 32-byte input to be truncated to 31 bytes to fit within a field. Consider adding a dedicated "unsafe" variant of this function so that it's clear when you're willing to allow truncation. E.g. it's likely alright to truncate a byte from the output of a sha256 function (as is done in the bind circuit), but it wouldn't be alright to accidentally truncate a byte from raw data. +// Consider adding std::static_assert(NBytes <= MAX_FIELD_SIZE, "..."); pub fn pack_be_bytes_into_field( x: [u8; NBytes], ) -> Field { @@ -272,6 +292,12 @@ pub fn pack_be_bytes_into_field( result } +// Visually inspected. +// Consider adding more test cases for the various numeric generics that are +// actually used within the zkpassport circuits. +// Consider renaming MAX_FIELD_SIZE -> MAX_FIELD_SIZE_IN_BYTES. +// Consider adding std::static_assert(NBytes <= MAX_FIELD_SIZE, "..."); +// Consider adding std::static_assert(MAX_FIELD_SIZE <= 31, "..."); pub fn pack_be_bytes_into_u128s( x: [u8; NBytes], ) -> [u128; N] { @@ -302,6 +328,13 @@ pub fn pack_be_bytes_into_u128s MAX_FIELD_SIZE_IN_BYTES or MAX_BYTES_PER_FIELD +// Consider adding std::static_assert(NBytes <= MAX_FIELD_SIZE, "..."); +// Consider adding std::static_assert(MAX_FIELD_SIZE <= 31, "..."); +// Consider adding std::static_assert((NBytes + MAX_FIELD_SIZE - 1) / MAX_FIELD_SIZE > N); to ensure N is large enough to not accidentally truncate the output. (although the `MAX_FIELD_SIZE - (N * MAX_FIELD_SIZE - NBytes` for-loop-bound might achieve this... but I'm not sure if Noir would throw an error for too-small an N, or whether it would just silently skip the loop and truncate the output). pub fn pack_be_bytes_into_fields( x: [u8; NBytes], ) -> [Field; N] { @@ -332,6 +365,9 @@ pub fn pack_be_bytes_into_fields MRZData { let mut mrz: MRZData = [0 as u8; 90]; for i in 0..90 { @@ -340,6 +376,9 @@ pub fn get_mrz_from_dg1(dg1: DG1Data) -> MRZData { mrz } +// Missing tests. +// Visually inspected. +// Note: this only works for an array of even length. pub fn split_array(array: [u8; N * 2]) -> ([u8; N], [u8; N]) { let mut array_x = [0 as u8; N]; let mut array_y = [0 as u8; N]; @@ -350,6 +389,8 @@ pub fn split_array(array: [u8; N * 2]) -> ([u8; N], [u8; N]) { (array_x, array_y) } +// Missing tests. +// Recommend using Noir's native array .concat() method, which has been tested. pub fn concat_array(array_x: [u8; N], array_y: [u8; N]) -> [u8; N * 2] { let mut array = [0 as u8; N * 2]; for i in 0..N { @@ -359,6 +400,10 @@ pub fn concat_array(array_x: [u8; N], array_y: [u8; N]) -> [u8; N * array } +// Missing tests. +// Suggestion: The output of this function seems to always be hashed. +// This approach to packing (1 byte into 1 field) is very inefficient, and will result in more hashing +// constraints than necessary. Consider more-tightly packing the data before hashing. /** * Concatenate two arrays (of different sizes) into one array */ @@ -376,6 +421,9 @@ pub fn array_concat_as_field( result } +// Not used. Not checked. Consider removing. +// Inconsistent leading underscore. Consider removing `_`, or aligning all unconstrained functions to +// have such a leading underscore. pub unconstrained fn _find_subarray_index( array: [u8; N], subarray: [u8; M], @@ -397,6 +445,18 @@ pub unconstrained fn _find_subarray_index( index } +// Missing tests. +// Visually inspected. +// Potential optimisation (sometimes this marginally improves constraints vs using an inequality `>=` inside a loop): +// let mut hit_len = false; +// for i in 0..N { +// if i == len { +// hit_len = true +// } +// if hit_len { +// assert_eq(padded_array[i], T::default()); +// } +// } pub fn check_zero_padding(padded_array: [T; N], len: u32) where T: Eq, @@ -404,11 +464,20 @@ where { for i in 0..N { if i >= len { + // This presumes `default()` has similar meaning to `zeroed`, which wont be true for all types T. Consider using `std::mem::zeroed();`, or concrete types (like `u8`) instead of T. assert_eq(padded_array[i], T::default()); } } } +// Missing Tests. +// Visually inspected, with the assumption that the global constants are correct. +// Potential optimisation: +// Establish the `start` and `end` indices as variables, and then only +// call `get_array_slice` once (rather than the two times currently). +// Having said that, I _think_ the calls to `get_array_slice` might evaluate at +// comptime, because you're passing global constants in as args. So maybe this +// is optimal. pub fn get_nationality_from_mrz(dg1: DG1Data) -> MRZNationality { let mrz = get_mrz_from_dg1(dg1); @@ -436,10 +505,15 @@ pub fn get_nationality_from_mrz(dg1: DG1Data) -> MRZNationality { country_bytes } +// Missing Tests. +// Visually inspected, with the assumption that the global constants are correct. pub fn get_issuing_country_from_mrz(dg1: DG1Data) -> MRZIssuingCountry { let mrz = get_mrz_from_dg1(dg1); // No need to check if it's an ID card since the issuing country // is always at the same index for both passports and ID cards + // Suggestion: + // `std::static_assert(PASSPORT_MRX_COUNTRY_INDEX == ID_CARD_MRZ_COUNTRY_INDEX, "Country index mismatch");` + // `std::static_assert(PASSPORT_MRZ_COUNTRY_LENGTH == PASSPORT_MRZ_COUNTRY_LENGTH, "Country length mismatch");` let mut country_bytes = get_array_slice( mrz, PASSPORT_MRZ_COUNTRY_INDEX, @@ -454,6 +528,11 @@ pub fn get_issuing_country_from_mrz(dg1: DG1Data) -> MRZIssuingCountry { country_bytes } + +//*********************************************************************** + + +// Visually inspected. /// Get the name from the MRZ /// /// Conditionally calls the correct function based on the type of document @@ -466,6 +545,7 @@ pub fn get_name_from_mrz(dg1: DG1Data) -> MRZName { } } +// Visually inspected. /// Get the name from the MRZ for a passport pub fn get_name_passport(mrz: MRZData) -> MRZName { get_array_slice( @@ -475,6 +555,7 @@ pub fn get_name_passport(mrz: MRZData) -> MRZName { ) } +// Visually inspected. /// Get the name from the MRZ for an ID card pub fn get_name_id_card(mrz: MRZData) -> MRZName { let mut mrz_name: MRZName = get_array_slice( @@ -491,6 +572,8 @@ pub fn get_name_id_card(mrz: MRZData) -> MRZName { mrz_name } +// Missing Tests. +// Visually inspected. /// Get the date of birth from the MRZ /// /// Conditionally calls the correct function based on the type of document @@ -503,6 +586,8 @@ pub fn get_dob_from_mrz(dg1: DG1Data) -> MRZDOB { } } +// Missing Tests. +// Visually inspected. /// Get the date of birth from the MRZ for a passport pub fn get_date_of_birth_passport(mrz: MRZData) -> MRZDOB { get_array_slice( @@ -512,6 +597,8 @@ pub fn get_date_of_birth_passport(mrz: MRZData) -> MRZDOB { ) } +// Missing Tests. +// Visually inspected. /// Get the date of birth from the MRZ for an ID card pub fn get_date_of_birth_id_card(mrz: MRZData) -> MRZDOB { get_array_slice( @@ -521,6 +608,8 @@ pub fn get_date_of_birth_id_card(mrz: MRZData) -> MRZDOB { ) } +// Missing Tests. +// Visually inspected. /// Get the year of birth from the MRZ /// /// Conditionally calls the correct function based on the type of document @@ -533,6 +622,8 @@ pub fn get_yob_from_mrz(dg1: DG1Data) -> MRZYOB { } } +// Missing Tests. +// Visually inspected. /// Get the year of birth from the MRZ for an ID card pub fn get_yob_from_mrz_passport(mrz: MRZData) -> MRZYOB { get_array_slice( @@ -542,6 +633,8 @@ pub fn get_yob_from_mrz_passport(mrz: MRZData) -> MRZYOB { ) } +// Missing Tests. +// Visually inspected. /// Get the year of birth from the MRZ for an ID card pub fn get_yob_from_mrz_id_card(mrz: MRZData) -> MRZYOB { get_array_slice( @@ -551,6 +644,8 @@ pub fn get_yob_from_mrz_id_card(mrz: MRZData) -> MRZYOB { ) } +// Missing Tests. +// Visually inspected. /// Get the document number from the MRZ /// /// Conditionally calls the correct function based on the type of document @@ -563,6 +658,8 @@ pub fn get_document_number_from_mrz(dg1: DG1Data) -> MRZDocumentNumber { } } +// Missing Tests. +// Visually inspected. /// Get the document number from the MRZ for a passport pub fn get_document_number_passport(mrz: MRZData) -> MRZDocumentNumber { get_array_slice( @@ -572,6 +669,8 @@ pub fn get_document_number_passport(mrz: MRZData) -> MRZDocumentNumber { ) } +// Missing Tests. +// Visually inspected. /// Get the document number from the MRZ for an ID card pub fn get_document_number_id_card(mrz: MRZData) -> MRZDocumentNumber { get_array_slice( @@ -581,21 +680,70 @@ pub fn get_document_number_id_card(mrz: MRZData) -> MRZDocumentNumber { ) } +// TO CHECK (in the constrained code): +// Is the class (bits 6 & 7) and the form (bit 5) being checked to match the +// expectations of passport encodings? +// This code accepts non-DER encodings (e.g. length 0x81 0x05 instead of 0x05). Is that ok? My reading suggests it's not ok, and passports prefer unambiguous encodings (DER). +// I.e. it is not strict to DER rules - is that ok? +// Do you want to allow "null" = 0x05 0x00? It is currently allowed. + /// Returns total TLV length (tag + length field + content) for any /// ASN.1 element using DER/BER **definite-length** encoding with a /// single-byte tag (tag number field < 31) +/// pub unconstrained fn unsafe_get_asn1_element_length(asn1: [u8; N]) -> u32 { let tag: u8 = asn1[0]; let elem_len: u8 = asn1[1]; + /* + * Tag encoding: + * + * | 7 6 | 5 | 4 3 2 1 0 | <- bit index + * | | | | <- bits + * ^ ^ ^ + * | | | + * | | Tag - a number < 31 (so 11111 is not allowed). + * | | + * | Form: 0=primitive type, 1=constructed type + * | + * class: 00=universal, 01=application, 10=context-specific, 11=private + */ + + /* + * Length encoding ("short form"): + * + * | 7 | 6 5 4 3 2 1 0 | <- bit index + * | 0 | L e n g t h | <- bits + * ^ ^ + * | | + * | Length - a number between 0 - 127 (inclusive) + * | + * delineates that this is the "short form" encoding. + * + * Length encoding ("short form"): + * + * | 7 | 6 5 4 3 2 1 0 | 7 6 5 4 3 2 1 0 | ... | 7 6 5 4 3 2 1 0 | <- bit index + * | 1 | | L e n g t h N | ... | L e n g t h 1 | <- bits + * ^ ^ + * | | + * | Length of the length (not 0), in bytes + * | + * delineates that this is the "short form" encoding. + */ + // Need at least tag(1) + length(1) assert(N >= 2, "TLV too short"); // Only support single-byte tag number (no high-tag-number 0x1F) + // 0x1F = 31 assert((tag & 0x1F) < 0x1F, "High-tag-number form not supported"); - // BER indefinite-length (0x80) is not supported here + // BER indefinite-length (0x80) is not supported here. + // (Indefinite length has the entire first byte encoded as 0x80). + // 0x80 = 128 assert(elem_len != 0x80, "Indefinite length not supported"); // Short form: content length is in low 7 bits + // 0x80 = 128 + // 0x7F = 127 if (elem_len & 0x80) == 0 { let content_len: u32 = (elem_len & 0x7F) as u32; let total: u32 = 2 + content_len; @@ -604,9 +752,10 @@ pub unconstrained fn unsafe_get_asn1_element_length(asn1: [u8; N]) - } // Long form: low 7 bits = number of following length bytes else { + // 0x7F = 127 let nlen: u32 = (elem_len & 0x7F) as u32; assert(nlen > 0, "Zero length-of-length"); - assert(nlen <= 4, "Length field too large"); + assert(nlen <= 4, "Length field too large"); // Consider renaming error to "Length > 4 not supported" assert(2 + nlen <= N, "Length bytes exceed buffer"); // Parse big-endian content length let mut content_len: u32 = 0; @@ -619,6 +768,29 @@ pub unconstrained fn unsafe_get_asn1_element_length(asn1: [u8; N]) - } } +// NOTE: this does NOT check that it is the FIRST occurrence of the needle; only that +// the needle exists somewhere in the haystack. If you want it to be the FIRST, you'll +// need to add some constraints (see example below). +// NOTE: the result of these constraints (as they stand) can be misleading and incorrect: +// If a malicious offset is returned by the unconstrained function, then this function could return +// `HAYSTACK_SIZE`, which will be inferred to mean "the needle is not in this haystack"... +// but the needle MIGHT ACTUALLY BE in the haystack; it's just that a bad hint +// was returned by the unconstrained function. +// E.g. Finding [3, 4, 5] in [0, 1, 2, 3, 4, 5, 6, 7, 8]. +// The correct answer is 3. +// But if a malicious prover returns `5` from the unconstrained function, +// this function will return `9`, which the calling function will infer to mean +// "needle not in this haystack". +// +// Some more comments below. +// +// Suggestion: add regression tests to catch these observations in future. +// In fact, we need a way to mock the return values of unconstrained calls in our tests. This has come +// up a few times on various Aztec projects too. +// +// Suggestion: consider throwing if the needle is not found. I haven't seen a case (yet) in this +// codebase where the circuit should gracefully continue if a call to this function fails to find the needle. +// /// Find the index of the first occurrence of the needle in the haystack /// Returns the index of the first occurrence of the needle in the haystack /// Returns HAYSTACK_SIZE if the needle is not found @@ -626,11 +798,58 @@ pub fn find_subarray_index( needle: [u8; NEEDLE_SIZE], haystack: [u8; HAYSTACK_SIZE], ) -> u32 { + // Suggestion: static_assert(NEEDLE_SIZE <= HAYSTACK_SIZE, "needle cannot be bigger than the haystack"); + // Safety: This is safe because the offset is only used as a starting point // to verify the substring exists + // Style: prefer snake_case for offset_unchecked. let offsetUnchecked = unsafe { find_subarray_index_unsafe(needle, haystack) }; let mut offset = offsetUnchecked; // Check if offset is valid before attempting verification + // Might be slightly more efficient to check: + // ``` + // std::static_assert(NEEDLE_SIZE != 0); // I can't think of a reason to call this function with an empty array... + // // And then, now we know the NEEDLE_SIZE is >= 1, and we know u32 operations can't wrap: + // if (offset_unchecked + NEEDLE_SIZE <= HAYSTACK_SIZE) { + // for i in 0..NEEDLE_SIZE { + // // And then, to avoid the bug, I'd just do assertions, so that we fail if they're not met: + // assert_eq(haystack[offset_unchecked + i], needle[i]); // cheaper to assert equal than to assert not equal. + // } + // } else { + // assert(offset_unchecked == HAYSTACK_SIZE); + // } + // ``` + // + // ^^^ Notice, that this suggestion still doesn't fix the observation that + // this function is not asserting that this is the FIRST occurrence. To do that, you'd + // need to iterate through all earlier elements (which in circuit-land actually means + // iterating through all elements) to assert that they DON'T contain the needle. + // It would be something like: + // ``` + // let mut before_offset = true; // will flip when we reach the claimed offset. + // for i in 0..(HAYSTACK_SIZE - NEEDLE_SIZE + 1) { + // if i == offset_unchecked { + // before_offset = false; + // for _ in 0..NEEDLE_SIZE { + // assert_eq(haystack[offset_unchecked + i], needle[i]); + // } + // } + // let mut mismatch = false; // we want to find at least one mismatch at earlier offsets. + // for j in 0..NEEDLE_SIZE { + // mismatch |= (haystack[i + j] != needle[i]); + // } + // if before_offset { + // assert(mismatch); + // } + // } + // + // if before_offset == true { + // assert(offset_unchecked == HAYSTACK_SIZE); + // } + // let offset = offset_unchecked + // offset + // ``` + if (offsetUnchecked < HAYSTACK_SIZE) & (offsetUnchecked + NEEDLE_SIZE <= HAYSTACK_SIZE) { for i in 0..NEEDLE_SIZE { if haystack[i + offsetUnchecked] != needle[i] { @@ -644,13 +863,16 @@ pub fn find_subarray_index( offset } +// Visually inspected. +// Missing tests. pub fn is_subarray_in_array( needle: [u8; NEEDLE_SIZE], haystack: [u8; HAYSTACK_SIZE], ) -> bool { - find_subarray_index(needle, haystack) < HAYSTACK_SIZE + find_subarray_index(needle, haystack) < HAYSTACK_SIZE // slightly more efficient to do `!=` here. } +// Visually inspected. /// Safety: This is safe because the offset is only used as a starting point /// to verify the substring exists pub unconstrained fn find_subarray_index_unsafe( @@ -660,7 +882,7 @@ pub unconstrained fn find_subarray_index_unsafe Self { // Set the last two bytes to NON 0 to indicate an ID card self.dg1[93] = 1; @@ -342,6 +344,9 @@ fn test_find_substring_index_various_cases() { assert_eq(index4, 2); // Test case 5: Empty needle (should return 0) + // The comment above the function says `/// Returns HAYSTACK_SIZE if the needle is not found`, + // so this test isn't returning the value I'd expect. To my intuition, the empty array is not in the haystack, + // so index `0` (as this test asserts) seems wrong. And actually, the element at index 0 is not `[]`; it's `0x01`. let needle5: [u8; 0] = []; let haystack5 = [0x01, 0x02, 0x03, 0x04, 0x05]; let index5 = find_subarray_index(needle5, haystack5); @@ -376,6 +381,8 @@ fn test_find_substring_index_various_cases() { let haystack10 = [0x01, 0x02, 0x03, 0x05, 0x06, 0x07, 0x08, 0x09]; let index10 = find_subarray_index(needle10, haystack10); assert_eq(index10, haystack10.len()); // Should equal haystack size + + // Missing negative tests which mock malicious return values from `find_subarray_index_unsafe` } pub fn verify_substring_in_string( diff --git a/src/noir/lib/utils/src/types.nr b/src/noir/lib/utils/src/types.nr index 001a11a43..11b26fff0 100644 --- a/src/noir/lib/utils/src/types.nr +++ b/src/noir/lib/utils/src/types.nr @@ -1,3 +1,5 @@ +// Recommendation: Replace all these magic numbers with the `_LENGTH` globals from `../lib.nr` (and introduce globals if any are missing). + // DG1 types pub type DG1Data = [u8; 95]; pub type MRZData = [u8; 90]; diff --git a/src/solidity/src/ArrayUtils.sol b/src/solidity/src/ArrayUtils.sol index ba502c755..986763688 100644 --- a/src/solidity/src/ArrayUtils.sol +++ b/src/solidity/src/ArrayUtils.sol @@ -4,6 +4,7 @@ pragma solidity >=0.8.21; import {StringUtils} from "./StringUtils.sol"; +// Missing tests. library ArrayUtils { function isSortedAscending(string[] memory array) internal pure returns (bool) { for (uint256 i = 1; i < array.length; i++) { diff --git a/src/solidity/src/Constants.sol b/src/solidity/src/Constants.sol index 25d5944f5..44e7e2de9 100644 --- a/src/solidity/src/Constants.sol +++ b/src/solidity/src/Constants.sol @@ -2,8 +2,12 @@ // Copyright 2025 ZKPassport pragma solidity >=0.8.21; +// Is this not updateable? bytes32 constant SANCTIONS_TREES_ROOT = 0x27cea23b989f5246d6577568d11cff22537f10fb47729dc004d1bf464ce37bd3; +// Not checked. +// Suggested comment to put here: +// The lengths of the preimages of the `param_commitments` of the various disclosure circuits. library CommittedInputLen { uint256 constant COMPARE_AGE = 11; uint256 constant COMPARE_BIRTHDATE = 25; @@ -16,3 +20,13 @@ library CommittedInputLen { uint256 constant BIND = 501; uint256 constant SANCTIONS = 33; } + +// For the items above which have the same number, consider adding a test which +// simply asserts that they're all equal, so that if a maintainer of this repo +// ever does change one of these values, the test will fail and it'll prompt the +// dev to think about consequences of this. +// E.g. assert(COMPARE_BIRTHDATE == COMPARE_EXPIRY, "It looks like you've changed one of these constants. +// There are baked-in assumptions that these values must always be equal. This assumption is relied upon by +// 'getDateProofInputs'. You might break the protocol unless you think very carefully about the change you're making" ); +// +// Similar for the `601` items. diff --git a/src/solidity/src/DateUtils.sol b/src/solidity/src/DateUtils.sol index 2ee06f46c..0d9af866d 100644 --- a/src/solidity/src/DateUtils.sol +++ b/src/solidity/src/DateUtils.sol @@ -2,6 +2,8 @@ // Copyright 2025 ZKPassport pragma solidity >=0.8.21; +// Visually inspected. +// Missing tests. library DateUtils { /** * @dev Validates if a date is within a validity period diff --git a/src/solidity/src/IRootRegistry.sol b/src/solidity/src/IRootRegistry.sol index c6533018d..92324e219 100644 --- a/src/solidity/src/IRootRegistry.sol +++ b/src/solidity/src/IRootRegistry.sol @@ -5,6 +5,7 @@ pragma solidity ^0.8.20; * @title IRootRegistry * @dev Interface for a root registry */ + // I guess the impl is in some other repo? interface IRootRegistry { function latestRoot(bytes32 registryId) external view returns (bytes32); diff --git a/src/solidity/src/SampleContract.sol b/src/solidity/src/SampleContract.sol index 91ee6f793..10cc65486 100644 --- a/src/solidity/src/SampleContract.sol +++ b/src/solidity/src/SampleContract.sol @@ -55,6 +55,10 @@ contract SampleContract { committedInputs, committedInputCounts ); + // Possible bug: the `currentDate` from this disclosure proof is not being asserted to equal + // the `currentDate` public input of the Outer proof. This means they could be different dates! + // That feels like it could lead to vulnerabilities / unexpected behaviour. + // Make sure the date used for the proof makes sense require(block.timestamp >= currentDate, "Date used in proof is in the future"); // This is the condition for checking the age is 18 or above @@ -76,6 +80,7 @@ contract SampleContract { // Get the nationality from the disclosed data and ignore the rest // Passing the disclosed bytes returned by the previous function // this function will format it for you so you can use the data you need + // Potentially dangerous if a developer mis-counts these many commas. Consider returning structs instead. (, , string memory nationality, , , , , ) = zkPassportVerifier.getDisclosedData( disclosedBytes, isIDCard @@ -83,6 +88,11 @@ contract SampleContract { return nationality; } + // UX suggestion: it looks like `committedInputs` and `committedInputCounts` will always be passed + // around as a pair everywhere. Consider wrapping them in a struct, and giving the struct a less + // jargony name, like 'RevealedData` (or something...) so that this app-developer-side code is easier for a non-cryptographer to grasp. + // The app developer (who will write a version of this smart contract) can then just pass around this + // new struct as an opaque blob of data to any of the neat getter functions you've exposed. function checkNationalityExclusion( bytes calldata committedInputs, uint256[] calldata committedInputCounts @@ -93,11 +103,14 @@ contract SampleContract { ProofType.NATIONALITY_EXCLUSION ); // The exclusion check relies on the country list being sorted in - // ascending order, if it is not, then the proof has no value + // ascending order, if it is not, then the proof has no value. + // ^^^ Consider amending this comment, since now the circuit enforces + // the ordering. require( ArrayUtils.isSortedAscending(nationalityExclusionList), "Nationality exclusion countries must be sorted in ascending order" - ); + ); // You can get rid of this `require` now. + // Let's check the exclusion list checked what we expect // Here we expect Spain, Italy and Portugal require( @@ -119,6 +132,12 @@ contract SampleContract { // passports and ID cards // You can ask the user to disclose their document type // and the SDK will tell you which one they have + // As a possible alternative to this `isIDCard` percolating through your stack, + // perhaps the disclosure circuit could output data in a format that's uniform + // for both passports and id cards: For each data type, choose the larger byte length, + // and expose those uniform lengths from the circuit to the verifier. + // It looks like `isIDCard` is only needed for the "disclose" proof type, to it might + // not be too big a change? It's up to you, of course :) bool isIDCard ) public returns (bytes32) { (bool verified, bytes32 uniqueIdentifier) = zkPassportVerifier.verifyProof(params); diff --git a/src/solidity/src/StringUtils.sol b/src/solidity/src/StringUtils.sol index 31b072c16..90c31fd57 100644 --- a/src/solidity/src/StringUtils.sol +++ b/src/solidity/src/StringUtils.sol @@ -2,6 +2,7 @@ // Copyright 2025 ZKPassport pragma solidity >=0.8.21; +// Missing tests for all fns. library StringUtils { function equals(string memory a, string memory b) internal pure returns (bool) { return keccak256(bytes(a)) == keccak256(bytes(b)); diff --git a/src/solidity/src/ZKPassportVerifier.sol b/src/solidity/src/ZKPassportVerifier.sol index e49148fe2..735ffce10 100644 --- a/src/solidity/src/ZKPassportVerifier.sol +++ b/src/solidity/src/ZKPassportVerifier.sol @@ -30,14 +30,30 @@ enum BoundDataIdentifier { } // Add this struct to group parameters +// Suggested comment for here (I had to add this to be able to follow the contract, so it feels like a worthwhile comment): +// +// publicInputs: +// - 0: certificate_registry_root: pub Field, +// - 1: circuit_registry_root: pub Field, +// - 2: current_date: pub u64, +// - 3: service_scope: pub Field, +// - 4: service_subscope: pub Field, +// - 5:5+N: param_commitments: pub [Field; N], +// - 5+N: scoped_nullifier: pub Field, +// +// committedInputs: the preimages of the `param_commitments` of the disclosure proofs. +// committedInputCounts: offsets to locate the committedInputs of each of the param_commitments of the public_inputs. struct ProofVerificationParams { bytes32 vkeyHash; bytes proof; bytes32[] publicInputs; bytes committedInputs; - uint256[] committedInputCounts; + uint256[] committedInputCounts; // TODO: consider whether this agency (over the offsets) would enable attacks. + // ... It might allow someone to _skip_ some of the disclosure checks, by providing an offset that jumps over them. + // We should assert that the length of the param_commitments matches the length of the committedInputCounts. Otherwise, a user could claim `committedInputCounts` + // is a size-1 array, when in fact it could be a size-9 array (meaning 9 disclosure proofs). uint256 validityPeriodInSeconds; - string domain; + string domain; // consider aligning with naming of circuits: domain -> scope, scope -> subscope. At the moment `scope` means two different things, depending on the file of code. string scope; bool devMode; } @@ -94,6 +110,7 @@ contract ZKPassportVerifier { address public admin; bool public paused; + // Which vkhashes are these? vkhashes of outer circuits? Please comment storage variables. mapping(bytes32 => address) public vkeyHashToVerifier; // Maybe make this immutable as this should most likely not change @@ -163,6 +180,7 @@ contract ZKPassportVerifier { rootRegistry = IRootRegistry(_rootRegistry); } + // Visually inspected. function checkDate( bytes32[] memory publicInputs, uint256 validityPeriodInSeconds @@ -171,6 +189,7 @@ contract ZKPassportVerifier { return DateUtils.isDateValid(currentDateTimeStamp, validityPeriodInSeconds); } + // Visually inspected. function getDisclosedData( bytes calldata discloseBytes, bool isIDCard @@ -188,6 +207,7 @@ contract ZKPassportVerifier { string memory documentType ) { + // Recommendation: create globals to replace all of these magic numbers (as per the circuits). if (!isIDCard) { name = string(discloseBytes[PASSPORT_MRZ_NAME_INDEX:PASSPORT_MRZ_NAME_INDEX + 39]); issuingCountry = string( @@ -233,6 +253,24 @@ contract ZKPassportVerifier { } } + // Visually inspected. + // Possibly-critical bug (as mentioned in the corresponding circuit): + // Neither the circuits nor this contract are enforcing that each `discloseMask` byte is + // actually a bit (0 or 1 only). If the mask is not properly enforced by the verifier, + // then a user could mutate the details of their passport, using the mask, to lie about their + // passport details. E.g. they could multiply any disclosed byte by 2 -- e.g. to shift which + // country they're from. + // This arguably places quite a high mental burden on the app developer, who must write + // a smart contract which enacts these missing checks. + // Slightly cheekily, I should point out that even the example SampleContract.sol is neglecting + // to check the `discloseMask`; it is throwing away that data. So that means a malicious user + // could indeed trick the SampleContract into thinking they're from a different country, by + // cleverly choosing the `disclose_mask` inputs to their "disclosure" circuit. Or they could + // mutate their date of birth. They could mutate anything. + // Yay!!! Dopamine for me as an auditor! + // + // I suspect an app developer might blindly follow the patterns in SampleConrtact.sol, and + // so it's very possible that an app developer could fall afoul of this bug. function getDiscloseProofInputs( bytes calldata committedInputs, uint256[] calldata committedInputCounts @@ -241,11 +279,35 @@ contract ZKPassportVerifier { bool found = false; for (uint256 i = 0; i < committedInputCounts.length; i++) { // Disclose circuits have 181 bytes of committed inputs - // The first byte is the proof type + // The first byte is the proof type, so we search for that. if (committedInputCounts[i] == CommittedInputLen.DISCLOSE_BYTES) { + // This only works because `181` is unique in the CommittedInputLen's. If it wasn't unique, + // this `require` would throw an error if we encountered another ProofType with `181` bytes. + // It's something to be aware of, in case the number of bytes ever changes with refactors. Indeed, + // there _are_ some proof types which have matching CommittedInputLen's. + // Consider instead something like: + // ``` + // if ( + // committedInputCounts[i] == CommittedInputLen.DISCLOSE_BYTES && + // committedInputs[offset] == bytes1(uint8(ProofType.DISCLOSE) + // ) { + // ``` + // ^^^ this would also be consistent with some other `get...ProofInputs` functions in this file. require(committedInputs[offset] == bytes1(uint8(ProofType.DISCLOSE)), "Invalid proof type"); - discloseMask = committedInputs[offset + 1:offset + 91]; + discloseMask = committedInputs[offset + 1:offset + 91]; // Replace 91 and 181 with global constants. discloseBytes = committedInputs[offset + 91:offset + 181]; + // Concern: is it possible to chain multiple "DISCLOSE" proof types into a single Outer proof? + // I don't think I've seen anything which ensures that each `ProofType` can only be used at most once. + // Consider asserting that each ProofType can only be used once, within the Outer circuit (unless + // I'm overlooking a use case that would require multiple of one ProofType?). + // My concern came from me thinking that this `for` loop should `break;` once it finds a match, + // to save gas. And then I wondered what would happen if there were two `DISCLOSE` proofs + // within this Outer proof: it would mean we'd never be able to extract the _first_ occurrence, + // because the loop would carry on and overwrite the output `discloseBytes` with the _second_ + // occurrence. + // This concern is probably valid for all `get...ProofInputs` functions in this file (this is the + // first one that I've looked at). + // Consider adding `break;` here, to save some gas. found = true; } offset += committedInputCounts[i]; @@ -253,6 +315,13 @@ contract ZKPassportVerifier { require(found, "Disclose proof inputs not found"); } + // Which kinds of proof is this actually for, in all? + // compare/expiry? + // Also compare/birthdate? It's not clear because of the hard-coded COMPARE_EXPIRY name below. COMPARE_BIRTHDATE doesn't appear in this file. + // Consider instead: + // `if ( (committedInputCounts[i] == CommittedInputLen.COMPARE_EXPIRY) || (committedInputCounts[i] == CommittedInputLen.COMPARE_BIRTHDAY) ) && ...` + // Or: remove `COMPARE_BIRTHDATE` and rename to `COMPARE_EXPIRY` to `COMPARE_DATE` with a comment to say that this covers BIRTHDATE and EXPIRY DATE use cases. + // Also consider renaming this function to getCompareDateProofInputs or something that makes it clear that it's the "compare" flow. function getDateProofInputs( bytes calldata committedInputs, uint256[] calldata committedInputCounts, @@ -272,6 +341,8 @@ contract ZKPassportVerifier { currentDate = uint256(bytes32(committedInputs[offset + 1:offset + 9])) >> 192; minDate = uint256(bytes32(committedInputs[offset + 9:offset + 17])) >> 192; maxDate = uint256(bytes32(committedInputs[offset + 17:offset + 25])) >> 192; + // See concern of the getDiscloseProofInputs fn above, relating to multiple of the same ProofType being used in a single Outer circuit. + // Also consider `break;` here, once found. found = true; } offset += committedInputCounts[i]; @@ -288,6 +359,18 @@ contract ZKPassportVerifier { for (uint256 i = 0; i < committedInputCounts.length; i++) { // The age circuit has 11 bytes of committed inputs // The first byte is the proof type + // This only works because `11` is unique in the CommittedInputLen's. If it wasn't unique, + // this `require` would throw an error if we encountered another ProofType with `11` bytes. + // It's something to be aware of, in case the number of bytes ever changes with refactors. Indeed, + // there _are_ some proof types which have matching CommittedInputLen's. + // Consider instead something like: + // ``` + // if ( + // committedInputCounts[i] == CommittedInputLen.COMPARE_AGE && + // committedInputs[offset] == bytes1(uint8(ProofType.AGE) + // ) { + // ``` + // (^^^ this pattern is followed in some of the other fns in this file) if (committedInputCounts[i] == CommittedInputLen.COMPARE_AGE) { require(committedInputs[offset] == bytes1(uint8(ProofType.AGE)), "Invalid proof type"); // Get rid of the padding 0s bytes as the timestamp is contained within the first 64 bits @@ -295,6 +378,8 @@ contract ZKPassportVerifier { currentDate = uint256(bytes32(committedInputs[offset + 1:offset + 9])) >> 192; minAge = uint8(committedInputs[offset + 9]); maxAge = uint8(committedInputs[offset + 10]); + // See concern of the getDiscloseProofInputs fn above, relating to multiple of the same ProofType being used in a single Outer circuit. + // Also consider `break;` here, once found. found = true; } offset += committedInputCounts[i]; @@ -302,6 +387,8 @@ contract ZKPassportVerifier { require(found, "Age proof inputs not found"); } + // Recommendation: add a doc comment to say which circuits this covers. I think: + // INCL_ISSUING_COUNTRY, EXCL_ISSUING_COUNTRY, INCL_NATIONALITY, EXCL_NATIONALITY function getCountryProofInputs( bytes calldata committedInputs, uint256[] calldata committedInputCounts, @@ -316,9 +403,16 @@ contract ZKPassportVerifier { committedInputCounts[i] == CommittedInputLen.INCL_NATIONALITY && committedInputs[offset] == bytes1(uint8(proofType)) ) { + // Consider using a global instead of the magic number 200. countryList = new string[](200); for (uint256 j = 0; j < 200; j++) { + // Each country code is 3 bytes long. + // `+ 1` skips over the proofType byte at the start. + // Consider doing `offset += 1;` before this loop begins, instead of `+ 1` in every iteration in the line below. + // Consider also assigning: `offset += j * 3` each loop, to save some gas. if (committedInputs[offset + j * 3 + 1] == 0) { + // The circuit constrains that once we've reached the first `0`, + // we won't encounter any further nonzero values. // We don't need to include the padding bytes break; } @@ -340,21 +434,55 @@ contract ZKPassportVerifier { for (uint256 i = 0; i < committedInputCounts.length; i++) { // The bind data circuit has 501 bytes of committed inputs // The first byte is the proof type + // See concern of the getDiscloseProofInputs fn above, relating to multiple of the same ProofType being used in a single Outer circuit. + // + // Also: + // This only works because `501` is unique in the CommittedInputLen's. If it wasn't unique, + // this `require` would throw an error if we encountered another ProofType with `501` bytes. + // It's something to be aware of, in case the number of bytes ever changes with refactors. Indeed, + // there _are_ some proof types which have matching CommittedInputLen's. + // Consider instead something like: + // ``` + // if ( + // committedInputCounts[i] == CommittedInputLen.BIND && + // committedInputs[offset] == bytes1(uint8(ProofType.BIND) + // ) { + // ``` + // (^^^ this pattern is followed in some of the other fns in this file) if (committedInputCounts[i] == CommittedInputLen.BIND) { require(committedInputs[offset] == bytes1(uint8(ProofType.BIND)), "Invalid proof type"); // Get the length of the data from the tag length encoded in the data // The developer should check on their side the actual data returned before // the padding bytes by asserting the values returned from getBoundData meets // what they expect + // This `while` loop seems like quite a complex preamble just to check for zero padding at the + // end of the bytes. Especially since the logic of this `while` loop is effectively repeated in `getBoundData`. + // You could perhaps instead assert padding within `getBoundData`, after its `while` loop. uint256 dataLength = 0; while (dataLength < 500) { + // Consider `offset += 1` before we enter this loop, to remove all the below `+ 1`'s. + // + // Each `if` statement in this loop would benefit from a comment explaining the layout + // of the bytes. It's not clear to me (having come here from all the circuits, which + // don't assume any layout structure for these 500 bytes). if ( committedInputs[offset + 1 + dataLength] == bytes1(uint8(BoundDataIdentifier.USER_ADDRESS)) ) { + // It makes me nervous that we're not checking the claimed lengths against known constants. + // E.g. we know an address should be 20 bytes, but that's not being asserted here. + // E.g. we know a chainId should be of a certain length (2 bytes?), but that's not being asserted here. + // I'm worried about an attack where a user could provide an incorrect addressLength or chainIdLength, + // which the code below would blindly accept and just jump over that many bytes. Perhaps it + // would allow a user to somehow skip over some bytes to avoid being bound to those bytes. + // Oh, it's being checked in getBoundData + // + // Looks like each length is encoded as 2 bytes? That makes sense, since 1 byte only encodes up to 256, but the array is 500 long. uint16 addressLength = uint16( bytes2(committedInputs[offset + 1 + dataLength + 1:offset + 1 + dataLength + 3]) ); + // Consider something like: + // dataLength += 1 /* BoundDataIdentifier */ + 2 /* length bytes */ + addressLength; // for readability. dataLength += 2 + addressLength + 1; } else if ( committedInputs[offset + 1 + dataLength] == bytes1(uint8(BoundDataIdentifier.CHAIN_ID)) @@ -383,6 +511,8 @@ contract ZKPassportVerifier { } data = committedInputs[offset + 1:offset + 501]; + // See concern of the getDiscloseProofInputs fn above, relating to multiple of the same ProofType being used in a single Outer circuit. + // Also consider `break;` here, once found, to save gas. found = true; } offset += committedInputCounts[i]; @@ -397,6 +527,21 @@ contract ZKPassportVerifier { uint256 offset = 0; bool found = false; for (uint256 i = 0; i < committedInputCounts.length; ++i) { + // See concern of the getDiscloseProofInputs fn above, relating to multiple of the same ProofType being used in a single Outer circuit. + // + // Also: + // This only works because `501` is unique in the CommittedInputLen's. If it wasn't unique, + // this `require` would throw an error if we encountered another ProofType with `501` bytes. + // It's something to be aware of, in case the number of bytes ever changes with refactors. Indeed, + // there _are_ some proof types which have matching CommittedInputLen's. + // Consider instead something like: + // ``` + // if ( + // committedInputCounts[i] == CommittedInputLen.BIND && + // committedInputs[offset] == bytes1(uint8(ProofType.BIND) + // ) { + // ``` + // (^^^ this pattern is followed in some of the other fns in this file) if (committedInputCounts[i] == CommittedInputLen.SANCTIONS) { require( committedInputs[offset] == bytes1(uint8(ProofType.SANCTIONS)), @@ -404,6 +549,7 @@ contract ZKPassportVerifier { ); sanctionsTreesCommitment = bytes32(committedInputs[offset + 1:offset + 33]); + // Consider `break;` here, once found, to save gas. found = true; } offset += committedInputCounts[i]; @@ -419,6 +565,7 @@ contract ZKPassportVerifier { _validateSanctionsRoot(proofSanctionsRoot); } + // Consider moving this function up to be directly below the `getBindProofInputs` fn. function getBoundData( bytes calldata data ) public pure returns (address senderAddress, uint256 chainId, string memory customData) { @@ -447,6 +594,8 @@ contract ZKPassportVerifier { } } + // Visually inspected. + // Missing tests. function verifyScopes( bytes32[] calldata publicInputs, string calldata domain, @@ -479,6 +628,9 @@ contract ZKPassportVerifier { require(calculatedCommitment == paramCommitments[i], "Invalid commitment"); offset += committedInputCounts[i]; } + + // Missing check: + // assert(committedInputs.length == offset); // to ensure there are no extra bytes of committedInputs, to avoid unexpected behaviour. } function _getVerifier(bytes32 vkeyHash) internal view returns (address) { @@ -508,18 +660,30 @@ contract ZKPassportVerifier { ); } + // MAIN /** * @notice Verifies a proof from ZKPassport + * + * WARNING: the outputs of the disclosure proofs are not checked by this function. You will need + * to validate those inputs in your own smart contract (see SampleContract.sol for an example). + * * @param params The proof verification parameters * @return isValid True if the proof is valid, false otherwise * @return uniqueIdentifier The unique identifier associated to the identity document that generated the proof */ function verifyProof( ProofVerificationParams calldata params - ) external view whenNotPaused returns (bool, bytes32) { + ) external view whenNotPaused returns (bool, bytes32) { // consider naming the return values, for readability. address verifier = _getVerifier(params.vkeyHash); // Validate certificate registry root + // Consider introducing globals for the indices of the publicInputs, to avoid mistakes. + // I.e.: + // uint256 CERTIFICATE_REGISTRY_ROOT_INDEX = 0; + // ... + // + // You wouldn't be able to do it for the scoped_nullifier, though, because it's at a fiddly position + // after the dynamic array of param_commitments. _validateCertificateRoot(params.publicInputs[0]); // Validate circuit registry root @@ -535,6 +699,20 @@ contract ZKPassportVerifier { require(verifyScopes(params.publicInputs, params.domain, params.scope), "Invalid scopes"); // Verifies the commitments against the committed inputs + // Possibly critical bug: The user can provide `committedInputCounts = []` (for example), + // to skip over _all_ of the param_commitments, meaning none of their preimages would be checked. + // To fix (in pseudocode): + // ``` + // let NUM_PUBLIC_INPUT_FIELDS = 7; // certificate_registry_root, circuit_registry_root, current_date, service_scope, service_subscope, param_commitments, scoped_nullifier. + // assert(params.publicInputs.length == NUM_PUBLIC_INPUT_FIELDS); // just because. + // let num_param_commitments = params.publicInputs.length - NUM_PUBLIC_INPUT_FIELDS + 1; + // assert(params.committedInputCounts.length == num_param_commitments); // this is the critical check that's missing. + // ``` + // + // Consider: + // let scoped_nullifier_index = params.publicInputs.length - 1; + // let scoped_nullifier = params.publicInputs[scoped_nullifier_index]; + // ... and using that thereafter, for clarity. verifyCommittedInputs( // Extracts the commitments from the public inputs params.publicInputs[5:params.publicInputs.length - 1], diff --git a/src/ts/scripts/circuit-builder.ts b/src/ts/scripts/circuit-builder.ts index 2bf576179..267910b26 100644 --- a/src/ts/scripts/circuit-builder.ts +++ b/src/ts/scripts/circuit-builder.ts @@ -176,7 +176,9 @@ ${unconstrained ? "unconstrained " : ""}fn main( tbs_certificate: [u8; ${tbs_max_len}], ) -> pub Field { // Get the length of tbs_certificate by parsing the ASN.1 - // Safety: This is safe because the length must be correct for the hash and signature to be valid + // Safety: This is safe because the length must be correct for the hash and signature to be valid. + // The tbs_certificate bytes are also asserted to be all-zero after tbs_certificate_len, within + // the 'verify_${curve_family}_${curve_name}' call below. let tbs_certificate_len = unsafe { utils::unsafe_get_asn1_element_length(tbs_certificate) }; let (r, s) = split_array(dsc_signature); @@ -220,7 +222,9 @@ ${unconstrained ? "unconstrained " : ""}fn main( exponent: u32, ) -> pub Field { // Get the length of tbs_certificate by parsing the ASN.1 - // Safety: This is safe because the length must be correct for the hash and signature to be valid + // Safety: This is safe because the length must be correct for the hash and signature to be valid. + // The tbs_certificate bytes are also asserted to be all-zero after tbs_certificate_len, within + // the 'verify_signature' call below. let tbs_certificate_len = unsafe { utils::unsafe_get_asn1_element_length(tbs_certificate) }; assert(verify_signature::<${Math.ceil(bit_size / 8)}, ${ @@ -262,7 +266,7 @@ use sig_check_ecdsa::verify_${curve_family}_${curve_name}; use utils::split_array; ${unconstrained ? "unconstrained " : ""}fn main( - comm_in: pub Field, + comm_in: pub Field, // hash(salt_in, country, tbs_certificate), as output by the preceding "dsc circuit". salt_in: Field, salt_out: Field, dg1: [u8; 95], @@ -274,7 +278,9 @@ ${unconstrained ? "unconstrained " : ""}fn main( e_content: [u8; 700], ) -> pub Field { // Get the length of signed_attributes by parsing the ASN.1 - // Safety: This is safe because the length must be correct for the hash and signature to be valid + // Safety: This is safe because the length must be correct for the hash and signature to be valid. + // The signed_attributes bytes are also asserted to be all-zero after signed_attributes_size, within + // the '${hash_algorithm}' call below. let signed_attributes_size = unsafe { utils::unsafe_get_asn1_element_length(signed_attributes) }; let (r, s) = split_array(sod_signature); @@ -326,7 +332,9 @@ ${unconstrained ? "unconstrained " : ""}fn main( ) -> pub Field { verify_rsa_pubkey_in_tbs(dsc_pubkey, tbs_certificate); // Get the length of signed_attributes by parsing the ASN.1 - // Safety: This is safe because the length must be correct for the hash and signature to be valid + // Safety: This is safe because the length must be correct for the hash and signature to be valid. + // The signed_attributes bytes are also asserted to be all-zero after signed_attributes_size, within + // the 'verify_signature' call below. let signed_attributes_size = unsafe { utils::unsafe_get_asn1_element_length(signed_attributes) }; assert(verify_signature::<${Math.ceil(bit_size / 8)}, ${