Skip to content

Commit

Permalink
feat: improve GitOid trait bounds (#111)
Browse files Browse the repository at this point in the history
This commit fully hides the implementation of the hash algorithms
by augmenting the HashAlgorithm trait to hide the digester types
inside of it. This makes for some slightly more complicated
internal code, but a much nicer external interface. It also means we
no longer need to re-export the cryptographic crates we use.

Signed-off-by: Andrew Lilley Brinker <[email protected]>
  • Loading branch information
alilleybrinker authored Feb 19, 2024
1 parent 4bfdc8f commit d34ad01
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 91 deletions.
20 changes: 18 additions & 2 deletions gitoid/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,31 @@ version = "0.4.0"
crate-type = ["rlib", "cdylib"]

[dependencies]

## Core Dependencies

# Match the version used in sha1 and sha2.
digest = "0.10.7"
format-bytes = "0.3.0"
# Match the version used in sha1, sha2, and digest.
generic-array = "0.14.7"
hex = { version = "0.4.3", default-features = false, features = ["std"] }
paste = "1.0.14"

## Hash Algorithms

sha1 = { version = "0.10.6", default-features = false, features = ["std"] }
sha1collisiondetection = "0.3.3"
sha2 = { version = "0.10.8", default-features = false }

## Async Support

tokio = { version = "1.36.0", features = ["io-util"] }

## Representations

hex = { version = "0.4.3", default-features = false, features = ["std"] }
url = "2.4.1"

[dev-dependencies]

# Need "rt" and "fs" additionally for tests.
tokio = { version = "1.36.0", features = ["io-util", "fs", "rt", "rt-multi-thread"] }
64 changes: 34 additions & 30 deletions gitoid/src/gitoid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use core::marker::PhantomData;
use core::ops::Not as _;
use core::str::FromStr;
use core::str::Split;
use digest::Digest;
use digest::OutputSizeUser;
use format_bytes::format_bytes;
use generic_array::sequence::GenericSequence;
Expand Down Expand Up @@ -44,7 +45,7 @@ where
_phantom: PhantomData<O>,

#[doc(hidden)]
value: H::ARRAY,
value: H::Array,
}

const GITOID_URL_SCHEME: &str = "gitoid";
Expand All @@ -54,14 +55,6 @@ where
H: HashAlgorithm,
O: ObjectType,
{
/// Helper function to construct GitOid from raw hash.
fn from_hash(arr: GenericArray<u8, H::OutputSize>) -> GitOid<H, O> {
GitOid {
_phantom: PhantomData,
value: H::array_from_generic(arr),
}
}

/// Create a new `GitOid` based on a slice of bytes.
pub fn from_bytes<B: AsRef<[u8]>>(content: B) -> GitOid<H, O> {
fn inner<H, O>(content: &[u8]) -> GitOid<H, O>
Expand Down Expand Up @@ -160,7 +153,7 @@ where

/// Get the length of the hash in bytes.
pub fn hash_len(&self) -> usize {
<H as OutputSizeUser>::output_size()
<H::Alg as OutputSizeUser>::output_size()
}
}

Expand Down Expand Up @@ -304,7 +297,10 @@ where
.and_then(|_| self.validate_object_type())
.and_then(|_| self.validate_hash_algorithm())
.and_then(|_| self.parse_hash())
.map(GitOid::from_hash)
.map(|hash| GitOid {
_phantom: PhantomData,
value: H::array_from_generic(hash),
})
}

fn validate_url_scheme(&self) -> Result<()> {
Expand Down Expand Up @@ -349,18 +345,19 @@ where
Ok(())
}

fn parse_hash(&mut self) -> Result<GenericArray<u8, H::OutputSize>> {
fn parse_hash(&mut self) -> Result<GenericArray<u8, <H::Alg as OutputSizeUser>::OutputSize>> {
let hex_str = self
.segments
.next()
.and_then(some_if_not_empty)
.ok_or_else(|| Error::MissingHash(self.url.clone()))?;

// TODO(abrinker): When `sha1` et al. move to generic-array 1.0, update this to use the `arr!` macro.
// TODO(alilleybrinker): When `sha1` et al. move to generic-array 1.0,
// update this to use the `arr!` macro.
let mut value = GenericArray::generate(|_| 0);
hex::decode_to_slice(hex_str, &mut value)?;

let expected_size = <H as OutputSizeUser>::output_size();
let expected_size = <H::Alg as OutputSizeUser>::output_size();
if value.len() != expected_size {
return Err(Error::UnexpectedHashLength {
expected: expected_size,
Expand Down Expand Up @@ -395,7 +392,7 @@ where
/// `expected_length`. If the actual bytes hashed differs, then something went
/// wrong and the hash is not valid.
fn gitoid_from_buffer<H, O, R>(
digester: H,
digester: H::Alg,
reader: R,
expected_read_length: usize,
) -> Result<GitOid<H, O>>
Expand All @@ -404,8 +401,9 @@ where
O: ObjectType,
R: Read,
{
let expected_hash_length = <H as OutputSizeUser>::output_size();
let (hash, amount_read) = hash_from_buffer::<H, O, R>(digester, reader, expected_read_length)?;
let expected_hash_length = <H::Alg as OutputSizeUser>::output_size();
let (hash, amount_read) =
hash_from_buffer::<H::Alg, O, R>(digester, reader, expected_read_length)?;

if amount_read != expected_read_length {
return Err(Error::UnexpectedHashLength {
Expand All @@ -421,7 +419,10 @@ where
});
}

Ok(GitOid::from_hash(hash))
Ok(GitOid {
_phantom: PhantomData,
value: H::array_from_generic(hash),
})
}

// Helper extension trait to give a convenient way to iterate over
Expand Down Expand Up @@ -458,13 +459,13 @@ impl<R: BufRead> ForEachChunk for R {
///
/// This function handles actually constructing the hash with the GitOID prefix,
/// and delegates to a buffered reader for performance of the chunked reading.
fn hash_from_buffer<H, O, R>(
mut digester: H,
fn hash_from_buffer<D, O, R>(
mut digester: D,
reader: R,
expected_read_length: usize,
) -> Result<(GenericArray<u8, H::OutputSize>, usize)>
) -> Result<(GenericArray<u8, D::OutputSize>, usize)>
where
H: HashAlgorithm,
D: Digest,
O: ObjectType,
R: Read,
{
Expand All @@ -480,7 +481,7 @@ where

/// Async version of `gitoid_from_buffer`.
async fn gitoid_from_async_buffer<H, O, R>(
digester: H,
digester: H::Alg,
reader: R,
expected_read_length: usize,
) -> Result<GitOid<H, O>>
Expand All @@ -489,9 +490,9 @@ where
O: ObjectType,
R: AsyncRead + Unpin,
{
let expected_hash_length = <H as OutputSizeUser>::output_size();
let expected_hash_length = <H::Alg as OutputSizeUser>::output_size();
let (hash, amount_read) =
hash_from_async_buffer::<H, O, R>(digester, reader, expected_read_length).await?;
hash_from_async_buffer::<H::Alg, O, R>(digester, reader, expected_read_length).await?;

if amount_read != expected_read_length {
return Err(Error::UnexpectedHashLength {
Expand All @@ -507,17 +508,20 @@ where
});
}

Ok(GitOid::from_hash(hash))
Ok(GitOid {
_phantom: PhantomData,
value: H::array_from_generic(hash),
})
}

/// Async version of `hash_from_buffer`.
async fn hash_from_async_buffer<H, O, R>(
mut digester: H,
async fn hash_from_async_buffer<D, O, R>(
mut digester: D,
reader: R,
expected_read_length: usize,
) -> Result<(GenericArray<u8, H::OutputSize>, usize)>
) -> Result<(GenericArray<u8, D::OutputSize>, usize)>
where
H: HashAlgorithm,
D: Digest,
O: ObjectType,
R: AsyncRead + Unpin,
{
Expand Down
60 changes: 47 additions & 13 deletions gitoid/src/hash_algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@ use core::fmt::Debug;
use core::hash::Hash;
use core::ops::Deref;
use digest::Digest;
use digest::OutputSizeUser;
use generic_array::GenericArray;
use sha1::Sha1;
use sha1collisiondetection::Sha1CD as Sha1Cd;
use sha2::Sha256;

/// Hash algorithms that can be used to make a [`GitOid`].
///
Expand All @@ -23,33 +21,69 @@ use sha2::Sha256;
/// Gruevski's ["A Definitive Guide to Sealed Traits in Rust"][1].
///
/// [1]: https://predr.ag/blog/definitive-guide-to-sealed-traits-in-rust/
pub trait HashAlgorithm: Digest + Sealed {
pub trait HashAlgorithm: Sealed {
/// The name of the hash algorithm in lowercase ASCII.
const NAME: &'static str;

/// The actual digest type used by the algorithm.
type Alg: Digest;

/// The array type generated by the hash.
type ARRAY: Copy + PartialEq + Ord + Hash + Debug + Deref<Target = [u8]>;
type Array: Copy + PartialEq + Ord + Hash + Debug + Deref<Target = [u8]>;

/// Helper function to convert the GenericArray type to Self::Array
fn array_from_generic(
arr: GenericArray<u8, <Self::Alg as OutputSizeUser>::OutputSize>,
) -> Self::Array;

/// Helper function to convert the GenericArray type to Self::ARRAY
fn array_from_generic(arr: GenericArray<u8, Self::OutputSize>) -> Self::ARRAY;
/// Get an instance of the digester.
fn new() -> Self::Alg;
}

macro_rules! impl_hash_algorithm {
( $type:ty, $name:literal ) => {
( $type:ident, $alg_ty:ty, $name:literal ) => {
impl Sealed for $type {}

impl HashAlgorithm for $type {
const NAME: &'static str = $name;

type ARRAY = GenericArray<u8, Self::OutputSize>;
type Alg = $alg_ty;

fn array_from_generic(arr: GenericArray<u8, Self::OutputSize>) -> Self::ARRAY {
type Array = GenericArray<u8, <Self::Alg as OutputSizeUser>::OutputSize>;

fn array_from_generic(
arr: GenericArray<u8, <Self::Alg as OutputSizeUser>::OutputSize>,
) -> Self::Array {
arr
}

fn new() -> Self::Alg {
Self::Alg::new()
}
}
};
}

impl_hash_algorithm!(Sha1, "sha1");
impl_hash_algorithm!(Sha256, "sha256");
impl_hash_algorithm!(Sha1Cd, "sha1cd");
/// SHA-1 algorithm,
pub struct Sha1 {
#[doc(hidden)]
_private: (),
}

impl_hash_algorithm!(Sha1, sha1::Sha1, "sha1");

/// SHA-256 algorithm.
pub struct Sha256 {
#[doc(hidden)]
_private: (),
}

impl_hash_algorithm!(Sha256, sha2::Sha256, "sha256");

/// SHA-1Cd (collision detection) algorithm.
pub struct Sha1Cd {
#[doc(hidden)]
_private: (),
}

impl_hash_algorithm!(Sha1Cd, sha1collisiondetection::Sha1CD, "sha1cd");
11 changes: 3 additions & 8 deletions gitoid/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,7 @@ pub mod hash {
#[cfg(doc)]
use crate::GitOid;

/// SHA-1 hasher.
pub type Sha1 = sha1::Sha1;

/// SHA-1CD hasher.
pub type Sha1Cd = sha1collisiondetection::Sha1CD;

/// SHA-256 hasher.
pub type Sha256 = sha2::Sha256;
pub use crate::hash_algorithm::Sha1;
pub use crate::hash_algorithm::Sha1Cd;
pub use crate::hash_algorithm::Sha256;
}
Loading

0 comments on commit d34ad01

Please sign in to comment.