eigerco · pete-eiger · Feb 3, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -159,7 +159,7 @@ blstrs = "0.7"
 filecoin-hashers = "13.1.0"
 filecoin-proofs = "18.1.0"
 fr32 = "11.1.0"
-generic-array = "1.1.0"
+generic-array = "=1.1.0"
 storage-proofs-core = "18.1.0"
 storage-proofs-porep = "18.1.0"
 storage-proofs-post = "18.1.0"

diff --git a/mater/cli/src/convert.rs b/mater/cli/src/convert.rs
@@ -9,34 +9,27 @@ use crate::error::Error;
 pub(crate) async fn convert_file_to_car(
     input_path: &PathBuf,
     output_path: &PathBuf,
-    overwrite: bool,
+    config: Config,
 ) -> Result<Cid, Error> {
     let source_file = File::open(input_path).await?;
-    let output_file = if overwrite {
-        File::create(output_path).await
-    } else {
-        File::create_new(output_path).await
-    }?;
-    let cid = create_filestore(source_file, output_file, Config::default()).await?;
-
+    let output_file = File::create(output_path).await?;
+    let cid = create_filestore(source_file, output_file, config).await?;
     Ok(cid)
 }
 
 /// Tests for file conversion.
 /// MaterError cases are not handled because these are tested in the mater library.
 #[cfg(test)]
 mod tests {
-    use std::str::FromStr;
-
     use anyhow::Result;
     use mater::Cid;
+    use mater::Config;
+    use std::str::FromStr;
     use tempfile::tempdir;
     use tokio::{fs::File, io::AsyncWriteExt};
 
-    use crate::{convert::convert_file_to_car, error::Error};
-
     #[tokio::test]
-    async fn convert_file_to_car_success() -> Result<()> {
+    async fn convert_file_to_car_raw_success() -> Result<()> {
         // Setup: Create a dummy input file
         let temp_dir = tempdir()?;
         let input_path = temp_dir.path().join("test_input.txt");
@@ -49,18 +42,14 @@ mod tests {
         // Define output path
         let output_path = temp_dir.path().join("test_output.car");
 
-        // Call the function under test
-        let result = convert_file_to_car(&input_path, &output_path, false).await;
+        // Configure in raw mode
+        let config = Config::balanced_raw(256 * 1024, 174);
 
-        // Assert the result is Ok
+        // Call the function under test
+        let result = super::convert_file_to_car(&input_path, &output_path, config).await;
         assert!(result.is_ok());
-
-        // Verify that the CID is as expected
         assert_eq!(result?, expected_cid);
 
-        // Close temporary directory
-        temp_dir.close()?;
-
         Ok(())
     }
 
@@ -69,19 +58,15 @@ mod tests {
         // Define non-existent input path
         let temp_dir = tempdir()?;
         let input_path = temp_dir.path().join("non_existent_input.txt");
-
         // Define output path
         let output_path = temp_dir.path().join("test_output.car");
 
-        // Call the function under test
-        let result = convert_file_to_car(&input_path, &output_path, false).await;
+        let config = Config::default();
 
-        // Assert the result is an error
+        // Call the function under test
+        let result = super::convert_file_to_car(&input_path, &output_path, config).await;
         assert!(result.is_err());
-        assert!(matches!(result, Err(Error::IoError(..))));
-
-        // Close temporary directory
-        temp_dir.close()?;
+        assert!(matches!(result, Err(super::Error::IoError(..))));
 
         Ok(())
     }
@@ -97,17 +82,13 @@ mod tests {
         // Create output file
         let output_path = temp_dir.path().join("output_file");
         File::create_new(&output_path).await?;
-        println!("gets here");
 
-        // Call the function under test
-        let result = convert_file_to_car(&input_path, &output_path, false).await;
+        let config = Config::default();
 
-        // Assert the result is an error
+        // Call the function under test
+        let result = super::convert_file_to_car(&input_path, &output_path, config).await;
         assert!(result.is_err());
-        assert!(matches!(result, Err(Error::IoError(..))));
-
-        // Close temporary directory
-        temp_dir.close()?;
+        assert!(matches!(result, Err(super::Error::IoError(..))));
 
         Ok(())
     }

diff --git a/mater/cli/src/main.rs b/mater/cli/src/main.rs
@@ -1,9 +1,7 @@
-use std::path::PathBuf;
-
-use clap::Parser;
-
 use crate::{convert::convert_file_to_car, error::Error, extract::extract_file_from_car};
-
+use clap::Parser;
+use mater::Config;
+use std::path::PathBuf;
 mod convert;
 mod error;
 mod extract;
@@ -19,21 +17,32 @@ enum MaterCli {
         input_path: PathBuf,
 
         /// Optional path to output CARv2 file.
-        /// If no output path is given it will store the `.car` file in the same location.
+        /// If no output path is given it will store the .car file in the same location.
         output_path: Option<PathBuf>,
 
         /// If enabled, only the resulting CID will be printed.
         #[arg(short, long, action)]
         quiet: bool,
 
-        /// If enabled, the output will overwrite any existing files.
+        /// If enabled, content will be stored directly without UnixFS wrapping.
+        /// By default, content is wrapped in UnixFS format for IPFS compatibility.
         #[arg(long, action)]
-        overwrite: bool,
+        raw: bool,
+
+        /// Size of each chunk in bytes. Defaults to 256 KiB.
+        #[arg(long)]
+        chunk_size: Option<usize>,
+
+        /// Maximum number of children per parent node. Defaults to 174.
+        #[arg(long)]
+        tree_width: Option<usize>,
     },
+
     /// Convert a CARv2 file to its original format
     Extract {
         /// Path to CARv2 file
         input_path: PathBuf,
+
         /// Path to output file
         output_path: Option<PathBuf>,
     },
@@ -46,14 +55,24 @@ async fn main() -> Result<(), Error> {
             input_path,
             output_path,
             quiet,
-            overwrite,
+            raw,
+            chunk_size,
+            tree_width,
         } => {
             let output_path = output_path.unwrap_or_else(|| {
                 let mut new_path = input_path.clone();
                 new_path.set_extension("car");
                 new_path
             });
-            let cid = convert_file_to_car(&input_path, &output_path, overwrite).await?;
+
+            // Build config with UnixFS wrapping by default
+            let config = Config::balanced(
+                chunk_size.unwrap_or(256 * 1024),
+                tree_width.unwrap_or(174),
+                raw,
+            );
+
+            let cid = convert_file_to_car(&input_path, &output_path, config).await?;
 
             if quiet {
                 println!("{}", cid);
@@ -75,14 +94,12 @@ async fn main() -> Result<(), Error> {
                 new_path
             });
             extract_file_from_car(&input_path, &output_path).await?;
-
             println!(
-                "Successfully converted CARv2 file {} and saved it to to {}",
+                "Successfully converted CARv2 file {} and saved it to {}",
                 input_path.display(),
                 output_path.display()
             );
         }
     }
-
     Ok(())
 }
diff --git a/mater/lib/Cargo.toml b/mater/lib/Cargo.toml
@@ -29,6 +29,7 @@ thiserror.workspace = true
 tokio = { workspace = true, features = ["fs", "macros", "rt-multi-thread"] }
 tokio-stream.workspace = true
 tokio-util = { workspace = true, features = ["io"] }
+tracing = { workspace = true }
 
 # Optional dependencies
 blockstore = { workspace = true, optional = true }

diff --git a/mater/lib/src/lib.rs b/mater/lib/src/lib.rs
@@ -21,7 +21,9 @@ mod v2;
 // We need to re-expose this because `read_block` returns `(Cid, Vec<u8>)`.
 pub use ipld_core::cid::Cid;
 pub use multicodec::{DAG_PB_CODE, IDENTITY_CODE, RAW_CODE};
-pub use stores::{create_filestore, Blockstore, Config, FileBlockstore};
+pub use stores::{
+    create_filestore, Blockstore, Config, FileBlockstore, DEFAULT_CHUNK_SIZE, DEFAULT_TREE_WIDTH,
+};
 pub use v1::{Header as CarV1Header, Reader as CarV1Reader, Writer as CarV1Writer};
 pub use v2::{
     verify_cid, Characteristics, Header as CarV2Header, Index, IndexEntry, IndexSorted,
@@ -111,6 +113,11 @@ pub enum Error {
     /// See [`DagPbError`](ipld_dagpb::Error) for more information.
     #[error(transparent)]
     DagPbError(#[from] ipld_dagpb::Error),
+
+    /// Error returned when attempting to encode an incorrect node type.
+    /// For example, when attempting to encode a Leaf node as a Stem node.
+    #[error("Invalid node type: {0}")]
+    InvalidNodeType(String),
 }
 
 #[cfg(test)]

diff --git a/mater/lib/src/stores/blockstore.rs b/mater/lib/src/stores/blockstore.rs
@@ -12,10 +12,10 @@ use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_stream::StreamExt;
 use tokio_util::io::ReaderStream;
 
-use super::{DEFAULT_BLOCK_SIZE, DEFAULT_TREE_WIDTH};
+use super::{DEFAULT_CHUNK_SIZE, DEFAULT_TREE_WIDTH};
 use crate::{
     multicodec::SHA_256_CODE, unixfs::stream_balanced_tree, CarV1Header, CarV2Header, CarV2Writer,
-    Error, Index, IndexEntry, MultihashIndexSorted, SingleWidthIndex,
+    Config, Error, Index, IndexEntry, MultihashIndexSorted, SingleWidthIndex,
 };
 
 /// The [`Blockstore`] stores pairs of [`Cid`] and [`Bytes`] in memory.
@@ -76,7 +76,7 @@ impl Blockstore {
             root: None,
             blocks: IndexMap::new(),
             indexed: HashSet::new(),
-            chunk_size: chunk_size.unwrap_or(DEFAULT_BLOCK_SIZE),
+            chunk_size: chunk_size.unwrap_or(DEFAULT_CHUNK_SIZE),
             tree_width: tree_width.unwrap_or(DEFAULT_TREE_WIDTH),
         }
     }
@@ -85,10 +85,14 @@ impl Blockstore {
     /// converting the contents into a CARv2 file.
     pub async fn read<R>(&mut self, reader: R) -> Result<(), Error>
     where
-        R: AsyncRead + Unpin,
+        R: AsyncRead + Unpin + Send + 'static,
     {
         let chunks = ReaderStream::with_capacity(reader, self.chunk_size);
-
+        let config = Config::Balanced {
+            chunk_size: self.chunk_size,
+            tree_width: self.tree_width,
+            raw_mode: true,
+        };
         // The `stream -> pin -> peekable` combo instead of `stream -> peekable -> pin` feels weird
         // but it has to do with two things:
         // - The fact that the stream can be self-referential:
@@ -98,7 +102,7 @@ impl Blockstore {
         //    https://github.com/tokio-rs/tokio/blob/14c17fc09656a30230177b600bacceb9db33e942/tokio-stream/src/stream_ext/peekable.rs#L26-L37
         //  - futures::Peekable::peek(self: Pin<&mut Self>)
         //    https://github.com/rust-lang/futures-rs/blob/c507ff833728e2979cf5519fc931ea97308ec876/futures-util/src/stream/stream/peek.rs#L38-L40
-        let tree = stream_balanced_tree(chunks, self.tree_width);
+        let tree = stream_balanced_tree(chunks, self.tree_width, &config);
         tokio::pin!(tree);
         let mut tree = tree.peekable();
 
@@ -206,7 +210,7 @@ impl Default for Blockstore {
             root: None,
             blocks: IndexMap::new(),
             indexed: HashSet::new(),
-            chunk_size: DEFAULT_BLOCK_SIZE,
+            chunk_size: DEFAULT_CHUNK_SIZE,
             tree_width: DEFAULT_TREE_WIDTH,
         }
     }