From 06542e19daf0a82f117c0814ee9f152122806611 Mon Sep 17 00:00:00 2001 From: nichmor Date: Tue, 11 Jun 2024 18:02:21 +0300 Subject: [PATCH 1/4] feat: refactor archive type --- .../rattler_package_streaming/src/archive.rs | 53 +++++++++++++++++ crates/rattler_package_streaming/src/lib.rs | 1 + crates/rattler_package_streaming/src/read.rs | 59 +++++++++++++++++++ 3 files changed, 113 insertions(+) create mode 100644 crates/rattler_package_streaming/src/archive.rs diff --git a/crates/rattler_package_streaming/src/archive.rs b/crates/rattler_package_streaming/src/archive.rs new file mode 100644 index 000000000..bdaf04ed0 --- /dev/null +++ b/crates/rattler_package_streaming/src/archive.rs @@ -0,0 +1,53 @@ +//! Functions that enable extracting or streaming a Conda package for objects that implement the + +use std::path::{Path, PathBuf}; + +use rattler_conda_types::package::ArchiveType; + +use crate::read::{folder_from_conda, folder_from_tar_bz2}; +/// Test +pub struct Archive { + /// Test + pub archive_type: ArchiveType, + /// Test + pub location: PathBuf, +} + +impl Archive { + /// Test + pub fn new(archive_type: ArchiveType, location: PathBuf) -> Self { + Archive { + archive_type, + location, + } + } + + /// Test + pub fn extract_a_folder( + &self, + folder_to_extract: &Path, + destination: &Path, + ) -> Result<(), std::io::Error> { + match self.archive_type { + ArchiveType::TarBz2 => { + folder_from_tar_bz2(&self.location, folder_to_extract, destination) + } + ArchiveType::Conda => folder_from_conda(&self.location, folder_to_extract, destination), + } + } +} + +impl TryFrom for Archive { + type Error = std::io::Error; + + fn try_from(path: PathBuf) -> Result { + let archive_type = ArchiveType::try_from(path.as_path()).ok_or(std::io::Error::new( + std::io::ErrorKind::NotFound, + "package does not point to valid archive", + ))?; + Ok(Archive { + archive_type, + location: path, + }) + } +} diff --git a/crates/rattler_package_streaming/src/lib.rs b/crates/rattler_package_streaming/src/lib.rs index 7b6c11d61..3a636d46f 100644 --- a/crates/rattler_package_streaming/src/lib.rs +++ b/crates/rattler_package_streaming/src/lib.rs @@ -10,6 +10,7 @@ use rattler_digest::{Md5Hash, Sha256Hash}; #[cfg(feature = "reqwest")] use rattler_networking::Redact; +pub mod archive; pub mod read; pub mod seek; diff --git a/crates/rattler_package_streaming/src/read.rs b/crates/rattler_package_streaming/src/read.rs index e532868ab..64155472a 100644 --- a/crates/rattler_package_streaming/src/read.rs +++ b/crates/rattler_package_streaming/src/read.rs @@ -1,6 +1,8 @@ //! Functions that enable extracting or streaming a Conda package for objects that implement the //! [`std::io::Read`] trait. +use crate::{read, seek}; + use super::{ExtractError, ExtractResult}; use std::mem::ManuallyDrop; use std::{ffi::OsStr, io::Read, path::Path}; @@ -86,3 +88,60 @@ pub fn extract_conda(reader: impl Read, destination: &Path) -> Result Result<(), std::io::Error> { + let reader = std::fs::File::open(archive_path)?; + let mut archive = read::stream_tar_bz2(reader); + + for entry in archive.entries()? { + let mut entry = entry?; + let path = entry.path()?; + if let Ok(stripped_path) = path.strip_prefix(find_path) { + let dest_file = dest_folder.join(stripped_path); + if let Some(parent_folder) = dest_file.parent() { + if !parent_folder.exists() { + std::fs::create_dir_all(parent_folder)?; + } + } + let mut dest_file = std::fs::File::create(dest_file)?; + std::io::copy(&mut entry, &mut dest_file)?; + } + } + Ok(()) +} + +/// Extracts a folder from a conda archive. +pub fn folder_from_conda( + archive_path: &Path, + find_path: &Path, + dest_folder: &Path, +) -> Result<(), std::io::Error> { + let reader = std::fs::File::open(archive_path)?; + + let mut archive = if find_path.starts_with("info") { + seek::stream_conda_info(reader).expect("Could not open conda file") + } else { + todo!("Not implemented yet"); + }; + + for entry in archive.entries()? { + let mut entry = entry?; + let path = entry.path()?; + if let Ok(stripped_path) = path.strip_prefix(find_path) { + let dest_file = dest_folder.join(stripped_path); + if let Some(parent_folder) = dest_file.parent() { + if !parent_folder.exists() { + std::fs::create_dir_all(parent_folder)?; + } + } + let mut dest_file = std::fs::File::create(dest_file)?; + std::io::copy(&mut entry, &mut dest_file)?; + } + } + Ok(()) +} From 2995cb9d5421de2730750de6b21cd9b368e75566 Mon Sep 17 00:00:00 2001 From: nichmor Date: Wed, 12 Jun 2024 17:24:36 +0300 Subject: [PATCH 2/4] misc: refactor into fs and archive.rs and add tests --- .../rattler_package_streaming/src/archive.rs | 183 ++++++++++++++++-- crates/rattler_package_streaming/src/fs.rs | 82 ++++++++ crates/rattler_package_streaming/src/read.rs | 59 ------ 3 files changed, 245 insertions(+), 79 deletions(-) diff --git a/crates/rattler_package_streaming/src/archive.rs b/crates/rattler_package_streaming/src/archive.rs index bdaf04ed0..ef95908d7 100644 --- a/crates/rattler_package_streaming/src/archive.rs +++ b/crates/rattler_package_streaming/src/archive.rs @@ -1,28 +1,21 @@ -//! Functions that enable extracting or streaming a Conda package for objects that implement the +//! This crate provides the ability to extract a specified directory from tar.bz2 or conda archive. use std::path::{Path, PathBuf}; use rattler_conda_types::package::ArchiveType; -use crate::read::{folder_from_conda, folder_from_tar_bz2}; -/// Test -pub struct Archive { - /// Test +use crate::fs::{extract_directory_from_conda, extract_directory_from_tar_bz2}; + +/// A struct representing an archive file located on disk. +pub struct LocalArchive { + /// Archive type representing the type of archive. pub archive_type: ArchiveType, - /// Test + /// Location of the archive file on disk. pub location: PathBuf, } -impl Archive { - /// Test - pub fn new(archive_type: ArchiveType, location: PathBuf) -> Self { - Archive { - archive_type, - location, - } - } - - /// Test +impl LocalArchive { + /// Extracts the contents of the archive to the specified destination. pub fn extract_a_folder( &self, folder_to_extract: &Path, @@ -30,14 +23,22 @@ impl Archive { ) -> Result<(), std::io::Error> { match self.archive_type { ArchiveType::TarBz2 => { - folder_from_tar_bz2(&self.location, folder_to_extract, destination) + extract_directory_from_tar_bz2(&self.location, folder_to_extract, destination) + } + ArchiveType::Conda => { + extract_directory_from_conda(&self.location, folder_to_extract, destination) } - ArchiveType::Conda => folder_from_conda(&self.location, folder_to_extract, destination), } } + + /// Tries to convert the specified path into a `LocalArchive`. + /// Returns an error if the path does not point to a valid archive ( `.tar.bz2` or `.conda` ) + pub fn try_from_path(path: PathBuf) -> Result { + Self::try_from(path) + } } -impl TryFrom for Archive { +impl TryFrom for LocalArchive { type Error = std::io::Error; fn try_from(path: PathBuf) -> Result { @@ -45,9 +46,151 @@ impl TryFrom for Archive { std::io::ErrorKind::NotFound, "package does not point to valid archive", ))?; - Ok(Archive { + Ok(LocalArchive { archive_type, location: path, }) } } + +#[cfg(test)] +mod tests { + + use tempfile::{tempdir, TempDir}; + + + + use crate::write::{write_conda_package, write_tar_bz2_package, CompressionLevel}; + + use super::*; + use std::fs::{self, File}; + use std::io::{Read}; + + fn create_tar_bz2_archive_with_folder() -> (TempDir, PathBuf) { + let temp_dir = tempdir().unwrap(); + let archive_path = temp_dir.path().join("archive.tar.bz2"); + let archive = File::create(&archive_path).unwrap(); + + // Create info/meta.yaml and info/recipe/recipe.yaml + let info_meta_path = temp_dir.path().join("info").join("meta.yaml"); + fs::create_dir_all(info_meta_path.parent().unwrap()).unwrap(); + fs::write(&info_meta_path, b"meta: data").unwrap(); + + let info_recipe_path = temp_dir + .path() + .join("info") + .join("recipe") + .join("recipe.yaml"); + fs::create_dir_all(info_recipe_path.parent().unwrap()).unwrap(); + fs::write(&info_recipe_path, b"its_recipe_yaml: yes").unwrap(); + + // Create tar.bz2 archive + write_tar_bz2_package( + archive, + temp_dir.path(), + vec![info_meta_path, info_recipe_path].as_slice(), + CompressionLevel::default(), + None, + None, + ) + .unwrap(); + (temp_dir, archive_path) + } + + fn create_conda_archive_with_folder() -> (TempDir, PathBuf) { + let temp_dir = tempdir().unwrap(); + let archive_path = temp_dir.path().join("archive.conda"); + let archive = File::create(&archive_path).unwrap(); + + // Create info/meta.yaml and info/recipe/recipe.yaml + let info_meta_path = temp_dir.path().join("info").join("meta.yaml"); + fs::create_dir_all(info_meta_path.parent().unwrap()).unwrap(); + fs::write(&info_meta_path, b"meta: data").unwrap(); + + let info_recipe_path = temp_dir + .path() + .join("info") + .join("recipe") + .join("recipe.yaml"); + fs::create_dir_all(info_recipe_path.parent().unwrap()).unwrap(); + fs::write(&info_recipe_path, b"its_recipe_yaml: yes").unwrap(); + + let paths = vec![info_meta_path, info_recipe_path]; + + write_conda_package( + archive, + temp_dir.path(), + paths.as_slice(), + CompressionLevel::default(), + None, + "test-package", + None, + None, + ) + .unwrap(); + + (temp_dir, archive_path) + } + + #[test] + fn test_local_archive_from_tar_bz() { + let location = PathBuf::from("/path/to/archive.tar.bz2"); + LocalArchive::try_from_path(location.clone()) + .expect("Archive should be created of tar bz type"); + } + + #[test] + fn test_local_archive_from_conda() { + let location = PathBuf::from("/path/to/conda_archive.conda"); + LocalArchive::try_from_path(location.clone()) + .expect("Archive should be created of conda type"); + } + + #[test] + fn test_extract_from_tar_bz2() { + // Create a tar.bz2 archive with a folder containing one file + let (_tmp, archive_path) = create_tar_bz2_archive_with_folder(); + + let archive = LocalArchive::try_from_path(archive_path.clone()).unwrap(); + let folder_to_extract = Path::new("info/recipe"); + let destination = tempdir().unwrap().path().to_path_buf().join("extract_to"); + + // Extract the folder + archive + .extract_a_folder(folder_to_extract, &destination) + .unwrap(); + + // Verify the extraction + let extracted_file_path = destination.join("recipe.yaml"); + assert!(extracted_file_path.exists()); + + let mut extracted_file = File::open(&extracted_file_path).unwrap(); + let mut content = Vec::default(); + extracted_file.read_to_end(&mut content).unwrap(); + assert_eq!(content, b"its_recipe_yaml: yes"); + } + + #[test] + fn test_extract_from_conda() { + // Create a tar.bz2 archive with a folder containing one file + let (_tmp, archive_path) = create_conda_archive_with_folder(); + + let archive = LocalArchive::try_from_path(archive_path.clone()).unwrap(); + let folder_to_extract = Path::new("info/recipe"); + let destination = tempdir().unwrap().path().to_path_buf().join("extract_to"); + + // Extract the folder + archive + .extract_a_folder(folder_to_extract, &destination) + .unwrap(); + + // Verify the extraction + let extracted_file_path = destination.join("recipe.yaml"); + assert!(extracted_file_path.exists()); + + let mut extracted_file = File::open(&extracted_file_path).unwrap(); + let mut content = Vec::default(); + extracted_file.read_to_end(&mut content).unwrap(); + assert_eq!(content, b"its_recipe_yaml: yes"); + } +} diff --git a/crates/rattler_package_streaming/src/fs.rs b/crates/rattler_package_streaming/src/fs.rs index b7c5278a4..89fddee8f 100644 --- a/crates/rattler_package_streaming/src/fs.rs +++ b/crates/rattler_package_streaming/src/fs.rs @@ -1,8 +1,10 @@ //! Functions to extracting or stream a Conda package from a file on disk. +use crate::{read, seek}; use crate::{ExtractError, ExtractResult}; use rattler_conda_types::package::ArchiveType; use std::fs::File; +use std::io::BufReader; use std::path::Path; /// Extracts the contents a `.tar.bz2` package archive at the specified path to a directory. @@ -52,3 +54,83 @@ pub fn extract(archive: &Path, destination: &Path) -> Result extract_conda(archive, destination), } } + +/// Extracts a specified directory from .tar.bz2 into a destination folder. +/// +/// ```rust,no_run +/// # use std::path::Path; +/// use rattler_package_streaming::fs::extract_directory_from_tar_bz2; +/// let _ = extract_directory_from_tar_bz2(Path::new("archive/location"), Path::new("directory_to_extract"), Path::new("destination/directory")) +/// ``` +pub fn extract_directory_from_tar_bz2( + archive_path: &Path, + directory_to_extract: &Path, + dest_directory: &Path, +) -> Result<(), std::io::Error> { + let reader = std::fs::File::open(archive_path)?; + eprintln!("File openeded"); + let mut archive = read::stream_tar_bz2(reader); + + for entry in archive.entries()? { + let mut entry = entry?; + let path = entry.path()?; + eprintln!("Entry path is {:?}", path); + + if let Ok(stripped_path) = path.strip_prefix(directory_to_extract) { + eprintln!("Stripped path {:?}", stripped_path); + let dest_file = dest_directory.join(stripped_path); + if let Some(parent_folder) = dest_file.parent() { + eprintln!("Parent folder is {:?}", parent_folder); + if !parent_folder.exists() { + eprintln!("Creating parent folder"); + std::fs::create_dir_all(parent_folder)?; + eprintln!("Parent folder created"); + } + } + eprintln!("Created "); + eprintln!("Creating file {:?} ", dest_file); + let mut dest_file = std::fs::File::create(dest_file)?; + eprintln!("Copying"); + std::io::copy(&mut entry, &mut dest_file)?; + } + } + Ok(()) +} + +/// Extracts a specified directory from .conda archive into a destination folder. +/// +/// ```rust,no_run +/// # use std::path::Path; +/// use rattler_package_streaming::fs::extract_directory_from_tar_bz2; +/// let _ = extract_directory_from_conda(Path::new("archive/location"), Path::new("directory_to_extract"), Path::new("destination/directory")) +/// ``` +pub fn extract_directory_from_conda( + archive_path: &Path, + directory_to_extract: &Path, + dest_directory: &Path, +) -> Result<(), std::io::Error> { + let reader = std::fs::File::open(archive_path)?; + let buf_reader = BufReader::new(reader); + + let mut archive = if directory_to_extract.starts_with("info") { + seek::stream_conda_info(buf_reader).expect("Could not open conda file") + } else { + todo!("Not implemented yet"); + }; + + for entry in archive.entries()? { + let mut entry = entry?; + let path = entry.path()?; + if let Ok(stripped_path) = path.strip_prefix(directory_to_extract) { + let dest_file = dest_directory.join(stripped_path); + if let Some(parent_folder) = dest_file.parent() { + if !parent_folder.exists() { + std::fs::create_dir_all(parent_folder)?; + } + } + let mut dest_file = std::fs::File::create(dest_file)?; + std::io::copy(&mut entry, &mut dest_file)?; + } + } + Ok(()) +} diff --git a/crates/rattler_package_streaming/src/read.rs b/crates/rattler_package_streaming/src/read.rs index 64155472a..e532868ab 100644 --- a/crates/rattler_package_streaming/src/read.rs +++ b/crates/rattler_package_streaming/src/read.rs @@ -1,8 +1,6 @@ //! Functions that enable extracting or streaming a Conda package for objects that implement the //! [`std::io::Read`] trait. -use crate::{read, seek}; - use super::{ExtractError, ExtractResult}; use std::mem::ManuallyDrop; use std::{ffi::OsStr, io::Read, path::Path}; @@ -88,60 +86,3 @@ pub fn extract_conda(reader: impl Read, destination: &Path) -> Result Result<(), std::io::Error> { - let reader = std::fs::File::open(archive_path)?; - let mut archive = read::stream_tar_bz2(reader); - - for entry in archive.entries()? { - let mut entry = entry?; - let path = entry.path()?; - if let Ok(stripped_path) = path.strip_prefix(find_path) { - let dest_file = dest_folder.join(stripped_path); - if let Some(parent_folder) = dest_file.parent() { - if !parent_folder.exists() { - std::fs::create_dir_all(parent_folder)?; - } - } - let mut dest_file = std::fs::File::create(dest_file)?; - std::io::copy(&mut entry, &mut dest_file)?; - } - } - Ok(()) -} - -/// Extracts a folder from a conda archive. -pub fn folder_from_conda( - archive_path: &Path, - find_path: &Path, - dest_folder: &Path, -) -> Result<(), std::io::Error> { - let reader = std::fs::File::open(archive_path)?; - - let mut archive = if find_path.starts_with("info") { - seek::stream_conda_info(reader).expect("Could not open conda file") - } else { - todo!("Not implemented yet"); - }; - - for entry in archive.entries()? { - let mut entry = entry?; - let path = entry.path()?; - if let Ok(stripped_path) = path.strip_prefix(find_path) { - let dest_file = dest_folder.join(stripped_path); - if let Some(parent_folder) = dest_file.parent() { - if !parent_folder.exists() { - std::fs::create_dir_all(parent_folder)?; - } - } - let mut dest_file = std::fs::File::create(dest_file)?; - std::io::copy(&mut entry, &mut dest_file)?; - } - } - Ok(()) -} From bb0a234e3ed26ed06a3f74652980c7522ba620c6 Mon Sep 17 00:00:00 2001 From: nichmor Date: Wed, 12 Jun 2024 17:27:30 +0300 Subject: [PATCH 3/4] misc: refactor into fs and archive.rs and add tests --- crates/rattler_package_streaming/src/archive.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/rattler_package_streaming/src/archive.rs b/crates/rattler_package_streaming/src/archive.rs index ef95908d7..0f4a0755d 100644 --- a/crates/rattler_package_streaming/src/archive.rs +++ b/crates/rattler_package_streaming/src/archive.rs @@ -55,16 +55,14 @@ impl TryFrom for LocalArchive { #[cfg(test)] mod tests { - + use tempfile::{tempdir, TempDir}; - - use crate::write::{write_conda_package, write_tar_bz2_package, CompressionLevel}; use super::*; use std::fs::{self, File}; - use std::io::{Read}; + use std::io::Read; fn create_tar_bz2_archive_with_folder() -> (TempDir, PathBuf) { let temp_dir = tempdir().unwrap(); From c445c7ccbd9f3a862bc9bbbae7627d779c467223 Mon Sep 17 00:00:00 2001 From: nichmor Date: Wed, 12 Jun 2024 17:29:07 +0300 Subject: [PATCH 4/4] misc: refactor into fs and archive.rs and add tests --- crates/rattler_package_streaming/src/fs.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/crates/rattler_package_streaming/src/fs.rs b/crates/rattler_package_streaming/src/fs.rs index 89fddee8f..c6f9eca2a 100644 --- a/crates/rattler_package_streaming/src/fs.rs +++ b/crates/rattler_package_streaming/src/fs.rs @@ -68,29 +68,20 @@ pub fn extract_directory_from_tar_bz2( dest_directory: &Path, ) -> Result<(), std::io::Error> { let reader = std::fs::File::open(archive_path)?; - eprintln!("File openeded"); let mut archive = read::stream_tar_bz2(reader); for entry in archive.entries()? { let mut entry = entry?; let path = entry.path()?; - eprintln!("Entry path is {:?}", path); if let Ok(stripped_path) = path.strip_prefix(directory_to_extract) { - eprintln!("Stripped path {:?}", stripped_path); let dest_file = dest_directory.join(stripped_path); if let Some(parent_folder) = dest_file.parent() { - eprintln!("Parent folder is {:?}", parent_folder); if !parent_folder.exists() { - eprintln!("Creating parent folder"); std::fs::create_dir_all(parent_folder)?; - eprintln!("Parent folder created"); } } - eprintln!("Created "); - eprintln!("Creating file {:?} ", dest_file); let mut dest_file = std::fs::File::create(dest_file)?; - eprintln!("Copying"); std::io::copy(&mut entry, &mut dest_file)?; } }