diff --git a/NAMESPACE b/NAMESPACE index c255a14..8e8c023 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -98,6 +98,7 @@ export(six_bucket_change_user) export(six_bucket_delete) export(six_bucket_permissions) export(six_bucket_remove_user) +export(six_bucket_upload) export(six_file_upload) export(six_user_create) export(six_user_creds) @@ -131,8 +132,12 @@ importFrom(dplyr,rowwise) importFrom(dplyr,select) importFrom(dplyr,starts_with) importFrom(dplyr,ungroup) +importFrom(fs,dir_ls) importFrom(fs,file_exists) importFrom(fs,fs_bytes) +importFrom(fs,is_dir) +importFrom(fs,path_join) +importFrom(fs,path_split) importFrom(glue,glue) importFrom(jsonlite,fromJSON) importFrom(jsonlite,toJSON) @@ -158,6 +163,7 @@ importFrom(purrr,safely) importFrom(rlang,":=") importFrom(rlang,abort) importFrom(rlang,has_name) +importFrom(rlang,is_character) importFrom(rlang,is_empty) importFrom(rlang,is_na) importFrom(s3fs,s3_file_system) diff --git a/R/bucket.R b/R/bucket.R index dbf9b66..60d4747 100644 --- a/R/bucket.R +++ b/R/bucket.R @@ -261,6 +261,133 @@ aws_bucket_upload <- function( s3_path(bucket) } +bucket_name <- function(x) { + first(fs::path_split(first(x))[[1]]) +} + +#' Get file path starting at a certain path component +#' @importFrom fs path_join path_split +#' @keywords internal +#' @examplesIf interactive() +#' path_from(path = "Rtmpxsqth0/apples/mcintosh/orange.csv", from = "apples") +path_from <- function(path, from) { + parts <- fs::path_split(path)[[1]] + kept_parts <- parts[which(parts == from):length(parts)] + fs::path_join(kept_parts) +} + +#' @importFrom fs is_dir dir_ls +#' @importFrom purrr list_rbind +explode_file_paths <- function(path) { + if (any(is_dir(path))) { + paths <- map(path, \(p) { + if (is_dir(p)) { + map( + dir_ls(p, recurse = TRUE, type = "file"), \(z) { + tibble(key = path_from(z, basename(p)), path = unname(z)) + } + ) %>% list_rbind() + } else { + tibble(key = basename(p), path = p) + } + }) + } else { + paths <- list(tibble(key = basename(path), path = path)) + } + list_rbind(paths) +} + +#' Magically upload a mix of files and directories into a bucket +#' +#' @export +#' @param path (character) one or more file paths to add to +#' the `bucket`. required. can include directories or files +#' @param remote (character/scalar) a character string to use to upload +#' files in `path`. the first component of the path will be used as the +#' bucket name. any subsequent path components will be used as a +#' key prefix for all objects created in the bucket +#' @inheritParams aws_file_copy +#' @param ... named params passed on to +#' [put_object](https://www.paws-r-sdk.com/docs/s3_put_object/) +#' @section What is magical: +#' - Exits early if folder or files do not exist +#' - Creates the bucket if it does not exist +#' - Adds files to the bucket at the top level with key as the file name +#' - Adds directories to the bucket, reconstructing the exact directory +#' structure in the S3 bucket +#' @family buckets +#' @family magicians +#' @return (character) a vector of remote s3 paths where your +#' files are located +#' @examplesIf interactive() +#' # single file, single remote path +#' bucket1 <- random_string("bucket") +#' demo_rds_file <- file.path(system.file(), "Meta/demo.rds") +#' six_bucket_upload(path = demo_rds_file, remote = bucket1) +#' +#' ## a file and a directory - with a single remote path +#' bucket2 <- random_string("bucket") +#' library(fs) +#' tdir <- path(path_temp(), "mytmp") +#' dir_create(tdir) +#' purrr::map(letters, \(l) file_create(path(tdir, l))) +#' dir_tree(tdir) +#' six_bucket_upload(path = c(demo_rds_file, tdir), remote = bucket2) +#' +#' ## a directory with nested dirs - with a single remote path +#' bucket3 <- random_string("bucket") +#' library(fs) +#' tdir <- path(path_temp(), "apples") +#' dir_create(tdir) +#' dir_create(path(tdir, "mcintosh")) +#' dir_create(path(tdir, "pink-lady")) +#' cat("Some text in a readme", file = path(tdir, "README.md")) +#' write.csv(Orange, file = path(tdir, "mcintosh", "orange.csv")) +#' write.csv(iris, file = path(tdir, "pink-lady", "iris.csv")) +#' dir_tree(tdir) +#' six_bucket_upload(path = tdir, remote = path(bucket3, "fruit/basket")) +#' +#' # cleanup +#' six_bucket_delete(bucket1, force = TRUE) +#' six_bucket_delete(bucket2, force = TRUE) +#' six_bucket_delete(bucket3, force = TRUE) +six_bucket_upload <- function(path, remote, force = FALSE, ...) { + stop_if_not(is_character(path), "{.strong path} must be character") + stop_if_not(is_character(remote), "{.strong remote} must be character") + stop_if_not(length(remote) == 1, "{.strong remote} must be length 1") + + path <- explode_file_paths(path) + stop_if_not( + all(file_exists(path$path)), + "one or more of {.strong path} don't exist" + ) + + bucket <- bucket_name(remote) + bucket_create_if_not(bucket, force) + if (!aws_bucket_exists(bucket)) { + cli_warning("bucket {.strong {bucket}} not created; exiting") + return(invisible()) + } + + # if remote has more than bucket name, use folder for keys + remote_parts <- path_split(remote)[[1]] + if (length(remote_parts) > 1) { + key_prefix <- path_join(remote_parts[-1]) + cli_info("using key prefix {.strong {key_prefix}}") + path$key <- path(key_prefix, path$key) + } + + map(apply(path, 1, as.list), \(row) { + con_s3()$put_object( + Bucket = bucket, + Key = row$key, + Body = row$path, + ... + ) + }) + s3_path(bucket, path$key) +} + #' List objects in an S3 bucket #' #' @export diff --git a/R/sixtyfour-package.R b/R/sixtyfour-package.R index 1ef4842..b57e0fb 100644 --- a/R/sixtyfour-package.R +++ b/R/sixtyfour-package.R @@ -8,7 +8,7 @@ #' @importFrom glue glue #' @importFrom jsonlite toJSON fromJSON #' @importFrom curl curl_fetch_memory -#' @importFrom rlang abort is_na +#' @importFrom rlang abort is_na is_character ## usethis namespace: end NULL diff --git a/R/utils.R b/R/utils.R index 8e506b4..c503ee5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -32,6 +32,12 @@ yesno <- function(msg, .envir = parent.frame()) { utils::menu(qs[rand]) != which(rand == 1) } +#' Get the first element of a vector +#' @keywords internal +#' @param x a vector +#' @return the first element of the vector +first <- function(x) x[1] + #' Get the last element of a vector #' @keywords internal #' @param x a vector diff --git a/_pkgdown.yml b/_pkgdown.yml index f700624..1c0bdaa 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -21,6 +21,7 @@ reference: - six_bucket_permissions - six_bucket_delete - six_file_upload + - six_bucket_upload - six_user_creds - group_policies - title: paws and s3fs clients diff --git a/man/aws_bucket_create.Rd b/man/aws_bucket_create.Rd index 2bb41a6..6aff201 100644 --- a/man/aws_bucket_create.Rd +++ b/man/aws_bucket_create.Rd @@ -35,6 +35,7 @@ Other buckets: \code{\link{aws_bucket_tree}()}, \code{\link{aws_bucket_upload}()}, \code{\link{aws_buckets}()}, -\code{\link{six_bucket_delete}()} +\code{\link{six_bucket_delete}()}, +\code{\link{six_bucket_upload}()} } \concept{buckets} diff --git a/man/aws_bucket_delete.Rd b/man/aws_bucket_delete.Rd index d7428d4..6305e42 100644 --- a/man/aws_bucket_delete.Rd +++ b/man/aws_bucket_delete.Rd @@ -46,6 +46,7 @@ Other buckets: \code{\link{aws_bucket_tree}()}, \code{\link{aws_bucket_upload}()}, \code{\link{aws_buckets}()}, -\code{\link{six_bucket_delete}()} +\code{\link{six_bucket_delete}()}, +\code{\link{six_bucket_upload}()} } \concept{buckets} diff --git a/man/aws_bucket_download.Rd b/man/aws_bucket_download.Rd index 43214cf..921c723 100644 --- a/man/aws_bucket_download.Rd +++ b/man/aws_bucket_download.Rd @@ -44,6 +44,7 @@ Other buckets: \code{\link{aws_bucket_tree}()}, \code{\link{aws_bucket_upload}()}, \code{\link{aws_buckets}()}, -\code{\link{six_bucket_delete}()} +\code{\link{six_bucket_delete}()}, +\code{\link{six_bucket_upload}()} } \concept{buckets} diff --git a/man/aws_bucket_exists.Rd b/man/aws_bucket_exists.Rd index 23e3ef1..805e240 100644 --- a/man/aws_bucket_exists.Rd +++ b/man/aws_bucket_exists.Rd @@ -36,6 +36,7 @@ Other buckets: \code{\link{aws_bucket_tree}()}, \code{\link{aws_bucket_upload}()}, \code{\link{aws_buckets}()}, -\code{\link{six_bucket_delete}()} +\code{\link{six_bucket_delete}()}, +\code{\link{six_bucket_upload}()} } \concept{buckets} diff --git a/man/aws_bucket_list_objects.Rd b/man/aws_bucket_list_objects.Rd index f4b2b17..dc7a414 100644 --- a/man/aws_bucket_list_objects.Rd +++ b/man/aws_bucket_list_objects.Rd @@ -50,6 +50,7 @@ Other buckets: \code{\link{aws_bucket_tree}()}, \code{\link{aws_bucket_upload}()}, \code{\link{aws_buckets}()}, -\code{\link{six_bucket_delete}()} +\code{\link{six_bucket_delete}()}, +\code{\link{six_bucket_upload}()} } \concept{buckets} diff --git a/man/aws_bucket_tree.Rd b/man/aws_bucket_tree.Rd index cc26ed6..8a00051 100644 --- a/man/aws_bucket_tree.Rd +++ b/man/aws_bucket_tree.Rd @@ -57,6 +57,7 @@ Other buckets: \code{\link{aws_bucket_list_objects}()}, \code{\link{aws_bucket_upload}()}, \code{\link{aws_buckets}()}, -\code{\link{six_bucket_delete}()} +\code{\link{six_bucket_delete}()}, +\code{\link{six_bucket_upload}()} } \concept{buckets} diff --git a/man/aws_bucket_upload.Rd b/man/aws_bucket_upload.Rd index 495b92a..cde06d4 100644 --- a/man/aws_bucket_upload.Rd +++ b/man/aws_bucket_upload.Rd @@ -69,6 +69,7 @@ Other buckets: \code{\link{aws_bucket_list_objects}()}, \code{\link{aws_bucket_tree}()}, \code{\link{aws_buckets}()}, -\code{\link{six_bucket_delete}()} +\code{\link{six_bucket_delete}()}, +\code{\link{six_bucket_upload}()} } \concept{buckets} diff --git a/man/aws_buckets.Rd b/man/aws_buckets.Rd index 7c518ba..b9692e6 100644 --- a/man/aws_buckets.Rd +++ b/man/aws_buckets.Rd @@ -48,6 +48,7 @@ Other buckets: \code{\link{aws_bucket_list_objects}()}, \code{\link{aws_bucket_tree}()}, \code{\link{aws_bucket_upload}()}, -\code{\link{six_bucket_delete}()} +\code{\link{six_bucket_delete}()}, +\code{\link{six_bucket_upload}()} } \concept{buckets} diff --git a/man/first.Rd b/man/first.Rd new file mode 100644 index 0000000..d0d17d9 --- /dev/null +++ b/man/first.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{first} +\alias{first} +\title{Get the first element of a vector} +\usage{ +first(x) +} +\arguments{ +\item{x}{a vector} +} +\value{ +the first element of the vector +} +\description{ +Get the first element of a vector +} +\keyword{internal} diff --git a/man/path_from.Rd b/man/path_from.Rd new file mode 100644 index 0000000..2e7ed40 --- /dev/null +++ b/man/path_from.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bucket.R +\name{path_from} +\alias{path_from} +\title{Get file path starting at a certain path component} +\usage{ +path_from(path, from) +} +\description{ +Get file path starting at a certain path component +} +\examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +path_from(path = "Rtmpxsqth0/apples/mcintosh/orange.csv", from = "apples") +\dontshow{\}) # examplesIf} +} +\keyword{internal} diff --git a/man/six_admin_setup.Rd b/man/six_admin_setup.Rd index 09a673b..a661a74 100644 --- a/man/six_admin_setup.Rd +++ b/man/six_admin_setup.Rd @@ -30,6 +30,7 @@ AWS account setup for administrators \seealso{ Other magicians: \code{\link{six_bucket_delete}()}, +\code{\link{six_bucket_upload}()}, \code{\link{six_file_upload}()}, \code{\link{six_user_create}()}, \code{\link{six_user_delete}()} diff --git a/man/six_bucket_delete.Rd b/man/six_bucket_delete.Rd index 5e9952f..ee33407 100644 --- a/man/six_bucket_delete.Rd +++ b/man/six_bucket_delete.Rd @@ -71,10 +71,12 @@ Other buckets: \code{\link{aws_bucket_list_objects}()}, \code{\link{aws_bucket_tree}()}, \code{\link{aws_bucket_upload}()}, -\code{\link{aws_buckets}()} +\code{\link{aws_buckets}()}, +\code{\link{six_bucket_upload}()} Other magicians: \code{\link{six_admin_setup}()}, +\code{\link{six_bucket_upload}()}, \code{\link{six_file_upload}()}, \code{\link{six_user_create}()}, \code{\link{six_user_delete}()} diff --git a/man/six_bucket_upload.Rd b/man/six_bucket_upload.Rd new file mode 100644 index 0000000..cc6e17d --- /dev/null +++ b/man/six_bucket_upload.Rd @@ -0,0 +1,98 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bucket.R +\name{six_bucket_upload} +\alias{six_bucket_upload} +\title{Magically upload a mix of files and directories into a bucket} +\usage{ +six_bucket_upload(path, remote, force = FALSE, ...) +} +\arguments{ +\item{path}{(character) one or more file paths to add to +the \code{bucket}. required. can include directories or files} + +\item{remote}{(character/scalar) a character string to use to upload +files in \code{path}. the first component of the path will be used as the +bucket name. any subsequent path components will be used as a +key prefix for all objects created in the bucket} + +\item{force}{(logical) force bucket creation without going through +the prompt. default: \code{FALSE}. Should only be set to \code{TRUE} when +required for non-interactive use.} + +\item{...}{named params passed on to +\href{https://www.paws-r-sdk.com/docs/s3_put_object/}{put_object}} +} +\value{ +(character) a vector of remote s3 paths where your +files are located +} +\description{ +Magically upload a mix of files and directories into a bucket +} +\section{What is magical}{ + +\itemize{ +\item Exits early if folder or files do not exist +\item Creates the bucket if it does not exist +\item Adds files to the bucket at the top level with key as the file name +\item Adds directories to the bucket, reconstructing the exact directory +structure in the S3 bucket +} +} + +\examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# single file, single remote path +bucket1 <- random_string("bucket") +demo_rds_file <- file.path(system.file(), "Meta/demo.rds") +six_bucket_upload(path = demo_rds_file, remote = bucket1) + +## a file and a directory - with a single remote path +bucket2 <- random_string("bucket") +library(fs) +tdir <- path(path_temp(), "mytmp") +dir_create(tdir) +purrr::map(letters, \(l) file_create(path(tdir, l))) +dir_tree(tdir) +six_bucket_upload(path = c(demo_rds_file, tdir), remote = bucket2) + +## a directory with nested dirs - with a single remote path +bucket3 <- random_string("bucket") +library(fs) +tdir <- path(path_temp(), "apples") +dir_create(tdir) +dir_create(path(tdir, "mcintosh")) +dir_create(path(tdir, "pink-lady")) +cat("Some text in a readme", file = path(tdir, "README.md")) +write.csv(Orange, file = path(tdir, "mcintosh", "orange.csv")) +write.csv(iris, file = path(tdir, "pink-lady", "iris.csv")) +dir_tree(tdir) +six_bucket_upload(path = tdir, remote = path(bucket3, "fruit/basket")) + +# cleanup +six_bucket_delete(bucket1, force = TRUE) +six_bucket_delete(bucket2, force = TRUE) +six_bucket_delete(bucket3, force = TRUE) +\dontshow{\}) # examplesIf} +} +\seealso{ +Other buckets: +\code{\link{aws_bucket_create}()}, +\code{\link{aws_bucket_delete}()}, +\code{\link{aws_bucket_download}()}, +\code{\link{aws_bucket_exists}()}, +\code{\link{aws_bucket_list_objects}()}, +\code{\link{aws_bucket_tree}()}, +\code{\link{aws_bucket_upload}()}, +\code{\link{aws_buckets}()}, +\code{\link{six_bucket_delete}()} + +Other magicians: +\code{\link{six_admin_setup}()}, +\code{\link{six_bucket_delete}()}, +\code{\link{six_file_upload}()}, +\code{\link{six_user_create}()}, +\code{\link{six_user_delete}()} +} +\concept{buckets} +\concept{magicians} diff --git a/man/six_file_upload.Rd b/man/six_file_upload.Rd index 4d6a11c..8f9788f 100644 --- a/man/six_file_upload.Rd +++ b/man/six_file_upload.Rd @@ -78,6 +78,7 @@ Other files: Other magicians: \code{\link{six_admin_setup}()}, \code{\link{six_bucket_delete}()}, +\code{\link{six_bucket_upload}()}, \code{\link{six_user_create}()}, \code{\link{six_user_delete}()} } diff --git a/man/six_user_create.Rd b/man/six_user_create.Rd index baaf494..44d8562 100644 --- a/man/six_user_create.Rd +++ b/man/six_user_create.Rd @@ -61,6 +61,7 @@ Other users: Other magicians: \code{\link{six_admin_setup}()}, \code{\link{six_bucket_delete}()}, +\code{\link{six_bucket_upload}()}, \code{\link{six_file_upload}()}, \code{\link{six_user_delete}()} } diff --git a/man/six_user_delete.Rd b/man/six_user_delete.Rd index 87ab92a..1c0aacb 100644 --- a/man/six_user_delete.Rd +++ b/man/six_user_delete.Rd @@ -51,6 +51,7 @@ Other users: Other magicians: \code{\link{six_admin_setup}()}, \code{\link{six_bucket_delete}()}, +\code{\link{six_bucket_upload}()}, \code{\link{six_file_upload}()}, \code{\link{six_user_create}()} } diff --git a/tests/testthat/test-buckets.R b/tests/testthat/test-buckets.R index 330fd82..f571248 100644 --- a/tests/testthat/test-buckets.R +++ b/tests/testthat/test-buckets.R @@ -132,6 +132,84 @@ test_that("aws_bucket_tree", { bucket_delete(bucket, force = TRUE) }) +test_that("six_bucket_upload, single file", { + bucket <- random_string("bucket") + demo_rds_file <- file.path(system.file(), "Meta/demo.rds") + res <- six_bucket_upload(path = demo_rds_file, remote = bucket, force = TRUE) + objs <- aws_bucket_list_objects(bucket) + + expect_type(res, "character") + expect_length(res, 1) + expect_match(res, bucket) # bucket is in each s3 path + expect_equal(NROW(objs), 1) + expect_equal(objs$key, basename(demo_rds_file)) +}) + +test_that("six_bucket_upload, mixed inputs (file and dir)", { + bucket <- random_string("bucket") + library(fs) + demo_rds_file <- file.path(system.file(), "Meta/demo.rds") + tdir <- path(path_temp(), "mytmp") + dir_create(tdir) + purrr::map(letters, \(l) file_create(path(tdir, l))) + res <- suppressMessages( + six_bucket_upload( + path = c(demo_rds_file, tdir), remote = bucket, + force = TRUE + ) + ) + objs <- aws_bucket_list_objects(bucket) + + expect_type(res, "character") + expect_length(res, 27) + expect_match(res, bucket) # bucket is in each s3 path + expect_equal(NROW(objs), 27) +}) + +test_that("six_bucket_upload, remote includes key prefix change", { + bucket <- random_string("bucket") + library(fs) + tdir <- path(path_temp(), "atmp") + demo_rds_file <- file.path(system.file(), "Meta/demo.rds") + dir_create(tdir) + purrr::map(letters, \(l) file_create(path(tdir, l))) + res <- suppressMessages( + six_bucket_upload( + path = c(demo_rds_file, tdir), remote = path(bucket, "some/dir"), + force = TRUE + ) + ) + objs <- aws_bucket_list_objects(bucket) + + expect_type(res, "character") + expect_length(res, 27) + expect_match(res, bucket) # bucket is in each s3 path + expect_match(res, "some/dir") # key prefix is in each s3 path + expect_equal(NROW(objs), 27) +}) + +test_that("six_bucket_upload error behavior", { + expect_error( + six_bucket_upload(path = letters, remote = c("z", "z")), + "must be length 1" + ) + expect_error( + six_bucket_upload(path = 5, remote = ""), + "must be character" + ) + expect_error( + six_bucket_upload(path = "", remote = 5), + "must be character" + ) + expect_error( + six_bucket_upload(path = "", remote = ""), + "don't exist" + ) + expect_error( + six_bucket_upload(path = "", remote = ""), + "don't exist" + ) +}) # cleanup buckets_empty()