diff --git a/DESCRIPTION b/DESCRIPTION index 725be2e..212c5cf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,6 +20,7 @@ Imports: AzureGraph (>= 1.3.1), utils, parallel, + tools, curl, httr, jsonlite, @@ -32,6 +33,7 @@ Suggests: rmarkdown, testthat, blastula, - emayili + emayili, + readr Roxygen: list(markdown=TRUE, r6=FALSE) RoxygenNote: 7.2.1 diff --git a/NEWS.md b/NEWS.md index a831591..0224548 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,10 +2,12 @@ ## OneDrive/SharePoint -- Fix broken functionality for shared items in OneDrive/Sharepoint. In particular, this should allow using the MS365 backend with the pins package (#149/#129). -- The `list_shared_items`/`list_shared_files` method for drives now always returns a list of drive item objects, rather than a data frame. If the `info` argument is supplied with a value other than "items", a warning is issued. +- Fix broken functionality for shared items in OneDrive/Sharepoint. In particular, this should allow using the MS365 backend with the pins package (#149, #129). +- The `list_shared_items()`/`list_shared_files()` method for drives now always returns a list of drive item objects, rather than a data frame. If the `info` argument is supplied with a value other than "items", a warning is issued. - Add folder upload and download functionality for `ms_drive_item$upload()` and `download()`. Subfolders can also be transferred recursively, and optionally in parallel. There are also corresponding `ms_drive$upload_folder()` and `download_folder()` methods. -- Add the ability to use object IDs instead of file/folder paths in `ms_drive` methods, including getting, uploading and downloading. This can be useful since the object ID is immutable, whereas file paths can be changed. See `?ms_drive` for more details. +- Add convenience methods for saving and loading datasets and R objects: `save_dataframe()`, `save_rds()`, `save_rdata()`, `load_dataframe()`, `load_rds()`, and `load_rdata()`. See `?ms_drive_item` and `?ms_drive` for more details. +- Add ability to upload and download via connections/raw vectors instead of files. You can specify the source to `ms_drive_item$upload()` to be a raw or text connection. Similarly, if the destination for `ms_drive_item$download()` is NULL, the downloaded data is returned as a raw vector. +- Add the ability to use object IDs instead of file/folder paths in `ms_drive` methods, including getting, uploading and downloading. This can be useful since the object ID is immutable, whereas file paths can change, eg if the file is moved or renamed. See `?ms_drive` for more details. ## Outlook diff --git a/R/ms_drive.R b/R/ms_drive.R index 118150a..be02f2b 100644 --- a/R/ms_drive.R +++ b/R/ms_drive.R @@ -27,6 +27,12 @@ #' - `get_item_properties(path, itemid)`: Get the properties (metadata) for a file or folder. #' - `set_item_properties(path, itemid, ...)`: Set the properties for a file or folder. #' - `list_shared_items(...), list_shared_files(...)`: List the drive items shared with you. See 'Shared items' below. +#' - `load_dataframe(path, itemid, ...)`: Download a delimited file and return its contents as a data frame. See 'Saving and loading data' below. +#' - `load_rds(path, itemid)`: Download a .rds file and return the saved object. +#' - `load_rdata(path, itemid)`: Load a .RData or .Rda file into the specified environment. +#' - `save_dataframe(df, file, ...)` Save a dataframe to a delimited file. +#' - `save_rds(object, file)`: Save an R object to a .rds file. +#' - `save_rdata(..., file)`: Save the specified objects to a .RData file. #' #' @section Initialization: #' Creating new objects of this class should be done via the `get_drive` methods of the [`ms_graph`], [`az_user`] or [`ms_site`] classes. Calling the `new()` method for this class only constructs the R object; it does not call the Microsoft Graph API to retrieve or create the actual drive. @@ -61,6 +67,15 @@ #' #' `create_folder` creates a folder with the specified path. Trying to create an already existing folder is an error. #' +#' @section Saving and loading data: +#' The following methods are provided to simplify the task of loading and saving datasets and R objects. They call down to the corresponding methods for the `ms_drive_item` class. The `load_*`` methods allow specifying the file to be loaded by either a path or item ID. +#' - `load_dataframe` downloads a delimited file and returns its contents as a data frame. The delimiter can be specified with the `delim` argument; if omitted, this is "," if the file extension is .csv, ";" if the file extension is .csv2, and a tab otherwise. If the readr package is installed, the `readr::read_delim` function is used to parse the file, otherwise `utils::read.delim` is used. You can supply other arguments to the parsing function via the `...` argument. +#' - `save_dataframe` is the inverse of `load_dataframe`: it uploads the given data frame to a folder item. Specify the delimiter with the `delim` argument. The `readr::write_delim` function is used to serialise the data if that package is installed, and `utils::write.table` otherwise. +#' - `load_rds` downloads a .rds file and returns its contents as an R object. It is analogous to the base `readRDS` function but for OneDrive/SharePoint drive items. +#' - `save_rds` uploads a given R object as a .rds file, analogously to `saveRDS`. +#' - `load_rdata` downloads a .RData or .Rda file and loads its contents into the given environment. It is analogous to the base `load` function but for OneDrive/SharePoint drive items. +#' - `save_rdata` uploads the given R objects as a .RData file, analogously to `save`. +#' #' @section Shared items: #' The `list_shared_items` method shows the files and folders that have been shared with you. This is a named list of drive items, that you can use to access the shared files/folders. The arguments are: #' - `allow_external`: Whether to include items that were shared from outside tenants. The default is FALSE. @@ -119,6 +134,15 @@ #' obj2 <- drv$get_item(itemid=id) #' obj$properties$id == obj2$properties$id # TRUE #' +#' # saving and loading data +#' drv$save_dataframe(iris, "path/to/iris.csv") +#' iris2 <- drv$load_dataframe("path/to/iris.csv") +#' identical(iris, iris2) # TRUE +#' +#' drv$save_rds(iris, "path/to/iris.rds") +#' iris3 <- drv$load_rds("path/to/iris.rds") +#' identical(iris, iris3) # TRUE +#' #' # accessing shared files #' shared_df <- drv$list_shared_items() #' shared_df$remoteItem[[1]]$open() @@ -230,6 +254,42 @@ public=list( out }, + load_dataframe=function(path=NULL, itemid=NULL, ...) + { + self$get_item(path, itemid)$load_dataframe(...) + }, + + load_rdata=function(path=NULL, itemid=NULL, envir=parent.frame()) + { + self$get_item(path, itemid)$load_rdata(envir=envir) + }, + + load_rds=function(path=NULL, itemid=NULL) + { + self$get_item(path, itemid)$load_rds() + }, + + save_dataframe=function(df, file, ...) + { + folder <- dirname(file) + if(folder == ".") folder <- "/" + self$get_item(folder)$save_dataframe(df, basename(file), ...) + }, + + save_rdata=function(..., file, envir=parent.frame()) + { + folder <- dirname(file) + if(folder == ".") folder <- "/" + self$get_item(folder)$save_rdata(..., file=basename(file), envir=envir) + }, + + save_rds=function(object, file) + { + folder <- dirname(file) + if(folder == ".") folder <- "/" + self$get_item(folder)$save_rds(object, file=basename(file)) + }, + print=function(...) { personal <- self$properties$driveType == "personal" diff --git a/R/ms_drive_item.R b/R/ms_drive_item.R index 5d52460..bfaee75 100644 --- a/R/ms_drive_item.R +++ b/R/ms_drive_item.R @@ -24,6 +24,12 @@ #' - `get_parent_folder()`: Get the parent folder for this item, as a drive item object. Returns the root folder for the root. Not supported for remote items. #' - `get_path()`: Get the absolute path for this item, as a character string. Not supported for remote items. #' - `is_folder()`: Information function, returns TRUE if this item is a folder. +#' - `load_dataframe(delim=NULL, ...)`: Download a delimited file and return its contents as a data frame. See 'Saving and loading data' below. +#' - `load_rds()`: Download a .rds file and return the saved object. +#' - `load_rdata(envir)`: Load a .RData or .Rda file into the specified environment. +#' - `save_dataframe(df, file, delim=",", ...)` Save a dataframe to a delimited file. +#' - `save_rds(object, file)`: Save an R object to a .rds file. +#' - `save_rdata(..., file)`: Save the specified objects to a .RData file. #' #' @section Initialization: #' Creating new objects of this class should be done via the `get_item` method of the [`ms_drive`] class. Calling the `new()` method for this class only constructs the R object; it does not call the Microsoft Graph API to retrieve or create the actual item. @@ -42,11 +48,11 @@ #' #' `list_files` is a synonym for `list_items`. #' -#' `download` downloads the item to the local machine. If this is a file, it is downloaded; if this is a folder, all its files are downloaded. If the `recursive` argument is TRUE and the item is a folder, all subfolders will also be downloaded recursively. +#' `download` downloads the item to the local machine. If this is a file, it is downloaded; in this case, the `dest` argument can be the path to the destination file, or NULL to return the downloaded content in a raw vector. If the item is a folder, all its files are downloaded, including subfolders if the `recursive` argument is TRUE. #' -#' `upload` uploads a file or folder from the local machine into the folder item. If this is a folder, and the `recursive` argument iS TRUE, all subfolders are also uploaded. The uploading is done in blocks of 32MB by default; you can change this by setting the `blocksize` argument. For technical reasons, the block size [must be a multiple of 320KB](https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#upload-bytes-to-the-upload-session). +#' `upload` uploads a file or folder from the local machine into the folder item. The `src` argument can be the path to the source file, a [rawConnection] or a [textConnection] object. If `src` is a folder, all its files are uploaded, including subfolders if the `recursive` argument iS TRUE. An `ms_drive_item` object is returned invisibly. #' -#' `upload` returns an `ms_drive_item` object invisibly if a file was uploaded, or NULL if a folder was uploaded. +#' Uploading is done in blocks of 32MB by default; you can change this by setting the `blocksize` argument. For technical reasons, the block size [must be a multiple of 320KB](https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#upload-bytes-to-the-upload-session). #' #' Uploading and downloading folders can be done in parallel, which can result in substantial speedup when transferring a large number of small files. This is controlled by the `parallel` argument to `upload` and `download`, which can have the following values: #' - TRUE: A cluster with 5 workers is created @@ -67,6 +73,15 @@ #' #' This method returns a URL to access the item, for `type="view"` or "`type=edit"`. For `type="embed"`, it returns a list with components `webUrl` containing the URL, and `webHtml` containing a HTML fragment to embed the link in an IFRAME. The default is a viewable link, expiring in 7 days. #' +#' @section Saving and loading data: +#' The following methods are provided to simplify the task of loading and saving datasets and R objects. +#' - `load_dataframe` downloads a delimited file and returns its contents as a data frame. The delimiter can be specified with the `delim` argument; if omitted, this is "," if the file extension is .csv, ";" if the file extension is .csv2, and a tab otherwise. If the readr package is installed, the `readr::read_delim` function is used to parse the file, otherwise `utils::read.delim` is used. You can supply other arguments to the parsing function via the `...` argument. +#' - `save_dataframe` is the inverse of `load_dataframe`: it uploads the given data frame to a folder item. Specify the delimiter with the `delim` argument. The `readr::write_delim` function is used to serialise the data if that package is installed, and `utils::write.table` otherwise. +#' - `load_rds` downloads a .rds file and returns its contents as an R object. It is analogous to the base `readRDS` function but for OneDrive/SharePoint drive items. +#' - `save_rds` uploads a given R object as a .rds file, analogously to `saveRDS`. +#' - `load_rdata` downloads a .RData or .Rda file and loads its contents into the given environment. It is analogous to the base `load` function but for OneDrive/SharePoint drive items. +#' - `save_rdata` uploads the given R objects as a .RData file, analogously to `save`. +#' #' @section List methods: #' All `list_*` methods have `filter` and `n` arguments to limit the number of results. The former should be an [OData expression](https://docs.microsoft.com/en-us/graph/query-parameters#filter-parameter) as a string to filter the result set on. The latter should be a number setting the maximum number of (filtered) results to return. The default values are `filter=NULL` and `n=Inf`. If `n=NULL`, the `ms_graph_pager` iterator object is returned instead to allow manual iteration over the results. #' @@ -108,6 +123,16 @@ #' # delete the file (will ask for confirmation first) #' myfile$delete() #' +#' # saving and loading data +#' myfolder <- mydrv$get_item("myfolder") +#' myfolder$save_dataframe(iris, "iris.csv") +#' iris2 <- myfolder$get_item("iris.csv")$load_dataframe() +#' identical(iris, iris2) # TRUE +#' +#' myfolder$save_rds(iris, "iris.rds") +#' iris3 <- myfolder$get_item("iris.rds")$load_rds() +#' identical(iris, iris3) # TRUE +#' #' } #' @format An R6 object of class `ms_drive_item`, inheriting from `ms_object`. #' @export @@ -275,7 +300,7 @@ public=list( private$assert_is_folder() # check if uploading a folder - if(dir.exists(src)) + if(is.character(src) && dir.exists(src)) { files <- dir(src, all.files=TRUE, no..=TRUE, recursive=recursive, full.names=FALSE) @@ -316,7 +341,7 @@ public=list( } else stop("Unknown value for 'parallel' argument", call.=FALSE) - invisible(NULL) + invisible(self$get_item(dest)) } else private$upload_file(src, dest, blocksize) }, @@ -328,6 +353,9 @@ public=list( children <- self$list_items() isdir <- children$isdir + if(!is.character(dest)) + stop("Must supply a destination folder", call.=FALSE) + dest <- normalizePath(dest, mustWork=FALSE) dir.create(dest, showWarnings=FALSE) @@ -373,6 +401,71 @@ public=list( else private$download_file(dest, overwrite) }, + load_dataframe=function(delim=NULL, ...) + { + private$assert_is_file() + ext <- tolower(tools::file_ext(self$properties$name)) + if(is.null(delim)) + { + delim <- if(ext == "csv") "," else if(ext == "csv2") ";" else "\t" + } + dat <- self$download(NULL) + if(requireNamespace("readr")) + { + con <- rawConnection(dat, "r") + on.exit(try(close(con), silent=TRUE)) + readr::read_delim(con, delim=delim) + } + else utils::read.delim(text=rawToChar(dat), sep=delim, ...) + }, + + load_rdata=function(envir=parent.frame()) + { + private$assert_is_file() + private$assert_file_extension_is("rdata", "rda") + rdata <- self$download(NULL) + load(rawConnection(rdata, open="rb"), envir=envir) + }, + + load_rds=function() + { + private$assert_is_file() + private$assert_file_extension_is("rds") + rds <- self$download(NULL) + unserialize(memDecompress(rds)) + }, + + save_dataframe=function(df, file, delim=",", ...) + { + private$assert_is_folder() + conn <- rawConnection(raw(0), open="r+b") + if(requireNamespace("readr")) + readr::write_delim(df, conn, delim=delim, ...) + else utils::write.table(df, conn, sep=delim, ...) + seek(conn, 0) + self$upload(conn, file) + }, + + save_rdata=function(..., file, envir=parent.frame()) + { + private$assert_is_folder() + # save to a temporary file as saving to a connection disables compression + tmpsave <- tempfile(fileext=".rdata") + on.exit(unlink(tmpsave)) + save(..., file=tmpsave, envir=envir) + self$upload(tmpsave, file) + }, + + save_rds=function(object, file) + { + private$assert_is_folder() + # save to a temporary file to avoid dealing with memCompress/memDecompress hassles + tmpsave <- tempfile(fileext=".rdata") + on.exit(unlink(tmpsave)) + saveRDS(object, tmpsave) + self$upload(tmpsave, file) + }, + get_path=function() { private$assert_is_not_remote() @@ -401,8 +494,8 @@ private=list( upload_file=function(src, dest, blocksize) { - con <- file(src, open="rb") - on.exit(close(con)) + src <- normalize_src(src) + on.exit(close(src$con)) fullpath <- private$make_absolute_path(dest) # possible fullpath formats -> string to append: @@ -415,14 +508,14 @@ private=list( else paste0(fullpath, "/createUploadSession") upload_dest <- call_graph_endpoint(self$token, op, http_verb="POST")$uploadUrl - size <- file.size(src) + size <- src$size next_blockstart <- 0 next_blockend <- size - 1 repeat { next_blocksize <- min(next_blockend - next_blockstart + 1, blocksize) - seek(con, next_blockstart) - body <- readBin(con, "raw", next_blocksize) + seek(src$con, next_blockstart) + body <- readBin(src$con, "raw", next_blocksize) thisblock <- length(body) if(thisblock == 0) break @@ -447,15 +540,22 @@ private=list( download_file=function(dest, overwrite) { private$assert_is_file() - res <- self$do_operation("content", config=httr::write_disk(dest, overwrite=overwrite), - http_status_handler="pass") + + # TODO: make less hacky + config <- if(is.character(dest)) + httr::write_disk(dest, overwrite=overwrite) + else list() + + res <- self$do_operation("content", config=config, http_status_handler="pass") if(httr::status_code(res) >= 300) { - on.exit(file.remove(dest)) + if(is.character(dest)) + on.exit(file.remove(dest)) httr::stop_for_status(res, paste0("complete operation. Message:\n", sub("\\.$", "", error_message(httr::content(res))))) } - invisible(NULL) + + if(is.character(dest)) invisible(NULL) else httr::content(res, as="raw") }, # dest = . or '' --> this item @@ -534,6 +634,13 @@ private=list( { if(!is.null(self$properties$remoteItem)) stop("This method is not applicable for a remote item", call.=FALSE) + }, + + assert_file_extension_is=function(...) + { + ext <- tolower(tools::file_ext(self$properties$name)) + if(!(ext %in% unlist(list(...)))) + stop("Not an allowed file type") } )) diff --git a/R/normalize_src.R b/R/normalize_src.R new file mode 100644 index 0000000..ba0e12f --- /dev/null +++ b/R/normalize_src.R @@ -0,0 +1,39 @@ +normalize_src <- function(src) +{ + UseMethod("normalize_src") +} + + +normalize_src.character <- function(src) +{ + con <- file(src, open="rb") + size <- file.size(src) + list(con=con, size=size) +} + + +normalize_src.textConnection <- function(src) +{ + # convert to raw connection + src <- charToRaw(paste0(readLines(src), collapse="\n")) + size <- length(src) + con <- rawConnection(src) + list(con=con, size=size) +} + + +normalize_src.rawConnection <- function(src) +{ + # need to read the data to get object size (!) + size <- 0 + repeat + { + x <- readBin(src, "raw", n=1e6) + if(length(x) == 0) + break + size <- size + length(x) + } + seek(src, 0) # reposition connection after reading + list(con=src, size=size) +} + diff --git a/man/ms_drive.Rd b/man/ms_drive.Rd index 390945c..eba1211 100644 --- a/man/ms_drive.Rd +++ b/man/ms_drive.Rd @@ -41,6 +41,12 @@ Class representing a personal OneDrive or SharePoint document library. \item \code{get_item_properties(path, itemid)}: Get the properties (metadata) for a file or folder. \item \code{set_item_properties(path, itemid, ...)}: Set the properties for a file or folder. \item \verb{list_shared_items(...), list_shared_files(...)}: List the drive items shared with you. See 'Shared items' below. +\item \code{load_dataframe(path, itemid, ...)}: Download a delimited file and return its contents as a data frame. See 'Saving and loading data' below. +\item \code{load_rds(path, itemid)}: Download a .rds file and return the saved object. +\item \code{load_rdata(path, itemid)}: Load a .RData or .Rda file into the specified environment. +\item \code{save_dataframe(df, file, ...)} Save a dataframe to a delimited file. +\item \code{save_rds(object, file)}: Save an R object to a .rds file. +\item \code{save_rdata(..., file)}: Save the specified objects to a .RData file. } } @@ -83,6 +89,19 @@ Transferring files in parallel can result in substantial speedup for a large num \code{create_folder} creates a folder with the specified path. Trying to create an already existing folder is an error. } +\section{Saving and loading data}{ + +The following methods are provided to simplify the task of loading and saving datasets and R objects. They call down to the corresponding methods for the \code{ms_drive_item} class. The `load_*`` methods allow specifying the file to be loaded by either a path or item ID. +\itemize{ +\item \code{load_dataframe} downloads a delimited file and returns its contents as a data frame. The delimiter can be specified with the \code{delim} argument; if omitted, this is "," if the file extension is .csv, ";" if the file extension is .csv2, and a tab otherwise. If the readr package is installed, the \code{readr::read_delim} function is used to parse the file, otherwise \code{utils::read.delim} is used. You can supply other arguments to the parsing function via the \code{...} argument. +\item \code{save_dataframe} is the inverse of \code{load_dataframe}: it uploads the given data frame to a folder item. Specify the delimiter with the \code{delim} argument. The \code{readr::write_delim} function is used to serialise the data if that package is installed, and \code{utils::write.table} otherwise. +\item \code{load_rds} downloads a .rds file and returns its contents as an R object. It is analogous to the base \code{readRDS} function but for OneDrive/SharePoint drive items. +\item \code{save_rds} uploads a given R object as a .rds file, analogously to \code{saveRDS}. +\item \code{load_rdata} downloads a .RData or .Rda file and loads its contents into the given environment. It is analogous to the base \code{load} function but for OneDrive/SharePoint drive items. +\item \code{save_rdata} uploads the given R objects as a .RData file, analogously to \code{save}. +} +} + \section{Shared items}{ The \code{list_shared_items} method shows the files and folders that have been shared with you. This is a named list of drive items, that you can use to access the shared files/folders. The arguments are: @@ -142,6 +161,15 @@ id <- obj$properties$id obj2 <- drv$get_item(itemid=id) obj$properties$id == obj2$properties$id # TRUE +# saving and loading data +drv$save_dataframe(iris, "path/to/iris.csv") +iris2 <- drv$load_dataframe("path/to/iris.csv") +identical(iris, iris2) # TRUE + +drv$save_rds(iris, "path/to/iris.rds") +iris3 <- drv$load_rds("path/to/iris.rds") +identical(iris, iris3) # TRUE + # accessing shared files shared_df <- drv$list_shared_items() shared_df$remoteItem[[1]]$open() diff --git a/man/ms_drive_item.Rd b/man/ms_drive_item.Rd index ea4caef..fa706e7 100644 --- a/man/ms_drive_item.Rd +++ b/man/ms_drive_item.Rd @@ -38,6 +38,12 @@ Class representing an item (file or folder) in a OneDrive or SharePoint document \item \code{get_parent_folder()}: Get the parent folder for this item, as a drive item object. Returns the root folder for the root. Not supported for remote items. \item \code{get_path()}: Get the absolute path for this item, as a character string. Not supported for remote items. \item \code{is_folder()}: Information function, returns TRUE if this item is a folder. +\item \code{load_dataframe(delim=NULL, ...)}: Download a delimited file and return its contents as a data frame. See 'Saving and loading data' below. +\item \code{load_rds()}: Download a .rds file and return the saved object. +\item \code{load_rdata(envir)}: Load a .RData or .Rda file into the specified environment. +\item \code{save_dataframe(df, file, delim=",", ...)} Save a dataframe to a delimited file. +\item \code{save_rds(object, file)}: Save an R object to a .rds file. +\item \code{save_rdata(..., file)}: Save the specified objects to a .RData file. } } @@ -63,11 +69,11 @@ This class exposes methods for carrying out common operations on files and folde \code{list_files} is a synonym for \code{list_items}. -\code{download} downloads the item to the local machine. If this is a file, it is downloaded; if this is a folder, all its files are downloaded. If the \code{recursive} argument is TRUE and the item is a folder, all subfolders will also be downloaded recursively. +\code{download} downloads the item to the local machine. If this is a file, it is downloaded; in this case, the \code{dest} argument can be the path to the destination file, or NULL to return the downloaded content in a raw vector. If the item is a folder, all its files are downloaded, including subfolders if the \code{recursive} argument is TRUE. -\code{upload} uploads a file or folder from the local machine into the folder item. If this is a folder, and the \code{recursive} argument iS TRUE, all subfolders are also uploaded. The uploading is done in blocks of 32MB by default; you can change this by setting the \code{blocksize} argument. For technical reasons, the block size \href{https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#upload-bytes-to-the-upload-session}{must be a multiple of 320KB}. +\code{upload} uploads a file or folder from the local machine into the folder item. The \code{src} argument can be the path to the source file, a \link{rawConnection} or a \link{textConnection} object. If \code{src} is a folder, all its files are uploaded, including subfolders if the \code{recursive} argument iS TRUE. An \code{ms_drive_item} object is returned invisibly. -\code{upload} returns an \code{ms_drive_item} object invisibly if a file was uploaded, or NULL if a folder was uploaded. +Uploading is done in blocks of 32MB by default; you can change this by setting the \code{blocksize} argument. For technical reasons, the block size \href{https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#upload-bytes-to-the-upload-session}{must be a multiple of 320KB}. Uploading and downloading folders can be done in parallel, which can result in substantial speedup when transferring a large number of small files. This is controlled by the \code{parallel} argument to \code{upload} and \code{download}, which can have the following values: \itemize{ @@ -93,6 +99,19 @@ Uploading and downloading folders can be done in parallel, which can result in s This method returns a URL to access the item, for \code{type="view"} or "\verb{type=edit"}. For \code{type="embed"}, it returns a list with components \code{webUrl} containing the URL, and \code{webHtml} containing a HTML fragment to embed the link in an IFRAME. The default is a viewable link, expiring in 7 days. } +\section{Saving and loading data}{ + +The following methods are provided to simplify the task of loading and saving datasets and R objects. +\itemize{ +\item \code{load_dataframe} downloads a delimited file and returns its contents as a data frame. The delimiter can be specified with the \code{delim} argument; if omitted, this is "," if the file extension is .csv, ";" if the file extension is .csv2, and a tab otherwise. If the readr package is installed, the \code{readr::read_delim} function is used to parse the file, otherwise \code{utils::read.delim} is used. You can supply other arguments to the parsing function via the \code{...} argument. +\item \code{save_dataframe} is the inverse of \code{load_dataframe}: it uploads the given data frame to a folder item. Specify the delimiter with the \code{delim} argument. The \code{readr::write_delim} function is used to serialise the data if that package is installed, and \code{utils::write.table} otherwise. +\item \code{load_rds} downloads a .rds file and returns its contents as an R object. It is analogous to the base \code{readRDS} function but for OneDrive/SharePoint drive items. +\item \code{save_rds} uploads a given R object as a .rds file, analogously to \code{saveRDS}. +\item \code{load_rdata} downloads a .RData or .Rda file and loads its contents into the given environment. It is analogous to the base \code{load} function but for OneDrive/SharePoint drive items. +\item \code{save_rdata} uploads the given R objects as a .RData file, analogously to \code{save}. +} +} + \section{List methods}{ All \verb{list_*} methods have \code{filter} and \code{n} arguments to limit the number of results. The former should be an \href{https://docs.microsoft.com/en-us/graph/query-parameters#filter-parameter}{OData expression} as a string to filter the result set on. The latter should be a number setting the maximum number of (filtered) results to return. The default values are \code{filter=NULL} and \code{n=Inf}. If \code{n=NULL}, the \code{ms_graph_pager} iterator object is returned instead to allow manual iteration over the results. @@ -131,6 +150,16 @@ myfile$create_share_link(password="Use-strong-passwords!") # delete the file (will ask for confirmation first) myfile$delete() +# saving and loading data +myfolder <- mydrv$get_item("myfolder") +myfolder$save_dataframe(iris, "iris.csv") +iris2 <- myfolder$get_item("iris.csv")$load_dataframe() +identical(iris, iris2) # TRUE + +myfolder$save_rds(iris, "iris.rds") +iris3 <- myfolder$get_item("iris.rds")$load_rds() +identical(iris, iris3) # TRUE + } } \seealso{ diff --git a/tests/testthat/test01a_onedrive_conntransfer.R b/tests/testthat/test01a_onedrive_conntransfer.R new file mode 100644 index 0000000..5686954 --- /dev/null +++ b/tests/testthat/test01a_onedrive_conntransfer.R @@ -0,0 +1,77 @@ +tenant <- "consumers" +app <- Sys.getenv("AZ_TEST_NATIVE_APP_ID") + +if(app == "") + skip("OneDrive tests skipped: Microsoft Graph credentials not set") + +if(!interactive()) + skip("OneDrive tests skipped: must be in interactive session") + +tok <- get_test_token(tenant, app, c("Files.ReadWrite.All", "User.Read")) +if(is.null(tok)) + skip("OneDrive tests skipped: unable to login to consumers tenant") + +drv <- try(call_graph_endpoint(tok, "me/drive"), silent=TRUE) +if(inherits(drv, "try-error")) + skip("OneDrive tests skipped: service not available") + +opt_use_itemid <- options(microsoft365r_use_itemid_in_path=TRUE) +od <- ms_drive$new(tok, tenant, drv) +folder <- od$create_folder(make_name()) + +test_that("OneDrive file transfer extras work", +{ + expect_is(od, "ms_drive") + + src <- "../resources/file.json" + img <- "../resources/logo_small.jpg" + + # upload raw connection + r <- readBin(img, what="raw", n=file.size(img)) + rcon <- rawConnection(r) + expect_silent(folder$upload(rcon, "raw.jpg")) + + # upload text connection + tcon <- textConnection(readLines(src)) + expect_silent(folder$upload(tcon, "text.json")) + + # download raw vector + expect_silent(rret <- folder$get_item("raw.jpg")$download(NULL)) + expect_type(rret, "raw") + expect_identical(r, rret) + + expect_silent(tret <- folder$get_item("text.json")$download(NULL)) + expect_type(tret, "raw") + expect_identical(rawToChar(tret), paste0(readLines(src), collapse="\n")) +}) + + +test_that("OneDrive load/save methods work", +{ + fname <- folder$properties$name + + name1 <- file.path(fname, "iris.csv") + expect_silent(od$save_dataframe(iris, name1)) + ir1 <- od$load_dataframe(name1) + expect_s3_class(ir1, "data.frame") + expect_identical(dim(ir1), dim(iris)) + + name2 <- file.path(fname, "iris.rds") + expect_silent(od$save_rds(iris, name2)) + ir2 <- od$load_rds(name2) + expect_s3_class(ir2, "data.frame") + expect_identical(dim(ir2), dim(iris)) + + name3 <- file.path(fname, "iris.rdata") + ir3 <- iris + expect_silent(od$save_rdata(ir3, file=name3)) + rm(ir3) + od$load_rdata(name3) + expect_identical(ir3, iris) +}) + + +teardown({ + options(opt_use_itemid) + folder$delete(confirm=FALSE) +})