Azure · hongooi73 · May 25, 2023 · May 24, 2023 · May 25, 2023 · May 25, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -20,6 +20,7 @@ Imports:
     AzureGraph (>= 1.3.1),
     utils,
     parallel,
+    tools,
     curl,
     httr,
     jsonlite,
@@ -32,6 +33,7 @@ Suggests:
     rmarkdown,
     testthat,
     blastula,
-    emayili
+    emayili,
+    readr
 Roxygen: list(markdown=TRUE, r6=FALSE)
 RoxygenNote: 7.2.1
diff --git a/NEWS.md b/NEWS.md
@@ -2,10 +2,12 @@
 
 ## OneDrive/SharePoint
 
-- Fix broken functionality for shared items in OneDrive/Sharepoint. In particular, this should allow using the MS365 backend with the pins package (#149/#129).
-- The `list_shared_items`/`list_shared_files` method for drives now always returns a list of drive item objects, rather than a data frame. If the `info` argument is supplied with a value other than "items", a warning is issued.
+- Fix broken functionality for shared items in OneDrive/Sharepoint. In particular, this should allow using the MS365 backend with the pins package (#149, #129).
+- The `list_shared_items()`/`list_shared_files()` method for drives now always returns a list of drive item objects, rather than a data frame. If the `info` argument is supplied with a value other than "items", a warning is issued.
 - Add folder upload and download functionality for `ms_drive_item$upload()` and `download()`. Subfolders can also be transferred recursively, and optionally in parallel. There are also corresponding `ms_drive$upload_folder()` and `download_folder()` methods.
-- Add the ability to use object IDs instead of file/folder paths in `ms_drive` methods, including getting, uploading and downloading. This can be useful since the object ID is immutable, whereas file paths can be changed. See `?ms_drive` for more details.
+- Add convenience methods for saving and loading datasets and R objects: `save_dataframe()`, `save_rds()`, `save_rdata()`, `load_dataframe()`, `load_rds()`, and `load_rdata()`. See `?ms_drive_item` and `?ms_drive` for more details.
+- Add ability to upload and download via connections/raw vectors instead of files. You can specify the source to `ms_drive_item$upload()` to be a raw or text connection. Similarly, if the destination for `ms_drive_item$download()` is NULL, the downloaded data is returned as a raw vector.
+- Add the ability to use object IDs instead of file/folder paths in `ms_drive` methods, including getting, uploading and downloading. This can be useful since the object ID is immutable, whereas file paths can change, eg if the file is moved or renamed. See `?ms_drive` for more details.
 
 ## Outlook
 

diff --git a/R/ms_drive.R b/R/ms_drive.R
@@ -27,6 +27,12 @@
 #' - `get_item_properties(path, itemid)`: Get the properties (metadata) for a file or folder.
 #' - `set_item_properties(path, itemid, ...)`: Set the properties for a file or folder.
 #' - `list_shared_items(...), list_shared_files(...)`: List the drive items shared with you. See 'Shared items' below.
+#' - `load_dataframe(path, itemid, ...)`: Download a delimited file and return its contents as a data frame. See 'Saving and loading data' below.
+#' - `load_rds(path, itemid)`: Download a .rds file and return the saved object.
+#' - `load_rdata(path, itemid)`: Load a .RData or .Rda file into the specified environment.
+#' - `save_dataframe(df, file, ...)` Save a dataframe to a delimited file.
+#' - `save_rds(object, file)`: Save an R object to a .rds file.
+#' - `save_rdata(..., file)`: Save the specified objects to a .RData file.
 #'
 #' @section Initialization:
 #' Creating new objects of this class should be done via the `get_drive` methods of the [`ms_graph`], [`az_user`] or [`ms_site`] classes. Calling the `new()` method for this class only constructs the R object; it does not call the Microsoft Graph API to retrieve or create the actual drive.
@@ -61,6 +67,15 @@
 #'
 #' `create_folder` creates a folder with the specified path. Trying to create an already existing folder is an error.
 #'
+#' @section Saving and loading data:
+#' The following methods are provided to simplify the task of loading and saving datasets and R objects. They call down to the corresponding methods for the `ms_drive_item` class. The `load_*`` methods allow specifying the file to be loaded by either a path or item ID.
+#' - `load_dataframe` downloads a delimited file and returns its contents as a data frame. The delimiter can be specified with the `delim` argument; if omitted, this is "," if the file extension is .csv, ";" if the file extension is .csv2, and a tab otherwise. If the readr package is installed, the `readr::read_delim` function is used to parse the file, otherwise `utils::read.delim` is used. You can supply other arguments to the parsing function via the `...` argument.
+#' - `save_dataframe` is the inverse of `load_dataframe`: it uploads the given data frame to a folder item. Specify the delimiter with the `delim` argument. The `readr::write_delim` function is used to serialise the data if that package is installed, and `utils::write.table` otherwise.
+#' - `load_rds` downloads a .rds file and returns its contents as an R object. It is analogous to the base `readRDS` function but for OneDrive/SharePoint drive items.
+#' - `save_rds` uploads a given R object as a .rds file, analogously to `saveRDS`.
+#' - `load_rdata` downloads a .RData or .Rda file and loads its contents into the given environment. It is analogous to the base `load` function but for OneDrive/SharePoint drive items.
+#' - `save_rdata` uploads the given R objects as a .RData file, analogously to `save`.
+#'
 #' @section Shared items:
 #' The `list_shared_items` method shows the files and folders that have been shared with you. This is a named list of drive items, that you can use to access the shared files/folders. The arguments are:
 #' - `allow_external`: Whether to include items that were shared from outside tenants. The default is FALSE.
@@ -119,6 +134,15 @@
 #' obj2 <- drv$get_item(itemid=id)
 #' obj$properties$id == obj2$properties$id  # TRUE
 #'
+#' # saving and loading data
+#' drv$save_dataframe(iris, "path/to/iris.csv")
+#' iris2 <- drv$load_dataframe("path/to/iris.csv")
+#' identical(iris, iris2)  # TRUE
+#'
+#' drv$save_rds(iris, "path/to/iris.rds")
+#' iris3 <- drv$load_rds("path/to/iris.rds")
+#' identical(iris, iris3)  # TRUE
+#'
 #' # accessing shared files
 #' shared_df <- drv$list_shared_items()
 #' shared_df$remoteItem[[1]]$open()
@@ -230,6 +254,42 @@ public=list(
         out
     },
 
+    load_dataframe=function(path=NULL, itemid=NULL, ...)
+    {
+        self$get_item(path, itemid)$load_dataframe(...)
+    },
+
+    load_rdata=function(path=NULL, itemid=NULL, envir=parent.frame())
+    {
+        self$get_item(path, itemid)$load_rdata(envir=envir)
+    },
+
+    load_rds=function(path=NULL, itemid=NULL)
+    {
+        self$get_item(path, itemid)$load_rds()
+    },
+
+    save_dataframe=function(df, file, ...)
+    {
+        folder <- dirname(file)
+        if(folder == ".") folder <- "/"
+        self$get_item(folder)$save_dataframe(df, basename(file), ...)
+    },
+
+    save_rdata=function(..., file, envir=parent.frame())
+    {
+        folder <- dirname(file)
+        if(folder == ".") folder <- "/"
+        self$get_item(folder)$save_rdata(..., file=basename(file), envir=envir)
+    },
+
+    save_rds=function(object, file)
+    {
+        folder <- dirname(file)
+        if(folder == ".") folder <- "/"
+        self$get_item(folder)$save_rds(object, file=basename(file))
+    },
+
     print=function(...)
     {
         personal <- self$properties$driveType == "personal"

diff --git a/R/ms_drive_item.R b/R/ms_drive_item.R
@@ -24,6 +24,12 @@
 #' - `get_parent_folder()`: Get the parent folder for this item, as a drive item object. Returns the root folder for the root. Not supported for remote items.
 #' - `get_path()`: Get the absolute path for this item, as a character string. Not supported for remote items.
 #' - `is_folder()`: Information function, returns TRUE if this item is a folder.
+#' - `load_dataframe(delim=NULL, ...)`: Download a delimited file and return its contents as a data frame. See 'Saving and loading data' below.
+#' - `load_rds()`: Download a .rds file and return the saved object.
+#' - `load_rdata(envir)`: Load a .RData or .Rda file into the specified environment.
+#' - `save_dataframe(df, file, delim=",", ...)` Save a dataframe to a delimited file.
+#' - `save_rds(object, file)`: Save an R object to a .rds file.
+#' - `save_rdata(..., file)`: Save the specified objects to a .RData file.
 #'
 #' @section Initialization:
 #' Creating new objects of this class should be done via the `get_item` method of the [`ms_drive`] class. Calling the `new()` method for this class only constructs the R object; it does not call the Microsoft Graph API to retrieve or create the actual item.
@@ -42,11 +48,11 @@
 #'
 #' `list_files` is a synonym for `list_items`.
 #'
-#' `download` downloads the item to the local machine. If this is a file, it is downloaded; if this is a folder, all its files are downloaded. If the `recursive` argument is TRUE and the item is a folder, all subfolders will also be downloaded recursively.
+#' `download` downloads the item to the local machine. If this is a file, it is downloaded; in this case, the `dest` argument can be the path to the destination file, or NULL to return the downloaded content in a raw vector. If the item is a folder, all its files are downloaded, including subfolders if the `recursive` argument is TRUE.
 #'
-#' `upload` uploads a file or folder from the local machine into the folder item. If this is a folder, and the `recursive` argument iS TRUE, all subfolders are also uploaded. The uploading is done in blocks of 32MB by default; you can change this by setting the `blocksize` argument. For technical reasons, the block size [must be a multiple of 320KB](https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#upload-bytes-to-the-upload-session).
+#' `upload` uploads a file or folder from the local machine into the folder item. The `src` argument can be the path to the source file, a [rawConnection] or a [textConnection] object. If `src` is a folder, all its files are uploaded, including subfolders if the `recursive` argument iS TRUE. An `ms_drive_item` object is returned invisibly.
 #'
-#' `upload` returns an `ms_drive_item` object invisibly if a file was uploaded, or NULL if a folder was uploaded.
+#' Uploading is done in blocks of 32MB by default; you can change this by setting the `blocksize` argument. For technical reasons, the block size [must be a multiple of 320KB](https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#upload-bytes-to-the-upload-session).
 #'
 #' Uploading and downloading folders can be done in parallel, which can result in substantial speedup when transferring a large number of small files. This is controlled by the `parallel` argument to `upload` and `download`, which can have the following values:
 #' - TRUE: A cluster with 5 workers is created
@@ -67,6 +73,15 @@
 #'
 #' This method returns a URL to access the item, for `type="view"` or "`type=edit"`. For `type="embed"`, it returns a list with components `webUrl` containing the URL, and `webHtml` containing a HTML fragment to embed the link in an IFRAME. The default is a viewable link, expiring in 7 days.
 #'
+#' @section Saving and loading data:
+#' The following methods are provided to simplify the task of loading and saving datasets and R objects.
+#' - `load_dataframe` downloads a delimited file and returns its contents as a data frame. The delimiter can be specified with the `delim` argument; if omitted, this is "," if the file extension is .csv, ";" if the file extension is .csv2, and a tab otherwise. If the readr package is installed, the `readr::read_delim` function is used to parse the file, otherwise `utils::read.delim` is used. You can supply other arguments to the parsing function via the `...` argument.
+#' - `save_dataframe` is the inverse of `load_dataframe`: it uploads the given data frame to a folder item. Specify the delimiter with the `delim` argument. The `readr::write_delim` function is used to serialise the data if that package is installed, and `utils::write.table` otherwise.
+#' - `load_rds` downloads a .rds file and returns its contents as an R object. It is analogous to the base `readRDS` function but for OneDrive/SharePoint drive items.
+#' - `save_rds` uploads a given R object as a .rds file, analogously to `saveRDS`.
+#' - `load_rdata` downloads a .RData or .Rda file and loads its contents into the given environment. It is analogous to the base `load` function but for OneDrive/SharePoint drive items.
+#' - `save_rdata` uploads the given R objects as a .RData file, analogously to `save`.
+#'
 #' @section List methods:
 #' All `list_*` methods have `filter` and `n` arguments to limit the number of results. The former should be an [OData expression](https://docs.microsoft.com/en-us/graph/query-parameters#filter-parameter) as a string to filter the result set on. The latter should be a number setting the maximum number of (filtered) results to return. The default values are `filter=NULL` and `n=Inf`. If `n=NULL`, the `ms_graph_pager` iterator object is returned instead to allow manual iteration over the results.
 #'
@@ -108,6 +123,16 @@
 #' # delete the file (will ask for confirmation first)
 #' myfile$delete()
 #'
+#' # saving and loading data
+#' myfolder <- mydrv$get_item("myfolder")
+#' myfolder$save_dataframe(iris, "iris.csv")
+#' iris2 <- myfolder$get_item("iris.csv")$load_dataframe()
+#' identical(iris, iris2)  # TRUE
+#'
+#' myfolder$save_rds(iris, "iris.rds")
+#' iris3 <- myfolder$get_item("iris.rds")$load_rds()
+#' identical(iris, iris3)  # TRUE
+#'
 #' }
 #' @format An R6 object of class `ms_drive_item`, inheriting from `ms_object`.
 #' @export
@@ -275,7 +300,7 @@ public=list(
         private$assert_is_folder()
 
         # check if uploading a folder
-        if(dir.exists(src))
+        if(is.character(src) && dir.exists(src))
         {
             files <- dir(src, all.files=TRUE, no..=TRUE, recursive=recursive, full.names=FALSE)
 
@@ -316,7 +341,7 @@ public=list(
             }
             else stop("Unknown value for 'parallel' argument", call.=FALSE)
 
-            invisible(NULL)
+            invisible(self$get_item(dest))
         }
         else private$upload_file(src, dest, blocksize)
     },
@@ -328,6 +353,9 @@ public=list(
             children <- self$list_items()
             isdir <- children$isdir
 
+            if(!is.character(dest))
+                stop("Must supply a destination folder", call.=FALSE)
+
             dest <- normalizePath(dest, mustWork=FALSE)
             dir.create(dest, showWarnings=FALSE)
 
@@ -373,6 +401,71 @@ public=list(
         else private$download_file(dest, overwrite)
     },
 
+    load_dataframe=function(delim=NULL, ...)
+    {
+        private$assert_is_file()
+        ext <- tolower(tools::file_ext(self$properties$name))
+        if(is.null(delim))
+        {
+            delim <- if(ext == "csv") "," else if(ext == "csv2") ";" else "\t"
+        }
+        dat <- self$download(NULL)
+        if(requireNamespace("readr"))
+        {
+            con <- rawConnection(dat, "r")
+            on.exit(try(close(con), silent=TRUE))
+            readr::read_delim(con, delim=delim)
+        }
+        else utils::read.delim(text=rawToChar(dat), sep=delim, ...)
+    },
+
+    load_rdata=function(envir=parent.frame())
+    {
+        private$assert_is_file()
+        private$assert_file_extension_is("rdata", "rda")
+        rdata <- self$download(NULL)
+        load(rawConnection(rdata, open="rb"), envir=envir)
+    },
+
+    load_rds=function()
+    {
+        private$assert_is_file()
+        private$assert_file_extension_is("rds")
+        rds <- self$download(NULL)
+        unserialize(memDecompress(rds))
+    },
+
+    save_dataframe=function(df, file, delim=",", ...)
+    {
+        private$assert_is_folder()
+        conn <- rawConnection(raw(0), open="r+b")
+        if(requireNamespace("readr"))
+            readr::write_delim(df, conn, delim=delim, ...)
+        else utils::write.table(df, conn, sep=delim, ...)
+        seek(conn, 0)
+        self$upload(conn, file)
+    },
+
+    save_rdata=function(..., file, envir=parent.frame())
+    {
+        private$assert_is_folder()
+        # save to a temporary file as saving to a connection disables compression
+        tmpsave <- tempfile(fileext=".rdata")
+        on.exit(unlink(tmpsave))
+        save(..., file=tmpsave, envir=envir)
+        self$upload(tmpsave, file)
+    },
+
+    save_rds=function(object, file)
+    {
+        private$assert_is_folder()
+        # save to a temporary file to avoid dealing with memCompress/memDecompress hassles
+        tmpsave <- tempfile(fileext=".rdata")
+        on.exit(unlink(tmpsave))
+        saveRDS(object, tmpsave)
+        self$upload(tmpsave, file)
+    },
+
     get_path=function()
     {
         private$assert_is_not_remote()
@@ -401,8 +494,8 @@ private=list(
 
     upload_file=function(src, dest, blocksize)
     {
-        con <- file(src, open="rb")
-        on.exit(close(con))
+        src <- normalize_src(src)
+        on.exit(close(src$con))
 
         fullpath <- private$make_absolute_path(dest)
         # possible fullpath formats -> string to append:
@@ -415,14 +508,14 @@ private=list(
         else paste0(fullpath, "/createUploadSession")
         upload_dest <- call_graph_endpoint(self$token, op, http_verb="POST")$uploadUrl
 
-        size <- file.size(src)
+        size <- src$size
         next_blockstart <- 0
         next_blockend <- size - 1
         repeat
         {
             next_blocksize <- min(next_blockend - next_blockstart + 1, blocksize)
-            seek(con, next_blockstart)
-            body <- readBin(con, "raw", next_blocksize)
+            seek(src$con, next_blockstart)
+            body <- readBin(src$con, "raw", next_blocksize)
             thisblock <- length(body)
             if(thisblock == 0)
                 break
@@ -447,15 +540,22 @@ private=list(
     download_file=function(dest, overwrite)
     {
         private$assert_is_file()
-        res <- self$do_operation("content", config=httr::write_disk(dest, overwrite=overwrite),
-                                 http_status_handler="pass")
+
+        # TODO: make less hacky
+        config <- if(is.character(dest))
+            httr::write_disk(dest, overwrite=overwrite)
+        else list()
+
+        res <- self$do_operation("content", config=config, http_status_handler="pass")
         if(httr::status_code(res) >= 300)
         {
-            on.exit(file.remove(dest))
+            if(is.character(dest))
+                on.exit(file.remove(dest))
             httr::stop_for_status(res, paste0("complete operation. Message:\n",
                 sub("\\.$", "", error_message(httr::content(res)))))
         }
-        invisible(NULL)
+
+        if(is.character(dest)) invisible(NULL) else httr::content(res, as="raw")
     },
 
     # dest = . or '' --> this item
@@ -534,6 +634,13 @@ private=list(
     {
         if(!is.null(self$properties$remoteItem))
             stop("This method is not applicable for a remote item", call.=FALSE)
+    },
+
+    assert_file_extension_is=function(...)
+    {
+        ext <- tolower(tools::file_ext(self$properties$name))
+        if(!(ext %in% unlist(list(...))))
+            stop("Not an allowed file type")
     }
 ))
 

diff --git a/R/normalize_src.R b/R/normalize_src.R
@@ -0,0 +1,39 @@
+normalize_src <- function(src)
+{
+    UseMethod("normalize_src")
+}
+
+
+normalize_src.character <- function(src)
+{
+    con <- file(src, open="rb")
+    size <- file.size(src)
+    list(con=con, size=size)
+}
+
+
+normalize_src.textConnection <- function(src)
+{
+    # convert to raw connection
+    src <- charToRaw(paste0(readLines(src), collapse="\n"))
+    size <- length(src)
+    con <- rawConnection(src)
+    list(con=con, size=size)
+}
+
+
+normalize_src.rawConnection <- function(src)
+{
+    # need to read the data to get object size (!)
+    size <- 0
+    repeat
+    {
+        x <- readBin(src, "raw", n=1e6)
+        if(length(x) == 0)
+            break
+        size <- size + length(x)
+    }
+    seek(src, 0) # reposition connection after reading
+    list(con=src, size=size)
+}
+