#8 more database work

- new files for db stuff, separate files for redshift, rds and wait fxns - add rds functions - modify wait functions to use common plumbing with generator for each of redshift, rds - now importing RMariaDB
getwilds · Dec 8, 2023 · e43dddb · e43dddb
1 parent 25969cc
commit e43dddb
Showing 19 changed files with 462 additions and 74 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -30,6 +30,7 @@ Suggests:
     roxyglobals,
     DBI,
     RPostgres,
+    RMariaDB,
     testthat (>= 3.0.0),
     vcr (>= 0.6.0),
     withr

diff --git a/NAMESPACE b/NAMESPACE
@@ -12,7 +12,10 @@ export(aws_bucket_tree)
 export(aws_bucket_upload)
 export(aws_buckets)
 export(aws_db_cluster_status)
+export(aws_db_instance_status)
+export(aws_db_rds_client)
 export(aws_db_rds_con)
+export(aws_db_rds_create)
 export(aws_db_redshift_client)
 export(aws_db_redshift_con)
 export(aws_db_redshift_create)
@@ -59,6 +62,7 @@ importFrom(lubridate,as_datetime)
 importFrom(magrittr,"%>%")
 importFrom(paws,costexplorer)
 importFrom(paws,iam)
+importFrom(paws,rds)
 importFrom(paws,redshift)
 importFrom(paws,s3)
 importFrom(purrr,flatten)

diff --git a/R/database-rds.R b/R/database-rds.R
@@ -0,0 +1,155 @@
+#' Get a database connection to Amazon RDS
+#'
+#' JUST MariaDB FOR NOW!!!!
+#'
+#' @export
+#' @inheritParams aws_db_redshift_con
+#' @note RDS supports: Aurora (both PostgreSQL and MySQL compatible),
+#' PostgreSQL, MariaDB, MySQL, Orace, MS SQL Server
+#' @examples \dontrun{
+#' library(DBI)
+#' library(RMariaDB)
+#'
+#' con_rds <- aws_db_rds_con("<define all params here>")
+#' con_rds
+#'
+#' library(RMariaDB)
+#' dbListTables(con_rds)
+#' dbWriteTable(con_rds, "mtcars", mtcars)
+#' dbListTables(con_rds)
+#' dbReadTable(con_rds, "mtcars")
+#'
+#' library(dplyr)
+#' tbl(con_rds, "mtcars")
+#' }
+aws_db_rds_con <- function(user, pwd, id = NULL, host = NULL, port = NULL,
+                           dbname = NULL, ...) {
+  check_for_pkg("DBI")
+  check_for_pkg("RMariaDB")
+
+  stopifnot("user is required" = !missing(user))
+  stopifnot("pwd is required" = !missing(pwd))
+
+  if (!is.null(id)) {
+    con_info <- instance_con_info(id)
+    host <- con_info$host
+    port <- con_info$port
+    dbname <- con_info$dbname
+  }
+  if (any(vapply(list(host, port, dbname), is.null, logical(1)))) {
+    stop("`host`, `port`, and `dbname` can not be NULL", call. = FALSE)
+  }
+
+  DBI::dbConnect(
+    RMariaDB::MariaDB(),
+    host = host,
+    port = port,
+    dbname = dbname,
+    user = user,
+    password = pwd,
+    ...
+  )
+}
+
+#' Create an RDS cluster
+#'
+#' @export
+#' @importFrom paws rds
+#' @param id (character) required. instance identifier. The identifier for
+#' this DB instance. This parameter is stored as a lowercase string.
+#' Constraints: must contain from 1 to 63 letters, numbers, or hyphens; first
+#' character must be a letter; cn't end with a hyphen or contain two
+#' consecutive hyphens. required.
+#' @param class (character) required. The compute and memory capacity of the
+#' DB instance, for example `db.m5.large`.
+#' @param user (character) User name associated with the admin user account for
+#' the cluster that is being created.
+#' @param pwd (character) Password associated with the admin user account for
+#' the cluster that is being created.
+#' @param dbname (character) The name of the first database to be created when
+#' the cluster is created. default: "dev". additional databases can be created
+#' within the cluster
+#' @param engine (character) The engine to use. default: "mariadb". required.
+#' @param storage (character) The amount of storage in gibibytes (GiB) to
+#' allocate for the DB instance. default: 20
+#' @param storage_encrypted (logical) Whether the DB instance is encrypted.
+#' default: `TRUE`
+#' @param security_group_ids (character) VPC security group identifiers; one
+#' or more. If none are supplied, you should go into your AWS Redshift
+#' dashboard and add the appropriate VPC security group.
+#' @param wait (logical) wait for cluster to initialize? default: `TRUE`. If
+#' you don't wait (`FALSE`) then there's many operations you can not do
+#' until the cluster is available. If `wait=FALSE` use
+#' `aws_db_instance_status()` to check on the cluster status.
+#' @param ... named parameters passed on to
+#' [create_db_instance](https://www.paws-r-sdk.com/docs/rds_create_db_instance/)
+#' @note See above link to `create_cluster` docs for details on requirements
+#' for each parameter
+#' @return a list with methods for interfacing with RDS;
+#' see <https://www.paws-r-sdk.com/docs/rds/>
+aws_db_rds_create <-
+  function(id, class, user, pwd, dbname = "dev",
+           engine = "mariadb", storage = 20,
+           storage_encrypted = TRUE, security_group_ids = NULL,
+           wait = TRUE, ...) {
+    aws_db_rds_client()
+    env64$rds$create_db_instance(
+      DBName = dbname, DBInstanceIdentifier = id,
+      Engine = engine, DBInstanceClass = class,
+      AllocatedStorage = storage,
+      MasterUsername = user, MasterUserPassword = pwd,
+      VpcSecurityGroupIds = security_group_ids,
+      StorageEncrypted = storage_encrypted,
+      ...
+    )
+    if (wait) {
+      wait_for_instance(id)
+    }
+    return(env64$rds)
+  }
+
+#' Get the `paws` RDS client
+#' @export
+#' @note returns existing client if found; a new client otherwise
+#' @return a list with methods for interfacing with RDS;
+#' see <https://www.paws-r-sdk.com/docs/rds/>
+aws_db_rds_client <- function() {
+  if (is.null(env64$rds)) env64$rds <- paws::rds()
+  return(env64$rds)
+}
+
+#' Get information for all RDS instances
+#' @return a list of instance details
+#' @keywords internal
+instance_details <- function() {
+  instances <- env64$rds$describe_db_instances()
+  return(instances)
+}
+
+#' Get connection information for all instances
+#' @inheritParams aws_db_redshift_create
+#' @return a list of cluster details
+#' @keywords internal
+instance_con_info <- function(id) {
+  deets <- instance_details()$DBInstances
+  z <- Filter(function(x) x$DBInstanceIdentifier == id, deets)[[1]]
+  list(host = z$Endpoint$Address, port = z$Endpoint$Port, dbname = z$DBName)
+}
+
+#' Get instance status
+#' @export
+#' @inheritParams aws_db_rds_create
+#' @return (character) the status of the instance, e.g., "creating",
+#' "available", "not found"
+#' @examples \dontrun{
+#' aws_db_instance_status(id = "thedbinstance")
+#' }
+aws_db_instance_status <- function(id) {
+  deets <- instance_details()$DBInstances
+  instance <- Filter(function(x) x$DBInstanceIdentifier == id, deets)
+  if (!length(instance)) {
+    warning(glue::glue("instance id '{id}' not found"))
+    return("not found")
+  }
+  instance[[1]]$DBInstanceStatus
+}
diff --git a/R/database.R → R/database-redshift.R b/R/database.R → R/database-redshift.R
@@ -29,9 +29,6 @@
 #'
 #' library(dplyr)
 #' tbl(con_rshift, "mtcars")
-#'
-#' con_rds <- aws_db_rds_con()
-#' con_rds
 #' }
 aws_db_redshift_con <- function(user, pwd, id = NULL, host = NULL, port = NULL,
                                 dbname = NULL, ...) {
@@ -62,25 +59,11 @@ aws_db_redshift_con <- function(user, pwd, id = NULL, host = NULL, port = NULL,
   )
 }
 
-#' Get a database connection to Amazon RDS
-#'
-#' @export
-#' @param ... named parameters passed on to
-#' [DBI::dbConnect](https://dbi.r-dbi.org/reference/dbconnect)
-#' @note RDS supports: Aurora (both PostgreSQL and MySQL compatible),
-#' PostgreSQL, MariaDB, MySQL, Orace, MS SQL Server
-aws_db_rds_con <- function(...) {
-  stop("not working yet, check back in later", call. = FALSE)
-  check_for_pkg("DBI")
-  check_for_pkg("RPostgres")
-  DBI::dbConnect(RPostgres::Postgres(), ...)
-}
-
 #' Create a Redshift cluster
 #'
 #' @export
 #' @importFrom paws redshift
-#' @param id (characteR) Cluster identifier. Use this identifier to refer to
+#' @param id (character) Cluster identifier. Use this identifier to refer to
 #' the cluster for any subsequent cluster operations such as deleting or
 #' modifying. The identifier also appears in the Amazon Redshift console.
 #' Must be unique for all clusters within a Amazon Web Services account.
@@ -114,7 +97,7 @@ aws_db_redshift_create <-
   function(id, user, pwd, dbname = "dev", cluster_type = "multi-node",
            node_type = "dc2.large", number_nodes = 2,
            security_group_ids = NULL, wait = TRUE, ...) {
-    env64$redshift <- paws::redshift()
+    aws_db_redshift_client()
     env64$redshift$create_cluster(
       DBName = dbname, ClusterIdentifier = id,
       ClusterType = cluster_type, NodeType = node_type,
@@ -124,7 +107,7 @@ aws_db_redshift_create <-
       ...
     )
     if (wait) {
-      wait_until_cluster_available(id)
+      wait_for_cluster(id)
     }
     return(env64$redshift)
   }
@@ -134,7 +117,7 @@ aws_db_redshift_create <-
 #' @return a list with methods for interfacing with Redshift;
 #' see <https://www.paws-r-sdk.com/docs/redshift/>
 aws_db_redshift_client <- function() {
-  env64$redshift <- paws::redshift()
+  if (is.null(env64$redshift)) env64$redshift <- paws::redshift()
   return(env64$redshift)
 }
 
@@ -173,31 +156,3 @@ aws_db_cluster_status <- function(id) {
   }
   cluster[[1]]$ClusterStatus
 }
-
-#' Wait until a cluster has status "available"
-#' @importFrom cli cli_progress_bar cli_progress_update pb_spin
-#' @inheritParams aws_db_redshift_create
-#' @param sleep (integer/numeric) number of seconds to wait between
-#' checks of the cluster status (i.e., http requests)
-#' @return nothing, exits if there's an error, or if the while
-#' loop completes
-#' @keywords internal
-#' @examples \dontrun{
-#' wait_until_cluster_available(id = "scotts-test-cluster-456")
-#' }
-wait_until_cluster_available <- function(id, sleep = 3) {
-  options(cli.spinner = "simpleDots")
-  on.exit(options(cli.spinner = NULL), add = TRUE)
-  msg <- "Redshift cluster initializing" # nolint
-  cli::cli_progress_bar(format = "{cli::pb_spin} {msg}")
-  is_not_available <- TRUE
-  while (is_not_available) {
-    status <- aws_db_cluster_status(id)
-    if (status == "not found") break
-    cli::cli_progress_update()
-    Sys.sleep(sleep)
-    if (status == "available") {
-      is_not_available <- FALSE
-    }
-  }
-}
diff --git a/R/wait.R b/R/wait.R
@@ -0,0 +1,53 @@
+# wait fxn generator
+wait_until <- function(fun, message) {
+  function(id, sleep = 2, status_target = "available") {
+    options(cli.spinner = "simpleDots")
+    on.exit(options(cli.spinner = NULL), add = TRUE)
+    cli::cli_progress_bar(format = "{cli::pb_spin} {message}") # nolint
+    is_not_available <- TRUE
+    while (is_not_available) {
+      status <- fun(id)
+      if (status == "not found") break
+      cli::cli_progress_update()
+      Sys.sleep(sleep)
+      if (status == status_target) {
+        is_not_available <- FALSE
+      }
+    }
+  }
+}
+
+#' Wait for a Redshift cluster to have a certain status
+#'
+#' @importFrom cli cli_progress_bar cli_progress_update pb_spin
+#' @inheritParams aws_db_redshift_create
+#' @param fun (function) a function to check status of something;
+#' must return a single boolean, e.g., `aws_db_cluster_status` or
+#' `aws_db_instance_status`
+#' @param sleep (integer/numeric) number of seconds to wait between
+#' checks of the cluster status (i.e., http requests)
+#' @param status_target (character) status to wait for. default: "available"
+#' @return nothing, exits if there's an error, or if the while
+#' loop completes
+#' @keywords internal
+#' @examples \dontrun{
+#' wait_for_cluster(id = "scotts-test-cluster-456")
+#' }
+wait_for_cluster <- wait_until(
+  aws_db_cluster_status,
+  "Redshift cluster initializing"
+)
+
+#' Wait for an RDS instance to have a certain status
+#'
+#' @inheritParams wait_for_cluster
+#' @return nothing, exits if there's an error, or if the while
+#' loop completes
+#' @keywords internal
+#' @examples \dontrun{
+#' wait_for_instance(id = "scotts-test-cluster-456")
+#' }
+wait_for_instance <- wait_until(
+  aws_db_instance_status,
+  "RDS instance initializing"
+)
diff --git a/man/aws_db_cluster_status.Rd b/man/aws_db_cluster_status.Rd
diff --git a/man/aws_db_instance_status.Rd b/man/aws_db_instance_status.Rd
diff --git a/man/aws_db_rds_client.Rd b/man/aws_db_rds_client.Rd
diff --git a/man/aws_db_rds_con.Rd b/man/aws_db_rds_con.Rd
diff --git a/man/aws_db_rds_create.Rd b/man/aws_db_rds_create.Rd
diff --git a/man/aws_db_redshift_client.Rd b/man/aws_db_redshift_client.Rd
diff --git a/man/aws_db_redshift_con.Rd b/man/aws_db_redshift_con.Rd
diff --git a/man/aws_db_redshift_create.Rd b/man/aws_db_redshift_create.Rd
diff --git a/man/cluster_con_info.Rd b/man/cluster_con_info.Rd
diff --git a/man/cluster_details.Rd b/man/cluster_details.Rd
diff --git a/man/instance_con_info.Rd b/man/instance_con_info.Rd
diff --git a/man/instance_details.Rd b/man/instance_details.Rd
diff --git a/man/wait_for_cluster.Rd b/man/wait_for_cluster.Rd
diff --git a/man/wait_until_cluster_available.Rd → man/wait_for_instance.Rd b/man/wait_until_cluster_available.Rd → man/wait_for_instance.Rd