Skip to content

Commit

Permalink
#8 more database work
Browse files Browse the repository at this point in the history
- new files for db stuff, separate files for redshift, rds and wait fxns
- add rds functions
- modify wait functions to use common plumbing with generator for each of redshift, rds
- now importing RMariaDB
sckott committed Dec 8, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 25969cc commit e43dddb
Showing 19 changed files with 462 additions and 74 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -30,6 +30,7 @@ Suggests:
roxyglobals,
DBI,
RPostgres,
RMariaDB,
testthat (>= 3.0.0),
vcr (>= 0.6.0),
withr
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -12,7 +12,10 @@ export(aws_bucket_tree)
export(aws_bucket_upload)
export(aws_buckets)
export(aws_db_cluster_status)
export(aws_db_instance_status)
export(aws_db_rds_client)
export(aws_db_rds_con)
export(aws_db_rds_create)
export(aws_db_redshift_client)
export(aws_db_redshift_con)
export(aws_db_redshift_create)
@@ -59,6 +62,7 @@ importFrom(lubridate,as_datetime)
importFrom(magrittr,"%>%")
importFrom(paws,costexplorer)
importFrom(paws,iam)
importFrom(paws,rds)
importFrom(paws,redshift)
importFrom(paws,s3)
importFrom(purrr,flatten)
155 changes: 155 additions & 0 deletions R/database-rds.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#' Get a database connection to Amazon RDS
#'
#' JUST MariaDB FOR NOW!!!!
#'
#' @export
#' @inheritParams aws_db_redshift_con
#' @note RDS supports: Aurora (both PostgreSQL and MySQL compatible),
#' PostgreSQL, MariaDB, MySQL, Orace, MS SQL Server
#' @examples \dontrun{
#' library(DBI)
#' library(RMariaDB)
#'
#' con_rds <- aws_db_rds_con("<define all params here>")
#' con_rds
#'
#' library(RMariaDB)
#' dbListTables(con_rds)
#' dbWriteTable(con_rds, "mtcars", mtcars)
#' dbListTables(con_rds)
#' dbReadTable(con_rds, "mtcars")
#'
#' library(dplyr)
#' tbl(con_rds, "mtcars")
#' }
aws_db_rds_con <- function(user, pwd, id = NULL, host = NULL, port = NULL,
dbname = NULL, ...) {
check_for_pkg("DBI")
check_for_pkg("RMariaDB")

stopifnot("user is required" = !missing(user))
stopifnot("pwd is required" = !missing(pwd))

if (!is.null(id)) {
con_info <- instance_con_info(id)
host <- con_info$host
port <- con_info$port
dbname <- con_info$dbname
}
if (any(vapply(list(host, port, dbname), is.null, logical(1)))) {
stop("`host`, `port`, and `dbname` can not be NULL", call. = FALSE)
}

DBI::dbConnect(
RMariaDB::MariaDB(),
host = host,
port = port,
dbname = dbname,
user = user,
password = pwd,
...
)
}

#' Create an RDS cluster
#'
#' @export
#' @importFrom paws rds
#' @param id (character) required. instance identifier. The identifier for
#' this DB instance. This parameter is stored as a lowercase string.
#' Constraints: must contain from 1 to 63 letters, numbers, or hyphens; first
#' character must be a letter; cn't end with a hyphen or contain two
#' consecutive hyphens. required.
#' @param class (character) required. The compute and memory capacity of the
#' DB instance, for example `db.m5.large`.
#' @param user (character) User name associated with the admin user account for
#' the cluster that is being created.
#' @param pwd (character) Password associated with the admin user account for
#' the cluster that is being created.
#' @param dbname (character) The name of the first database to be created when
#' the cluster is created. default: "dev". additional databases can be created
#' within the cluster
#' @param engine (character) The engine to use. default: "mariadb". required.
#' @param storage (character) The amount of storage in gibibytes (GiB) to
#' allocate for the DB instance. default: 20
#' @param storage_encrypted (logical) Whether the DB instance is encrypted.
#' default: `TRUE`
#' @param security_group_ids (character) VPC security group identifiers; one
#' or more. If none are supplied, you should go into your AWS Redshift
#' dashboard and add the appropriate VPC security group.
#' @param wait (logical) wait for cluster to initialize? default: `TRUE`. If
#' you don't wait (`FALSE`) then there's many operations you can not do
#' until the cluster is available. If `wait=FALSE` use
#' `aws_db_instance_status()` to check on the cluster status.
#' @param ... named parameters passed on to
#' [create_db_instance](https://www.paws-r-sdk.com/docs/rds_create_db_instance/)
#' @note See above link to `create_cluster` docs for details on requirements
#' for each parameter
#' @return a list with methods for interfacing with RDS;
#' see <https://www.paws-r-sdk.com/docs/rds/>
aws_db_rds_create <-
function(id, class, user, pwd, dbname = "dev",
engine = "mariadb", storage = 20,
storage_encrypted = TRUE, security_group_ids = NULL,
wait = TRUE, ...) {
aws_db_rds_client()
env64$rds$create_db_instance(
DBName = dbname, DBInstanceIdentifier = id,
Engine = engine, DBInstanceClass = class,
AllocatedStorage = storage,
MasterUsername = user, MasterUserPassword = pwd,
VpcSecurityGroupIds = security_group_ids,
StorageEncrypted = storage_encrypted,
...
)
if (wait) {
wait_for_instance(id)
}
return(env64$rds)
}

#' Get the `paws` RDS client
#' @export
#' @note returns existing client if found; a new client otherwise
#' @return a list with methods for interfacing with RDS;
#' see <https://www.paws-r-sdk.com/docs/rds/>
aws_db_rds_client <- function() {
if (is.null(env64$rds)) env64$rds <- paws::rds()
return(env64$rds)
}

#' Get information for all RDS instances
#' @return a list of instance details
#' @keywords internal
instance_details <- function() {
instances <- env64$rds$describe_db_instances()
return(instances)
}

#' Get connection information for all instances
#' @inheritParams aws_db_redshift_create
#' @return a list of cluster details
#' @keywords internal
instance_con_info <- function(id) {
deets <- instance_details()$DBInstances
z <- Filter(function(x) x$DBInstanceIdentifier == id, deets)[[1]]
list(host = z$Endpoint$Address, port = z$Endpoint$Port, dbname = z$DBName)
}

#' Get instance status
#' @export
#' @inheritParams aws_db_rds_create
#' @return (character) the status of the instance, e.g., "creating",
#' "available", "not found"
#' @examples \dontrun{
#' aws_db_instance_status(id = "thedbinstance")
#' }
aws_db_instance_status <- function(id) {
deets <- instance_details()$DBInstances
instance <- Filter(function(x) x$DBInstanceIdentifier == id, deets)
if (!length(instance)) {
warning(glue::glue("instance id '{id}' not found"))
return("not found")
}
instance[[1]]$DBInstanceStatus
}
53 changes: 4 additions & 49 deletions R/database.R → R/database-redshift.R
Original file line number Diff line number Diff line change
@@ -29,9 +29,6 @@
#'
#' library(dplyr)
#' tbl(con_rshift, "mtcars")
#'
#' con_rds <- aws_db_rds_con()
#' con_rds
#' }
aws_db_redshift_con <- function(user, pwd, id = NULL, host = NULL, port = NULL,
dbname = NULL, ...) {
@@ -62,25 +59,11 @@ aws_db_redshift_con <- function(user, pwd, id = NULL, host = NULL, port = NULL,
)
}

#' Get a database connection to Amazon RDS
#'
#' @export
#' @param ... named parameters passed on to
#' [DBI::dbConnect](https://dbi.r-dbi.org/reference/dbconnect)
#' @note RDS supports: Aurora (both PostgreSQL and MySQL compatible),
#' PostgreSQL, MariaDB, MySQL, Orace, MS SQL Server
aws_db_rds_con <- function(...) {
stop("not working yet, check back in later", call. = FALSE)
check_for_pkg("DBI")
check_for_pkg("RPostgres")
DBI::dbConnect(RPostgres::Postgres(), ...)
}

#' Create a Redshift cluster
#'
#' @export
#' @importFrom paws redshift
#' @param id (characteR) Cluster identifier. Use this identifier to refer to
#' @param id (character) Cluster identifier. Use this identifier to refer to
#' the cluster for any subsequent cluster operations such as deleting or
#' modifying. The identifier also appears in the Amazon Redshift console.
#' Must be unique for all clusters within a Amazon Web Services account.
@@ -114,7 +97,7 @@ aws_db_redshift_create <-
function(id, user, pwd, dbname = "dev", cluster_type = "multi-node",
node_type = "dc2.large", number_nodes = 2,
security_group_ids = NULL, wait = TRUE, ...) {
env64$redshift <- paws::redshift()
aws_db_redshift_client()
env64$redshift$create_cluster(
DBName = dbname, ClusterIdentifier = id,
ClusterType = cluster_type, NodeType = node_type,
@@ -124,7 +107,7 @@ aws_db_redshift_create <-
...
)
if (wait) {
wait_until_cluster_available(id)
wait_for_cluster(id)
}
return(env64$redshift)
}
@@ -134,7 +117,7 @@ aws_db_redshift_create <-
#' @return a list with methods for interfacing with Redshift;
#' see <https://www.paws-r-sdk.com/docs/redshift/>
aws_db_redshift_client <- function() {
env64$redshift <- paws::redshift()
if (is.null(env64$redshift)) env64$redshift <- paws::redshift()
return(env64$redshift)
}

@@ -173,31 +156,3 @@ aws_db_cluster_status <- function(id) {
}
cluster[[1]]$ClusterStatus
}

#' Wait until a cluster has status "available"
#' @importFrom cli cli_progress_bar cli_progress_update pb_spin
#' @inheritParams aws_db_redshift_create
#' @param sleep (integer/numeric) number of seconds to wait between
#' checks of the cluster status (i.e., http requests)
#' @return nothing, exits if there's an error, or if the while
#' loop completes
#' @keywords internal
#' @examples \dontrun{
#' wait_until_cluster_available(id = "scotts-test-cluster-456")
#' }
wait_until_cluster_available <- function(id, sleep = 3) {
options(cli.spinner = "simpleDots")
on.exit(options(cli.spinner = NULL), add = TRUE)
msg <- "Redshift cluster initializing" # nolint
cli::cli_progress_bar(format = "{cli::pb_spin} {msg}")
is_not_available <- TRUE
while (is_not_available) {
status <- aws_db_cluster_status(id)
if (status == "not found") break
cli::cli_progress_update()
Sys.sleep(sleep)
if (status == "available") {
is_not_available <- FALSE
}
}
}
53 changes: 53 additions & 0 deletions R/wait.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# wait fxn generator
wait_until <- function(fun, message) {
function(id, sleep = 2, status_target = "available") {
options(cli.spinner = "simpleDots")
on.exit(options(cli.spinner = NULL), add = TRUE)
cli::cli_progress_bar(format = "{cli::pb_spin} {message}") # nolint
is_not_available <- TRUE
while (is_not_available) {
status <- fun(id)
if (status == "not found") break
cli::cli_progress_update()
Sys.sleep(sleep)
if (status == status_target) {
is_not_available <- FALSE
}
}
}
}

#' Wait for a Redshift cluster to have a certain status
#'
#' @importFrom cli cli_progress_bar cli_progress_update pb_spin
#' @inheritParams aws_db_redshift_create
#' @param fun (function) a function to check status of something;
#' must return a single boolean, e.g., `aws_db_cluster_status` or
#' `aws_db_instance_status`
#' @param sleep (integer/numeric) number of seconds to wait between
#' checks of the cluster status (i.e., http requests)
#' @param status_target (character) status to wait for. default: "available"
#' @return nothing, exits if there's an error, or if the while
#' loop completes
#' @keywords internal
#' @examples \dontrun{
#' wait_for_cluster(id = "scotts-test-cluster-456")
#' }
wait_for_cluster <- wait_until(
aws_db_cluster_status,
"Redshift cluster initializing"
)

#' Wait for an RDS instance to have a certain status
#'
#' @inheritParams wait_for_cluster
#' @return nothing, exits if there's an error, or if the while
#' loop completes
#' @keywords internal
#' @examples \dontrun{
#' wait_for_instance(id = "scotts-test-cluster-456")
#' }
wait_for_instance <- wait_until(
aws_db_instance_status,
"RDS instance initializing"
)
4 changes: 2 additions & 2 deletions man/aws_db_cluster_status.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/aws_db_instance_status.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions man/aws_db_rds_client.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 38 additions & 5 deletions man/aws_db_rds_con.Rd
71 changes: 71 additions & 0 deletions man/aws_db_rds_create.Rd
2 changes: 1 addition & 1 deletion man/aws_db_redshift_client.Rd
5 changes: 1 addition & 4 deletions man/aws_db_redshift_con.Rd
4 changes: 2 additions & 2 deletions man/aws_db_redshift_create.Rd
4 changes: 2 additions & 2 deletions man/cluster_con_info.Rd
2 changes: 1 addition & 1 deletion man/cluster_details.Rd
21 changes: 21 additions & 0 deletions man/instance_con_info.Rd
15 changes: 15 additions & 0 deletions man/instance_details.Rd
36 changes: 36 additions & 0 deletions man/wait_for_cluster.Rd
18 changes: 10 additions & 8 deletions man/wait_until_cluster_available.Rd → man/wait_for_instance.Rd

0 comments on commit e43dddb

Please sign in to comment.