From 08dd087747d8d4a811fc40e2252db7d6ceeec8df Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Fri, 21 Jul 2023 19:37:48 -0400 Subject: [PATCH 1/2] Add erase_move_project_field_log_records.R --- etl/erase_move_project_field_log_records.R | 131 +++++++++++++++++++++ report/render_report.R | 53 ++++----- 2 files changed, 152 insertions(+), 32 deletions(-) create mode 100644 etl/erase_move_project_field_log_records.R diff --git a/etl/erase_move_project_field_log_records.R b/etl/erase_move_project_field_log_records.R new file mode 100644 index 0000000..3c45917 --- /dev/null +++ b/etl/erase_move_project_field_log_records.R @@ -0,0 +1,131 @@ +library(tidyverse) +library(lubridate) +library(dotenv) +library(redcapcustodian) # devtools::install_github("ctsit/redcapcustodian") +library(DBI) +library(RMariaDB) + +script_name <- "erase_move_project_field_log_records" + +connect_to_redcap_with_schema <- function(schema = NULL) { + if (is.null(schema)) { + schema <- Sys.getenv("REDCAP_DB_NAME") + } + + if (Sys.getenv("REDCAP_DB_PORT") == "") { + port <- "3306" + } else { + port <- Sys.getenv("REDCAP_DB_PORT") + } + conn <- DBI::dbConnect( + RMariaDB::MariaDB(), + dbname = schema, + host = Sys.getenv("REDCAP_DB_HOST"), + user = Sys.getenv("REDCAP_DB_USER"), + password = Sys.getenv("REDCAP_DB_PASSWORD"), + port = port + ) +} + +# Get the stats of the log_event_table +is_conn <- connect_to_redcap_with_schema(schema = "INFORMATION_SCHEMA") +log_table_data <- tbl(is_conn, "TABLES") %>% + filter(TABLE_NAME %in% !!redcapcustodian::log_event_tables) %>% + collect() %>% + janitor::clean_names() %>% + select( + log_event_table = table_name, + log_event_data_length = data_length + ) + +measure_move_project_fields_in_one_project <- function(project_id, log_event_table, conn) { + sql <- paste( + "select project_id, sum(length(sql_log)) as sql_log_size, count(*) as n from", + log_event_table, + "where project_id =", + project_id, + "and description = 'Move project field'" + ) + start <- Sys.time() + query_result <- DBI::dbGetQuery(conn, sql) + end <- Sys.time() + + result <- + query_result %>% + mutate( + log_event_table = log_event_table, + elapsed_time_to_query = round(end - start) + ) %>% + rename(rows = n) + + return(result) +} + +delete_move_project_fields_in_one_project <- function(project_id, log_event_table, conn) { + sql <- paste( + "delete from", + log_event_table, + "where project_id =", + project_id, + "and description = 'Move project field'" + ) + start <- Sys.time() + rows_deleted <- DBI::dbExecute(conn, sql) + end <- Sys.time() + + result <- + tibble(rows_deleted) %>% + mutate( + project_id = project_id, + elapsed_time_to_delete = round(end - start) + ) %>% + select(-rows_deleted, everything(), rows_deleted) + + return(result) +} + +rc_conn <- connect_to_redcap_db() +redcap_projects <- + tbl(rc_conn, "redcap_projects") %>% + select(project_id, log_event_table) %>% + collect() + +# measure the projects of interest +size_by_project <- map2_dfr( + redcap_projects$project_id, + redcap_projects$log_event_table, + measure_move_project_fields_in_one_project, + rc_conn +) + +projects_to_clean <- size_by_project %>% + filter(rows > 0) %>% + left_join(log_table_data, by = "log_event_table") + +deletions <- map2_dfr( + projects_to_clean$project_id, + projects_to_clean$log_event_table, + delete_move_project_fields_in_one_project, + rc_conn +) %>% + left_join(projects_to_clean, by = "project_id") %>% + select( + project_id, + log_event_table, + rows, + rows_deleted, + sql_log_size, + log_event_data_length, + elapsed_time_to_query, + elapsed_time_to_delete + ) %>% + mutate(across(starts_with("elapsed"), as.numeric)) + +activity_log <- lst( + deletions +) + +log_job_success(jsonlite::toJSON(activity_log)) + +dbDisconnect(rc_conn) +dbDisconnect(is_conn) diff --git a/report/render_report.R b/report/render_report.R index 643dae0..2ae113c 100644 --- a/report/render_report.R +++ b/report/render_report.R @@ -5,49 +5,38 @@ library(lubridate) library(rmarkdown) library(sendmailR) library(redcapcustodian) -library(argparse) init_etl("render_report") -parser <- ArgumentParser() -parser$add_argument("script_name", nargs=1, help="Script to be run") +if (!dir.exists("output")){ + dir.create("output") +} + if (!interactive()) { - args <- parser$parse_args() - script_name <- args$script_name - if(!fs::file_exists(script_name)) { - stop(sprintf("Specified file, %s, does not exist", script_name)) - } + args <- commandArgs(trailingOnly = T) + script_name <- word(args, 2, sep = "=") } else { - script_name <- "dummy.qmd" - stop(sprintf("Specified file, %s, does not exist", script_name)) + script_name <- "sample_report.Rmd" } report_name <- word(script_name, 1, sep = "\\.") -report_type <- word(script_name, 2, sep = "\\.") script_run_time <- set_script_run_time() -output_file <- - paste0(str_replace(report_name, ".*/", ""), - "_", - format(script_run_time, "%Y%m%d%H%M%S"), - if_else(report_type == "qmd", ".pdf", "") - ) - -if (report_type == "qmd") { - quarto::quarto_render( - script_name, - output_file = output_file, - output_format = "pdf" - ) -} else { - render( - script_name, - output_file = output_file - ) -} -output_file_extension <- word(output_file, 2 , sep = "\\.") -attachment_object <- mime_part(output_file, output_file) +output_file <- here::here( + "output", + paste0(report_name, + "_", + format(script_run_time, "%Y%m%d%H%M%S")) +) + +full_path_to_output_file <- render( + here::here("report", script_name), + output_file = output_file +) + +output_file_extension <- word(full_path_to_output_file, 2 , sep = "\\.") +attachment_object <- mime_part(full_path_to_output_file, basename(full_path_to_output_file)) email_subject <- paste(report_name, "|", script_run_time) body <- "Please see the attached report." From 505f869bf8f28c251976fe5a9bc49a9ab5a766d1 Mon Sep 17 00:00:00 2001 From: Philip Chase Date: Fri, 21 Jul 2023 20:15:29 -0400 Subject: [PATCH 2/2] Update erase_move_project_field_log_records Call init_etl(). --- etl/erase_move_project_field_log_records.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/erase_move_project_field_log_records.R b/etl/erase_move_project_field_log_records.R index 3c45917..be634ee 100644 --- a/etl/erase_move_project_field_log_records.R +++ b/etl/erase_move_project_field_log_records.R @@ -5,7 +5,7 @@ library(redcapcustodian) # devtools::install_github("ctsit/redcapcustodian") library(DBI) library(RMariaDB) -script_name <- "erase_move_project_field_log_records" +init_etl("erase_move_project_field_log_records") connect_to_redcap_with_schema <- function(schema = NULL) { if (is.null(schema)) {