From b37c01f6bb0392dc2e3277766406f0b44ef262fb Mon Sep 17 00:00:00 2001 From: Cole Beck Date: Fri, 12 Jul 2024 17:13:59 -0500 Subject: [PATCH 1/4] support missing cbox in missingSummary --- DESCRIPTION | 2 +- NEWS | 4 ++ R/missingSummary.R | 91 +++++++++++++++++++++++++++------------------- 3 files changed, 59 insertions(+), 38 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 19e52f52..9837e0db 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: redcapAPI Type: Package Title: Interface to 'REDCap' -Version: 2.9.4 +Version: 2.9.5 Authors@R: c( person("Benjamin", "Nutter", email = "benjamin.nutter@gmail.com", role = c("ctb", "aut")), diff --git a/NEWS b/NEWS index ffcd6240..38e8c679 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,10 @@ A future release of version 3.0.0 will introduce several breaking changes! * The `exportProjectInfo` and `exportBundle` functions are being discontinued. Their functionality is replaced by caching values on the connection object. * The `cleanseMetaData` function is being discontinued. +## 2.9.5 + +* Update `missingSummary` to support "missing checkbox" under special branching scenario. + ## 2.9.4 * Minor code refactoring diff --git a/R/missingSummary.R b/R/missingSummary.R index 98ac2878..7dfc1a1d 100644 --- a/R/missingSummary.R +++ b/R/missingSummary.R @@ -163,58 +163,75 @@ missingSummary_offline <- function(records, meta_data, logic){ records <- records_orig - + ignore_fld <- c(REDCAP_SYSTEM_FIELDS, meta_data$field_name[1]) + rec_name <- names(records) + # Remove checkbox suffixes. This allows logic to be matched to the field. + # defined in constants.R + rec_name_cb <- sub(REGEX_CHECKBOX_FIELD_NAME, "\\1", rec_name, perl = TRUE) + rec_name_clean <- sub("___[[:print:]]", "", rec_name) + # get each field's metadata row number + md_row <- match(rec_name_clean, meta_data$field_name) + # identify checkbox groups + is_cb <- !is.na(md_row) & meta_data[md_row, "field_type"] == "checkbox" + cb_grp <- unique(rec_name_cb[is_cb]) + cb_grp_id <- match(rec_name_cb, cb_grp, nomatch = 0) + # get the name of the form on which the field is saved + form_comp <- paste0(meta_data[md_row, "form_name"], "_complete") + # is this always true? rec_name_cb == rec_name_clean + for (i in seq_along(records)){ # Actual field name - this_field <- names(records)[i] - # Remove checkbox suffixes. This allows logic to be matched to the field. - this_field_base <- sub(REGEX_CHECKBOX_FIELD_NAME, #defined in constants.R - "\\1", this_field, perl = TRUE) + this_field <- rec_name[i] # get the logic expression for this iteration of the loop - this_logic <- logic[[this_field_base]] - + this_logic <- logic[[rec_name_cb[i]]] + tmp_form <- form_comp[i] + # We are only going to look at fields that are informative as missing. # we skip fixed fields (see unexported) and the ID variable. - if (!this_field %in% c(REDCAP_SYSTEM_FIELDS, - meta_data$field_name[1]) & - !is.null(this_logic)){ - - # get the name of the form on which the field is saved - tmp_form <- meta_data$form_name[meta_data$field_name == - sub("___[[:print:]]", "", names(records)[i])] - tmp_form <- paste0(tmp_form, "_complete") - + if (!this_field %in% ignore_fld & !is.null(this_logic)) { # NOTE: in the result, TRUE means the value is missing # FALSE means the value is non-missing - if (tmp_form == "_complete"){ - # If we are here, we did not find a matching form name. We will - # assume variables not on a form are always non-missing. - records[[i]] <- rep(FALSE, nrow(records)) - } - else if (!tmp_form %in% names(records)){ - # If we are here, we are evaluating a `[form]_complete` field. - # We just want to know if it is missing or not. - records[[i]] <- is.na(records[[i]]) + if (tmp_form == "NA_complete") { + # I doubt these scenarios are possible + # this_logic would be NULL and skipped + if(grepl('_complete$', this_field)) { + # If we are here, we are evaluating a `[form]_complete` field. + # We just want to know if it is missing or not. + records[[i]] <- is.na(records[[i]]) + } else { + # If we are here, we did not find a matching form name. We will + # assume variables not on a form are always non-missing. + records[[i]] <- rep(FALSE, nrow(records)) + } + } else if (!tmp_form %in% rec_name){ + # can this scenario occur? + # no `[form]_complete` to evaluate, check missingness of value + records[[i]] <- is.na(records[[i]]) } else if (!is.expression(this_logic)) { # If we are here, there is not branching logic. # If the `[form]_complete` field is missing, we return FALSE # If the `[form]_complete` is non-missing, we return the missingness of the value - records[[i]] <- ifelse(test = is.na(records_orig[[tmp_form]]), - yes = FALSE, - no = is.na(records_orig[[i]])) - } - else - { + records[[i]] <- !is.na(records_orig[[tmp_form]]) & is.na(records_orig[[i]]) + } else { # Here we have branching logic. # If the `[form]_complete` field is missing, we return FALSE # If the `[form]_complete` is non-missing: # The branching logic is satisfied: return the missingness of the value - # The branchign logic is not satisfied: return FALSE - records[[i]] <- ifelse(test = is.na(records_orig[[tmp_form]]), - yes = FALSE, - no = ifelse(test = with(records_orig, eval(this_logic)), - yes = is.na(records_orig[[i]]), - no = FALSE)) + # The branching logic is not satisfied: return FALSE + ans <- !is.na(records_orig[[tmp_form]]) & eval(this_logic, records_orig) + ans_ix <- which(ans) + if(length(ans_ix) > 0L) { + if(is_cb[i]) { + # special case for checkbox group + # examine all columns + cb_dat <- records_orig[ans_ix, cb_grp_id == cb_grp_id[i], drop = FALSE] + # return TRUE if nothing is checked + records[[i]] <- rowSums(cb_dat != "0", na.rm = TRUE) == 0 + } else { + ans[ans_ix] <- is.na(records_orig[[i]][ans_ix]) + } + } + records[[i]] <- ans } } } From 238575916ed267ffaf7e6d2c98bf3ef799917040 Mon Sep 17 00:00:00 2001 From: Cole Beck Date: Mon, 15 Jul 2024 13:51:35 -0500 Subject: [PATCH 2/4] drop redundant cond cbox columns in summary --- R/missingSummary.R | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/R/missingSummary.R b/R/missingSummary.R index 7dfc1a1d..9a66c29f 100644 --- a/R/missingSummary.R +++ b/R/missingSummary.R @@ -175,6 +175,8 @@ missingSummary_offline <- function(records, is_cb <- !is.na(md_row) & meta_data[md_row, "field_type"] == "checkbox" cb_grp <- unique(rec_name_cb[is_cb]) cb_grp_id <- match(rec_name_cb, cb_grp, nomatch = 0) + cb_logic_grp <- vector('list', length(cb_grp)) + names(cb_logic_grp) <- cb_grp # get the name of the form on which the field is saved form_comp <- paste0(meta_data[md_row, "form_name"], "_complete") # is this always true? rec_name_cb == rec_name_clean @@ -224,9 +226,14 @@ missingSummary_offline <- function(records, if(is_cb[i]) { # special case for checkbox group # examine all columns - cb_dat <- records_orig[ans_ix, cb_grp_id == cb_grp_id[i], drop = FALSE] - # return TRUE if nothing is checked - records[[i]] <- rowSums(cb_dat != "0", na.rm = TRUE) == 0 + cb_grp_name <- cb_grp[cb_grp_id[i]] + if(is.null(cb_logic_grp[[cb_grp_name]]$missing)) { + cb_dat <- records_orig[ans_ix, cb_grp_id == cb_grp_id[i], drop = FALSE] + # return TRUE if nothing is checked + cb_no_checks <- rowSums(cb_dat != "0", na.rm = TRUE) == 0 + cb_logic_grp[[cb_grp_name]]$missing <- unname(cb_no_checks) + } + records[[i]] <- cb_logic_grp[[cb_grp_name]]$missing } else { ans[ans_ix] <- is.na(records_orig[[i]][ans_ix]) } @@ -235,6 +242,18 @@ missingSummary_offline <- function(records, } } } + # only post-logic checkbox groups will have length; remove others + cb_logic_grp <- cb_logic_grp[lengths(cb_logic_grp) > 0L] + # remove redundant records and rename + if(length(cb_logic_grp) > 0L) { + coi <- lapply(names(cb_logic_grp), function(i) which(match(i, cb_grp) == cb_grp_id)) + noi <- lapply(coi, function(i) rec_name[i]) + cnoi <- vapply(coi, function(i) rec_name_cb[i][1], character(1)) + c_rm <- unlist(lapply(coi, function(i) i[-1])) + records <- records[,-c_rm] + c_rename <- match(vapply(noi, `[`, character(1), 1), names(records)) + names(records)[c_rename] <- cnoi + } records } From 4e1e5bf8615f85bc7a26c507df7c8a06ab4efc52 Mon Sep 17 00:00:00 2001 From: Cole Beck Date: Mon, 15 Jul 2024 15:11:46 -0500 Subject: [PATCH 3/4] fix checkbox error and add test --- R/missingSummary.R | 2 +- tests/testthat/test-356-missingSummary.R | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/R/missingSummary.R b/R/missingSummary.R index 9a66c29f..933e9772 100644 --- a/R/missingSummary.R +++ b/R/missingSummary.R @@ -233,7 +233,7 @@ missingSummary_offline <- function(records, cb_no_checks <- rowSums(cb_dat != "0", na.rm = TRUE) == 0 cb_logic_grp[[cb_grp_name]]$missing <- unname(cb_no_checks) } - records[[i]] <- cb_logic_grp[[cb_grp_name]]$missing + ans[ans_ix] <- cb_logic_grp[[cb_grp_name]]$missing } else { ans[ans_ix] <- is.na(records_orig[[i]][ans_ix]) } diff --git a/tests/testthat/test-356-missingSummary.R b/tests/testthat/test-356-missingSummary.R index 9ea09feb..fdecaabe 100644 --- a/tests/testthat/test-356-missingSummary.R +++ b/tests/testthat/test-356-missingSummary.R @@ -120,3 +120,25 @@ test_that( forms = "branching_logic"))) } ) + +# Test internal function for checkbox ------------------------------- + +md <- data.frame( + field_name = c('record_id','cb1','cb2','cb3'), + form_name = 'test', + field_type = c('text','checkbox','checkbox','checkbox'), + select_choices_or_calculations = c(NA, '1, yes | 2, no', '1, yes | 2, no', '1, yes | 2, no') +) +logic <- list(record_id = NA, cb1 = NA, cb2 = expression(TRUE), cb3 = expression(TRUE)) +r <- data.frame(record_id = 1:2, cb1___1 = 0, cb1___2 = 0:1, cb2___1 = 0, cb2___2 = 0:1, cb3___1 = 0, cb3___2 = 0:1, test_complete = 2) +o <- data.frame(record_id = 1:2, cb1___1 = FALSE, cb1___2 = FALSE, cb2 = c(TRUE, FALSE), cb3 = c(TRUE, FALSE), test_complete = FALSE) + +test_that( + "Missing values in checkbox groups are correctly identified around branching logic", + { + local_reproducible_output(width = 200) + expect_identical( + redcapAPI:::.missingSummary_isMissingInField(r, md, logic), o + ) + } +) From 96cb79b951e654342cf571f1fa23627be9b04dda Mon Sep 17 00:00:00 2001 From: Cole Beck Date: Mon, 15 Jul 2024 15:15:29 -0500 Subject: [PATCH 4/4] _complete keeps value, perhaps incorrectly? --- tests/testthat/test-356-missingSummary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-356-missingSummary.R b/tests/testthat/test-356-missingSummary.R index fdecaabe..85f8e8f0 100644 --- a/tests/testthat/test-356-missingSummary.R +++ b/tests/testthat/test-356-missingSummary.R @@ -131,7 +131,7 @@ md <- data.frame( ) logic <- list(record_id = NA, cb1 = NA, cb2 = expression(TRUE), cb3 = expression(TRUE)) r <- data.frame(record_id = 1:2, cb1___1 = 0, cb1___2 = 0:1, cb2___1 = 0, cb2___2 = 0:1, cb3___1 = 0, cb3___2 = 0:1, test_complete = 2) -o <- data.frame(record_id = 1:2, cb1___1 = FALSE, cb1___2 = FALSE, cb2 = c(TRUE, FALSE), cb3 = c(TRUE, FALSE), test_complete = FALSE) +o <- data.frame(record_id = 1:2, cb1___1 = FALSE, cb1___2 = FALSE, cb2 = c(TRUE, FALSE), cb3 = c(TRUE, FALSE), test_complete = 2) test_that( "Missing values in checkbox groups are correctly identified around branching logic",