diff --git a/DESCRIPTION b/DESCRIPTION index 4fbc607..90df3bb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: romic Type: Package Title: R for High-Dimensional Omic Data -Version: 1.2.3 +Version: 1.2.4 Authors@R: c( person( given = "Sean", diff --git a/R/filters.R b/R/filters.R index 334182e..bc52963 100644 --- a/R/filters.R +++ b/R/filters.R @@ -141,7 +141,7 @@ filter_tomic <- function( dplyr::filter(type %in% c("feature_primary_key", "sample_primary_key")) %>% dplyr::pull(variable) - updated_filtered_table <- anti_join( + updated_filtered_table <- dplyr::anti_join( triple_omic[[filter_table]], updated_filtered_table, by = join_keys diff --git a/man/filter_tomic.Rd b/man/filter_tomic.Rd index 38ddc63..3ab3468 100644 --- a/man/filter_tomic.Rd +++ b/man/filter_tomic.Rd @@ -9,7 +9,8 @@ filter_tomic( filter_type, filter_table, filter_value, - filter_variable = NULL + filter_variable = NULL, + invert = FALSE ) } \arguments{ @@ -20,7 +21,7 @@ filter_tomic( filter_value} \item{range}{filter filter_variable to using the range (i.e., lower and upper limit) provided in filter_value} - \item{apply}{a quosure as a \code{filter_value} to a table of interest} + \item{quo}{a quosure as a \code{filter_value} to a table of interest} }} \item{filter_table}{table where the filter should be applied} @@ -28,6 +29,9 @@ filter_tomic( \item{filter_value}{values to filter based on} \item{filter_variable}{variable to apply the filter to} + +\item{invert}{If FALSE (default) entities will be retained; if TRUE, they +will be removed.} } \value{ A \code{tomic} object where a subset of features, samples or diff --git a/tests/testthat/_snaps/filters.md b/tests/testthat/_snaps/filters.md index ba9f2bb..f8d9d6f 100644 --- a/tests/testthat/_snaps/filters.md +++ b/tests/testthat/_snaps/filters.md @@ -1,4 +1,4 @@ -# Try all of the filters +# Try all of the filters [plain] Code . @@ -15,6 +15,438 @@ filter_variable = "bar", filter_value = "biological process unknown") Condition Error in `filter_tomic()`: - ! bar is not a valid value for "filter_type", - valid values are all variables within the "features" table: name, BP, MF, systematic_name + ! bar is is not a valid value for `filter_type`, valid values are all variables within the features table: name, BP, MF, and systematic_name + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "quo", filter_table = "features", + filter_variable = "bar", filter_value = rlang::quo(BP == + "biological process unknown")) + Message + ! `filter_variable` was provided when `filter_type` is quo + only a filter_value should be passed. filter_variable will be ignored + Output + $features + # A tibble: 110 x 4 + name BP MF systematic_name + + 1 YOL029C biological process unknown molecular function unknown YOL029C + 2 YHR036W biological process unknown molecular function unknown YHR036W + 3 YAL046C biological process unknown molecular function unknown YAL046C + 4 YHR151C biological process unknown molecular function unknown YHR151C + 5 YKL027W biological process unknown molecular function unknown YKL027W + 6 YBR220C biological process unknown molecular function unknown YBR220C + 7 YLR057W biological process unknown molecular function unknown YLR057W + 8 YDR239C biological process unknown molecular function unknown YDR239C + 9 KKQ8 biological process unknown protein kinase activity YKL168C + 10 UIP5 biological process unknown molecular function unknown YKR044W + # i 100 more rows + + $samples + # A tibble: 36 x 3 + sample nutrient DR + + 1 G0.05 G 0.05 + 2 G0.1 G 0.1 + 3 G0.15 G 0.15 + 4 G0.2 G 0.2 + 5 G0.25 G 0.25 + 6 G0.3 G 0.3 + 7 N0.05 N 0.05 + 8 N0.1 N 0.1 + 9 N0.15 N 0.15 + 10 N0.2 N 0.2 + # i 26 more rows + + $measurements + # A tibble: 3,960 x 3 + name sample expression + + 1 YOL029C G0.05 -0.22 + 2 YHR036W G0.05 -0.91 + 3 YAL046C G0.05 0.05 + 4 YHR151C G0.05 -0.53 + 5 YKL027W G0.05 -0.52 + 6 YBR220C G0.05 -1.06 + 7 YLR057W G0.05 -0.42 + 8 YDR239C G0.05 -0.55 + 9 KKQ8 G0.05 -0.6 + 10 UIP5 G0.05 -0.56 + # i 3,950 more rows + + $design + $design$features + # A tibble: 4 x 2 + variable type + + 1 name feature_primary_key + 2 systematic_name character + 3 BP character + 4 MF character + + $design$samples + # A tibble: 3 x 2 + variable type + + 1 sample sample_primary_key + 2 nutrient character + 3 DR numeric + + $design$measurements + # A tibble: 3 x 2 + variable type + + 1 name feature_primary_key + 2 sample sample_primary_key + 3 expression numeric + + $design$feature_pk + [1] "name" + + $design$sample_pk + [1] "sample" + + + attr(,"class") + [1] "triple_omic" "tomic" "general" + +# Try all of the filters [ansi] + + Code + . + Output + # A tibble: 1 x 1 + BP +  + 1 biological process unknown + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "category", filter_table = "features", + filter_variable = "bar", filter_value = "biological process unknown") + Condition + Error in `filter_tomic()`: + ! bar is is not a valid value for `filter_type`, valid values are all variables within the features table: name, BP, MF, and systematic_name + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "quo", filter_table = "features", + filter_variable = "bar", filter_value = rlang::quo(BP == + "biological process unknown")) + Message + ! `filter_variable` was provided when `filter_type` is quo + only a filter_value should be passed. filter_variable will be ignored + Output + $features + # A tibble: 110 x 4 + name BP MF systematic_name +     +  1 YOL029C biological process unknown molecular function unknown YOL029C +  2 YHR036W biological process unknown molecular function unknown YHR036W +  3 YAL046C biological process unknown molecular function unknown YAL046C +  4 YHR151C biological process unknown molecular function unknown YHR151C +  5 YKL027W biological process unknown molecular function unknown YKL027W +  6 YBR220C biological process unknown molecular function unknown YBR220C +  7 YLR057W biological process unknown molecular function unknown YLR057W +  8 YDR239C biological process unknown molecular function unknown YDR239C +  9 KKQ8 biological process unknown protein kinase activity YKL168C + 10 UIP5 biological process unknown molecular function unknown YKR044W + # i 100 more rows + + $samples + # A tibble: 36 x 3 + sample nutrient DR +    +  1 G0.05 G 0.05 +  2 G0.1 G 0.1 +  3 G0.15 G 0.15 +  4 G0.2 G 0.2 +  5 G0.25 G 0.25 +  6 G0.3 G 0.3 +  7 N0.05 N 0.05 +  8 N0.1 N 0.1 +  9 N0.15 N 0.15 + 10 N0.2 N 0.2 + # i 26 more rows + + $measurements + # A tibble: 3,960 x 3 + name sample expression +    +  1 YOL029C G0.05 -0.22 +  2 YHR036W G0.05 -0.91 +  3 YAL046C G0.05 0.05 +  4 YHR151C G0.05 -0.53 +  5 YKL027W G0.05 -0.52 +  6 YBR220C G0.05 -1.06 +  7 YLR057W G0.05 -0.42 +  8 YDR239C G0.05 -0.55 +  9 KKQ8 G0.05 -0.6 + 10 UIP5 G0.05 -0.56 + # i 3,950 more rows + + $design + $design$features + # A tibble: 4 x 2 + variable type +   + 1 name feature_primary_key + 2 systematic_name character + 3 BP character + 4 MF character + + $design$samples + # A tibble: 3 x 2 + variable type +   + 1 sample sample_primary_key + 2 nutrient character + 3 DR numeric + + $design$measurements + # A tibble: 3 x 2 + variable type +   + 1 name feature_primary_key + 2 sample sample_primary_key + 3 expression numeric + + $design$feature_pk + [1] "name" + + $design$sample_pk + [1] "sample" + + + attr(,"class") + [1] "triple_omic" "tomic" "general" + +# Try all of the filters [unicode] + + Code + . + Output + # A tibble: 1 × 1 + BP + + 1 biological process unknown + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "category", filter_table = "features", + filter_variable = "bar", filter_value = "biological process unknown") + Condition + Error in `filter_tomic()`: + ! bar is is not a valid value for `filter_type`, valid values are all variables within the features table: name, BP, MF, and systematic_name + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "quo", filter_table = "features", + filter_variable = "bar", filter_value = rlang::quo(BP == + "biological process unknown")) + Message + ! `filter_variable` was provided when `filter_type` is quo + only a filter_value should be passed. filter_variable will be ignored + Output + $features + # A tibble: 110 × 4 + name BP MF systematic_name + + 1 YOL029C biological process unknown molecular function unknown YOL029C + 2 YHR036W biological process unknown molecular function unknown YHR036W + 3 YAL046C biological process unknown molecular function unknown YAL046C + 4 YHR151C biological process unknown molecular function unknown YHR151C + 5 YKL027W biological process unknown molecular function unknown YKL027W + 6 YBR220C biological process unknown molecular function unknown YBR220C + 7 YLR057W biological process unknown molecular function unknown YLR057W + 8 YDR239C biological process unknown molecular function unknown YDR239C + 9 KKQ8 biological process unknown protein kinase activity YKL168C + 10 UIP5 biological process unknown molecular function unknown YKR044W + # ℹ 100 more rows + + $samples + # A tibble: 36 × 3 + sample nutrient DR + + 1 G0.05 G 0.05 + 2 G0.1 G 0.1 + 3 G0.15 G 0.15 + 4 G0.2 G 0.2 + 5 G0.25 G 0.25 + 6 G0.3 G 0.3 + 7 N0.05 N 0.05 + 8 N0.1 N 0.1 + 9 N0.15 N 0.15 + 10 N0.2 N 0.2 + # ℹ 26 more rows + + $measurements + # A tibble: 3,960 × 3 + name sample expression + + 1 YOL029C G0.05 -0.22 + 2 YHR036W G0.05 -0.91 + 3 YAL046C G0.05 0.05 + 4 YHR151C G0.05 -0.53 + 5 YKL027W G0.05 -0.52 + 6 YBR220C G0.05 -1.06 + 7 YLR057W G0.05 -0.42 + 8 YDR239C G0.05 -0.55 + 9 KKQ8 G0.05 -0.6 + 10 UIP5 G0.05 -0.56 + # ℹ 3,950 more rows + + $design + $design$features + # A tibble: 4 × 2 + variable type + + 1 name feature_primary_key + 2 systematic_name character + 3 BP character + 4 MF character + + $design$samples + # A tibble: 3 × 2 + variable type + + 1 sample sample_primary_key + 2 nutrient character + 3 DR numeric + + $design$measurements + # A tibble: 3 × 2 + variable type + + 1 name feature_primary_key + 2 sample sample_primary_key + 3 expression numeric + + $design$feature_pk + [1] "name" + + $design$sample_pk + [1] "sample" + + + attr(,"class") + [1] "triple_omic" "tomic" "general" + +# Try all of the filters [fancy] + + Code + . + Output + # A tibble: 1 × 1 + BP +  + 1 biological process unknown + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "category", filter_table = "features", + filter_variable = "bar", filter_value = "biological process unknown") + Condition + Error in `filter_tomic()`: + ! bar is is not a valid value for `filter_type`, valid values are all variables within the features table: name, BP, MF, and systematic_name + +--- + + Code + filter_tomic(brauer_2008_triple, filter_type = "quo", filter_table = "features", + filter_variable = "bar", filter_value = rlang::quo(BP == + "biological process unknown")) + Message + ! `filter_variable` was provided when `filter_type` is quo + only a filter_value should be passed. filter_variable will be ignored + Output + $features + # A tibble: 110 × 4 + name BP MF systematic_name +     +  1 YOL029C biological process unknown molecular function unknown YOL029C +  2 YHR036W biological process unknown molecular function unknown YHR036W +  3 YAL046C biological process unknown molecular function unknown YAL046C +  4 YHR151C biological process unknown molecular function unknown YHR151C +  5 YKL027W biological process unknown molecular function unknown YKL027W +  6 YBR220C biological process unknown molecular function unknown YBR220C +  7 YLR057W biological process unknown molecular function unknown YLR057W +  8 YDR239C biological process unknown molecular function unknown YDR239C +  9 KKQ8 biological process unknown protein kinase activity YKL168C + 10 UIP5 biological process unknown molecular function unknown YKR044W + # ℹ 100 more rows + + $samples + # A tibble: 36 × 3 + sample nutrient DR +    +  1 G0.05 G 0.05 +  2 G0.1 G 0.1 +  3 G0.15 G 0.15 +  4 G0.2 G 0.2 +  5 G0.25 G 0.25 +  6 G0.3 G 0.3 +  7 N0.05 N 0.05 +  8 N0.1 N 0.1 +  9 N0.15 N 0.15 + 10 N0.2 N 0.2 + # ℹ 26 more rows + + $measurements + # A tibble: 3,960 × 3 + name sample expression +    +  1 YOL029C G0.05 -0.22 +  2 YHR036W G0.05 -0.91 +  3 YAL046C G0.05 0.05 +  4 YHR151C G0.05 -0.53 +  5 YKL027W G0.05 -0.52 +  6 YBR220C G0.05 -1.06 +  7 YLR057W G0.05 -0.42 +  8 YDR239C G0.05 -0.55 +  9 KKQ8 G0.05 -0.6 + 10 UIP5 G0.05 -0.56 + # ℹ 3,950 more rows + + $design + $design$features + # A tibble: 4 × 2 + variable type +   + 1 name feature_primary_key + 2 systematic_name character + 3 BP character + 4 MF character + + $design$samples + # A tibble: 3 × 2 + variable type +   + 1 sample sample_primary_key + 2 nutrient character + 3 DR numeric + + $design$measurements + # A tibble: 3 × 2 + variable type +   + 1 name feature_primary_key + 2 sample sample_primary_key + 3 expression numeric + + $design$feature_pk + [1] "name" + + $design$sample_pk + [1] "sample" + + + attr(,"class") + [1] "triple_omic" "tomic" "general" diff --git a/tests/testthat/test-filters.R b/tests/testthat/test-filters.R index adc1332..b619911 100644 --- a/tests/testthat/test-filters.R +++ b/tests/testthat/test-filters.R @@ -1,4 +1,4 @@ -test_that("Try all of the filters", { +cli::test_that_cli("Try all of the filters", { filter_tomic( brauer_2008_triple, @@ -53,15 +53,29 @@ test_that("Try all of the filters", { error = TRUE ) - expect_warning( + expect_snapshot( filter_tomic( brauer_2008_triple, filter_type = "quo", filter_table = "features", filter_variable = "bar", filter_value = rlang::quo(BP == "biological process unknown") - ), - regexp = "filter_type is quo" + ) ) }) + +test_that("Validate that filtering works with invert = TRUE", { + + retained_samples <- filter_tomic( + brauer_2008_triple, + filter_type = "category", + filter_table = "samples", + filter_variable = "nutrient", + filter_value = c("U", "L"), + invert = TRUE + )$samples + + expect_equal(nrow(retained_samples), 24) + expect_equal(unique(retained_samples$nutrient), c("G", "N", "P", "S")) +})