-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #50 from VEuPathDB/add-differential-expression
Add differential expression
- Loading branch information
Showing
16 changed files
with
1,119 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
|
||
check_comparator <- function(object) { | ||
|
||
variable <- object@variable | ||
groupA <- object@groupA | ||
groupB <- object@groupB | ||
|
||
errors <- character() | ||
|
||
# Check that the variable has a reasonable variable spec | ||
if (is.na(variable@variableSpec@variableId)) { | ||
msg <- "Comparator variable needs a variable id" | ||
errors <- c(errors, msg) | ||
} | ||
|
||
# Check that groups exist | ||
if (!length(groupA) || !length(groupA)) { | ||
msg <- "Both groupA and groupB must be defined" | ||
errors <- c(errors, msg) | ||
} | ||
|
||
if (identical(variable@dataShape@value, "CONTINUOUS")) { | ||
## Checks for continuous variables | ||
|
||
# Err if variable is continuous but either group is missing a binStart or binEnd | ||
if (!all(unlist(lapply(groupA, function(bin) {return(!!length(bin@binStart))})))) { | ||
msg <- "All groupA bins must have a binStart" | ||
errors <- c(errors, msg) | ||
} | ||
if (!all(unlist(lapply(groupA, function(bin) {return(!!length(bin@binEnd))})))) { | ||
msg <- "All groupA bins must have a binEnd" | ||
errors <- c(errors, msg) | ||
} | ||
if (!all(unlist(lapply(groupB, function(bin) {return(!!length(bin@binStart))})))) { | ||
msg <- "All groupB bins must have a binStart" | ||
errors <- c(errors, msg) | ||
} | ||
if (!all(unlist(lapply(groupB, function(bin) {return(!!length(bin@binEnd))})))) { | ||
msg <- "All groupB bins must have a binEnd" | ||
errors <- c(errors, msg) | ||
} | ||
} else { | ||
## Checks for non-continuous variables | ||
|
||
# Ensure no values are duplicated between group A and group B | ||
groupAValues <- getGroupLabels(object, "groupA") | ||
groupBValues <- getGroupLabels(object, "groupB") | ||
|
||
if (!!length(intersect(groupAValues, groupBValues))) { | ||
msg <- "groupA and groupB cannot share members" | ||
errors <- c(errors, msg) | ||
} | ||
|
||
} | ||
|
||
return(if (length(errors) == 0) TRUE else errors) | ||
} | ||
|
||
#' Comparator | ||
#' | ||
#' A class for representing a variable that will be used to compare samples between two groups. The variable's | ||
#' values will be used to split samples into groups. | ||
#' | ||
#' @slot variable A VariableMetadata | ||
#' @slot groupA BinList | ||
#' @slot groupB BinList | ||
#' @name Comparator-class | ||
#' @rdname Comparator-class | ||
#' @include class-VariableMetadata.R | ||
#' @export | ||
Comparator <- setClass("Comparator", representation( | ||
variable = 'VariableMetadata', | ||
groupA = 'BinList', | ||
groupB = 'BinList' | ||
), prototype = prototype( | ||
variable = new("VariableMetadata"), | ||
groupA = new("BinList"), | ||
groupB = new("BinList") | ||
), validity = check_comparator) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
check_count_data_collection <- function(object) { | ||
errors <- character() | ||
df <- object@data | ||
record_id_col <- object@recordIdColumn | ||
ancestor_id_cols <- object@ancestorIdColumns | ||
all_id_cols <- c(record_id_col, ancestor_id_cols) | ||
|
||
|
||
allDataColsNumeric <- all(unlist(lapply(df[, !(names(df) %in% c(record_id_col, ancestor_id_cols))], is.numeric))) | ||
if (inherits(df, 'data.table')) allDataColsNumeric <- all(unlist(lapply(df[, !(names(df) %in% c(record_id_col, ancestor_id_cols)), with=F], is.numeric))) | ||
if (!allDataColsNumeric) { | ||
msg <- paste("All columns except the ID columns must be numeric.") | ||
errors <- c(errors, msg) | ||
} | ||
|
||
numeric_data <- df[, !(names(df) %in% all_id_cols)] | ||
if (inherits(df, 'data.table')) numeric_data <- df[, !(names(df) %in% all_id_cols), with=F] | ||
|
||
if (!identical(numeric_data, round(numeric_data))) { | ||
msg <- "Count data must be integer numbers." | ||
errors <- c(errors, msg) | ||
} | ||
|
||
if (any(df < 0, na.rm=TRUE)) { | ||
msg <- paste("Count data cannot contain negative values.") | ||
errors <- c(errors, msg) | ||
} | ||
|
||
|
||
return(if (length(errors) == 0) TRUE else errors) | ||
} | ||
|
||
#' Count Data | ||
#' | ||
#' A class for working with count data, including microbial or genetic assays. | ||
#' | ||
#' @slot data A data.frame of integer abundance counts with genes (species, etc.) as columns and samples as rows | ||
#' @slot sampleMetadata A data.frame of metadata about the samples with samples as rows and metadata variables as columns | ||
#' @slot recordIdColumn The name of the column containing IDs for the samples. All other columns will be treated as abundance values. | ||
#' @slot ancestorIdColumns A character vector of column names representing parent entities of the recordIdColumn. | ||
#' @slot imputeZero A logical indicating whether NA/ null values should be replaced with zeros. | ||
#' @name CountDataCollection-class | ||
#' @rdname CountDataCollection-class | ||
#' @include class-CollectionWithMetadata.R | ||
#' @export | ||
CountDataCollection <- setClass("CountDataCollection", contains = "CollectionWithMetadata", validity = check_count_data_collection) |
Oops, something went wrong.