Skip to content

Commit

Permalink
Merge pull request #50 from VEuPathDB/add-differential-expression
Browse files Browse the repository at this point in the history
Add differential expression
  • Loading branch information
asizemore authored Dec 5, 2024
2 parents 42792cd + fd7d49c commit d443d99
Show file tree
Hide file tree
Showing 16 changed files with 1,119 additions and 1 deletion.
4 changes: 4 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,20 @@ Collate:
'class-SampleMetadata.R'
'class-CollectionWithMetadata.R'
'class-VariableMetadata.R'
'class-Comparator.R'
'class-ComputeResult.R'
'class-CorrelationResult.R'
'class-CountDataCollection.R'
'class-Megastudy.R'
'class-Range.R'
'class-Statistic.R'
'data.R'
'method-correlation.R'
'method-differentialExpression.R'
'methods-Bin.R'
'methods-CollectionWithMetadata.R'
'methods-Collections.R'
'methods-Comparator.R'
'methods-VariableMetadata.R'
'methods-ComputeResult.R'
'methods-Megastudy.R'
Expand Down
13 changes: 13 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@ export(BinList)
export(Collection)
export(CollectionWithMetadata)
export(Collections)
export(Comparator)
export(ComputeResult)
export(CorrelationResult)
export(CountDataCollection)
export(DataShape)
export(DataType)
export(DifferentialExpressionResult)
export(Megastudy)
export(PlotReference)
export(Range)
Expand All @@ -40,6 +43,7 @@ export(cut_interval)
export(cut_number)
export(cut_width)
export(data_frame)
export(differentialExpression)
export(findAllColNames)
export(findAncestorIdColumns)
export(findColNamesByPredicate)
Expand Down Expand Up @@ -73,6 +77,7 @@ export(getDataFromSource)
export(getDataTable)
export(getDiscretizedBins)
export(getEntityId)
export(getGroupLabels)
export(getHasStudyDependentVocabulary)
export(getIdColumns)
export(getMetadataVariableNames)
Expand Down Expand Up @@ -118,10 +123,13 @@ exportClasses(Bin)
exportClasses(BinList)
exportClasses(Collection)
exportClasses(CollectionWithMetadata)
exportClasses(Comparator)
exportClasses(ComputeResult)
exportClasses(CorrelationResult)
exportClasses(CountDataCollection)
exportClasses(DataShape)
exportClasses(DataType)
exportClasses(DifferentialExpressionResult)
exportClasses(Megastudy)
exportClasses(PlotReference)
exportClasses(Range)
Expand All @@ -136,6 +144,7 @@ exportClasses(VariableMetadataList)
exportClasses(VariableSpec)
exportClasses(VariableSpecList)
exportMethods(as.numeric)
exportMethods(differentialExpression)
exportMethods(findAllColNames)
exportMethods(findColNamesByPredicate)
exportMethods(findColNamesFromPlotRef)
Expand All @@ -155,6 +164,7 @@ exportMethods(getColName)
exportMethods(getDTWithImputedZeroes)
exportMethods(getDataTable)
exportMethods(getEntityId)
exportMethods(getGroupLabels)
exportMethods(getHasStudyDependentVocabulary)
exportMethods(getStudyIdColumnName)
exportMethods(getVariableSpec)
Expand All @@ -167,6 +177,7 @@ exportMethods(whichValuesInBinList)
exportMethods(writeData)
exportMethods(writeMeta)
exportMethods(writeStatistics)
import(DESeq2)
import(data.table)
importFrom(Hmisc,rcorr)
importFrom(S4Vectors,SimpleList)
Expand All @@ -175,6 +186,8 @@ importFrom(SpiecEasi,sparcc)
importFrom(SpiecEasi,sparccboot)
importFrom(digest,digest)
importFrom(microbenchmark,microbenchmark)
importFrom(purrr,discard)
importFrom(purrr,map)
importFrom(purrr,map_lgl)
importFrom(purrr,none)
importFrom(stringi,stri_detect_regex)
79 changes: 79 additions & 0 deletions R/class-Comparator.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@

check_comparator <- function(object) {

variable <- object@variable
groupA <- object@groupA
groupB <- object@groupB

errors <- character()

# Check that the variable has a reasonable variable spec
if (is.na(variable@variableSpec@variableId)) {
msg <- "Comparator variable needs a variable id"
errors <- c(errors, msg)
}

# Check that groups exist
if (!length(groupA) || !length(groupA)) {
msg <- "Both groupA and groupB must be defined"
errors <- c(errors, msg)
}

if (identical(variable@dataShape@value, "CONTINUOUS")) {
## Checks for continuous variables

# Err if variable is continuous but either group is missing a binStart or binEnd
if (!all(unlist(lapply(groupA, function(bin) {return(!!length(bin@binStart))})))) {
msg <- "All groupA bins must have a binStart"
errors <- c(errors, msg)
}
if (!all(unlist(lapply(groupA, function(bin) {return(!!length(bin@binEnd))})))) {
msg <- "All groupA bins must have a binEnd"
errors <- c(errors, msg)
}
if (!all(unlist(lapply(groupB, function(bin) {return(!!length(bin@binStart))})))) {
msg <- "All groupB bins must have a binStart"
errors <- c(errors, msg)
}
if (!all(unlist(lapply(groupB, function(bin) {return(!!length(bin@binEnd))})))) {
msg <- "All groupB bins must have a binEnd"
errors <- c(errors, msg)
}
} else {
## Checks for non-continuous variables

# Ensure no values are duplicated between group A and group B
groupAValues <- getGroupLabels(object, "groupA")
groupBValues <- getGroupLabels(object, "groupB")

if (!!length(intersect(groupAValues, groupBValues))) {
msg <- "groupA and groupB cannot share members"
errors <- c(errors, msg)
}

}

return(if (length(errors) == 0) TRUE else errors)
}

#' Comparator
#'
#' A class for representing a variable that will be used to compare samples between two groups. The variable's
#' values will be used to split samples into groups.
#'
#' @slot variable A VariableMetadata
#' @slot groupA BinList
#' @slot groupB BinList
#' @name Comparator-class
#' @rdname Comparator-class
#' @include class-VariableMetadata.R
#' @export
Comparator <- setClass("Comparator", representation(
variable = 'VariableMetadata',
groupA = 'BinList',
groupB = 'BinList'
), prototype = prototype(
variable = new("VariableMetadata"),
groupA = new("BinList"),
groupB = new("BinList")
), validity = check_comparator)
46 changes: 46 additions & 0 deletions R/class-CountDataCollection.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
check_count_data_collection <- function(object) {
errors <- character()
df <- object@data
record_id_col <- object@recordIdColumn
ancestor_id_cols <- object@ancestorIdColumns
all_id_cols <- c(record_id_col, ancestor_id_cols)


allDataColsNumeric <- all(unlist(lapply(df[, !(names(df) %in% c(record_id_col, ancestor_id_cols))], is.numeric)))
if (inherits(df, 'data.table')) allDataColsNumeric <- all(unlist(lapply(df[, !(names(df) %in% c(record_id_col, ancestor_id_cols)), with=F], is.numeric)))
if (!allDataColsNumeric) {
msg <- paste("All columns except the ID columns must be numeric.")
errors <- c(errors, msg)
}

numeric_data <- df[, !(names(df) %in% all_id_cols)]
if (inherits(df, 'data.table')) numeric_data <- df[, !(names(df) %in% all_id_cols), with=F]

if (!identical(numeric_data, round(numeric_data))) {
msg <- "Count data must be integer numbers."
errors <- c(errors, msg)
}

if (any(df < 0, na.rm=TRUE)) {
msg <- paste("Count data cannot contain negative values.")
errors <- c(errors, msg)
}


return(if (length(errors) == 0) TRUE else errors)
}

#' Count Data
#'
#' A class for working with count data, including microbial or genetic assays.
#'
#' @slot data A data.frame of integer abundance counts with genes (species, etc.) as columns and samples as rows
#' @slot sampleMetadata A data.frame of metadata about the samples with samples as rows and metadata variables as columns
#' @slot recordIdColumn The name of the column containing IDs for the samples. All other columns will be treated as abundance values.
#' @slot ancestorIdColumns A character vector of column names representing parent entities of the recordIdColumn.
#' @slot imputeZero A logical indicating whether NA/ null values should be replaced with zeros.
#' @name CountDataCollection-class
#' @rdname CountDataCollection-class
#' @include class-CollectionWithMetadata.R
#' @export
CountDataCollection <- setClass("CountDataCollection", contains = "CollectionWithMetadata", validity = check_count_data_collection)
Loading

0 comments on commit d443d99

Please sign in to comment.