Skip to content

Commit

Permalink
Merge pull request #46 from VEuPathDB/get-arbitrary-vars
Browse files Browse the repository at this point in the history
Get arbitrary vars
  • Loading branch information
d-callan authored Apr 23, 2024
2 parents 26d33c8 + d6fe72c commit c79944c
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 8 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ export(getColName)
export(getCollectionData)
export(getCollectionName)
export(getCollectionNames)
export(getCollectionVariableNames)
export(getCollectionsList)
export(getDTWithImputedZeroes)
export(getDataFromSource)
Expand All @@ -83,6 +84,7 @@ export(getVariableSpec)
export(getVariableSpecColumnName)
export(is.POSIXct)
export(is.error)
export(isOneToManyWithAncestor)
export(logWithTime)
export(matchArg)
export(merge)
Expand Down
2 changes: 1 addition & 1 deletion R/class-CollectionWithMetadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ check_collection_with_metadata <- function(object) {
#' @include class-SampleMetadata.R
CollectionWithMetadata <- setClass("CollectionWithMetadata",
contains = "Collection",
representation = representation(
slots = list(
sampleMetadata = 'SampleMetadata'
),
validity = check_collection_with_metadata)
8 changes: 4 additions & 4 deletions R/method-correlation.R
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ function(
data1 <- pruneFeatures(data1, predicateFactory('variance', varianceThreshold), verbose)
data1 <- pruneFeatures(data1, predicateFactory('sd', stdDevThreshold), verbose)

values <- getCollectionData(data1, FALSE, FALSE, verbose)
values <- getCollectionData(data1, variableNames = NULL, ignoreImputeZero = FALSE, includeIds = FALSE, verbose = verbose)
if (metadataIsFirst) {
corrResult <- correlation(
getSampleMetadata(data1, TRUE, FALSE),
Expand Down Expand Up @@ -413,7 +413,7 @@ function(
data <- pruneFeatures(data, predicateFactory('variance', varianceThreshold), verbose)
data <- pruneFeatures(data, predicateFactory('sd', stdDevThreshold), verbose)

values <- getCollectionData(data, FALSE, FALSE, verbose)
values <- getCollectionData(data, variableNames = NULL, ignoreImputeZero = FALSE, includeIds = FALSE, verbose = verbose)
corrResult <- correlation(values, NULL, method = method, format = 'data.table', verbose = verbose)

veupathUtils::logWithTime(
Expand Down Expand Up @@ -499,8 +499,8 @@ function(
data2 <- pruneFeatures(data2, predicateFactory('variance', varianceThreshold), verbose)
data2 <- pruneFeatures(data2, predicateFactory('sd', stdDevThreshold), verbose)

values1 <- getCollectionData(data1, FALSE, TRUE, verbose)
values2 <- getCollectionData(data2, FALSE, TRUE, verbose)
values1 <- getCollectionData(data1, variableNames = NULL, ignoreImputeZero = FALSE, includeIds = TRUE, verbose = verbose)
values2 <- getCollectionData(data2, variableNames = NULL, ignoreImputeZero = FALSE, includeIds = TRUE, verbose = verbose)

veupathUtils::logWithTime(
paste(
Expand Down
74 changes: 71 additions & 3 deletions R/methods-Collections.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,32 +32,69 @@ setGeneric("getCollectionNames", function(object) standardGeneric("getCollection
#' @aliases getCollectionNames,Collections-method
setMethod("getCollectionNames", "Collections", function(object) return(sapply(object, name)))

#' Get Collection Variable Names
#'
#' Get the names of the variables in the Collection
#' @param object A Collection
#' @param ... Additional arguments
#' @return A character vector of variable names
#' @export
#' @rdname getCollectionVariableNames
setGeneric("getCollectionVariableNames", function(object, ...) standardGeneric("getCollectionVariableNames"))

#' @rdname getCollectionVariableNames
#' @aliases getCollectionVariableNames,Collection-method
setMethod("getCollectionVariableNames", "Collection", function(object) {
allIdColumns <- getIdColumns(object)

return(colnames(object@data)[!(colnames(object@data) %in% allIdColumns)])
})

#' Get data.table of values from Collection
#'
#' Returns a data.table of collection values, respecting the
#' `imputeZero` slot.
#'
#' @param object Collection
#' @param variableNames A character vector representing the name of the variables to return.
#' If NULL, returns all variables
#' @param ignoreImputeZero boolean indicating whether we should respect the imputeZero slot
#' @param includeIds boolean indicating whether we should include recordIdColumn and ancestorIdColumns
#' @param verbose boolean indicating if timed logging is desired
#' @return data.table of values
#' @rdname getCollectionData
#' @export
setGeneric("getCollectionData",
function(object, ignoreImputeZero = c(FALSE, TRUE), includeIds = c(TRUE, FALSE), verbose = c(TRUE, FALSE)) standardGeneric("getCollectionData"),
function(
object,
variableNames = NULL,
ignoreImputeZero = c(FALSE, TRUE),
includeIds = c(TRUE, FALSE),
verbose = c(TRUE, FALSE)
) standardGeneric("getCollectionData"),
signature = c("object")
)

#' @rdname getCollectionData
#' @aliases getCollectionData,Collection-method
setMethod("getCollectionData", signature("Collection"), function(object, ignoreImputeZero = c(FALSE, TRUE), includeIds = c(TRUE, FALSE), verbose = c(TRUE, FALSE)) {
setMethod("getCollectionData", signature("Collection"),
function(
object,
variableNames = NULL,
ignoreImputeZero = c(FALSE, TRUE),
includeIds = c(TRUE, FALSE),
verbose = c(TRUE, FALSE)
) {
ignoreImputeZero <- veupathUtils::matchArg(ignoreImputeZero)
includeIds <- veupathUtils::matchArg(includeIds)
verbose <- veupathUtils::matchArg(verbose)

dt <- object@data
allIdColumns <- getIdColumns(object)
if (is.null(variableNames)) {
dt <- object@data
} else {
dt <- object@data[, c(allIdColumns, variableNames), with = FALSE]
}

# Check that incoming dt meets requirements
if (!inherits(dt, 'data.table')) {
Expand Down Expand Up @@ -119,4 +156,35 @@ setMethod("pruneFeatures", signature("Collection"), function(object, predicate,

validObject(object)
return(object)
})

#' Is One to Many With Ancestor
#'
#' Determines if the collection is one-to-many with its ancestor(s).
#' Importantly, if there are no ancestors, this function returns FALSE.
#' @param object Collection
#' @param verbose boolean indicating if timed logging is desired
#' @return boolean
#' @rdname isOneToManyWithAncestor
#' @export
setGeneric("isOneToManyWithAncestor",
function(object, verbose = c(TRUE, FALSE)) standardGeneric("isOneToManyWithAncestor"),
signature = c("object")
)

#' @rdname isOneToManyWithAncestor
#' @aliases isOneToManyWithAncestor,Collection-method
setMethod("isOneToManyWithAncestor", signature("Collection"), function(object, verbose = c(TRUE, FALSE)) {
ancestorIdColumns <- object@ancestorIdColumns

if (length(ancestorIdColumns) == 0) return(FALSE)

# if count of unique ancestors are the same as ancestors, then its one-to-one
uniqueAncestors <- unique(object@data[, ..ancestorIdColumns])

if (nrow(uniqueAncestors) == nrow(object@data)) {
return(FALSE)
} else {
return(TRUE)
}
})
5 changes: 5 additions & 0 deletions man/getCollectionData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/getCollectionVariableNames.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/isOneToManyWithAncestor.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c79944c

Please sign in to comment.