Skip to content

Commit

Permalink
Merge pull request #48 from VEuPathDB/soften-correlations-errors
Browse files Browse the repository at this point in the history
warning rather than error for non-cont vars in correlation inputs
  • Loading branch information
d-callan authored Apr 30, 2024
2 parents e49db12 + 35b6ab7 commit 0767353
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 3 deletions.
27 changes: 24 additions & 3 deletions R/method-correlation.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,22 @@ function(
}

# Check that all values are numeric
if (!identical(veupathUtils::findNumericCols(data1), names(data1))) { stop("All columns in data1 must be numeric.")}
if (!identical(veupathUtils::findNumericCols(data2), names(data2))) { stop("All columns in data2 must be numeric.")}
if (!identical(veupathUtils::findNumericCols(data1), names(data1))) {
warning("All columns in data1 are not numeric. Only numeric columns will be used.")
keepCols <- veupathUtils::findNumericCols(data1)
if (length(keepCols) == 0) {
stop("No numeric columns found in data1.")
}
data1 <- data1[, ..keepCols]
}
if (!identical(veupathUtils::findNumericCols(data2), names(data2))) {
warning("All columns in data2 are not numeric. Only numeric columns will be used.")
keepCols <- veupathUtils::findNumericCols(data2)
if (length(keepCols) == 0) {
stop("No numeric columns found in data2.")
}
data2 <- data2[, ..keepCols]
}


## Compute correlation
Expand Down Expand Up @@ -175,7 +189,14 @@ function(
verbose <- veupathUtils::matchArg(verbose)

# Check that all values are numeric
if (!identical(veupathUtils::findNumericCols(data1), names(data1))) { stop("All columns in data1 must be numeric.")}
if (!identical(veupathUtils::findNumericCols(data1), names(data1))) {
warning("All columns in data1 are not numeric. Only numeric columns will be used.")
keepCols <- veupathUtils::findNumericCols(data1)
if (length(keepCols) == 0) {
stop("No numeric columns found in data1.")
}
data1 <- data1[, ..keepCols]
}

## Compute correlation
# rownames and colnames should be the same in this case
Expand Down
43 changes: 43 additions & 0 deletions tests/testthat/test-correlation.R
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,49 @@ test_that("correlation fails with improper inputs", {
expect_error(corrleation(data, verbose=F))
})

test_that("correlation succeeds w a mix of cat and cont metadata", {
nSamples <- 200
df <- data.table::data.table(
"entity.SampleID" = 1:nSamples,
"entity.cont1" = rnorm(nSamples),
"entity.cont2" = rnorm(nSamples),
"entity.cont3" = rnorm(nSamples)
)

counts <- round(df[, -c("entity.SampleID")]*1000) # make into "counts"
counts[ ,entity.SampleID:= df$entity.SampleID]

sampleMetadata <- SampleMetadata(
data = data.frame(list(
"entity.SampleID" = df[["entity.SampleID"]],
"entity.binA" = sample(c("binA_a", "binA_b"), nSamples, replace=T),
"entity.cat2" = sample(c("cat2_a", "cat2_b"), nSamples, replace=T),
"entity.cat3" = sample(paste0("cat3_", letters[1:3]), nSamples, replace=T),
"entity.cat4" = sample(paste0("cat4_", letters[1:4]), nSamples, replace=T),
"entity.cont1" = rnorm(nSamples),
"entity.cont2" = rnorm(nSamples),
"entity.cont3" = rnorm(nSamples)
)),
recordIdColumn = "entity.SampleID"
)

data <- CollectionWithMetadata(
name = 'testing',
data = counts,
sampleMetadata = sampleMetadata,
recordIdColumn = 'entity.SampleID')

data@sampleMetadata <- sampleMetadata

result <- correlation(data, method='pearson', proportionNonZeroThreshold = 0, verbose = FALSE)
# Check stats (all correlation outputs)
statsData <- result@statistics@statistics
expect_s3_class(statsData, 'data.frame')
expect_equal(names(statsData), c('data1','data2','correlationCoef','pValue'))
expect_equal(nrow(statsData), 9) # Should be number of variables in df1 * number of variables in df2
expect_true(all(!is.na(statsData)))
})

test_that("toJSON works as expected for the CorrelationResult class", {

nSamples <- 200
Expand Down

0 comments on commit 0767353

Please sign in to comment.