Skip to content

Commit

Permalink
Merge branch 'issue-24' into develop
Browse files Browse the repository at this point in the history
* issue-24:
  Increment version number to 0.0.0.9026
  Added (commented) test for BAPS data (#24)
  Renamed global environment, fixed assignments (#24)
  Fixes to `laskeMuutokset()` (#24)
  Syntax fix to `indMix()` (#24)
  Retranslated `computeDiffInCounts()` (#24)
  Changed default `npops` to 3 (#24)
  `laskeMuutokset()` reads `npop` from parent functions (#24)
  Using a dedicated environment for globals (#24)
  Retranslated `computePopulationLogml()` (#24)
  Added/renamed BAPS example data (#24)
  • Loading branch information
wleoncio committed Apr 11, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
2 parents 9b8da3a + e615bb8 commit 2d5d93a
Showing 30 changed files with 194 additions and 183 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rBAPS
Title: Bayesian Analysis of Population Structure
Version: 0.0.0.9025
Version: 0.0.0.9026
Date: 2020-11-09
Authors@R:
c(
2 changes: 1 addition & 1 deletion R/addToSummary.R
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@ addToSummary <- function(logml, partitionSummary, worstIndex) {
apu <- matlab2r::find(abs(partitionSummary[, 2] - logml) < 1e-5)
if (isempty(apu)) {
# Nyt l�ydetty partitio ei ole viel� kirjattuna summaryyn.
npops <- length(unique(PARTITION))
npops <- length(unique(globals$PARTITION))
partitionSummary[worstIndex, 1] <- npops
partitionSummary[worstIndex, 2] <- logml
added <- 1
6 changes: 3 additions & 3 deletions R/checkLogml.R
Original file line number Diff line number Diff line change
@@ -5,12 +5,12 @@ checkLogml <- function(priorTerm, adjprior, cliques, separators) {
# global SEPCOUNTS
# global PARTITION

npops <- length(unique(PARTITION))
npops <- length(unique(globals$PARTITION))
cliqcounts <- computeCounts(cliques, separators, npops)$cliqcounts
sepcounts <- computeCounts(cliques, separators, npops)$sepcounts

CLIQCOUNTS <- cliqcounts
SEPCOUNTS <- sepcounts
globals$CLIQCOUNTS <- cliqcounts
globals$SEPCOUNTS <- sepcounts

logml <- computeLogml(adjprior, priorTerm)$logml
spatialPrior <- computeLogml(adjprior, priorTerm)$spatialPrior
16 changes: 8 additions & 8 deletions R/computeAllFreqs2.R
Original file line number Diff line number Diff line change
@@ -3,12 +3,12 @@
#' j 1/noalle(j) verran.
#' @param noalle noalle
computeAllFreqs2 <- function(noalle) {
COUNTS <- ifelse(isGlobalEmpty(COUNTS), vector(), COUNTS)
SUMCOUNTS <- ifelse(isGlobalEmpty(SUMCOUNTS), vector(), COUNTS)
max_noalle <- size(COUNTS, 1)
nloci <- size(COUNTS, 2)
npops <- size(COUNTS, 3)
sumCounts <- SUMCOUNTS + ones(size(SUMCOUNTS))
globals$COUNTS <- ifelse(isGlobalEmpty(globals$COUNTS), vector(), globals$COUNTS)
globals$COUNTS <- ifelse(isGlobalEmpty(globals$COUNTS), vector(), globals$COUNTS)
max_noalle <- size(globals$COUNTS, 1)
nloci <- size(globals$COUNTS, 2)
npops <- size(globals$COUNTS, 3)
sumCounts <- globals$COUNTS + ones(size(globals$COUNTS))
sumCounts <- reshape(t(sumCounts), c(1, nloci, npops))
sumCounts <- repmat(sumCounts, c(max_noalle, 1, 1))

@@ -20,9 +20,9 @@ computeAllFreqs2 <- function(noalle) {
}
prioriAlleelit <- repmat(prioriAlleelit, c(1, 1, npops))
counts <- ifelse(
test = isGlobalEmpty(COUNTS),
test = isGlobalEmpty(globals$COUNTS),
yes = prioriAlleelit,
no = COUNTS + prioriAlleelit
no = globals$COUNTS + prioriAlleelit
)
allFreqs <- counts / drop(sumCounts)
return(allFreqs)
12 changes: 3 additions & 9 deletions R/computeDiffInCounts.R
Original file line number Diff line number Diff line change
@@ -4,20 +4,14 @@ computeDiffInCounts <- function(rows, max_noalle, nloci, data) {
# % riveill� rows. rows pit�� olla vaakavektori.

diffInCounts <- zeros(max_noalle, nloci)
for (i in seq_len(nrow(data))) {
for (i in rows) { # yep, just one iteration
row <- data[i, ]
notEmpty <- as.matrix(matlab2r::find(row >= 0))

if (length(notEmpty) > 0) {
diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] <-
diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] + 1
element <- row[notEmpty] + (notEmpty - 1) * max_noalle
diffInCounts[element] <- diffInCounts[element] + 1
}
}
diffInCounts <- matrix(
data = diffInCounts[!is.na(diffInCounts)],
nrow = max_noalle,
ncol = nloci,
byrow = TRUE
)
return(diffInCounts)
}
6 changes: 3 additions & 3 deletions R/computeLogml.R
Original file line number Diff line number Diff line change
@@ -14,14 +14,14 @@ computeLogml <- function(counts, sumcounts, noalle, data, rowsFromInd) {
logml <- sum(
sum(
sum(
GAMMA_LN[
globals$GAMMA_LN[
counts + 1 +
repmat(rowsInG * (adjnoalle - 1), c(1, 1, npops))
]
)
)
) -
npops * sum(sum(GAMMA_LN[1, adjnoalle])) -
sum(sum(GAMMA_LN[sumcounts + 1, 1]))
npops * sum(sum(globals$GAMMA_LN[1, adjnoalle])) -
sum(sum(globals$GAMMA_LN[sumcounts + 1, 1]))
return(logml)
}
6 changes: 3 additions & 3 deletions R/computePersonalAllFreqs.R
Original file line number Diff line number Diff line change
@@ -8,11 +8,11 @@
#' @param allFreqs allFreqs
#' @param rowsFromInd rowsFromInd
computePersonalAllFreqs <- function(ind, data, allFreqs, rowsFromInd) {
if (isGlobalEmpty(COUNTS)) {
if (isGlobalEmpty(globals$COUNTS)) {
nloci <- npops <- 1
} else {
nloci <- ifelse(is.na(dim(COUNTS)[2]), 1, dim(COUNTS)[2])
npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3])
nloci <- ifelse(is.na(dim(globals$COUNTS)[2]), 1, dim(globals$COUNTS)[2])
npops <- ifelse(is.na(dim(globals$COUNTS)[3]), 1, dim(globals$COUNTS)[3])
}

rows <- as.matrix(t(data))[computeRows(rowsFromInd, ind, 1), , drop = FALSE]
44 changes: 11 additions & 33 deletions R/computePopulationLogml.R
Original file line number Diff line number Diff line change
@@ -1,43 +1,21 @@
computePopulationLogml <- function(pops, adjprior, priorTerm) {
# Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset

# ======================================================== #
# Limiting COUNTS size #
# ======================================================== #
if (!is.null(adjprior)) {
nr <- seq_len(nrow(adjprior))
nc <- seq_len(ncol(adjprior))
COUNTS <- COUNTS[nr, nc, pops, drop = FALSE]
} else {
COUNTS <- NA
}
nr <- seq_len(nrow(adjprior))
nc <- seq_len(ncol(adjprior))

x <- size(COUNTS, 1)
y <- size(COUNTS, 2)
x <- size(globals$COUNTS, 1)
y <- size(globals$COUNTS, 2)
z <- length(pops)

# ======================================================== #
# Computation #
# ======================================================== #
term1 <- NULL
if (!is.null(adjprior)) {
isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
term1 <- squeeze(
sum(
sum(
reshape(
lgamma(
repmat(adjprior, c(1, 1, length(pops))) + COUNTS[nr, nc, pops, drop = !isarray]
),
c(x, y, z)
),
1
),
2
)
)
}
if (is.null(priorTerm)) priorTerm <- 0
popLogml <- term1 - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm
return(popLogml)
rep_adj <- repmat(adjprior, c(1, 1, z))
gamma_rep_counts <- matlab2r::gammaln(rep_adj + globals$COUNTS[, , pops])
gamma_sum_counts <- rowSums(matlab2r::gammaln(1 + globals$SUMCOUNTS[pops, , drop = FALSE]))
gamma_rep_counts_sum <- colSums(colSums(reshape(gamma_rep_counts, c(x, y, z))))
gamma_rep_counts_reshaped <- squeeze(gamma_rep_counts_sum)
popLogml <- gamma_rep_counts_reshaped - gamma_sum_counts - priorTerm
return(popLogml[, , drop = FALSE])
}
2 changes: 1 addition & 1 deletion R/fiksaaPartitioYksiloTasolle.R
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@ fiksaaPartitioYksiloTasolle <- function(rows, rowsFromInd) {
kaikkiRivit <- rows[ind, 1]:rows[ind, 2]
for (riviNumero in seq(rowsFromInd, length(kaikkiRivit), rowsFromInd)) {
rivi <- kaikkiRivit[riviNumero]
partitio2[rivi / rowsFromInd] <- PARTITION[ind]
partitio2[rivi / rowsFromInd] <- globals$PARTITION[ind]
}
}
global_env <- as.environment(1L)
2 changes: 1 addition & 1 deletion R/findEmptyPop.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
findEmptyPop <- function(npops) {
# % Palauttaa ensimm�isen tyhj�n populaation indeksin. Jos tyhji�
# % populaatioita ei ole, palauttaa -1:n.
pops <- t(unique(PARTITION))
pops <- t(unique(globals$PARTITION))
if (length(pops) == npops) {
emptyPop <- -1
} else {
12 changes: 6 additions & 6 deletions R/getPopDistancesByKL.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
getPopDistancesByKL <- function(adjprior) {
# Laskee populaatioille etהisyydet
# kהyttהen KL-divergenssi?
COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), ]
maxnoalle <- size(COUNTS, 1)
nloci <- size(COUNTS, 2)
npops <- size(COUNTS, 3)
globals$COUNTS <- globals$COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), ]
maxnoalle <- size(globals$COUNTS, 1)
nloci <- size(globals$COUNTS, 2)
npops <- size(globals$COUNTS, 3)
distances <- zeros(choose(npops, 2), 1)

d <- zeros(maxnoalle, nloci, npops)
@@ -16,8 +16,8 @@ getPopDistancesByKL <- function(adjprior) {

prior[1, nollia] <- 1
for (pop1 in 1:npops) {
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / repmat(
sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, ncol(prior))
d[, , pop1] <- (squeeze(globals$COUNTS[, , pop1]) + prior) / repmat(
sum(squeeze(globals$COUNTS[, , pop1]) + prior), c(maxnoalle, ncol(prior))
)
}
pointer <- 1
22 changes: 12 additions & 10 deletions R/globals.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
COUNTS <- array(0, dim = c(100, 100, 100))
SUMCOUNTS <- array(0, dim = c(100, 100))
PARTITION <- array(1, dim = 100)
POP_LOGML <- array(1, dim = 100)
LOGDIFF <- array(1, dim = c(100, 100))
# If handling globas break, try other ideas from
# https://stackoverflow.com/a/65252740/1169233
globals <- new.env(parent = emptyenv())

utils::globalVariables(
c("PARTITION", "COUNTS", "SUMCOUNTS", "LOGDIFF", "POP_LOGML", "GAMMA_LN")
)
assign("COUNTS", array(0, dim = c(0, 0, 0)), envir = globals)
assign("SUMCOUNTS", array(0, dim = c(0, 0)), envir = globals)
assign("PARTITION", array(1, dim = 0), envir = globals)
assign("POP_LOGML", array(1, dim = 0), envir = globals)
assign("LOGDIFF", array(1, dim = c(0, 0)), envir = globals)
assign("CLOQCOUNTS", array(0, dim = c(0, 0)), envir = globals)
assign("SEPCOUNTS", array(0, dim = c(0, 0)), envir = globals)
assign("GAMMA_LN", array(0, dim = c(0, 0)), envir = globals)
# If handling globas break, try other ideas from
# https://stackoverflow.com/a/65252740/1169233 and
# https://stackoverflow.com/questions/12598242/
2 changes: 1 addition & 1 deletion R/greedyMix.R
Original file line number Diff line number Diff line change
@@ -24,7 +24,7 @@
#' greedyMix(data, "baps")
#' } # TEMP: unwrap once #24 is resolved
greedyMix <- function(
data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, npops = 1L,
data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, npops = 3L,
counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL,
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
) {
8 changes: 4 additions & 4 deletions R/greedyPopMix.R
Original file line number Diff line number Diff line change
@@ -89,11 +89,11 @@ greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE
rows <- popnames_rowsFromInd$rows
rm(popnames_rowsFromInd)
}
groupPartition <- PARTITION
groupPartition <- globals$PARTITION
fiksaaPartitioYksiloTasolle(rows, rowsFromInd)
c$PARTITION <- PARTITION
c$COUNTS <- COUNTS
c$SUMCOUNTS <- SUMCOUNTS
c$PARTITION <- globals$PARTITION
c$COUNTS <- globals$COUNTS
c$SUMCOUNTS <- globals$SUMCOUNTS
c$alleleCodes <- alleleCodes
c$adjprior <- adjprior
c$rowsFromInd <- rowsFromInd
25 changes: 13 additions & 12 deletions R/indMix.R
Original file line number Diff line number Diff line change
@@ -4,7 +4,6 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
# Input npops is not used if called by greedyMix or greedyPopMix.

logml <- 1
clearGlobalVars()

noalle <- c$noalle
rows <- c$rows
@@ -94,16 +93,16 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
counts <- sumcounts_counts_logml$counts
logml <- sumcounts_counts_logml$logml

PARTITION <- zeros(ninds, 1)
assign("PARTITION", zeros(ninds, 1), globals)
for (i in seq_len(ninds)) {
apu <- rows[i]
PARTITION[i] <- initialPartition[apu[1]]
globals$PARTITION[i] <- initialPartition[apu[1]]
}

COUNTS <- counts
SUMCOUNTS <- sumcounts
POP_LOGML <- computePopulationLogml(seq_len(npops), adjprior, priorTerm)
LOGDIFF <- repmat(-Inf, c(ninds, npops))
assign("COUNTS", counts, globals)
assign("SUMCOUNTS", sumcounts, globals)
assign("POP_LOGML", computePopulationLogml(seq_len(npops), adjprior, priorTerm), globals)
assign("LOGDIFF", matrix(-Inf, nrow = ninds, ncol = npops), globals)

# PARHAAN MIXTURE-PARTITION ETSIMINEN
nRoundTypes <- 7
@@ -147,10 +146,10 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
muutosNyt <- 0

for (ind in inds) {
i1 <- PARTITION[ind]
i1 <- globals$PARTITION[ind]
muutokset_diffInCounts <- greedyMix_muutokset$new()
muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset(
ind, rows, data, adjprior, priorTerm
ind, rows, data, adjprior, priorTerm, npops
)
muutokset <- muutokset_diffInCounts$muutokset
diffInCounts <- muutokset_diffInCounts$diffInCounts
@@ -196,7 +195,9 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
} else if (round == 2) { # Populaation yhdist�minen toiseen.
maxMuutos <- 0
for (pop in seq_len(npops)) {
muutokset_diffInCounts <- greedyMix_muutokset$new()
muutokset_diffInCounts <- greedyMix_muutokset$new
# FIXME: wrong input
browser() # TEMP. Tip: browserText()
muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset2(
pop, rows, data, adjprior, priorTerm
)
@@ -278,7 +279,7 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
diffInCounts <- computeDiffInCounts(
t(rivit), size(COUNTS, 1), size(COUNTS, 2), data
)
i1 <- PARTITION(muuttuvat[1])
i1 <- PARTITION[muuttuvat[1]]
updateGlobalVariables3(
muuttuvat, diffInCounts, adjprior, priorTerm, i2
)
@@ -513,7 +514,7 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
}
if (muutoksia == 0) {
if (vaihe <= 4) {
vaihe <= vaihe + 1
vaihe <- vaihe + 1
} else if (vaihe == 5) {
ready <- 1
}
2 changes: 1 addition & 1 deletion R/isGlobalEmpty.R
Original file line number Diff line number Diff line change
@@ -6,5 +6,5 @@
#' @importFrom stats sd
#' @author Waldir Leoncio
isGlobalEmpty <- function(g) {
return(sum(g) == 0 & sd(g) == 0)
return(sum(g) == 0 && is.na(sd(g)))
}
Loading

0 comments on commit 2d5d93a

Please sign in to comment.