Simulation study.R

# Reads data saved from running HACSim and 
# sets the name for the output file.
setwd("/Users/jarrettphillips/desktop/HACSim Simulation Study Paper/Supplemental Information/p = 0.95/hypothetical species")
myvals <- read.csv("data-90.txt", header = FALSE)
output_file_name <- "Select-90-10k.txt"

# set.seed(1234)

# set up probabilities for haplotype generation in the population
# probs <- envr$probs
# probs <- c(rep(0.1, 10))
probs <- c(0.9, rep(0.1/9, 9))
# probs <- c(0.45, 0.45, rep(0.1/8, 8))
# probs <- c(rep(0.3, 3), rep(0.10/7, 7))

# Sets the number of haplotypes in the species
Hstar <- 10
# Hstar <- envr$Hstar

# set the population size
pop.size <- 10000

# number of replications
num.reps <- 50

# generate the population according to the probabilities set earlier
population <- sample(1:Hstar, size = pop.size, replace = TRUE, prob = probs)

# get all unique values of N* generated by HACSim
actualVals <- myvals[,1]
uniqueVals <- unique(actualVals)
uniqueVals <- sort(uniqueVals)


# go through the N* values generated by HACSim
for (x in uniqueVals) {
  totalHaps <- vector()
  for (y in 1:num.reps) {
    # get N* unique samples from the population
    sampled <- sample(1:pop.size, x, replace = FALSE) # individuals sampled
    hapsFound <- vector()
    # find what the haplotypes of the samples are
    for (z in 1:length(sampled)) {
      hapsFound[z] <- population[sampled[z]]
    }
    # find the total number of haplotypes found from sampling the population
    hapsFound <- unique(hapsFound)
    totalHaps[y] <- length(hapsFound)
  }
  # output N* value and average of the proportion of how many haplotypes were 
  # found from sampling that many individuals from the population
  cat(x, file = output_file_name, append = TRUE)
  cat(", ", file = output_file_name, append = TRUE)
  cat(mean(totalHaps)/Hstar, file = output_file_name, append = TRUE)
  cat(", ", file = output_file_name, append = TRUE)
  write((sd(totalHaps/Hstar)/sqrt(num.reps)), file = output_file_name, append = TRUE)
}