Skip to content

Commit

Permalink
Some updates to the format (including SRA removal)
Browse files Browse the repository at this point in the history
  • Loading branch information
Colin J. Carlson committed May 30, 2021
1 parent 0dd62a2 commit 8654517
Show file tree
Hide file tree
Showing 42 changed files with 19,152 additions and 39,997 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ Intermediate/Formatted/GenbankFormatted.csv
Intermediate/Formatted/GenbankFormatted.csv
Intermediate/Formatted/VIRIONUnprocessed.csv
Intermediate/Formatted/GenbankFormatted.csv
wc5/alt.bil
wc5/alt.hdr
5 changes: 3 additions & 2 deletions Code/03_Merge clean files.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ library(tidyverse); library(magrittr); library(vroom)

gb <- vroom("Intermediate/Formatted/GenbankFormatted.csv.gz")
clo <- read_csv("Intermediate/Formatted/CloverFormatted.csv")
sra <- read_csv("Intermediate/Formatted/SRAFormatted.csv")
#sra <- read_csv("Intermediate/Formatted/SRAFormatted.csv")
pred <- read_csv("Intermediate/Formatted/PredictFormatted.csv")
globi <- read_csv("Intermediate/Formatted/GLOBIFormatted.csv")

if(class(clo$NCBIAccession)=='numeric') {clo %<>% mutate(NCBIAccession = as.character(NCBIAccession))}

virion <- bind_rows(clo, pred, gb, sra, globi)
# virion <- bind_rows(clo, pred, gb, sra, globi)
virion <- bind_rows(clo, pred, gb, globi)

vroom_write(virion, "Intermediate/Formatted/VIRIONUnprocessed.csv.gz")

12 changes: 11 additions & 1 deletion Code/04_High level VIRION checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,18 @@ virion %<>%

ictv <- read_csv("Source/ICTV Master Species List 2019.v1.csv")

virion %<>% mutate(ICTVRatified = (Virus %in% str_to_lower(ictv$Species)))
virion %<>% mutate(ICTVRatified = (Virus %in% str_to_lower(ictv$Species))) %>%
relocate(ICTVRatified, .after = VirusNCBIResolved)

#### SOME TEMPORARY SCAFFOLDING: May 30, 2021
#### From the SRA patch

virion %<>% mutate(HostFlagID = replace_na(HostFlagID, "FALSE"))
virion %<>% select(-c(HostSynonyms, VirusFlagContaminant))

####

virion %<>% distinct()
virion %<>% mutate(across(everything(), ~replace_na(.x, '')))

vroom_write(virion, "Virion/Virion.csv.gz")
22 changes: 14 additions & 8 deletions Code/05_Dissolve_VIRION.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,18 @@ fixer <- function(x) {toString(unique(unlist(x)))}
virion %<>% filter(!is.na(HostTaxID),
!is.na(VirusTaxID))

# virion %>%
# select(HostTaxID, Host, HostGenus, HostFamily, HostOrder, HostClass, HostSynonyms, HostNCBIResolved) %>%
# group_by_at(vars(-c("HostSynonyms"))) %>%
# summarise_at(vars(c("HostSynonyms")), ~list(.x)) %>%
# arrange(Host) %>%
# mutate(HostSynonyms = sapply(HostSynonyms, fixer)) -> host.tax

# Temporary no-synonyms code:
virion %>%
select(HostTaxID, Host, HostGenus, HostFamily, HostOrder, HostClass, HostSynonyms, HostNCBIResolved) %>%
group_by_at(vars(-c("HostSynonyms"))) %>%
summarise_at(vars(c("HostSynonyms")), ~list(.x)) %>%
arrange(Host) %>%
mutate(HostSynonyms = sapply(HostSynonyms, fixer)) -> host.tax
select(HostTaxID, Host, HostGenus, HostFamily, HostOrder, HostClass, HostNCBIResolved) %>%
distinct() %>%
arrange(Host) -> host.tax

virion %>%
select(VirusTaxID, Virus, VirusGenus, VirusFamily, VirusOrder, VirusClass, VirusNCBIResolved, ICTVRatified) %>%
Expand All @@ -32,7 +38,7 @@ write_csv(host.tax, "Virion/TaxonomyHost.csv")
write_csv(virus.tax, "Virion/TaxonomyVirus.csv")

virion %<>%
select(-c(Host, HostNCBIResolved, HostGenus, HostFamily, HostOrder, HostClass, HostSynonyms,
select(-c(Host, HostNCBIResolved, HostGenus, HostFamily, HostOrder, HostClass,
Virus, VirusNCBIResolved, VirusGenus, VirusFamily, VirusOrder, VirusClass, ICTVRatified)) #

# Organize the sampling information into an ID-linked column
Expand All @@ -51,7 +57,7 @@ virion %>%
virion %>%
select(AssocID,
DetectionMethod, DetectionOriginal,
HostFlagID, VirusFlagContaminant,
HostFlagID,
NCBIAccession) -> detection

virion %>%
Expand All @@ -70,7 +76,7 @@ virion %<>%
PMID,
DetectionMethod, DetectionOriginal,
HostOriginal, VirusOriginal,
HostFlagID, VirusFlagContaminant,
HostFlagID,
NCBIAccession,
PublicationYear,
ReleaseYear, ReleaseMonth, ReleaseDay,
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
99 changes: 99 additions & 0 deletions Code/Code_Dev/X_Geographic coverage.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@

setwd("~/Github/virion")
library(tidyverse)
library(vroom)
library(sf)
library(fasterize)

vir <- vroom("Virion/virion.csv.gz")
iucn <- st_read(dsn = 'C:/Users/cjcar/Dropbox/CurrentIUCN',
layer = 'MAMMALS')

vir %>% filter(HostClass == "mammalia") %>%
select(Host, Virus) %>%
distinct() %>%
group_by(Host) %>%
summarize(NVirus = n_distinct(Virus)) -> nvir

iucn %>% mutate(binomial = tolower(binomial)) %>%
left_join(nvir, by = c('binomial' = 'Host')) %>%
filter(NVirus > 0) -> iucn

r <- raster::getData("worldclim",var="alt",res=5) # Make a blank raster
map.num.m <- fasterize(iucn, r, field = NULL, fun = 'count')
map.sum.m <- fasterize(iucn, r, field = "NVirus", fun = 'sum')

############

iucn <- st_read(dsn = 'C:/Users/cjcar/Dropbox/CurrentIUCN',
layer = 'REPTILES')

vir %>% filter(HostClass == "lepidosauria" | HostOrder == "testudines") %>%
select(Host, Virus) %>%
distinct() %>%
group_by(Host) %>%
summarize(NVirus = n_distinct(Virus)) -> nvir

iucn %>% mutate(binomial = tolower(binomial)) %>%
left_join(nvir, by = c('binomial' = 'Host')) %>%
filter(NVirus > 0) -> iucn

r <- raster::getData("worldclim",var="alt",res=5) # Make a blank raster
map.num.r <- fasterize(iucn, r, field = NULL, fun = 'count')
map.sum.r <- fasterize(iucn, r, field = "NVirus", fun = 'sum')

############

iucn <- st_read(dsn = 'C:/Users/cjcar/Dropbox/CurrentIUCN',
layer = 'AMPHIBIANS')

vir %>% filter(HostClass == "amphibia") %>%
select(Host, Virus) %>%
distinct() %>%
group_by(Host) %>%
summarize(NVirus = n_distinct(Virus)) -> nvir

iucn %>% mutate(binomial = tolower(binomial)) %>%
left_join(nvir, by = c('binomial' = 'Host')) %>%
filter(NVirus > 0) -> iucn

r <- raster::getData("worldclim",var="alt",res=5) # Make a blank raster
map.num.a <- fasterize(iucn, r, field = NULL, fun = 'count')
map.sum.a <- fasterize(iucn, r, field = "NVirus", fun = 'sum')

############

library(rgdal)
bl <- st_read(dsn="C:/Users/cjcar/Dropbox/Dan\'s birdlife hole/BOTW.gdb",layer="All_Species")

vir %>% filter(HostClass == "aves") %>%
select(Host, Virus) %>%
distinct() %>%
group_by(Host) %>%
summarize(NVirus = n_distinct(Virus)) -> nvir

bl %>% mutate(SCINAME = tolower(SCINAME)) %>%
left_join(nvir, by = c('SCINAME' = 'Host')) %>%
filter(NVirus > 0) -> bl2

map.num.b <- fasterize(bl2, r, field = NULL, fun = 'count')
map.sum.b <- fasterize(bl2, r, field = "NVirus", fun = 'sum')

library(rasterVis)

s1 <- stack(map.num.m, map.num.b, map.num.r, map.num.a)
names(s1) <- c("Mammals", "Birds", "Reptiles", "Amphibians")
levelplot(log(s1+1), names.attr = names(s1))

s2 <- stack(map.sum.m, map.sum.b, map.sum.r, map.sum.a)
names(s2) <- c("Mammals", "Birds", "Reptiles", "Amphibians")
levelplot(log(s2+1), names.attr = names(s2))

s3 <- stack(map.num.m, map.sum.m,
map.num.b, map.sum.b,
map.num.r, map.sum.r,
map.num.a, map.sum.a)
names(s3) <- c("Mammal (species)", "Mammal (interactions)",
"Bird (species)", "Bird (interactions)",
"Reptile (species)", "Reptile (interactions)",
"Amphibian (species)", "Amphibian (interactions)")
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file modified Intermediate/Formatted/VIRIONUnprocessed.csv.gz
Binary file not shown.
Binary file modified Virion/Detection.csv.gz
Binary file not shown.
Loading

0 comments on commit 8654517

Please sign in to comment.