Skip to content

Commit

Permalink
functions to read from live CAVE tables as default
Browse files Browse the repository at this point in the history
* make it clearer what sort of position data reroot functions are using
  • Loading branch information
alexanderbates committed Nov 26, 2024
1 parent 2e8ba0c commit 95e1474
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 35 deletions.
121 changes: 97 additions & 24 deletions R/cave-tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,13 @@ banc_nuclei <- function (rootids = NULL,
else if (!is.null(rootids)) {
rootids <- banc_ids(rootids)
nuclei <- if (length(rootids) < 200)
banc_cave_query(table = table, filter_in_dict = list(pt_root_id=rootids),
banc_cave_query(table = table,
filter_in_dict = list(pt_root_id=rootids),
...)
else
banc_cave_query(table = table, live = F, ...)
banc_cave_query(table = table,
live = TRUE,
...)
if (nrow(nuclei) == 0)
return(nuclei)
nuclei <- nuclei %>%
Expand All @@ -89,25 +92,27 @@ banc_nuclei <- function (rootids = NULL,
}
} else {
nuclei <- banc_cave_query(table = table,
filter_in_dict = list(id=nucleus_ids), ...)
filter_in_dict = list(id=nucleus_ids),
...)
nuclei %>%
dplyr::right_join(data.frame(id = as.integer64(nucleus_ids)), by = "id") %>%
dplyr::select(colnames(nuclei))
}
res
if (isTRUE(rawcoords))
res
else {
res <- res %>%
dplyr::mutate(dplyr::across(dplyr::ends_with("position"), function(x)
nat::xyzmatrix2str(banc_raw2nm(x))))
res$pt_position <- sapply(res$pt_position, paste, collapse=", ")
res <- res %>%
dplyr::rename(nucleus_id = .data$id,
nucleus_position_nm = .data$pt_position,
root_id = .data$pt_root_id) %>%
dplyr::filter(.data$valid=="t")
res$pt_position <- sapply(res$pt_position, paste, collapse=", ")
# res$pt_position_ref <- sapply(res$pt_position_ref, paste, collapse=", ")
res <- res %>%
dplyr::rename(nucleus_id = .data$id,
nucleus_position = .data$pt_position,
root_id = .data$pt_root_id) %>%
dplyr::filter(.data$valid=="t")
if (isFALSE(rawcoords)) {
# res <- res %>%
# dplyr::mutate(dplyr::across(dplyr::ends_with("position"), function(x)
# nat::xyzmatrix2str(banc_raw2nm(x))))
res$nucleus_position_nm <- apply(banc_raw2nm(res$nucleus_position),1,paste_coords)
res$nucleus_position_nm <- gsub("\\(|\\)","",res$nucleus_position_nm)
}
res
}

#' @rdname banc_cave_tables
Expand Down Expand Up @@ -159,7 +164,7 @@ get_cave_table_data <- function(table, rootids = NULL, ...){
fafbseg::flywire_cave_query(table = table,
filter_in_dict = list(pt_root_id=rootids), ...)
} else {
fafbseg::flywire_cave_query(table = table, live = F, ...)
fafbseg::flywire_cave_query(table = table, live = TRUE, ...)
}
} else {
df <- fafbseg::flywire_cave_query(table = table , ...)
Expand All @@ -168,10 +173,11 @@ get_cave_table_data <- function(table, rootids = NULL, ...){
}

# hidden
banc_cave_cell_types <- function(){
banc_cave_cell_types <- function(user_id = NULL){
banc.cell.info <- banc_cell_info(rawcoords = TRUE)
banc.cell.info$pt_position <- sapply(banc.cell.info$pt_position, paste, collapse=", ")
banc.cell.info.mod <- banc.cell.info %>%
dplyr::filter(valid == 't') %>%
dplyr::rowwise() %>%
dplyr::mutate(pt_position = paste0(pt_position,collapse=",")) %>%
dplyr::ungroup() %>%
Expand All @@ -189,10 +195,7 @@ banc_cave_cell_types <- function(){
TRUE ~ NA
)) %>%
dplyr::mutate(user_id = dplyr::case_when(
grepl("neuron identity", tag2) ~ user_id,
grepl("^DN|^AMMC|^PDN|^LH|^il|^T1|^T5|^T4|^TY4|^IN|^il|^HS|^Mi|^PS|^CB|^FB|^AL|
^FET|^bCS|SEZ-NSC-Hugin|^MDN|^OA|^PS|^ovi|giant fiber|^m-NSC|^l-NSC-ITP
|^OA|^LH|^CSD|^BDN|^AN|^AL|^AV|^AN|^MN|^SA|^Mi|^LH|^L1|^BDN|^LAL",tag) ~ user_id,
!is.na(cell_type) ~ user_id,
TRUE ~ NA
)) %>%
dplyr::mutate(cell_type = gsub("\\\n.*|\\*.*","",cell_type)) %>%
Expand Down Expand Up @@ -224,10 +227,80 @@ banc_cave_cell_types <- function(){
cell_class = paste(unique(na.omit(sort(cell_class))), collapse = ", "),
super_class = paste(unique(na.omit(sort(super_class))), collapse = ", "),
cell_type = paste(unique(na.omit(sort(cell_type))), collapse = ", "),
side = paste(unique(na.omit(sort(side))), collapse = ", ")) %>%
side = paste(unique(na.omit(sort(side))), collapse = ", "),
user_id = paste(unique(na.omit(sort(user_id))), collapse = ", ")) %>%
dplyr::ungroup() %>%
dplyr::rename(cell_id = id, root_id = pt_root_id, supervoxel_id = pt_supervoxel_id, position = pt_position) %>%
dplyr::distinct(root_id, supervoxel_id, side, super_class, cell_class, cell_type, .keep_all = TRUE) %>%
dplyr::select(cell_id, root_id, supervoxel_id, position, side, super_class, cell_class, cell_type, user_id, notes)
dplyr::select(cell_id, root_id, supervoxel_id, position, side, super_class, cell_class, cell_type, user_id,notes) %>%
dplyr::left_join(banc_users %>% dplyr::distinct(pi_lab,cave_id) %>% dplyr::mutate(cave_id=as.character(cave_id)),
by=c("user_id"="cave_id")) %>%
dplyr::rename(cell_type_source = pi_lab)
banc.cell.info.mod
}

# # # Updated cell_type_source column based on CAVE
# banc.cell.info.mod <- banc_cave_cell_types()
# banc.cell.info.mod <- subset(banc.cell.info.mod, ! user_id %in% c(355,52))
# bc.all <- banctable_query("SELECT _id, root_id, cell_type, other_names, super_class, cell_class, proofread, region, cell_type_source from banc_meta")
# bc.all$cell_type_source <- unlist(sapply(bc.all$cell_type_source ,function(x) paste(unlist(x),collapse=", ")))
# bc.ct <- bc.all %>%
# dplyr::left_join(banc.cell.info.mod %>%
# dplyr::mutate(root_id=as.character(root_id)) %>%
# dplyr::distinct(root_id, cell_type, cell_type_source),
# by = "root_id") %>%
# dplyr::mutate(
# other_names = ifelse(is.na(other_names),'',other_names),
# cell_type_source.y = gsub("Rachel Wilson Lab", "Wilson lab", cell_type_source.y),
# cell_type_source.y = ifelse(is.na(cell_type_source.y),NA,tolower(cell_type_source.y)),
# cell_type_source.x = ifelse(is.na(cell_type_source.x),NA,tolower(cell_type_source.x)),
# cell_type_source.x = ifelse(grepl("NA|na|princeton|community|CAVE|Princeton",cell_type_source.x),NA,cell_type_source.x),
# cell_type_source.x = ifelse(cell_type_source.x%in%c("","NA"),NA,cell_type_source.x),
# cell_type_source.y = ifelse(cell_type_source.y%in%c("","NA"),NA,cell_type_source.y)) %>%
# dplyr::mutate(cell_type = dplyr::case_when(
# is.na(cell_type.x) ~ cell_type.y,
# is.na(cell_type.y) ~ cell_type.x,
# TRUE ~ cell_type.x),
# ) %>%
# dplyr::rowwise() %>%
# dplyr::mutate(other_names = dplyr::case_when(
# (!is.na(cell_type.x)&!is.na(cell_type.y)) & (cell_type.y!= cell_type.x) ~ paste(sort(unique(c(unlist(strsplit(other_names,split=", ")),cell_type.y))),collapse=", "),
# TRUE ~ other_names
# )) %>%
# dplyr::mutate(
# cell_type_source.y = cell_type_source.y,
# cell_type_source.x = cell_type_source.x,
# cell_type_source = dplyr::case_when(
# is.na(cell_type_source.x) ~ cell_type_source.y,
# is.na(cell_type_source.y) ~ cell_type_source.x,
# cell_type_source.x=="NA" ~ cell_type_source.y,
# cell_type_source.y=="NA" ~ cell_type_source.x,
# cell_type_source.x=="cave"&!is.na(cell_type_source.y) ~ cell_type_source.y,
# cell_type_source.x=="community"&!is.na(cell_type_source.y) ~ cell_type_source.y,
# cell_type_source.x==""&!is.na(cell_type_source.y) ~ cell_type_source.y,
# !is.na(cell_type_source.x)&!is.na(cell_type_source.y) ~ paste(sort(unique(c(cell_type_source.x,cell_type_source.y)),
# decreasing=TRUE),
# collapse=","),
# TRUE ~ cell_type_source.x
# )) %>%
# dplyr::filter(!is.na(cell_type_source), cell_type_source!="") %>%
# dplyr::distinct(`_id`, root_id, .keep_all = TRUE) %>%
# dplyr::select(`_id`, root_id, cell_type, other_names, cell_type_source,
# super_class, cell_class, proofread, region) %>%
# dplyr::mutate(other_names = gsub("^,|^ ,|^ ","",other_names),
# cell_type_source = ifelse(cell_type_source=='151184',NA,cell_type_source))
#
# #Add cell type source labels
# bc.update <- as.data.frame(bc.ct)
# bc.update[is.na(bc.update)] <- ''
# banctable_update_rows(base='banc_meta',
# table = "banc_meta",
# df = bc.update[,c("_id","cell_type", "other_names", "cell_type_source")],
# append_allowed = FALSE,
# chunksize = 1000)






25 changes: 25 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -200,4 +200,29 @@
#' `banc_brain_neuropil.surf$RegionList`
"banc_volumes.df"

#' User information (name + CAVE ID) for active BANC users
#'
#' @name banc_users
#' @docType data
#' @description The purpose of this table is to map CAVE users IDs to names, in order to credit annotation work done in BANC CAVE.
#' This information is based on \href{https://docs.google.com/spreadsheets/d/1UFmeWr2uF9jTLVMw3bD6nM3ejM-b-HDZz6sQBPTEoZ8/edit?gid=1163959922#gid=1163959922}{google sheet}.
#'
#' @examples
#' \dontrun{
#' View(banc_users)
#' }
"banc_users"

# banc_users <- googlesheets4::read_sheet("1UFmeWr2uF9jTLVMw3bD6nM3ejM-b-HDZz6sQBPTEoZ8")
# colnames(banc_users) <- snakecase::to_snake_case(colnames(banc_users))
# banc_users <- banc_users %>%
# dplyr::select(name, pi_lab, cave_id) %>%
# dplyr::mutate(name = gsub("\\(.*","",name)) %>%
# dplyr::mutate(pi_lab = gsub("\\(.*","",pi_lab)) %>%
# dplyr::mutate(pi_lab = ifelse(is.na(pi_lab),name,pi_lab)) %>%
# dplyr::mutate(pi_lab = ifelse(pi_lab=="PI",paste0(name," lab"),pi_lab)) %>%
# dplyr::arrange(pi_lab, name) %>%
# dplyr::distinct(cave_id, .keep_all = TRUE) %>%
# dplyr::filter(!is.na(pi_lab))
# usethis::use_data(banc_users, overwrite = TRUE)

48 changes: 39 additions & 9 deletions R/l2.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ banc_read_l2skel <- function(id, OmitFailures=TRUE, dataset=NULL, ...) {
#' @param id (Optional) The `root_id` of the neuron in the `roots` data
#' frame. If NULL, it will be taken from the `x$root_id` slot.
#' @param roots A data frame containing information about root points, i.e. nuclei
#' obtained using `bancr::roots()`. This data frame is assumed to have
#' columns named `root_id` and `pt_position`, where `pt_position`
#' obtained using `bancr:::banc_roots()`. This data frame is assumed to have
#' columns named `root_id` and `root_position_nm`, where `root_position_nm`
#' specifies the 3D coordinates of the soma for each `root_id`.
#' @param estimate if \code{TRUE} and nucleus position is not in `roots`,
#' then root is estimated as a leaf node furthest outside of the brain neuropil.
Expand Down Expand Up @@ -122,9 +122,18 @@ banc_reroot.neuron <- function(x, id = NULL, roots = NULL, estimate = TRUE, ...)
if(is.null(id)){
stop("a root_id in roots must be given")
}
df <- subset(roots, roots$root_id==id & !is.na(roots$pt_position))
if("root_position_nm"%in%colnames(roots)){
if("root_position"%in%colnames(roots)){
warning("root_position_nm, converting root_position to root_position_nm")
roots$root_position_nm <- apply(banc_raw2nm(roots$root_position),1, paste_coords)
roots$root_position_nm <- gsub("\\(|\\)","",roots$root_position_nm)
}else{
stop("root_position_nm not found in roots")
}
}
df <- subset(roots, roots$root_id==id & !is.na(roots$root_position_nm))
if(nrow(df)){
soma <- nat::xyzmatrix(df$pt_position)[1,]
soma <- nat::xyzmatrix(df$root_position_nm)[1,]
x <- nat::reroot(x = x, point = c(soma))
x$tags$soma <- nat::rootpoints(x)
}else if (estimate){ # As best we can
Expand Down Expand Up @@ -161,23 +170,44 @@ banc_reroot.neuronlist <- function(x, id = NULL, roots = NULL, estimate = TRUE,
if(is.null(roots)){
roots <- banc_roots()
}
if("root_position_nm"%in%colnames(roots)){
if("root_position"%in%colnames(roots)){
warning("root_position_nm, converting root_position to root_position_nm")
roots$root_position_nm <- apply(banc_raw2nm(roots$root_position),1, paste_coords)
roots$root_position_nm <- gsub("\\(|\\)","",roots$root_position_nm)
}else{
stop("root_position_nm not found in roots")
}
}
x <- add_field_seq(x, entries=id, field="id")
nat::nlapply(x, FUN = banc_reroot.neuron, roots = roots, id = id, estimate = estimate, ...)
}

# hidden
banc_roots <- function(rawcoords = FALSE){
roots <- bancr::banc_nuclei(rawcoords = rawcoords)
roots$pt_position <- roots$nucleus_position_nm
info <- banc_cell_info(rawcoords = rawcoords)
banc_roots <- function(){
# Get roots from nuclei table
roots <- banc_nuclei(rawcoords = FALSE)
roots$root_position_nm <- roots$nucleus_position_nm
# Neurons with no nuclei are mostly sensory, their roots are usually their tracked point
info <- banc_cell_info(rawcoords = FALSE)
info$root_id <-info$pt_root_id
xyz <- nat::xyzmatrix(info$pt_position)
p <- nat::pointsinside(xyz,surf=bancr::banc_brain_neuropil.surf)
info <- info[!p,]
roots <- rbind(roots[,c("root_id","pt_position")],info[,c("root_id","pt_position")])
info$root_position_nm <- info$pt_position
# Compile
roots <- rbind(roots[,c("root_id","root_position_nm")],
info[,c("root_id","root_position_nm")])
roots$root_position <- apply(banc_nm2raw(roots$root_position_nm),1, paste_coords)
roots$root_position <- gsub("\\(|\\)","",roots$root_position)
roots
}

# hidden
paste_coords <- function (xyz, sep = ", ", brackets = TRUE) {
paste0(ifelse(brackets, "(", NULL), paste(xyz, sep = sep,collapse = sep), ifelse(brackets, ")", NULL))
}

# hidden
add_field_seq <- function (x, entries, field = "id", ...) {
x = nat::as.neuronlist(x)
Expand Down
Binary file added data/banc_users.rda
Binary file not shown.
4 changes: 2 additions & 2 deletions man/banc_reroot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/banc_users.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 95e1474

Please sign in to comment.