diff --git a/DESCRIPTION b/DESCRIPTION index 585ceab..28cf8c4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,6 +33,7 @@ Imports: rgl, bit64, httr, + httr2, jsonlite, pbapply, dplyr, diff --git a/NAMESPACE b/NAMESPACE index cf3554e..3d6ecf8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -76,6 +76,7 @@ export(banc_xyz2id) export(bancsee) export(banctable_append_rows) export(banctable_login) +export(banctable_move_to_bigdata) export(banctable_query) export(banctable_set_token) export(banctable_update_rows) diff --git a/R/banc-table.R b/R/banc-table.R index a2f3177..4555cc9 100644 --- a/R/banc-table.R +++ b/R/banc-table.R @@ -41,6 +41,9 @@ #' @param append_allowed Logical. Whether rows without row identifiers can be appended. #' @param chunksize To split large requests into smaller ones with max this many rows. #' @param token_name The name of the token in your .Renviron file, should be \code{BANCTABLE_TOKEN}. +#' @param where Optional SQL-like where clause to filter rows (default: NULL moves all rows) +#' @param bigdata logical, if `TRUE` new rows are added to the bigdata archive rather than the 'normal' seatable. +#' @param invert whether to send the specified rows (`where`) to big data storage (`FALSE`) or from storage to the 'normal' table (`FALSE`.) #' @param ... Additional arguments passed to pbsapply which might include cl=2 to specify a number of parallel jobs to run. #' #' @return a \code{data.frame} of results. There should be 0 rows if no rows @@ -279,10 +282,95 @@ banctable_base_impl <- function (base_name = "banc_meta", base } +#' @export +#' @rdname banctable_query +banctable_move_to_bigdata <- function(table = "banc_meta", + base = "banc_meta", + url = "https://cloud.seatable.io/", + workspace_id = "57832", + token_name = "BANCTABLE_TOKEN", + where = "`region` = 'optic'", + invert = FALSE){ + + # get base + ac <- banctable_login(token_name=token_name) + base <- banctable_base_impl(table = table, + base_name = base, + url = url, + workspace_id = workspace_id) + base_uuid <- base$dtable_uuid + token <- base$jwt_token + + # Remove any protocol prefix if present + server <- gsub("^https?://", "", base$server_url) + server <- gsub("/$", "", server) + + # Construct the URL + if(invert){ + movement <- "unarchive" + }else{ + movement <- "archive-view" + } + endpoint <- sprintf("https://%s/api-gateway/api/v2/dtables/%s/%s/", server, base_uuid, movement) + + # Prepare the request body + body <- list(table_name = table) + + # Add where clause if provided + if (!is.null(where)) { + body$where <- where + } + + # Make the request + response <- httr2::request(endpoint) %>% + httr2::req_headers( + "Authorization" = sprintf("Bearer %s", token), + "Accept" = "application/json", + "Content-Type" = "application/json" + ) %>% + httr2::req_body_json(body) %>% + httr2::req_error(is_error = function(resp) FALSE) %>% # This allows us to handle errors manually + httr2::req_perform() + + # Check for successful response + if (httr2::resp_status(response) != 200) { + # Try to get error message from response body + error_msg <- tryCatch({ + if (httr2::resp_content_type(response) == "application/json") { + error_content <- httr2::resp_body_json(response) + } else { + # If not JSON, get the raw text + httr2::resp_body_string(response) + } + }, error = function(e) { + "Could not parse error message" + }) + stop(error_msg) + } + + # Return the response + invisible() +} + +# ## in python: +# url = "https://cloud.seatable.io/api-gateway/api/v2/dtables/397da290-5aec-44dc-8a05-e2f58254d84a/archive-view/" +# headers = { +# "accept": "application/json", +# "content-type": "application/json", +# "authorization": "Bearer MY_TOKEN" +# } +# body = { +# "table_name": "banc_meta", +# "where": "`cell_class` = 'glia'" +# } +# response = requests.post(url, headers=headers, json=body) +# print(response.text) + #' @export #' @rdname banctable_query banctable_append_rows <- function (df, table, + bigdata = FALSE, base = NULL, chunksize = 1000L, workspace_id = "57832", @@ -306,7 +394,11 @@ banctable_append_rows <- function (df, return(all(oks)) } pyl = fafbseg:::df2appendpayload(df) - res = base$batch_append_rows(table_name = table, rows_data = pyl) + if(bigdata){ + res = base$batch_append_rows(table_name = table, rows_data = pyl) + }else{ + res = base$big_data_insert_rows(table_name = table, rows_data = pyl) + } ok = isTRUE(all.equal(res[["inserted_row_count"]], nx)) return(ok) } @@ -438,7 +530,10 @@ banctable_updateids <- function(){ dplyr::select(-pt_root_id,-pt_position) # Update root IDs directly where needed - bc.new <- banc_updateids(bc.new, root.column = "root_id", supervoxel.column = "supervoxel_id") + bc.new <- banc_updateids(bc.new, + root.column = "root_id", + supervoxel.column = "supervoxel_id", + position.column = "position") # Make sure supervoxel and root position information that is missing, is filled in bc.new <- bc.new %>% diff --git a/R/ids.R b/R/ids.R index a3b505e..39f2371 100644 --- a/R/ids.R +++ b/R/ids.R @@ -187,7 +187,8 @@ banc_updateids <- function(x, if(sum(no.sp)){ cat('determining missing supervoxel_ids ...\n') x[no.sp,][[supervoxel.column]] <- unname(pbapply::pbsapply(x[no.sp,][[position.column]], function(row){ - tryCatch(banc_xyz2id(row,rawcoords = TRUE, root = FALSE, ...), error = function(e) NA) + tryCatch(quiet_function(banc_xyz2id(row,rawcoords = TRUE, root = FALSE, ...)), + error = function(e) NA) })) } } @@ -214,20 +215,23 @@ banc_updateids <- function(x, bad <- is.na(update)|update=="0" update <- update[!bad] if(length(update)) x[old,][[root.column]][!bad] <- update - old[old][!bad] <- TRUE + old[!bad] <- FALSE } old[is.na(old)] <- TRUE # update based on position if(any(c("position","pt_position")%in%colnames(x)) && sum(old)){ cat('updating root_ids with a position ...\n') - update <- unname(pbapply::pbsapply(x[old,][[position.column]], banc_xyz2id, rawcoords = TRUE, root = TRUE, ...)) + update <- unname(pbapply::pbsapply(x[old,][[position.column]], function(row){ + tryCatch(quiet_function(banc_xyz2id(row,rawcoords = TRUE, root = TRUE, ...)), + error = function(e) NA) + })) bad <- is.na(update)|update=="0" update <- update[!bad] if(length(update)) x[old,][[root.column]][!bad] <- update old[!bad] <- FALSE } - old[is.na(old)] <- FALSE + old[is.na(old)] <- TRUE # update based on root Ids if(root.column%in%colnames(x) && sum(old)){ diff --git a/man/banc_latestid.Rd b/man/banc_latestid.Rd index efcb89d..fb38b71 100644 --- a/man/banc_latestid.Rd +++ b/man/banc_latestid.Rd @@ -17,6 +17,8 @@ banc_updateids( x, root.column = "root_id", supervoxel.column = "supervoxel_id", + position.column = "position", + serial = FALSE, ... ) } @@ -43,6 +45,10 @@ Else a vector of \code{BANC} root IDs.} \item{root.column}{when \code{x} is a \code{data.frame}, the \code{root_id} column you wish to update} \item{supervoxel.column}{when \code{x} is a \code{data.frame}, the \code{supervoxel_id} column you wish to use to update \code{root.column}} + +\item{position.column}{when \code{x} is a \code{data.frame}, the \code{position} column with xyz values you wish to use to update \code{supervoxel.column}} + +\item{serial}{if TRUE and x is a vector, calls \code{banc_updateids} on each ID in sequence to bufffer against connection failures. Slower.} } \description{ Find the latest id for a banc root id diff --git a/man/banctable_query.Rd b/man/banctable_query.Rd index 4aedd26..b34ad2e 100644 --- a/man/banctable_query.Rd +++ b/man/banctable_query.Rd @@ -5,6 +5,7 @@ \alias{banctable_set_token} \alias{banctable_login} \alias{banctable_update_rows} +\alias{banctable_move_to_bigdata} \alias{banctable_append_rows} \title{Read and write to the seatable for draft BANC annotations} \usage{ @@ -42,9 +43,20 @@ banctable_update_rows( ... ) +banctable_move_to_bigdata( + table = "banc_meta", + base = "banc_meta", + url = "https://cloud.seatable.io/", + workspace_id = "57832", + token_name = "BANCTABLE_TOKEN", + where = "`region` = 'optic'", + invert = FALSE +) + banctable_append_rows( df, table, + bigdata = FALSE, base = NULL, chunksize = 1000L, workspace_id = "57832", @@ -91,6 +103,12 @@ column that can identify each row in the remote table.} \item{chunksize}{To split large requests into smaller ones with max this many rows.} \item{...}{Additional arguments passed to pbsapply which might include cl=2 to specify a number of parallel jobs to run.} + +\item{where}{Optional SQL-like where clause to filter rows (default: NULL moves all rows)} + +\item{invert}{whether to send the specified rows (\code{where}) to big data storage (\code{FALSE}) or from storage to the 'normal' table (\code{FALSE}.)} + +\item{bigdata}{logical, if \code{TRUE} new rows are added to the bigdata archive rather than the 'normal' seatable.} } \value{ a \code{data.frame} of results. There should be 0 rows if no rows