diff --git a/R/browse-trend.R b/R/browse-trend.R new file mode 100644 index 0000000..100a52e --- /dev/null +++ b/R/browse-trend.R @@ -0,0 +1,42 @@ +# https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1#results +# +# req_url_query(https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1&6) %>% +# req_perform() +# +# browseURL(paste0(https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1#results)) +# +# +# +# req <- "https://statecancerprofiles.cancer.gov/historicaltrend/index.php" +# +# api_arguments <- "0&9953&999&7599&136&071&48&2&0&0&1&1&1&1&6" +# +# resp <- req %>% +# req_url_query( +# stateFIPS = fips_scp(area), +# areatype = tolower(areatype), +# topic = "crowd", +# demo = handle_crowding(crowding), +# race = handle_race(race), +# type = "manyareacensus", +# sortVariableName = "value", +# sortOrder = "default", +# output = 1 +# ) %>% +# req_perform() +# +# +# test_ur_fail <- "https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1&6" +# req <- "https://statecancerprofiles.cancer.gov/historicaltrend/data.php/historicaltrend.csv?0&9953&999&7599&136&071&07&2&0&0&1&1&1&1&6" +# req <- request("https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1&6") +# +# resp <- req %>% +# req_perform() %>% +# resp_body_string() +# +# +# if (httr2::resp_content_type(resp) != "text/csv") { +# cli_abort("Invalid input, please check documentation for valid arguments.") +# } + + \ No newline at end of file diff --git a/R/demo-crowding.R b/R/demo-crowding.R index 7790bb7..8402460 100644 --- a/R/demo-crowding.R +++ b/R/demo-crowding.R @@ -69,6 +69,8 @@ demo_crowding <- function(area, areatype, crowding, race) { ) %>% req_perform() + resp_url <- resp$url + resp <- process_resp(resp, "demographics") resp$data <- resp$data %>% @@ -80,5 +82,5 @@ demo_crowding <- function(area, areatype, crowding, race) { )) %>% mutate(across(c("Percent", "Households"), \(x) as.numeric(x))) - process_metadata(resp) + process_metadata(resp, "demographics", resp_url) } diff --git a/R/demo-education.R b/R/demo-education.R index 931769a..f1ab15e 100644 --- a/R/demo-education.R +++ b/R/demo-education.R @@ -88,6 +88,8 @@ demo_education <- function(area, areatype, education, sex = NULL, race = NULL) { resp <- resp %>% req_perform() + + resp_url <- resp$url resp <- process_resp(resp, "demographics") @@ -100,6 +102,6 @@ demo_education <- function(area, areatype, education, sex = NULL, race = NULL) { )) %>% mutate(across(c("Percent", "Households"), \(x) as.numeric(x))) - process_metadata(resp) + process_metadata(resp, "demographics", resp_url) } diff --git a/R/demo-population.R b/R/demo-population.R index c30dc2d..c115e95 100644 --- a/R/demo-population.R +++ b/R/demo-population.R @@ -114,7 +114,7 @@ demo_population <- function(area, areatype, population, race = NULL, sex = NULL) resp <- process_resp(resp, "demographics") - resp %>% + resp$data %>% setNames(c( get_area(areatype), "Percent", @@ -122,4 +122,6 @@ demo_population <- function(area, areatype, population, race = NULL, sex = NULL) "Rank" )) %>% mutate(across(c("Percent", "People"), \(x) as.numeric(x))) + + process_metadata(resp) } diff --git a/R/demo-svi.R b/R/demo-svi.R index 1d3b1e9..c9dcb43 100644 --- a/R/demo-svi.R +++ b/R/demo-svi.R @@ -56,7 +56,9 @@ demo_svi <- function(area, svi) { resp <- process_resp(resp, "demographics") - resp %>% + resp$data %>% setNames(c("County", "FIPS", "Score")) %>% mutate(across(c("Score"), \(x) as.numeric(x))) + + process_metadata(resp) } diff --git a/R/get-metadata.R b/R/get-metadata.R index e93e546..e66b84d 100644 --- a/R/get-metadata.R +++ b/R/get-metadata.R @@ -1,5 +1,17 @@ - -#helper function to remove key from values +#' Extract Values +#' +#' This function finds a string value that contains a key and removes the +#' key from the value +#' +#' @param key A String value +#' @param resp_metadaa A list of strings of metadata +#' +#' @returns A string value without the key +#' +#' @examples +#' \dontrun{ +#' extract_values("Sorted by", resp_metadata) +#' } extract_values <- function(key, resp_metadata) { values <- resp_metadata[grep(key, resp_metadata)] values <- gsub(paste0("^\\s*", key, ":?\\s*"), "", values) @@ -9,62 +21,240 @@ extract_values <- function(key, resp_metadata) { #' Custom print function #' #' This custom print function processes the -#' metadata output for a response object +#' metadata output for easier readability #' #' @param x #' #' @export print.cancerprof_metadata <- function(x, ...) { - - cat("\033[38;5;246m# Data Report: \033[39m", "\n") - cat(paste(x$data_report, '"\n', sep = "", collapse = " "), "\n") - cat("\033[38;5;246m# Sorted By: \033[39m", "\n") - cat(x$sortedby, "\n") - cat("\n") + cli_h1("Metadata") + cli_text("\n") + + cli_div(theme = list( + span.cancerprof_class = list(color = "darkgray"))) - cat("\033[38;5;246m# Created By: \033[39m", "\n") - cat(x$createdby, "\n") - cat("\n") + data_topic <- attributes(x)$data_topic - cat("\033[38;5;246m# Data Sources: \033[39m", "\n") - cat(x$data_sources, "\n") - cat("\n") + # do some conditionals to filter data topic + if (data_topic == "demographics" || data_topic == "risks") { + cli_text("{.cancerprof_class # Data Report:}") + for (i in seq_along(x$data_report)) { + cli_text(x$data_report[i], "\n") + } + cli_text("\n") + + cli_text("{.cancerprof_class # Sorted By:}") + cli_text(x$sortedby, "\n") + cli_text("\n") + + cli_text("{.cancerprof_class # Created By:}") + cli_text(x$createdby, "\n") + cli_text("\n") + + if (!is.null(x$data_sources) && length(x$data_sources) > 0) { + cli_text("{.cancerprof_class # Data Sources:}", "\n") + cli_text(x$data_sources, "\n") + cli_text("\n") + } + + if (!is.null(x$data_dictionary) && length(x$data_dictionary) > 0) { + cli_text("{.cancerprof_class # Data Dictionary:}", "\n") + cli_text(x$data_dictionary, "\n") + cli_text("\n") + } + + if (!is.null(x$data_limitations) && length(x$data_limitations) > 0) { + cli_text("{.cancerprof_class # Data Limitations:}", "\n") + cli_text(x$data_limitations, "\n") + cli_text("\n") + } + + if (!is.null(x$name_change) && length(x$name_change) > 0) { + cli_text("{.cancerprof_class # Name Change:}", "\n") + cli_text(x$name_change, "\n") + cli_text("\n") + } + + if (!is.null(x$additional_notes) && length(x$additional_notes) > 0) { + cli_text("{.cancerprof_class # Additional Notes:}", "\n") + cli_text(x$additional_notes, "\n") + } + + } else if (data_topic == "incidence" || data_topic == "mortality") { + cli_text("{.cancerprof_class # Data Report:}") + + for (i in seq_along(x$data_report)) { + cli_text(x$data_report[i], "\n") + } + cli_text("\n") + + cli_text("{.cancerprof_class # Sorted By:}") + cli_text(x$sortedby, "\n") + cli_text("\n") + + cli_text("{.cancerprof_class # Created By:}") + cli_text(x$createdby, "\n") + cli_text("\n") + + if (!is.null(x$trend) && length(x$x$trend) > 0) { + cli_text("{.cancerprof_class # Trend:}", "\n") + cli_text(x$x$trend, "\n") + cli_text("\n") + } + + if (!is.null(x$trend_note) && length(x$trend_note) > 0) { + cli_text("{.cancerprof_class # Trend Note:}", "\n") + cli_text(x$trend_note, "\n") + cli_text("\n") + } + + if (!is.null(x$rate_note) && length(x$rate_note) > 0) { + cli_text("{.cancerprof_class # Rate Note:}", "\n") + cli_text(x$rate_note, "\n") + cli_text("\n") + } + + if (!is.null(x$stage_note) && length(x$stage_note) > 0) { + cli_text("{.cancerprof_class # Stage Note:}", "\n") + cli_text(x$stage_note, "\n") + cli_text("\n") + } + + if (!is.null(x$rank_note) && length(x$rank_note) > 0) { + cli_text("{.cancerprof_class # Rank Note:}", "\n") + cli_text(x$rank_note, "\n") + cli_text("\n") + } + + if (!is.null(x$data_not_available) && length(x$data_not_available) > 0) { + cli_text("{.cancerprof_class # Data not available:}", "\n") + cli_text(x$data_not_available, "\n") + cli_text("\n") + } + + if (!is.null(x$data_sources) && length(x$data_sources) > 0) { + cli_text("{.cancerprof_class # Data sources:}", "\n") + cli_text(x$data_sources, "\n") + cli_text("\n") + } + + if (!is.null(x$data_limitations) && length(x$data_limitations) > 0) { + cli_text("{.cancerprof_class # Data limitations:}", "\n") + cli_text(x$data_limitations, "\n") + cli_text("\n") + } + + if (!is.null(x$additional_notes) && length(x$additional_notes) > 0) { + cli_text("{.cancerprof_class # Additional Notes:}", "\n") + cli_text(x$additional_notes, "\n") + } + } - cat("\033[38;5;246m# Data Dictionary: \033[39m", "\n") - cat(x$data_dictionary, "\n") - cat("\n") - cat("\033[38;5;246m# Data Limitations: \033[39m", "\n") - cat(x$data_limitations, "\n") invisible(x) } - +#' Get Metadata +#' +#' This function assigns a list of metadata components and returns a string of +#' processed metadata that is easily readable +#' +#' @param input_tbl A tibble object +#' +#' @returns a string of metadata and an invisible metadata object as a list +#' of strings +#' +#' @examples +#' \dontrun{ +#' process_metadata(resp) +#' } get_metadata <- function(input_tbl) { + resp_metadata <- attr(input_tbl, "metadata") resp_metadata <- gsub("\\\"", "", resp_metadata) - - data_report <- c(resp_metadata[1], resp_metadata[2], resp_metadata[3], resp_metadata[4]) - sortedby <- extract_values("Sorted by", resp_metadata) - createdby <- extract_values("Created by", resp_metadata) - data_sources <- extract_values("Source", resp_metadata) - data_dictionary <- extract_values("For more information about", resp_metadata) - data_limitations <- extract_values("Data for", resp_metadata) - - - demo_metadata_list <- list( - data_report = data_report, - sortedby = sortedby, - createdby = createdby, - data_sources = data_sources, - data_dictionary = data_dictionary, - data_limitations = data_limitations - ) - class(demo_metadata_list) <- c("cancerprof_metadata", class(demo_metadata_list)) + #check data topic + data_topic <- attributes(input_tbl)$data_topic + + # do some conditionals to filter data topic + if (data_topic == "demographics" || data_topic == "risks") { + data_report <- c(resp_metadata[1], resp_metadata[2], resp_metadata[3], resp_metadata[4]) + sortedby <- extract_values("Sorted by", resp_metadata) + createdby <- extract_values("Created by", resp_metadata) + data_sources <- extract_values("Source", resp_metadata) + data_dictionary <- resp_metadata[grep("For more information", resp_metadata)] + data_limitations <- resp_metadata[grep("Data for", resp_metadata)] + + name_change <- extract_values("Name Change:", resp_metadata) + + exclude_keywords <- c("Sorted by", "Created by", "Source", "For more information", "Data for", "Name Change") + + additional_notes <- resp_metadata[!grepl(paste(exclude_keywords, collapse = "|"), resp_metadata, ignore.case = TRUE)] + additional_notes <- additional_notes[!additional_notes %in% data_report] + + output_metadata_list <- list( + data_report = data_report, + sortedby = sortedby, + createdby = createdby, + data_sources = data_sources, + data_dictionary = data_dictionary, + data_limitations = data_limitations, + additional_notes = additional_notes + ) + + } else if (data_topic == "incidence" || data_topic == "mortality") { + data_report <- c(resp_metadata[1], resp_metadata[2], resp_metadata[3]) + sortedby <- extract_values("Sorted by", resp_metadata) + createdby <- extract_values("Created by", resp_metadata) + trend <- extract_values("^ ", resp_metadata) + trend_note <- extract_values("trend note", resp_metadata) + rate_note <- extract_values("rate note", resp_metadata) + stage_note <- extract_values("Stage ", resp_metadata) + rank_note <- extract_values("rank note", resp_metadata) + data_not_available <- resp_metadata[grep("Data not available", resp_metadata)] + data_sources <- extract_values("Source:", resp_metadata) + data_limitations <- resp_metadata[grep("Data for", resp_metadata)] + + exclude_keywords <- c("Sorted by", "Created by", "^ ", "trend note", + "rate note", "Stage ", "rank note", + "Data not available", "Source", "Data for") + + additional_notes <- resp_metadata[!grepl(paste(exclude_keywords, collapse = "|"), resp_metadata, ignore.case = TRUE)] + additional_notes <- additional_notes[!additional_notes %in% data_report] + + output_metadata_list <- list( + data_report = data_report, + sortedby = sortedby, + createdby = createdby, + trend = trend, + trend_note = trend_note, + rate_note = rate_note, + stage_note = stage_note, + rank_note = rank_note, + data_not_available = data_not_available, + data_sources = data_sources, + data_limitations = data_limitations, + additional_notes = additional_notes + ) + + } else { + cli_abort("Incorrect data topic argument, please ensure that it is correct.") + } + + #add attribute to list + attr(output_metadata_list, "data_topic") <- data_topic + + #add custom print + class(output_metadata_list) <- c("cancerprof_metadata", class(output_metadata_list)) + + print.cancerprof_metadata(output_metadata_list) +} + +get_raw_metadata <- function(input_tbl) { + resp_metadata <- attr(input_tbl, "metadata") - print.cancerprof_metadata(demo_metadata_list) + return(resp_metadata) } diff --git a/R/incidence-cancer.R b/R/incidence-cancer.R index df2535e..1da161a 100644 --- a/R/incidence-cancer.R +++ b/R/incidence-cancer.R @@ -166,6 +166,8 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year resp <- resp %>% req_perform() + + resp_url <- resp$url resp <- process_resp(resp, "incidence") @@ -179,7 +181,7 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year ) if (stage == "all stages") { - resp %>% + resp$data <- resp$data %>% setNames(c( get_area(areatype), shared_names_to_numeric, @@ -196,7 +198,7 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year "Trend_Upper_95%_CI" ), \(x) as.numeric(x))) } else if (stage == "late stage (regional & distant)") { - resp %>% + resp$data <- resp$data %>% setNames(c( get_area(areatype), shared_names_to_numeric, @@ -208,4 +210,6 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year "Percentage_of_Cases_with_Late_Stage" ), \(x) as.numeric(x))) } + + process_metadata(resp, "incidence", resp_url) } diff --git a/R/mortality-cancer.R b/R/mortality-cancer.R index dbb08c4..91457d9 100644 --- a/R/mortality-cancer.R +++ b/R/mortality-cancer.R @@ -147,6 +147,8 @@ mortality_cancer <- function(area, areatype, cancer, race, sex, age, year) { resp <- resp %>% req_perform() + resp_url <- resp$url + resp <- process_resp(resp, "mortality") names_to_numeric <- c( @@ -158,7 +160,7 @@ mortality_cancer <- function(area, areatype, cancer, race, sex, age, year) { "Upper_CI_Rank" ) - resp %>% + resp$data <- resp$data %>% setNames(c( get_area(areatype), "Met Healthy People Objective of ***?", @@ -180,4 +182,6 @@ mortality_cancer <- function(area, areatype, cancer, race, sex, age, year) { "Lower_95%_CI_Trend", "Upper_95%_CI_Trend" ), \(x) as.numeric(x))) + + process_metadata(resp, "mortality", resp_url) } diff --git a/R/process-resp.R b/R/process-resp.R index 6f2a642..011f731 100644 --- a/R/process-resp.R +++ b/R/process-resp.R @@ -48,7 +48,7 @@ process_resp <- function(resp, topic) { index_first_line_break <- which(resp_lines == "")[4] index_second_line_break <- which(resp_lines == "")[5] } else { - cli_abort("Incorrect topic argument, please ensure that correct.") + cli_abort("Incorrect topic argument, please ensure that it is correct.") } resp <- resp_lines[ @@ -82,10 +82,7 @@ process_resp <- function(resp, topic) { mutate_all(\(x) na_if(x, "data not available")) %>% mutate_all(\(x) na_if(x, "*")) %>% as_tibble() - - #store metadata - - #return dataframe AND metadata + resp_metadata <- c( resp_lines[1: (index_first_line_break - 1)], resp_lines[(index_second_line_break + 1): line_length] ) diff --git a/R/process_metadata.R b/R/process_metadata.R index d813967..c33ac82 100644 --- a/R/process_metadata.R +++ b/R/process_metadata.R @@ -1,18 +1,25 @@ #' Custom print function #' -#' This custom print function processes the -#' metadata output for a response object +#' This custom print function edits the comment on the +#' metadata tibble output for a response object #' #' @param x #' #' @export print.cancerprof_tbl <- function(x, ...) { - #cat("Metadata:", "\n") - # we actually need to figure out how to use pillar here - cat("\033[38;5;246m# Access metadata with `get_metadata()`\033[39m", "\n") - # for (i in seq_along(attr(x, "metadata"))) { - # cat(names(attr(x, "metadata"))[i], attr(x, "metadata")[[i]], "\n") - # } + original_url <- attributes(x)$url + modified_url <- gsub("&output=1", "#results", original_url) + + cli_div(theme = list( + span.cancerprof_class = list(color = "darkgray"))) + + cli_par() + cli_text( + "{.href [# Click to view this query on State Cancer Profiles](", modified_url, ")}" + ) + cli_text("{.cancerprof_class # Access metadata with `get_metadata()`}") + cli_end() + NextMethod(x, ...) } @@ -29,14 +36,19 @@ print.cancerprof_tbl <- function(x, ...) { #' \dontrun{ #' process_metadata(resp) #' } -process_metadata <- function(resp) { +process_metadata <- function(resp, data_topic, resp_url) { resp_data <- resp$data resp_metadata <- resp$metadata + #remove new lines + resp_metadata <- resp_metadata[!grepl("^\\s*$", resp_metadata)] + class(resp_data) <- c("cancerprof_tbl", class(resp_data)) attr(resp_data, "metadata") <- resp_metadata - #print(resp_metadata) + attr(resp_data, "data_topic") <- data_topic + + attr(resp_data, "url") <- resp_url return(resp_data) } diff --git a/R/risk-colorectal-screening.R b/R/risk-colorectal-screening.R index ceca7e4..324376d 100644 --- a/R/risk-colorectal-screening.R +++ b/R/risk-colorectal-screening.R @@ -96,11 +96,13 @@ risk_colorectal_screening <- function(screening, race = NULL, sex = NULL, area = resp <- resp %>% req_perform() + + resp_url <- resp$url resp <- process_resp(resp, "risks") if (screening %in% screening_type_1) { - resp %>% + resp$data <- resp$data %>% setNames(c( "State", "FIPS", @@ -116,7 +118,7 @@ risk_colorectal_screening <- function(screening, race = NULL, sex = NULL, area = "Number_of_Respondents" ), \(x) as.numeric(x))) } else if (screening %in% screening_type_2) { - resp %>% + resp$data <- resp$data %>% setNames(c( "County", "FIPS", @@ -130,4 +132,6 @@ risk_colorectal_screening <- function(screening, race = NULL, sex = NULL, area = "Upper_95%_CI" ), \(x) as.numeric(x))) } + + process_metadata(resp, "risks", resp_url) } diff --git a/R/risk-diet-exercise.R b/R/risk-diet-exercise.R index c2000b8..47b632c 100644 --- a/R/risk-diet-exercise.R +++ b/R/risk-diet-exercise.R @@ -60,6 +60,8 @@ risk_diet_exercise <- function(diet_exercise, race, sex) { output = 1 ) %>% req_perform() + + resp_url <- resp$url resp <- process_resp(resp, "risks") @@ -69,7 +71,7 @@ risk_diet_exercise <- function(diet_exercise, race, sex) { ) if (diet_exercise %in% diet_exercise_type1) { - resp %>% + resp$data <- resp$data %>% setNames(c( "State", "FIPS", @@ -83,7 +85,7 @@ risk_diet_exercise <- function(diet_exercise, race, sex) { "Upper_95%_CI" ), \(x) as.numeric(x))) } else { - resp %>% + resp$data <- resp$data %>% setNames(c( "State", "FIPS", @@ -99,4 +101,6 @@ risk_diet_exercise <- function(diet_exercise, race, sex) { "Number_of_Respondents" ), \(x) as.numeric(x))) } + + process_metadata(resp, "risks", resp_url) } diff --git a/man/extract_values.Rd b/man/extract_values.Rd new file mode 100644 index 0000000..6fd99e3 --- /dev/null +++ b/man/extract_values.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get-metadata.R +\name{extract_values} +\alias{extract_values} +\title{Extract Values} +\usage{ +extract_values(key, resp_metadata) +} +\arguments{ +\item{key}{A String value} + +\item{resp_metadaa}{A list of strings of metadata} +} +\value{ +A string value without the key +} +\description{ +This function finds a string value that contains a key and removes the +key from the value +} +\examples{ +\dontrun{ +extract_values("Sorted by", resp_metadata) +} +} diff --git a/man/get_metadata.Rd b/man/get_metadata.Rd new file mode 100644 index 0000000..44c4666 --- /dev/null +++ b/man/get_metadata.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get-metadata.R +\name{get_metadata} +\alias{get_metadata} +\title{Get Metadata} +\usage{ +get_metadata(input_tbl) +} +\arguments{ +\item{input_tbl}{A tibble object} +} +\value{ +a string of metadata and an invisible metadata object as a list +of strings +} +\description{ +This function assigns a list of metadata components and returns a string of +processed metadata that is easily readable +} +\examples{ +\dontrun{ +process_metadata(resp) +} +} diff --git a/man/print.cancerprof_metadata.Rd b/man/print.cancerprof_metadata.Rd index 8e63907..14a098b 100644 --- a/man/print.cancerprof_metadata.Rd +++ b/man/print.cancerprof_metadata.Rd @@ -11,5 +11,5 @@ } \description{ This custom print function processes the -metadata output for a response object +metadata output for easier readability } diff --git a/man/print.cancerprof_tbl.Rd b/man/print.cancerprof_tbl.Rd index 7e0a13b..fafd5c4 100644 --- a/man/print.cancerprof_tbl.Rd +++ b/man/print.cancerprof_tbl.Rd @@ -15,6 +15,6 @@ This custom print function processes the metadata output for a response object -This custom print function processes the -metadata output for a response object +This custom print function edits the comment on the +metadata tibble output for a response object } diff --git a/man/process_metadata.Rd b/man/process_metadata.Rd index f728b58..3fb7dd8 100644 --- a/man/process_metadata.Rd +++ b/man/process_metadata.Rd @@ -4,9 +4,9 @@ \alias{process_metadata} \title{Process Metadata} \usage{ -process_metadata(resp) +process_metadata(resp, data_topic, resp_url) -process_metadata(resp) +process_metadata(resp, data_topic, resp_url) } \arguments{ \item{resp}{A response object}