-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature: Metadata #60
Comments
andrewallenbruce
added
documentation ✍️
feature 📸
feature request
consistency 🗻
adherence to standards
labels
Nov 19, 2023
Function
library(provider)
library(httr2)
library(dplyr)
metadata.affiliations <- function() {
urlx <- paste0("https://data.cms.gov/",
"provider-data/api/1/datastore/query/",
"78125945-ea51-5ee0-b3f1-5f46292467b1",
"?limit=1&offset=0&count=true&results=true",
"&schema=true&keys=true&format=json&rowIds=true")
urly <- paste0("https://data.cms.gov/",
"provider-data/api/1/metastore/schemas/",
"dataset/items/27ea-46a8?show-reference-ids=false")
x <- httr2::request(urlx) |>
httr2::req_perform() |>
httr2::resp_body_json(check_type = FALSE,
simplifyVector = TRUE)
y <- httr2::request(urly) |>
httr2::req_perform() |>
httr2::resp_body_json(check_type = FALSE,
simplifyVector = TRUE)
list(
title = y$title,
description = y$description,
uuid = "27ea-46a8",
identifier = y$keyword$identifier,
distribution = y$distribution$identifier,
date_issued = lubridate::ymd(y$issued),
date_modified = lubridate::ymd(y$modified),
datetime_modified = lubridate::ymd_hms(y$`%modified`),
date_released = lubridate::ymd(y$released),
last_updated = make_interval(dplyr::tibble(date = lubridate::ymd(y$released)), start = date),
publisher = y$publisher$data$name,
format = x$query$format,
landing_page = y$landingPage,
dictionary = "https://data.cms.gov/provider-data/sites/default/files/data_dictionaries/physician/DOC_Data_Dictionary.pdf",
dimensions = paste0(x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$record_number$length, ' columns x ', x$count, ' rows'),
fields = c(x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$npi$description,
x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$ind_pac_id$description,
x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_last_name$description,
x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_first_name$description,
x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_middle_name$description,
x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$suff$description,
x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_type$description,
x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_affiliations_certification_number$description,
x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_type_certification_number$description),
example = display_long(x$results))
} metadata.affiliations()
#> $title
#> [1] "Facility Affiliation Data"
#>
#> $description
#> [1] "This is the facility affiliations data publicly reported in the Provider Data Catalog."
#>
#> $uuid
#> [1] "27ea-46a8"
#>
#> $identifier
#> [1] "f62da856-f3e0-565a-a6e0-f6aefcafde00"
#>
#> $distribution
#> [1] "78125945-ea51-5ee0-b3f1-5f46292467b1"
#>
#> $date_issued
#> [1] "2023-08-17"
#>
#> $date_modified
#> [1] "2023-11-02"
#>
#> $datetime_modified
#> [1] "2023-11-03 00:05:23 UTC"
#>
#> $date_released
#> [1] "2023-11-16"
#>
#> $last_updated
#> # A tibble: 1 × 4
#> date interval period timelength_days
#> <date> <Interval> <Period> <dbl>
#> 1 2023-11-16 2023-11-16 UTC--2023-11-21 UTC 5d 0H 0M 0S 5
#>
#> $publisher
#> [1] "Centers for Medicare & Medicaid Services (CMS)"
#>
#> $format
#> [1] "json"
#>
#> $landing_page
#> [1] "https://data.cms.gov/provider-data/dataset/27ea-46a8"
#>
#> $dictionary
#> [1] "https://data.cms.gov/provider-data/sites/default/files/data_dictionaries/physician/DOC_Data_Dictionary.pdf"
#>
#> $dimensions
#> [1] "10 columns x 1563152 rows"
#>
#> $fields
#> [1] "NPI"
#> [2] "Ind_PAC_ID"
#> [3] "Provider Last Name"
#> [4] "Provider First Name"
#> [5] "Provider Middle Name"
#> [6] "Facility Affiliations Certification Number"
#> [7] "Facility Type Certification Number"
#>
#> $example
#> # A tibble: 10 × 2
#> name value
#> <chr> <chr>
#> 1 record_number "1"
#> 2 npi "1003000126"
#> 3 ind_pac_id "7517003643"
#> 4 provider_last_name "ENKESHAFI"
#> 5 provider_first_name "ARDALAN"
#> 6 provider_middle_name ""
#> 7 suff ""
#> 8 facility_type "Hospital"
#> 9 facility_affiliations_certification_number "490063"
#> 10 facility_type_certification_number "" Created on 2023-11-21 with reprex v2.0.2 |
Code
library(pointblank)
library(provider)
affiliations(npi = '1558595660', na.rm = FALSE) |>
create_informant(
tbl_name = "affiliations()",
label = "Provider <-> Facility Affiliations"
) |>
info_columns(
columns = npi,
info = "10-digit National Provider Identifier.") |>
info_columns(
columns = pac,
info = "10-digit PECOS Associate Control ID.") |>
info_columns(
columns = vars(first, middle, last, suffix),
info = "Individual Provider's Name.") |>
info_columns(
columns = facility_type,
info = "Individual Provider's Name.") |>
info_section(
section_name = "Notes",
usage = "`affiliations(parent_ccn = '670055')`",
Source = c(
"- From the **(provider)** package.",
"- [CMS Affiliations API](https://data.cms.gov/provider-data/dataset/27ea-46a8)"
)) |>
get_informant_report(
title = "**`affiliations()`** Data Dictionary",
size = "standard"
) |
Function
metadata.one <- function(api,
first = FALSE) {
resp <- httr2::request("https://data.cms.gov/data.json") |>
httr2::req_perform() |>
httr2::resp_body_json(check_type = FALSE, simplifyVector = TRUE)
resp <- resp$dataset |>
dplyr::tibble() |>
dplyr::select(title,
description,
describedBy,
distribution,
landingPage,
modified,
references) |>
dplyr::filter(title == {{ api }}) |>
tidyr::unnest(references)
dst <- resp |>
dplyr::select(title,
distribution) |>
tidyr::unnest(cols = distribution, names_sep = "_") |>
#dplyr::filter(distribution_format == "API") |>
dplyr::select(title,
distribution_title,
distribution_modified,
distribution_accessURL) |>
dplyr::mutate(distribution_accessURL = strex::str_after_last(distribution_accessURL, "dataset/"),
distribution_accessURL = strex::str_before_last(distribution_accessURL, "/data")) |>
dplyr::rename(distribution = distribution_accessURL)
resp$distribution <- NULL
results <- dplyr::left_join(resp, dst, by = dplyr::join_by(title)) |>
dplyr::select(-title) |>
dplyr::select(title = distribution_title,
description,
dictionary = describedBy,
methodology = references,
landing_page = landingPage,
distribution,
modified = distribution_modified) |>
dplyr::mutate(modified = lubridate::ymd(modified)) |>
provider::make_interval(start = modified) |>
tidyr::separate_wider_delim(title, delim = " : ", names = c("title", NA))
if (first) results <- dplyr::slice_head(results)
return(results)
} x <- metadata.one(
api = "Medicare Fee-For-Service Public Provider Enrollment",
first = TRUE)
x |> dplyr::glimpse()
#> Rows: 1
#> Columns: 10
#> $ title <chr> "Medicare Fee-For-Service Public Provider Enrollment"
#> $ description <chr> "The Medicare Fee-For-Service Public Provider Enrollme…
#> $ dictionary <chr> "https://data.cms.gov/resources/medicare-fee-for-servi…
#> $ methodology <chr> "https://data.cms.gov/resources/fee-for-service-public…
#> $ landing_page <chr> "https://data.cms.gov/provider-characteristics/medicar…
#> $ distribution <chr> "2457ea29-fc82-48b0-86ec-3b0755de7515"
#> $ modified <date> 2023-10-16
#> $ interval <Interval> 2023-10-16 UTC--2023-12-03 UTC
#> $ period <Period> 1m 17d 0H 0M 0S
#> $ timelength_days <dbl> 48
list(title = x$title,
description = x$description,
dictionary = x$dictionary,
methodology = x$methodology,
landing = x$landing_page,
distribution = x$distribution,
date = list(updated = x$modified,
length.period = x$period,
length.days = x$timelength_days))
#> $title
#> [1] "Medicare Fee-For-Service Public Provider Enrollment"
#>
#> $description
#> [1] "The Medicare Fee-For-Service Public Provider Enrollment dataset includes information on providers who are actively approved to bill Medicare or have completed the 855O at the time the data was pulled from the Provider Enrollment, Chain, and Ownership System (PECOS). The release of this provider enrollment data is not related to other provider information releases such as Physician Compare or Data Transparency.\n\n \n\nNote: This full dataset contains more records than most spreadsheet programs can handle, which will result in an incomplete load of data. Use of a database or statistical software is required."
#>
#> $dictionary
#> [1] "https://data.cms.gov/resources/medicare-fee-for-service-public-provider-enrollment-data-dictionary"
#>
#> $methodology
#> [1] "https://data.cms.gov/resources/fee-for-service-public-provider-enrollment-methodology"
#>
#> $landing
#> [1] "https://data.cms.gov/provider-characteristics/medicare-provider-supplier-enrollment/medicare-fee-for-service-public-provider-enrollment"
#>
#> $distribution
#> [1] "2457ea29-fc82-48b0-86ec-3b0755de7515"
#>
#> $date
#> $date$updated
#> [1] "2023-10-16"
#>
#> $date$length.period
#> [1] "1m 17d 0H 0M 0S"
#>
#> $date$length.days
#> [1] 48 Created on 2023-12-03 with reprex v2.0.2 |
andrewallenbruce
added a commit
that referenced
this issue
Dec 4, 2023
… `metadata.viewer`, `metadata.json` (#60)
andrewallenbruce
added
metadata
and removed
documentation ✍️
feature 📸
feature request
consistency 🗻
adherence to standards
labels
Dec 6, 2023
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Metadata Wishlist
Example
Created on 2023-11-20 with reprex v2.0.2
The text was updated successfully, but these errors were encountered: