-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBDC_ChallengesDownload.qmd
213 lines (158 loc) · 6.91 KB
/
BDC_ChallengesDownload.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
---
title: "FCC-BDC Challenges Download"
author: "Christine Parker"
format: html
editor: visual
last updated: 21 Nov 2024
---
# Set up environment
A [good reference](https://cran.r-project.org/web/packages/httr/vignettes/secrets.html) for different ways to keep things like usernames, passwords, and API tokens secure in your scripts.
```{r Load required libraries and settings}
library(httr)
library(jsonlite)
library(tidyverse)
# API base URL
base_url <- "https://broadbandmap.fcc.gov/api/public/map/downloads"
# User credentials (replace with your username and token)
token <- Sys.getenv("FCC_API_TOKEN")
username <- Sys.getenv("FCC_USERNAME")
challenge_download_path <- paste0(Sys.getenv("challenge_download_path"), "/Downloaded_21Nov24/")
print(token) # Should display your API token
print(username) # Should display your username
```
# Function to get "as of" dates for availability data
This will create a function that after running will provide you with a list of the available data sets and their corresponding publication dates. Currently, there are "availability" and "challenge" data for download.
```{r function to get challenge dates}
get_challenge_dates <- function(username, token) {
# Construct the URL for the as-of-dates endpoint
url <- "https://broadbandmap.fcc.gov/api/public/map/listAsOfDates"
# Perform the GET request
res <- GET(
url,
add_headers(
username = username,
hash_value = token,
`accept` = "application/json",
`user-agent` = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
)
)
# Check the response status
if (http_status(res)$category != "Success") {
stop(paste("Error:", http_status(res)$message))
}
# Parse the JSON response
dates <- content(res, as = "parsed", type = "application/json")
# Filter for data_type = "challenge"
challenge_dates <- Filter(function(d) d$data_type == "challenge", dates$data)
# Extract as_of_date values
as_of_dates <- sapply(challenge_dates, function(d) d$as_of_date)
return(as_of_dates)
}
```
# Function to list challenge data files for a specific date
This creates a function to download a list of all the challenge files that are available for public download.
```{r function to list data files}
list_challenge_files <- function(as_of_date) {
url <- paste0(base_url, "/listChallengeData/", as_of_date)
res <- GET(url, add_headers(username = username, hash_value = token))
stop_for_status(res)
content(res, as = "parsed", type = "application/json")
}
```
```{r get list of challenge data files}
list_challenge_files <- function(dates) {
all_files <- list() # Initialize an empty list to store files
# Iterate over each date in the tibble
for (date in dates$date) {
message(paste("Fetching files for date:", date))
# Construct the URL for the current date
url <- paste0(base_url, "/listChallengeData/", date)
# Perform the GET request
res <- GET(
url,
add_headers(
username = username,
hash_value = token,
`accept` = "application/json",
`user-agent` = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
)
)
# Check the response status
if (http_status(res)$category != "Success") {
warning(paste("Error fetching files for date:", date, "-", http_status(res)$message))
next # Skip to the next date if there's an error
}
# Parse the JSON response
files <- content(res, as = "parsed", type = "application/json")
# Check if the data field is present and not empty
if (!is.null(files$data) && length(files$data) > 0) {
# Append the files to the master list
all_files <- append(all_files, files$data)
} else {
message(paste("No files found for date:", date))
}
}
# Return the consolidated list of all files
return(all_files)
}
```
# Function to download a specific file
This function does the heavy lifting.
```{r debug version download}
download_challenge_file <- function(file_id, download_path = challenge_download_path) {
# Ensure the download path exists
if (!dir.exists(download_path)) {
dir.create(download_path, recursive = TRUE)
}
# Sanitize the file name to ensure it's valid
# sanitized_file_name <- gsub("[^A-Za-z0-9._-]", "_", file_name) # Replace invalid characters
full_file_name <- paste0(download_path, file_id, ".zip")
# Construct the URL
url <- paste0("https://broadbandmap.fcc.gov/nbm/map/api/getChallengeDownloadFile/", file_id)
# Debug: Print file name and URL
print(paste("File path:", full_file_name))
print(paste("URL:", url))
# Perform the GET request
res <- GET(
url,
add_headers(
`accept` = "application/json, text/plain, */*",
`referer` = "https://broadbandmap.fcc.gov/data-download/challenge-data",
`user-agent` = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
),
write_disk(full_file_name, overwrite = TRUE) # Save the file
)
# Check response status
if (http_status(res)$category != "Success") {
print(content(res, "text")) # Print server response for debugging
stop(paste("Error:", http_status(res)$message))
}
message(paste("Downloaded:", full_file_name))
}
```
# Download Steps
This is the part of the script where you'll put all those functions to use! If you need to make edits to the categories of data you want to download, refer to the [API guide](https://us-fcc.app.box.com/v/bdc-public-data-api-spec) for more detail
```{r Download the data, include=FALSE, echo=FALSE}
# Step 1: Get "as of" dates for only challenge data
dates <- get_challenge_dates(username, token) %>% as_tibble() %>% rename("date" = "value")
# Step 3: List files for the dates
challenge_files <- list_challenge_files(dates)
# Step 4: Filter to get only the files you want - refer to API doc for specifics
chall_data_list <- challenge_files %>% map_dfr(~ .x) %>%
#here I'm specifically interested in downloading the location-level data by state for only fixed broadband - you can choose other options - reference the API guide
subset(category == "Fixed Challenge - Resolved")
# Step 4: Download all files
# Iterate over all file_id values in avail_data_list
for (i in 1:nrow(chall_data_list)) {
tryCatch({
# Extract file_id and file_name for the current row
file_id <- chall_data_list$file_id[i]
# Call the download_file function
download_challenge_file(file_id)
}, error = function(e) {
# Handle errors gracefully and log them
message(paste("Error downloading file_id:", file_id, "-", e$message))
})
}
message("All downloads attempted.")
```