Skip to content

Commit

Permalink
Merge pull request #179 from ldecicco-USGS/master
Browse files Browse the repository at this point in the history
Second shot at httr
  • Loading branch information
ldecicco-USGS committed Feb 24, 2016
2 parents 83dc1c6 + c103a72 commit 65e66a6
Show file tree
Hide file tree
Showing 10 changed files with 117 additions and 124 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: dataRetrieval
Type: Package
Title: Retrieval Functions for USGS and EPA Hydrologic and Water Quality Data
Version: 2.4.9
Date: 2016-02-12
Version: 2.5.0
Date: 2016-02-24
Authors@R: c( person("Robert", "Hirsch", role = c("aut"),
email = "[email protected]"),
person("Laura", "DeCicco", role = c("aut","cre"),
Expand All @@ -28,7 +28,8 @@ Depends:
R (>= 3.0)
Imports:
XML,
RCurl,
httr,
curl,
reshape2,
lubridate,
stats,
Expand Down
15 changes: 10 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,27 +33,32 @@ export(whatWQPsites)
export(zeroPad)
import(stats)
import(utils)
importFrom(RCurl,basicHeaderGatherer)
importFrom(RCurl,curlOptions)
importFrom(RCurl,curlVersion)
importFrom(RCurl,getBinaryURL)
importFrom(RCurl,getURI)
importFrom(XML,xmlAttrs)
importFrom(XML,xmlDoc)
importFrom(XML,xmlName)
importFrom(XML,xmlNamespaceDefinitions)
importFrom(XML,xmlParse)
importFrom(XML,xmlRoot)
importFrom(XML,xmlSize)
importFrom(XML,xmlToList)
importFrom(XML,xmlTreeParse)
importFrom(XML,xmlValue)
importFrom(XML,xpathApply)
importFrom(XML,xpathSApply)
importFrom(curl,curl_version)
importFrom(dplyr,full_join)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate_)
importFrom(dplyr,mutate_each_)
importFrom(dplyr,rbind_all)
importFrom(dplyr,select_)
importFrom(httr,GET)
importFrom(httr,content)
importFrom(httr,headers)
importFrom(httr,status_code)
importFrom(httr,stop_for_status)
importFrom(httr,user_agent)
importFrom(httr,write_disk)
importFrom(lubridate,fast_strptime)
importFrom(lubridate,parse_date_time)
importFrom(readr,col_character)
Expand Down
71 changes: 45 additions & 26 deletions R/getWebServiceData.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@
#'
#' @param obs_url character containing the url for the retrieval
#' @param \dots information to pass to header request
#' @importFrom RCurl basicHeaderGatherer
#' @importFrom RCurl getURI
#' @importFrom RCurl curlVersion
#' @importFrom httr GET
#' @importFrom httr user_agent
#' @importFrom httr stop_for_status
#' @importFrom httr status_code
#' @importFrom httr headers
#' @importFrom httr content
#' @importFrom curl curl_version
#' @export
#' @return raw data from web services
#' @examples
Expand All @@ -22,36 +26,51 @@
#' }
getWebServiceData <- function(obs_url, ...){

possibleError <- tryCatch({
h <- basicHeaderGatherer()

returnedDoc <- getURI(obs_url, headerfunction = h$update,
useragent = default_ua(), ...)
}, warning = function(w) {
warning(w, "with url:", obs_url)
}, error = function(e) {
stop(e, "with url:", obs_url)
})
returnedList <- GET(obs_url, ..., user_agent(default_ua()))

headerInfo <- h$value()

if(headerInfo['status'] != "200"){
stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
} else {
if(grepl("No sites/data found using the selection criteria specified", returnedDoc)){
message(returnedDoc)
headerInfo['warn'] <- returnedDoc
if(status_code(returnedList) != 200){
message("For: ", obs_url,"\n")
stop_for_status(returnedList)
} else {

headerInfo <- headers(returnedList)

if(headerInfo$`content-type` == "text/tab-separated-values;charset=UTF-8"){
returnedDoc <- content(returnedList, type="text",encoding = "UTF-8")
} else if (headerInfo$`content-type` == "text/xml;charset=UTF-8"){
returnedDoc <- xmlcontent(returnedList)
} else {
returnedDoc <- content(returnedList)

if(grepl("No sites/data found using the selection criteria specified", returnedDoc)){
message(returnedDoc)
}
}


attr(returnedDoc, "headerInfo") <- headerInfo

return(returnedDoc)
}
attr(returnedDoc, "headerInfo") <- headerInfo
return(returnedDoc)
}
}

default_ua <- function() {
versions <- c(
libcurl = RCurl::curlVersion()$version,
RCurl = as.character(packageVersion("RCurl")),
libcurl = curl_version()$version,
httr = as.character(packageVersion("httr")),
dataRetrieval = as.character(packageVersion("dataRetrieval"))
)
paste0(names(versions), "/", versions, collapse = " ")
}

#' drop in replacement for httr switching to xml2 from XML
#'
#' reverts to old parsing pre v1.1.0 for httr
#'
#' @param response the result of httr::GET(url)
#' @keywords internal
#' @importFrom XML xmlParse
xmlcontent <- function(response){
XML::xmlTreeParse(iconv(readBin(response$content, character()), from = "UTF-8", to = "UTF-8"),
useInternalNodes=TRUE,getDTD = FALSE)
}
5 changes: 3 additions & 2 deletions R/importRDB1.r
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#' @importFrom readr read_delim
#' @importFrom readr problems
#' @importFrom readr parse_number
#' @importFrom lubridate fast_strptime
#' @examples
#' siteNumber <- "02177000"
#' startDate <- "2012-09-01"
Expand Down Expand Up @@ -166,7 +167,7 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){
if(all(c(paste0(i,"_dt"),paste0(i,"_tm")) %in% header.names)){
varname <- paste0(i,"_dateTime")

varval <- as.POSIXct(paste(readr.data[,paste0(i,"_dt")],readr.data[,paste0(i,"_tm")]), "%Y-%m-%d %H:%M", tz = "UTC")
varval <- fast_strptime(paste(readr.data[,paste0(i,"_dt")],readr.data[,paste0(i,"_tm")]), "%Y-%m-%d %H:%M", tz = "UTC")

if(!all(is.na(varval))){
readr.data[,varname] <- varval
Expand Down Expand Up @@ -198,7 +199,7 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){

if(all(c("DATE","TIME","TZCD") %in% header.names)){
varname <- "DATETIME"
varval <- as.POSIXct(paste(readr.data[,"DATE"],readr.data[,"TIME"]), "%Y-%m-%d %H%M%S", tz = "UTC")
varval <- fast_strptime(paste(readr.data[,"DATE"],readr.data[,"TIME"]), "%Y-%m-%d %H%M%S", tz = "UTC")
readr.data[,varname] <- varval
readr.data <- convertTZ(readr.data,"TZCD",varname,tz, flip.cols=TRUE)
}
Expand Down
39 changes: 20 additions & 19 deletions R/importWQP.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
#' @importFrom dplyr left_join
#' @importFrom lubridate parse_date_time
#' @importFrom lubridate fast_strptime
#' @importFrom RCurl basicHeaderGatherer
#' @importFrom RCurl getBinaryURL
#' @importFrom RCurl curlOptions
#' @importFrom httr GET
#' @importFrom httr user_agent
#' @importFrom httr write_disk
#' @examples
#' # These examples require an internet connection to run
#'
Expand All @@ -38,8 +38,8 @@
#'
#' rawSample <- importWQP(rawSampleURL)
#'
#' rawSampleURL_noZip <- constructWQPURL('USGS-01594440','01075', '', '', FALSE)
#' rawSample2 <- importWQP(rawSampleURL_noZip, zip=FALSE)
#' rawSampleURL_Zip <- constructWQPURL('USGS-01594440','01075', '', '', TRUE)
#' rawSample2 <- importWQP(rawSampleURL_Zip, zip=TRUE)
#'
#' STORETex <- constructWQPURL('WIDNR_WQX-10032762','Specific conductance', '', '')
#' STORETdata <- importWQP(STORETex)
Expand All @@ -59,22 +59,28 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
if(zip){
message("zip encoding access still in development")
temp <- tempfile()
options(timeout = 120)
h <- basicHeaderGatherer()
myOpts = curlOptions(verbose = FALSE,
header = FALSE,
useragent = default_ua())
temp <- paste0(temp,".zip")
doc <- GET(obs_url, user_agent(default_ua()),
write_disk(temp))

doc <- getBinaryURL(obs_url, .opts=myOpts, headerfunction = h$update)
headerInfo <- h$value()
headerInfo <- headers(doc)

} else {
doc <- getWebServiceData(obs_url)
headerInfo <- attr(doc, "headerInfo")
}

numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"])
sitesToBeReturned <- as.numeric(headerInfo["Total-Site-Count"])
numToBeReturned <- 0
sitesToBeReturned <- 0

if("total-result-count" %in% names(headerInfo)){
numToBeReturned <- as.numeric(headerInfo["total-result-count"])
}

if("total-site-count" %in% names(headerInfo)){
sitesToBeReturned <- as.numeric(headerInfo["total-site-count"])
}


totalReturned <- sum(numToBeReturned, sitesToBeReturned,na.rm = TRUE)

Expand All @@ -88,11 +94,6 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
}

if(zip){
temp <- paste0(temp,".zip")
f <- file(temp, "wb")
writeBin(doc, con = f)
close(f)

doc <- unzip(temp)
}

Expand Down
Loading

0 comments on commit 65e66a6

Please sign in to comment.