Merge pull request #179 from ldecicco-USGS/master

Second shot at httr
DOI-USGS · Feb 24, 2016 · 65e66a6 · 65e66a6
2 parents 83dc1c6 + c103a72
commit 65e66a6
Show file tree

Hide file tree

Showing 10 changed files with 117 additions and 124 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: dataRetrieval
 Type: Package
 Title: Retrieval Functions for USGS and EPA Hydrologic and Water Quality Data
-Version: 2.4.9
-Date: 2016-02-12
+Version: 2.5.0
+Date: 2016-02-24
 Authors@R: c( person("Robert", "Hirsch", role = c("aut"),
     email = "[email protected]"),
     person("Laura", "DeCicco", role = c("aut","cre"),
@@ -28,7 +28,8 @@ Depends:
     R (>= 3.0)
 Imports:
     XML,
-    RCurl,
+    httr,
+    curl,
     reshape2,
     lubridate,
     stats,

diff --git a/NAMESPACE b/NAMESPACE
@@ -33,27 +33,32 @@ export(whatWQPsites)
 export(zeroPad)
 import(stats)
 import(utils)
-importFrom(RCurl,basicHeaderGatherer)
-importFrom(RCurl,curlOptions)
-importFrom(RCurl,curlVersion)
-importFrom(RCurl,getBinaryURL)
-importFrom(RCurl,getURI)
 importFrom(XML,xmlAttrs)
 importFrom(XML,xmlDoc)
 importFrom(XML,xmlName)
 importFrom(XML,xmlNamespaceDefinitions)
+importFrom(XML,xmlParse)
 importFrom(XML,xmlRoot)
 importFrom(XML,xmlSize)
 importFrom(XML,xmlToList)
 importFrom(XML,xmlTreeParse)
 importFrom(XML,xmlValue)
 importFrom(XML,xpathApply)
 importFrom(XML,xpathSApply)
+importFrom(curl,curl_version)
+importFrom(dplyr,full_join)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate_)
 importFrom(dplyr,mutate_each_)
 importFrom(dplyr,rbind_all)
 importFrom(dplyr,select_)
+importFrom(httr,GET)
+importFrom(httr,content)
+importFrom(httr,headers)
+importFrom(httr,status_code)
+importFrom(httr,stop_for_status)
+importFrom(httr,user_agent)
+importFrom(httr,write_disk)
 importFrom(lubridate,fast_strptime)
 importFrom(lubridate,parse_date_time)
 importFrom(readr,col_character)

diff --git a/R/getWebServiceData.R b/R/getWebServiceData.R
@@ -5,9 +5,13 @@
 #'
 #' @param obs_url character containing the url for the retrieval
 #' @param \dots information to pass to header request
-#' @importFrom RCurl basicHeaderGatherer
-#' @importFrom RCurl getURI
-#' @importFrom RCurl curlVersion
+#' @importFrom httr GET
+#' @importFrom httr user_agent
+#' @importFrom httr stop_for_status
+#' @importFrom httr status_code
+#' @importFrom httr headers
+#' @importFrom httr content
+#' @importFrom curl curl_version
 #' @export
 #' @return raw data from web services
 #' @examples
@@ -22,36 +26,51 @@
 #' }
 getWebServiceData <- function(obs_url, ...){
 
-  possibleError <- tryCatch({
-    h <- basicHeaderGatherer()
-
-    returnedDoc <- getURI(obs_url, headerfunction = h$update, 
-                          useragent = default_ua(), ...)      
-  }, warning = function(w) {
-    warning(w, "with url:", obs_url)
-  }, error = function(e) {
-    stop(e, "with url:", obs_url)
-  }) 
+  returnedList <- GET(obs_url, ..., user_agent(default_ua()))
 
-  headerInfo <- h$value()
-
-  if(headerInfo['status'] != "200"){  
-    stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
-  } else {
-    if(grepl("No sites/data found using the selection criteria specified", returnedDoc)){
-      message(returnedDoc)
-      headerInfo['warn'] <- returnedDoc
+    if(status_code(returnedList) != 200){
+      message("For: ", obs_url,"\n")
+      stop_for_status(returnedList)
+    } else {
+
+      headerInfo <- headers(returnedList)
+
+      if(headerInfo$`content-type` == "text/tab-separated-values;charset=UTF-8"){
+        returnedDoc <- content(returnedList, type="text",encoding = "UTF-8")
+      } else if (headerInfo$`content-type` == "text/xml;charset=UTF-8"){
+        returnedDoc <- xmlcontent(returnedList)
+      } else {
+        returnedDoc <- content(returnedList)
+
+        if(grepl("No sites/data found using the selection criteria specified", returnedDoc)){
+          message(returnedDoc)
+        }
+      }
+
+
+      attr(returnedDoc, "headerInfo") <- headerInfo
+
+      return(returnedDoc)
     }
-    attr(returnedDoc, "headerInfo") <- headerInfo
-    return(returnedDoc)
-  }
 }
 
 default_ua <- function() {
   versions <- c(
-    libcurl = RCurl::curlVersion()$version,
-    RCurl = as.character(packageVersion("RCurl")),
+    libcurl = curl_version()$version,
+    httr = as.character(packageVersion("httr")),
     dataRetrieval = as.character(packageVersion("dataRetrieval"))
   )
   paste0(names(versions), "/", versions, collapse = " ")
+}
+
+#' drop in replacement for httr switching to xml2 from XML
+#' 
+#' reverts to old parsing pre v1.1.0 for httr
+#' 
+#' @param response the result of httr::GET(url)
+#' @keywords internal
+#' @importFrom XML xmlParse
+xmlcontent <- function(response){
+  XML::xmlTreeParse(iconv(readBin(response$content, character()), from = "UTF-8", to = "UTF-8"),
+                    useInternalNodes=TRUE,getDTD = FALSE)
 }
diff --git a/R/importRDB1.r b/R/importRDB1.r
@@ -48,6 +48,7 @@
 #' @importFrom readr read_delim
 #' @importFrom readr problems
 #' @importFrom readr parse_number
+#' @importFrom lubridate fast_strptime
 #' @examples
 #' siteNumber <- "02177000"
 #' startDate <- "2012-09-01"
@@ -166,7 +167,7 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){
         if(all(c(paste0(i,"_dt"),paste0(i,"_tm")) %in% header.names)){
           varname <- paste0(i,"_dateTime")
 
-          varval <- as.POSIXct(paste(readr.data[,paste0(i,"_dt")],readr.data[,paste0(i,"_tm")]), "%Y-%m-%d %H:%M", tz = "UTC")
+          varval <- fast_strptime(paste(readr.data[,paste0(i,"_dt")],readr.data[,paste0(i,"_tm")]), "%Y-%m-%d %H:%M", tz = "UTC")
 
           if(!all(is.na(varval))){
             readr.data[,varname] <- varval
@@ -198,7 +199,7 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){
 
       if(all(c("DATE","TIME","TZCD") %in% header.names)){
         varname <- "DATETIME"
-        varval <- as.POSIXct(paste(readr.data[,"DATE"],readr.data[,"TIME"]), "%Y-%m-%d %H%M%S", tz = "UTC")
+        varval <- fast_strptime(paste(readr.data[,"DATE"],readr.data[,"TIME"]), "%Y-%m-%d %H%M%S", tz = "UTC")
         readr.data[,varname] <- varval
         readr.data <- convertTZ(readr.data,"TZCD",varname,tz, flip.cols=TRUE)
       }

diff --git a/R/importWQP.R b/R/importWQP.R
@@ -26,9 +26,9 @@
 #' @importFrom dplyr left_join
 #' @importFrom lubridate parse_date_time
 #' @importFrom lubridate fast_strptime
-#' @importFrom RCurl basicHeaderGatherer
-#' @importFrom RCurl getBinaryURL
-#' @importFrom RCurl curlOptions
+#' @importFrom httr GET
+#' @importFrom httr user_agent
+#' @importFrom httr write_disk
 #' @examples
 #' # These examples require an internet connection to run
 #' 
@@ -38,8 +38,8 @@
 #' 
 #' rawSample <- importWQP(rawSampleURL)
 #' 
-#' rawSampleURL_noZip <- constructWQPURL('USGS-01594440','01075', '', '', FALSE)
-#' rawSample2 <- importWQP(rawSampleURL_noZip, zip=FALSE)
+#' rawSampleURL_Zip <- constructWQPURL('USGS-01594440','01075', '', '', TRUE)
+#' rawSample2 <- importWQP(rawSampleURL_Zip, zip=TRUE)
 #' 
 #' STORETex <- constructWQPURL('WIDNR_WQX-10032762','Specific conductance', '', '')
 #' STORETdata <- importWQP(STORETex)
@@ -59,22 +59,28 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
     if(zip){
       message("zip encoding access still in development")
       temp <- tempfile()
-      options(timeout = 120)
-      h <- basicHeaderGatherer()
-      myOpts = curlOptions(verbose = FALSE, 
-                           header = FALSE, 
-                           useragent = default_ua())
+      temp <- paste0(temp,".zip")
+      doc <- GET(obs_url, user_agent(default_ua()), 
+                          write_disk(temp))
 
-      doc <- getBinaryURL(obs_url, .opts=myOpts, headerfunction = h$update)
-      headerInfo <- h$value()
+      headerInfo <- headers(doc)
 
     } else {
       doc <- getWebServiceData(obs_url)
       headerInfo <- attr(doc, "headerInfo")
     }
 
-    numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"])
-    sitesToBeReturned <- as.numeric(headerInfo["Total-Site-Count"])
+    numToBeReturned <- 0
+    sitesToBeReturned <- 0
+
+    if("total-result-count" %in% names(headerInfo)){
+      numToBeReturned <- as.numeric(headerInfo["total-result-count"])
+    } 
+
+    if("total-site-count" %in% names(headerInfo)){
+      sitesToBeReturned <- as.numeric(headerInfo["total-site-count"])
+    }
+
 
     totalReturned <- sum(numToBeReturned, sitesToBeReturned,na.rm = TRUE)
 
@@ -88,11 +94,6 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
     }  
 
     if(zip){
-      temp <- paste0(temp,".zip")
-      f <- file(temp, "wb")
-      writeBin(doc, con = f)
-      close(f)
-
       doc <- unzip(temp)
     }