From 0dc031a4d79bec856e6d477645a75e66d4234e8f Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Mon, 22 Feb 2016 15:33:59 -0600 Subject: [PATCH 1/6] Second shot at httr --- DESCRIPTION | 5 ++-- NAMESPACE | 15 ++++++---- R/getWebServiceData.R | 63 +++++++++++++++++++++++----------------- R/importWQP.R | 43 +++++++++++++++------------ R/readNWISdata.r | 1 - man/getWebServiceData.Rd | 6 +++- man/importWQP.Rd | 4 +-- 7 files changed, 80 insertions(+), 57 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0b6e5998..09814041 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dataRetrieval Type: Package Title: Retrieval Functions for USGS and EPA Hydrologic and Water Quality Data -Version: 2.4.9 +Version: 2.4.10 Date: 2016-02-12 Authors@R: c( person("Robert", "Hirsch", role = c("aut"), email = "rhirsch@usgs.gov"), @@ -28,7 +28,8 @@ Depends: R (>= 3.0) Imports: XML, - RCurl, + httr, + curl, reshape2, lubridate, stats, diff --git a/NAMESPACE b/NAMESPACE index 9d3d0d2a..9202ed07 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,11 +33,6 @@ export(whatWQPsites) export(zeroPad) import(stats) import(utils) -importFrom(RCurl,basicHeaderGatherer) -importFrom(RCurl,curlOptions) -importFrom(RCurl,curlVersion) -importFrom(RCurl,getBinaryURL) -importFrom(RCurl,getURI) importFrom(XML,xmlAttrs) importFrom(XML,xmlDoc) importFrom(XML,xmlName) @@ -49,11 +44,21 @@ importFrom(XML,xmlTreeParse) importFrom(XML,xmlValue) importFrom(XML,xpathApply) importFrom(XML,xpathSApply) +importFrom(curl,curl_version) importFrom(dplyr,left_join) importFrom(dplyr,mutate_) importFrom(dplyr,mutate_each_) importFrom(dplyr,rbind_all) importFrom(dplyr,select_) +importFrom(httr,GET) +importFrom(httr,content) +importFrom(httr,headers) +importFrom(httr,progress) +importFrom(httr,status_code) +importFrom(httr,stop_for_status) +importFrom(httr,user_agent) +importFrom(httr,verbose) +importFrom(httr,write_disk) importFrom(lubridate,fast_strptime) importFrom(lubridate,parse_date_time) importFrom(readr,col_character) diff --git a/R/getWebServiceData.R b/R/getWebServiceData.R index 68c5f99c..3494aeb7 100644 --- a/R/getWebServiceData.R +++ b/R/getWebServiceData.R @@ -4,10 +4,18 @@ #' \code{\link[RCurl]{getURI}} with more informative error messages. #' #' @param obs_url character containing the url for the retrieval +#' @param progress logical +#' @param verbose logical #' @param \dots information to pass to header request -#' @importFrom RCurl basicHeaderGatherer -#' @importFrom RCurl getURI -#' @importFrom RCurl curlVersion +#' @importFrom httr GET +#' @importFrom httr content +#' @importFrom httr user_agent +#' @importFrom httr stop_for_status +#' @importFrom httr status_code +#' @importFrom httr headers +#' @importFrom httr verbose +#' @importFrom httr progress +#' @importFrom curl curl_version #' @export #' @return raw data from web services #' @examples @@ -20,37 +28,38 @@ #' \dontrun{ #' rawData <- getWebServiceData(obs_url) #' } -getWebServiceData <- function(obs_url, ...){ +getWebServiceData <- function(obs_url, ..., progress=TRUE, verbose=TRUE){ - possibleError <- tryCatch({ - h <- basicHeaderGatherer() - - returnedDoc <- getURI(obs_url, headerfunction = h$update, - useragent = default_ua(), ...) - }, warning = function(w) { - warning(w, "with url:", obs_url) - }, error = function(e) { - stop(e, "with url:", obs_url) - }) + returnedList <- GET(obs_url, ..., user_agent(default_ua()), + if(progress) progress(), + if(verbose) verbose()) - headerInfo <- h$value() - - if(headerInfo['status'] != "200"){ - stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url) - } else { - if(grepl("No sites/data found using the selection criteria specified", returnedDoc)){ - message(returnedDoc) - headerInfo['warn'] <- returnedDoc + if(status_code(returnedList) != 200){ + message("For: ", obs_url,"\n") + stop_for_status(returnedList) + } else { + + headerInfo <- headers(returnedList) + + if(headerInfo$`content-type` == "text/tab-separated-values;charset=UTF-8"){ + returnedDoc <- content(returnedList, type="text",encoding = "UTF-8") + } else { + returnedDoc <- content(returnedList) + } + + if(grepl("No sites/data found using the selection criteria specified", returnedDoc)){ + message(returnedDoc) + } + attr(returnedDoc, "headerInfo") <- headerInfo + + return(returnedDoc) } - attr(returnedDoc, "headerInfo") <- headerInfo - return(returnedDoc) - } } default_ua <- function() { versions <- c( - libcurl = RCurl::curlVersion()$version, - RCurl = as.character(packageVersion("RCurl")), + libcurl = curl_version()$version, + httr = as.character(packageVersion("httr")), dataRetrieval = as.character(packageVersion("dataRetrieval")) ) paste0(names(versions), "/", versions, collapse = " ") diff --git a/R/importWQP.R b/R/importWQP.R index 3b6da483..f3d95c69 100644 --- a/R/importWQP.R +++ b/R/importWQP.R @@ -26,9 +26,12 @@ #' @importFrom dplyr left_join #' @importFrom lubridate parse_date_time #' @importFrom lubridate fast_strptime -#' @importFrom RCurl basicHeaderGatherer -#' @importFrom RCurl getBinaryURL -#' @importFrom RCurl curlOptions +#' @importFrom httr GET +#' @importFrom httr content +#' @importFrom httr user_agent +#' @importFrom httr write_disk +#' @importFrom httr verbose +#' @importFrom httr progress #' @examples #' # These examples require an internet connection to run #' @@ -38,8 +41,8 @@ #' #' rawSample <- importWQP(rawSampleURL) #' -#' rawSampleURL_noZip <- constructWQPURL('USGS-01594440','01075', '', '', FALSE) -#' rawSample2 <- importWQP(rawSampleURL_noZip, zip=FALSE) +#' rawSampleURL_Zip <- constructWQPURL('USGS-01594440','01075', '', '', TRUE) +#' rawSample2 <- importWQP(rawSampleURL_Zip, zip=TRUE) #' #' STORETex <- constructWQPURL('WIDNR_WQX-10032762','Specific conductance', '', '') #' STORETdata <- importWQP(STORETex) @@ -59,22 +62,29 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){ if(zip){ message("zip encoding access still in development") temp <- tempfile() - options(timeout = 120) - h <- basicHeaderGatherer() - myOpts = curlOptions(verbose = FALSE, - header = FALSE, - useragent = default_ua()) + temp <- paste0(temp,".zip") + doc <- GET(obs_url, user_agent(default_ua()), + progress(), + verbose(), write_disk(temp)) - doc <- getBinaryURL(obs_url, .opts=myOpts, headerfunction = h$update) - headerInfo <- h$value() + headerInfo <- headers(doc) } else { doc <- getWebServiceData(obs_url) headerInfo <- attr(doc, "headerInfo") } - numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"]) - sitesToBeReturned <- as.numeric(headerInfo["Total-Site-Count"]) + numToBeReturned <- 0 + sitesToBeReturned <- 0 + + if("total-result-count" %in% names(headerInfo)){ + numToBeReturned <- as.numeric(headerInfo["total-result-count"]) + } + + if("total-site-count" %in% names(headerInfo)){ + sitesToBeReturned <- as.numeric(headerInfo["total-site-count"]) + } + totalReturned <- sum(numToBeReturned, sitesToBeReturned,na.rm = TRUE) @@ -88,11 +98,6 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){ } if(zip){ - temp <- paste0(temp,".zip") - f <- file(temp, "wb") - writeBin(doc, con = f) - close(f) - doc <- unzip(temp) } diff --git a/R/readNWISdata.r b/R/readNWISdata.r index b7f38fad..d5edbe65 100644 --- a/R/readNWISdata.r +++ b/R/readNWISdata.r @@ -178,7 +178,6 @@ readNWISdata <- function(service="dv", ...){ if(length(grep("rdb",values["format"])) >0){ retval <- importRDB1(urlCall, asDateTime = TRUE, tz = tz) - # retval <- importRDB1(urlCall, asDateTime = (service == "qwdata"), tz = tz) } else { retval <- importWaterML1(urlCall, asDateTime = ("iv" == service), tz= tz) } diff --git a/man/getWebServiceData.Rd b/man/getWebServiceData.Rd index 9d049704..2fde4dbd 100644 --- a/man/getWebServiceData.Rd +++ b/man/getWebServiceData.Rd @@ -4,11 +4,15 @@ \alias{getWebServiceData} \title{Function to return data from web services} \usage{ -getWebServiceData(obs_url, ...) +getWebServiceData(obs_url, ..., progress = TRUE, verbose = TRUE) } \arguments{ \item{obs_url}{character containing the url for the retrieval} +\item{progress}{logical} + +\item{verbose}{logical} + \item{\dots}{information to pass to header request} } \value{ diff --git a/man/importWQP.Rd b/man/importWQP.Rd index 2abb0fd8..bee8a945 100644 --- a/man/importWQP.Rd +++ b/man/importWQP.Rd @@ -32,8 +32,8 @@ rawSampleURL <- constructWQPURL('USGS-01594440','01075', '', '') rawSample <- importWQP(rawSampleURL) -rawSampleURL_noZip <- constructWQPURL('USGS-01594440','01075', '', '', FALSE) -rawSample2 <- importWQP(rawSampleURL_noZip, zip=FALSE) +rawSampleURL_Zip <- constructWQPURL('USGS-01594440','01075', '', '', TRUE) +rawSample2 <- importWQP(rawSampleURL_Zip, zip=TRUE) STORETex <- constructWQPURL('WIDNR_WQX-10032762','Specific conductance', '', '') STORETdata <- importWQP(STORETex) From 6f4954b4049d86d084f5997ea9b7e9633d913ab0 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Tue, 23 Feb 2016 10:25:00 -0600 Subject: [PATCH 2/6] Change to no verbose gets tests to pass. --- R/getWebServiceData.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/getWebServiceData.R b/R/getWebServiceData.R index 3494aeb7..d4a9b955 100644 --- a/R/getWebServiceData.R +++ b/R/getWebServiceData.R @@ -28,7 +28,7 @@ #' \dontrun{ #' rawData <- getWebServiceData(obs_url) #' } -getWebServiceData <- function(obs_url, ..., progress=TRUE, verbose=TRUE){ +getWebServiceData <- function(obs_url, ..., progress=FALSE, verbose=FALSE){ returnedList <- GET(obs_url, ..., user_agent(default_ua()), if(progress) progress(), From e6cb6657f113d1dd76f6ebd96a959b5d340aa830 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Tue, 23 Feb 2016 10:36:55 -0600 Subject: [PATCH 3/6] commit the help file! --- man/getWebServiceData.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/getWebServiceData.Rd b/man/getWebServiceData.Rd index 2fde4dbd..53bdbe11 100644 --- a/man/getWebServiceData.Rd +++ b/man/getWebServiceData.Rd @@ -4,7 +4,7 @@ \alias{getWebServiceData} \title{Function to return data from web services} \usage{ -getWebServiceData(obs_url, ..., progress = TRUE, verbose = TRUE) +getWebServiceData(obs_url, ..., progress = FALSE, verbose = FALSE) } \arguments{ \item{obs_url}{character containing the url for the retrieval} From 1af08e2fcd11e040d10bebe7d9cf587a3605eaf1 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Wed, 24 Feb 2016 10:00:05 -0600 Subject: [PATCH 4/6] Let user figure out progress or verbose on their own. --- NAMESPACE | 2 -- R/getWebServiceData.R | 10 ++-------- R/importWQP.R | 5 +---- man/getWebServiceData.Rd | 6 +----- 4 files changed, 4 insertions(+), 19 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 9202ed07..60e58406 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,11 +53,9 @@ importFrom(dplyr,select_) importFrom(httr,GET) importFrom(httr,content) importFrom(httr,headers) -importFrom(httr,progress) importFrom(httr,status_code) importFrom(httr,stop_for_status) importFrom(httr,user_agent) -importFrom(httr,verbose) importFrom(httr,write_disk) importFrom(lubridate,fast_strptime) importFrom(lubridate,parse_date_time) diff --git a/R/getWebServiceData.R b/R/getWebServiceData.R index d4a9b955..2a07f242 100644 --- a/R/getWebServiceData.R +++ b/R/getWebServiceData.R @@ -4,8 +4,6 @@ #' \code{\link[RCurl]{getURI}} with more informative error messages. #' #' @param obs_url character containing the url for the retrieval -#' @param progress logical -#' @param verbose logical #' @param \dots information to pass to header request #' @importFrom httr GET #' @importFrom httr content @@ -13,8 +11,6 @@ #' @importFrom httr stop_for_status #' @importFrom httr status_code #' @importFrom httr headers -#' @importFrom httr verbose -#' @importFrom httr progress #' @importFrom curl curl_version #' @export #' @return raw data from web services @@ -28,11 +24,9 @@ #' \dontrun{ #' rawData <- getWebServiceData(obs_url) #' } -getWebServiceData <- function(obs_url, ..., progress=FALSE, verbose=FALSE){ +getWebServiceData <- function(obs_url, ...){ - returnedList <- GET(obs_url, ..., user_agent(default_ua()), - if(progress) progress(), - if(verbose) verbose()) + returnedList <- GET(obs_url, ..., user_agent(default_ua())) if(status_code(returnedList) != 200){ message("For: ", obs_url,"\n") diff --git a/R/importWQP.R b/R/importWQP.R index f3d95c69..4e60ca77 100644 --- a/R/importWQP.R +++ b/R/importWQP.R @@ -30,8 +30,6 @@ #' @importFrom httr content #' @importFrom httr user_agent #' @importFrom httr write_disk -#' @importFrom httr verbose -#' @importFrom httr progress #' @examples #' # These examples require an internet connection to run #' @@ -64,8 +62,7 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){ temp <- tempfile() temp <- paste0(temp,".zip") doc <- GET(obs_url, user_agent(default_ua()), - progress(), - verbose(), write_disk(temp)) + write_disk(temp)) headerInfo <- headers(doc) diff --git a/man/getWebServiceData.Rd b/man/getWebServiceData.Rd index 53bdbe11..9d049704 100644 --- a/man/getWebServiceData.Rd +++ b/man/getWebServiceData.Rd @@ -4,15 +4,11 @@ \alias{getWebServiceData} \title{Function to return data from web services} \usage{ -getWebServiceData(obs_url, ..., progress = FALSE, verbose = FALSE) +getWebServiceData(obs_url, ...) } \arguments{ \item{obs_url}{character containing the url for the retrieval} -\item{progress}{logical} - -\item{verbose}{logical} - \item{\dots}{information to pass to header request} } \value{ From 2a841424601412cbf38a3388afc6038b0d628b7a Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Wed, 24 Feb 2016 13:19:07 -0600 Subject: [PATCH 5/6] Getting checks to pass for realz --- DESCRIPTION | 4 ++-- NAMESPACE | 1 + R/getWebServiceData.R | 24 ++++++++++++++++++++---- R/importWQP.R | 1 - R/importWaterML1.r | 12 ++++-------- R/importWaterML2.r | 7 +++---- 6 files changed, 30 insertions(+), 19 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 09814041..c98779d5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: dataRetrieval Type: Package Title: Retrieval Functions for USGS and EPA Hydrologic and Water Quality Data -Version: 2.4.10 -Date: 2016-02-12 +Version: 2.5.0 +Date: 2016-02-24 Authors@R: c( person("Robert", "Hirsch", role = c("aut"), email = "rhirsch@usgs.gov"), person("Laura", "DeCicco", role = c("aut","cre"), diff --git a/NAMESPACE b/NAMESPACE index 60e58406..f184117b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,6 +37,7 @@ importFrom(XML,xmlAttrs) importFrom(XML,xmlDoc) importFrom(XML,xmlName) importFrom(XML,xmlNamespaceDefinitions) +importFrom(XML,xmlParse) importFrom(XML,xmlRoot) importFrom(XML,xmlSize) importFrom(XML,xmlToList) diff --git a/R/getWebServiceData.R b/R/getWebServiceData.R index 2a07f242..786f1b18 100644 --- a/R/getWebServiceData.R +++ b/R/getWebServiceData.R @@ -6,11 +6,11 @@ #' @param obs_url character containing the url for the retrieval #' @param \dots information to pass to header request #' @importFrom httr GET -#' @importFrom httr content #' @importFrom httr user_agent #' @importFrom httr stop_for_status #' @importFrom httr status_code #' @importFrom httr headers +#' @importFrom httr content #' @importFrom curl curl_version #' @export #' @return raw data from web services @@ -37,13 +37,17 @@ getWebServiceData <- function(obs_url, ...){ if(headerInfo$`content-type` == "text/tab-separated-values;charset=UTF-8"){ returnedDoc <- content(returnedList, type="text",encoding = "UTF-8") + } else if (headerInfo$`content-type` == "text/xml;charset=UTF-8"){ + returnedDoc <- xmlcontent(returnedList) } else { returnedDoc <- content(returnedList) + + if(grepl("No sites/data found using the selection criteria specified", returnedDoc)){ + message(returnedDoc) + } } - if(grepl("No sites/data found using the selection criteria specified", returnedDoc)){ - message(returnedDoc) - } + attr(returnedDoc, "headerInfo") <- headerInfo return(returnedDoc) @@ -57,4 +61,16 @@ default_ua <- function() { dataRetrieval = as.character(packageVersion("dataRetrieval")) ) paste0(names(versions), "/", versions, collapse = " ") +} + +#' drop in replacement for httr switching to xml2 from XML +#' +#' reverts to old parsing pre v1.1.0 for httr +#' +#' @param response the result of httr::GET(url) +#' @keywords internal +#' @importFrom XML xmlParse +xmlcontent <- function(response){ + XML::xmlTreeParse(iconv(readBin(response$content, character()), from = "UTF-8", to = "UTF-8"), + useInternalNodes=TRUE,getDTD = FALSE) } \ No newline at end of file diff --git a/R/importWQP.R b/R/importWQP.R index 4e60ca77..f1b1ed2a 100644 --- a/R/importWQP.R +++ b/R/importWQP.R @@ -27,7 +27,6 @@ #' @importFrom lubridate parse_date_time #' @importFrom lubridate fast_strptime #' @importFrom httr GET -#' @importFrom httr content #' @importFrom httr user_agent #' @importFrom httr write_disk #' @examples diff --git a/R/importWaterML1.r b/R/importWaterML1.r index cf5409a8..980a8848 100644 --- a/R/importWaterML1.r +++ b/R/importWaterML1.r @@ -106,12 +106,11 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ if(file.exists(obs_url)){ rawData <- obs_url + returnedDoc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE) } else { - rawData <- getWebServiceData(obs_url, encoding='gzip') + returnedDoc <- getWebServiceData(obs_url, encoding='gzip') } - - returnedDoc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE) - + if(tz != ""){ tz <- match.arg(tz, c("America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", @@ -181,9 +180,7 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ value <- as.numeric(xpathSApply(subChunk, "ns1:value",namespaces = chunkNS, xmlValue)) if(length(value)!=0){ - -# value[value == noValue] <- NA - + attNames <- xpathSApply(subChunk, "ns1:value/@*",namespaces = chunkNS) attributeNames <- unique(names(attNames)) @@ -197,7 +194,6 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ if(length(methodDescription) > 0 && methodDescription != ""){ valueName <- paste("X",methodDescription,pCode,statCd,sep="_") } - assign(valueName,value) diff --git a/R/importWaterML2.r b/R/importWaterML2.r index cec37cba..da078a9e 100644 --- a/R/importWaterML2.r +++ b/R/importWaterML2.r @@ -51,12 +51,11 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz=""){ if(file.exists(obs_url)){ rawData <- obs_url + doc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE) } else { - rawData <- getWebServiceData(obs_url) + doc <- getWebServiceData(obs_url) } - - doc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE) - + if(tz != ""){ tz <- match.arg(tz, c("America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", From c103a720c52bad0b7d6f0d8b666c2d7a0dada0ff Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Wed, 24 Feb 2016 15:30:34 -0600 Subject: [PATCH 6/6] More attempts to speed things up. --- NAMESPACE | 1 + R/importRDB1.r | 5 +-- R/importWaterML1.r | 78 ++++++++++++++----------------------------- man/importWaterML1.Rd | 2 +- 4 files changed, 30 insertions(+), 56 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index f184117b..eb93deae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,6 +46,7 @@ importFrom(XML,xmlValue) importFrom(XML,xpathApply) importFrom(XML,xpathSApply) importFrom(curl,curl_version) +importFrom(dplyr,full_join) importFrom(dplyr,left_join) importFrom(dplyr,mutate_) importFrom(dplyr,mutate_each_) diff --git a/R/importRDB1.r b/R/importRDB1.r index 773bc597..bcf765a3 100644 --- a/R/importRDB1.r +++ b/R/importRDB1.r @@ -48,6 +48,7 @@ #' @importFrom readr read_delim #' @importFrom readr problems #' @importFrom readr parse_number +#' @importFrom lubridate fast_strptime #' @examples #' siteNumber <- "02177000" #' startDate <- "2012-09-01" @@ -166,7 +167,7 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){ if(all(c(paste0(i,"_dt"),paste0(i,"_tm")) %in% header.names)){ varname <- paste0(i,"_dateTime") - varval <- as.POSIXct(paste(readr.data[,paste0(i,"_dt")],readr.data[,paste0(i,"_tm")]), "%Y-%m-%d %H:%M", tz = "UTC") + varval <- fast_strptime(paste(readr.data[,paste0(i,"_dt")],readr.data[,paste0(i,"_tm")]), "%Y-%m-%d %H:%M", tz = "UTC") if(!all(is.na(varval))){ readr.data[,varname] <- varval @@ -198,7 +199,7 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){ if(all(c("DATE","TIME","TZCD") %in% header.names)){ varname <- "DATETIME" - varval <- as.POSIXct(paste(readr.data[,"DATE"],readr.data[,"TIME"]), "%Y-%m-%d %H%M%S", tz = "UTC") + varval <- fast_strptime(paste(readr.data[,"DATE"],readr.data[,"TIME"]), "%Y-%m-%d %H%M%S", tz = "UTC") readr.data[,varname] <- varval readr.data <- convertTZ(readr.data,"TZCD",varname,tz, flip.cols=TRUE) } diff --git a/R/importWaterML1.r b/R/importWaterML1.r index 980a8848..0a71af03 100644 --- a/R/importWaterML1.r +++ b/R/importWaterML1.r @@ -51,6 +51,8 @@ #' @import stats #' @importFrom reshape2 melt #' @importFrom reshape2 dcast +#' @importFrom lubridate parse_date_time +#' @importFrom dplyr full_join #' @examples #' siteNumber <- "02177000" #' startDate <- "2012-09-01" @@ -59,7 +61,7 @@ #' property <- '00060' #' obs_url <- constructNWISURL(siteNumber,property,startDate,endDate,'dv') #' \dontrun{ -#' data <- importWaterML1(obs_url) +#' data <- importWaterML1(obs_url, asDateTime=TRUE) #' #' groundWaterSite <- "431049071324301" #' startGW <- "2013-10-01" @@ -231,64 +233,32 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ if("dateTime" %in% attributeNames){ datetime <- xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS) - - numChar <- nchar(datetime) - + if(asDateTime){ - # Common options: - # YYYY numChar=4 - # YYYY-MM-DD numChar=10 - # YYYY-MM-DDTHH:MM numChar=16 - # YYYY-MM-DDTHH:MM:SS numChar=19 - # YYYY-MM-DDTHH:MM:SSZ numChar=20 - # YYYY-MM-DDTHH:MM:SS.000 numChar=23 - # YYYY-MM-DDTHH:MM:SS.000-XX:00 numChar=29 - - if(abs(max(numChar) - min(numChar)) != 0){ - warning("Mixed date types, not converted to POSIXct") - } else { - numChar <- numChar[1] - if(numChar == 4){ - datetime <- as.POSIXct(datetime, "%Y", tz = "UTC") - } else if(numChar == 10){ - datetime <- as.POSIXct(datetime, "%Y-%m-%d", tz = "UTC") - } else if(numChar == 16){ - datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M", tz = "UTC") - } else if(numChar == 19){ - datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%S", tz = "UTC") - } else if(numChar == 20){ - datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%S", tz = "UTC") - } else if(numChar == 23){ - datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%OS", tz = "UTC") - } else if(numChar == 24){ - datetime <- substr(datetime,1,23) - datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%OS", tz = "UTC") - df$tz_cd <- rep(zoneAbbrievs[1], nrow(df)) - } else if(numChar == 29){ - tzOffset <- as.character(substr(datetime,24,numChar)) - - tzHours <- as.numeric(substr(tzOffset,1,3)) - - datetime <- substr(datetime,1,23) - datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%OS", tz = "UTC") - datetime <- datetime - tzHours*60*60 - df$tz_cd <- as.character(zoneAbbrievs[tzOffset]) - } + numChar <- nchar(datetime) + + datetime <- parse_date_time(datetime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M", + "%Y-%m-%dT%H:%M:%S","%Y-%m-%dT%H:%M:%OS", + "%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE) + + if(any(numChar < 20) & any(numChar > 16)){ + + offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0), + code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST",""), + stringsAsFactors = FALSE) - if(!("tz_cd" %in% names(df))){ - df$tz_cd <- zoneAbbrievs[1] - tzHours <- as.numeric(substr(names(zoneAbbrievs[1]),1,3)) - datetime <- datetime - tzHours*60*60 - } + datetime[numChar < 20 & numChar > 16] <- datetime[numChar < 20 & numChar > 16] + offsetLibrary[offsetLibrary$code == zoneAbbrievs[1],"offset"]*60*60 } - + + } else { datetime <- as.character(datetime) + numChar <- nchar(datetime) if(any(numChar) == 29){ tzOffset <- as.character(substr(datetime,24,numChar)) - df$tz_cd <- as.character(zoneAbbrievs[tzOffset]) + df$tz_cd <- as.character(zoneAbbrievs[tzOffset]) df$tz_cd[is.na(df$tz_cd)] <- zoneAbbrievs[1] } else { df$tz_cd <- zoneAbbrievs[1] @@ -315,7 +285,8 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ mergedDF <- df } else { similarNames <- intersect(names(mergedDF), names(df)) - mergedDF <- merge(mergedDF, df,by=similarNames,all=TRUE) + # mergedDF <- merge(mergedDF, df,by=similarNames,all=TRUE) + mergedDF <- full_join(mergedDF, df, by=similarNames) } } else { @@ -382,7 +353,8 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ } else { similarSites <- intersect(names(siteInformation), names(siteInfo)) - siteInformation <- merge(siteInformation, siteInfo, by=similarSites, all=TRUE) + # siteInformation <- merge(siteInformation, siteInfo, by=similarSites, all=TRUE) + siteInformation <- full_join(siteInformation, siteInfo, by=similarSites) similarVariables <- intersect(names(variableInformation),names(variableInfo)) variableInformation <- merge(variableInformation, variableInfo, by=similarVariables, all=TRUE) @@ -427,7 +399,7 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ newRows <- rbind(meltedmergedDF[indexDups, ], valDF[matchIndexes,]) - mergedDF3 <- dcast(newRows, castFormula, drop=FALSE, value.var = "value",) + mergedDF3 <- dcast(newRows, castFormula, drop=FALSE, value.var = "value") mergedDF2 <- rbind(mergedDF2, mergedDF3) mergedDF2 <- mergedDF2[order(mergedDF2$dateTime),] diff --git a/man/importWaterML1.Rd b/man/importWaterML1.Rd index 84201fad..4136d32a 100644 --- a/man/importWaterML1.Rd +++ b/man/importWaterML1.Rd @@ -56,7 +56,7 @@ offering <- '00003' property <- '00060' obs_url <- constructNWISURL(siteNumber,property,startDate,endDate,'dv') \dontrun{ -data <- importWaterML1(obs_url) +data <- importWaterML1(obs_url, asDateTime=TRUE) groundWaterSite <- "431049071324301" startGW <- "2013-10-01"