From d928f4776dcf12da804903e4930b467535e881fb Mon Sep 17 00:00:00 2001 From: wdwatkins Date: Wed, 1 Mar 2017 16:02:40 -0600 Subject: [PATCH 1/8] split out parseML2Timeseriesin ngwmn --- NAMESPACE | 1 + R/importNGWMN_wml2.R | 82 +++++++++++++----------- tests/testthat/tests_userFriendly_fxns.R | 3 +- 3 files changed, 48 insertions(+), 38 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 7145580..591c5a7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,7 @@ export(importWaterML1) export(importWaterML2) export(pCodeToName) export(parameterCdFile) +export(parseWaterML2Timeseries) export(readNGWMNdata) export(readNGWMNlevels) export(readNGWMNsites) diff --git a/R/importNGWMN_wml2.R b/R/importNGWMN_wml2.R index b5d2298..9a1f64d 100644 --- a/R/importNGWMN_wml2.R +++ b/R/importNGWMN_wml2.R @@ -63,44 +63,8 @@ importNGWMN_wml2 <- function(input, asDateTime=FALSE, tz=""){ mergedDF <- NULL for(t in timeSeries){ - gmlID <- xml_attr(t,"id") - TVP <- xml_find_all(t, ".//wml2:MeasurementTVP")#time-value pairs - rawTime <- xml_text(xml_find_all(TVP,".//wml2:time")) + df <- parseWaterML2Timeseries(t, asDateTime) - valueNodes <- xml_find_all(TVP,".//wml2:value") - values <- as.numeric(xml_text(valueNodes)) - nVals <- length(values) - gmlID <- rep(gmlID, nVals) - - #df of date, time, dateTime - oneCol <- rep(NA, nVals) - timeDF <- data.frame(date=oneCol, time=oneCol, dateTime=oneCol) - splitTime <- data.frame(matrix(unlist(strsplit(rawTime, "T")), nrow=nVals, byrow = TRUE), stringsAsFactors=FALSE) - if(ncol(splitTime) > 1){ #some sites only have a date - names(splitTime) <- c("date", "time") - }else{ - names(splitTime) <- "date" - splitTime <- mutate(splitTime, time = NA) - } - - timeDF <- mutate(splitTime, dateTime = NA) - logicVec <- nchar(rawTime) > 19 - timeDF$dateTime[logicVec] <- rawTime[logicVec] - if(asDateTime){ - timeDF$dateTime <- parse_date_time(timeDF$dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S", - "%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE) - #^^setting tz in as.POSIXct just sets the attribute, does not convert the time! - attr(time, 'tzone') <- tz - } - - - - uom <- xml_attr(valueNodes, "uom", default = NA) - source <- xml_attr(xml_find_all(TVP, ".//wml2:source"), "title") - comment <- xml_text(xml_find_all(TVP, ".//wml2:comment")) - - df <- cbind.data.frame(source, timeDF, value=values, uom, comment, gmlID, - stringsAsFactors=FALSE) if (is.null(mergedDF)){ mergedDF <- df } else { @@ -147,3 +111,47 @@ importNGWMN_wml2 <- function(input, asDateTime=FALSE, tz=""){ } return(mergedDF) } + +#' parse the timeseries portion of a waterML2 file +#' @param input XML with only the wml2:MeasurementTimeseries node and children +#' +#' @export +parseWaterML2Timeseries <- function(input, asDateTime) { + gmlID <- xml_attr(input,"id") + TVP <- xml_find_all(input, ".//wml2:MeasurementTVP")#time-value pairs + rawTime <- xml_text(xml_find_all(TVP,".//wml2:time")) + + valueNodes <- xml_find_all(TVP,".//wml2:value") + values <- as.numeric(xml_text(valueNodes)) + nVals <- length(values) + gmlID <- rep(gmlID, nVals) + + #df of date, time, dateTime + oneCol <- rep(NA, nVals) + timeDF <- data.frame(date=oneCol, time=oneCol, dateTime=oneCol) + splitTime <- data.frame(matrix(unlist(strsplit(rawTime, "T")), nrow=nVals, byrow = TRUE), stringsAsFactors=FALSE) + if(ncol(splitTime) > 1){ #some sites only have a date + names(splitTime) <- c("date", "time") + }else{ + names(splitTime) <- "date" + splitTime <- mutate(splitTime, time = NA) + } + + timeDF <- mutate(splitTime, dateTime = NA) + logicVec <- nchar(rawTime) > 19 + timeDF$dateTime[logicVec] <- rawTime[logicVec] + if(asDateTime){ + timeDF$dateTime <- parse_date_time(timeDF$dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S", + "%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE) + #^^setting tz in as.POSIXct just sets the attribute, does not convert the time! + attr(time, 'tzone') <- tz + } + + uom <- xml_attr(valueNodes, "uom", default = NA) + source <- xml_attr(xml_find_all(TVP, ".//wml2:source"), "title") + comment <- xml_text(xml_find_all(TVP, ".//wml2:comment")) + + df <- cbind.data.frame(source, timeDF, value=values, uom, comment, gmlID, + stringsAsFactors=FALSE) + return(df) +} \ No newline at end of file diff --git a/tests/testthat/tests_userFriendly_fxns.R b/tests/testthat/tests_userFriendly_fxns.R index 17fd931..890d672 100644 --- a/tests/testthat/tests_userFriendly_fxns.R +++ b/tests/testthat/tests_userFriendly_fxns.R @@ -254,7 +254,8 @@ test_that("NGWMN functions working", { #sites with colons and NAs work na_colons <- c(NA, bboxSites$site[200:212], NA, NA) - returnDF <- readNGWMNdata(service = "observation", featureID = na_colons) + returnDF <- readNGWMNdata(service = "observation", + featureID = na_colons, asDateTime = FALSE) expect_is(returnDF, "data.frame") expect_true(nrow(returnDF) > 1) expect_true(!is.null(attributes(returnDF)$siteInfo)) -- GitLab From 33f2ac4617fa967a98e523f0680415ac0d31e33c Mon Sep 17 00:00:00 2001 From: wdwatkins Date: Thu, 2 Mar 2017 16:10:56 -0600 Subject: [PATCH 2/8] got the pattern? --- R/importNGWMN_wml2.R | 29 +++++++++++++++++++++++------ R/importWaterML2.r | 4 ++++ man/parseWaterML2Timeseries.Rd | 15 +++++++++++++++ 3 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 man/parseWaterML2Timeseries.Rd diff --git a/R/importNGWMN_wml2.R b/R/importNGWMN_wml2.R index 9a1f64d..64a9df4 100644 --- a/R/importNGWMN_wml2.R +++ b/R/importNGWMN_wml2.R @@ -113,18 +113,24 @@ importNGWMN_wml2 <- function(input, asDateTime=FALSE, tz=""){ } #' parse the timeseries portion of a waterML2 file -#' @param input XML with only the wml2:MeasurementTimeseries node and children #' +#' Returns data frame columns of all information with each time series measurement; +#' Anything defined as a default, is returned as an attribute of that data frame. +#' +#' @param input XML with only the wml2:MeasurementTimeseries node and children +#' @importFrom xml2 xml_attr xml_find_all xml_text +#' @importFrom dplyr mutate +#' @importFrom lubridate parse_date_time #' @export parseWaterML2Timeseries <- function(input, asDateTime) { - gmlID <- xml_attr(input,"id") + gmlID <- xml_attr(input,"id") #TODO: make this an attribute TVP <- xml_find_all(input, ".//wml2:MeasurementTVP")#time-value pairs rawTime <- xml_text(xml_find_all(TVP,".//wml2:time")) valueNodes <- xml_find_all(TVP,".//wml2:value") values <- as.numeric(xml_text(valueNodes)) nVals <- length(values) - gmlID <- rep(gmlID, nVals) + #gmlID <- rep(gmlID, nVals) #df of date, time, dateTime oneCol <- rep(NA, nVals) @@ -148,10 +154,21 @@ parseWaterML2Timeseries <- function(input, asDateTime) { } uom <- xml_attr(valueNodes, "uom", default = NA) + source <- xml_attr(xml_find_all(TVP, ".//wml2:source"), "title") comment <- xml_text(xml_find_all(TVP, ".//wml2:comment")) - - df <- cbind.data.frame(source, timeDF, value=values, uom, comment, gmlID, - stringsAsFactors=FALSE) + #TODO: other fields, then list, then df from list + tvpQuals <- xml_text(xml_find_all(TVP, ".//swe:description")) + defaultMeta <- xml_find_all(input, ".//wml2:DefaultTVPMeasurementMetadata") + defaultQuals <- xml_text(xml_find_all(defaultMeta, ".//swe:description")) + defaultUOM <- xml_attr(xml_find_all(defaultMeta, ".//wml2:uom"), "title", default = NA) + #attach defaultQuals as attributes + + df_vars <- list(source, timeDF, value = values, uom, comment) + df_use <- df_vars[sapply(df_vars, function(x){length(x) > 0 && !all(is.na(x))})] + df <- data.frame(df_use, stringsAsFactors = FALSE) + attr(df, "defaultQualifier") <- defaultQuals + attr(df, "defaultUOM") <- defaultUOM + attr(df, "gmlID") <- gmlID return(df) } \ No newline at end of file diff --git a/R/importWaterML2.r b/R/importWaterML2.r index bb4fc26..7df55da 100644 --- a/R/importWaterML2.r +++ b/R/importWaterML2.r @@ -70,6 +70,10 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz="UTC"){ mergedDF <- NULL for(t in timeSeries){ + + df <- parseWaterML2Timeseries(t, asDateTime) + #remove time and date columns + TVP <- xml_find_all(t, ".//wml2:MeasurementTVP")#time-value pairs time <- xml_text(xml_find_all(TVP,".//wml2:time")) if(asDateTime){ diff --git a/man/parseWaterML2Timeseries.Rd b/man/parseWaterML2Timeseries.Rd new file mode 100644 index 0000000..7562b5c --- /dev/null +++ b/man/parseWaterML2Timeseries.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/importNGWMN_wml2.R +\name{parseWaterML2Timeseries} +\alias{parseWaterML2Timeseries} +\title{parse the timeseries portion of a waterML2 file} +\usage{ +parseWaterML2Timeseries(input, asDateTime) +} +\arguments{ +\item{input}{XML with only the wml2:MeasurementTimeseries node and children} +} +\description{ +Returns data frame columns of all information with each time series measurement; +Anything defined as a default, is returned as an attribute of that data frame. +} -- GitLab From d0e3d7b1fb06963ece9df54bf183e3429562d74f Mon Sep 17 00:00:00 2001 From: wdwatkins Date: Fri, 3 Mar 2017 15:32:57 -0600 Subject: [PATCH 3/8] tests passing --- R/importNGWMN_wml2.R | 25 ++++++++++++------- R/importWaterML2.r | 44 ++++++++++------------------------ tests/testthat/tests_imports.R | 8 +++---- 3 files changed, 33 insertions(+), 44 deletions(-) diff --git a/R/importNGWMN_wml2.R b/R/importNGWMN_wml2.R index 64a9df4..1a7c0bd 100644 --- a/R/importNGWMN_wml2.R +++ b/R/importNGWMN_wml2.R @@ -130,7 +130,6 @@ parseWaterML2Timeseries <- function(input, asDateTime) { valueNodes <- xml_find_all(TVP,".//wml2:value") values <- as.numeric(xml_text(valueNodes)) nVals <- length(values) - #gmlID <- rep(gmlID, nVals) #df of date, time, dateTime oneCol <- rep(NA, nVals) @@ -145,7 +144,9 @@ parseWaterML2Timeseries <- function(input, asDateTime) { timeDF <- mutate(splitTime, dateTime = NA) logicVec <- nchar(rawTime) > 19 - timeDF$dateTime[logicVec] <- rawTime[logicVec] + if(!all(!logicVec)) { #otherwise sets it to char + timeDF$dateTime[logicVec] <- rawTime[logicVec] + } if(asDateTime){ timeDF$dateTime <- parse_date_time(timeDF$dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE) @@ -157,18 +158,24 @@ parseWaterML2Timeseries <- function(input, asDateTime) { source <- xml_attr(xml_find_all(TVP, ".//wml2:source"), "title") comment <- xml_text(xml_find_all(TVP, ".//wml2:comment")) - #TODO: other fields, then list, then df from list tvpQuals <- xml_text(xml_find_all(TVP, ".//swe:description")) defaultMeta <- xml_find_all(input, ".//wml2:DefaultTVPMeasurementMetadata") defaultQuals <- xml_text(xml_find_all(defaultMeta, ".//swe:description")) defaultUOM <- xml_attr(xml_find_all(defaultMeta, ".//wml2:uom"), "title", default = NA) - #attach defaultQuals as attributes - - df_vars <- list(source, timeDF, value = values, uom, comment) + + df_vars <- list(source = source, timeDF, value = values, + uom = uom, comment = comment) df_use <- df_vars[sapply(df_vars, function(x){length(x) > 0 && !all(is.na(x))})] df <- data.frame(df_use, stringsAsFactors = FALSE) - attr(df, "defaultQualifier") <- defaultQuals - attr(df, "defaultUOM") <- defaultUOM - attr(df, "gmlID") <- gmlID + + #from the default metadata section + #append to existing attributes if they aren't empty + # attr(df, "defaultQualifier") <- defaultQuals + # attr(df, "defaultUOM") <- defaultUOM + # attr(df, "gmlID") <- gmlID + mdAttribs <- list(defaultQualifier=defaultQuals, defaultUOM=defaultUOM, + gmlID=gmlID) #all attributes must have names + mdAttribs_use <- mdAttribs[sapply(mdAttribs, function(x){length(x) > 0})] + attributes(df) <- append(attributes(df), mdAttribs_use) return(df) } \ No newline at end of file diff --git a/R/importWaterML2.r b/R/importWaterML2.r index 7df55da..248f0e4 100644 --- a/R/importWaterML2.r +++ b/R/importWaterML2.r @@ -16,7 +16,7 @@ #' @importFrom xml2 xml_find_all #' @importFrom xml2 xml_text #' @importFrom xml2 xml_attr -#' @importFrom dplyr rbind_all +#' @importFrom dplyr rbind_all select #' @importFrom lubridate parse_date_time #' @examples #' baseURL <- "https://waterservices.usgs.gov/nwis/dv/?format=waterml,2.0" @@ -72,36 +72,17 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz="UTC"){ for(t in timeSeries){ df <- parseWaterML2Timeseries(t, asDateTime) - #remove time and date columns - - TVP <- xml_find_all(t, ".//wml2:MeasurementTVP")#time-value pairs - time <- xml_text(xml_find_all(TVP,".//wml2:time")) - if(asDateTime){ - time <- parse_date_time(time, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S", - "%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE) - #^^setting tz in as.POSIXct just sets the attribute, does not convert the time! - attr(time, 'tzone') <- tz - } - values <- as.numeric(xml_text(xml_find_all(TVP,".//wml2:value"))) - #TODO: deal with multiple identifiers (assigning column names) - idents <- xml_text(xml_find_all(t, ".//gml:identifier")) - useIdents <- rep(idents, length(values)) - #TODO: check qualifiers in points against default, if both exist, same, etc - tvpQuals <- xml_text(xml_find_all(TVP, ".//swe:description")) - defaultPointMeta <- xml_find_all(t, ".//wml2:DefaultTVPMeasurementMetadata") - defaultQuals <- xml_text(xml_find_all(defaultPointMeta, ".//swe:description")) - - if(length(tvpQuals) == 0){ - useQuals <- rep(defaultQuals, length(values)) - }else{ - useQuals <- tvpQuals - } - if(length(useQuals) == 0){ - df <- cbind.data.frame(time, value=values, identifier=useIdents, - stringsAsFactors=FALSE) - }else{ - df <- cbind.data.frame(time, value=values, qualifier=useQuals, identifier=useIdents, - stringsAsFactors=FALSE) + #need to save attributes first, and create identifier column + saveAttribs <- attributes(df)[-(1:3)] + #remove time and date columns, add site col + df <- mutate(df, identifier = saveAttribs$gmlID, + qualifier = ifelse(is.null(saveAttribs$defaultQualifier), + NA, saveAttribs$defaultQualifier)) + if(all(is.na(df$dateTime))){ + df <- subset(df, select=-c(dateTime, time)) + #should the remaining column be changed to dateTime? + } else { + df <- subset(df, select=-c(date, time)) } if (is.null(mergedDF)){ mergedDF <- df @@ -109,6 +90,7 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz="UTC"){ similarNames <- intersect(colnames(mergedDF), colnames(df)) mergedDF <- full_join(mergedDF, df, by=similarNames) } + attributes(mergedDF) <- append(attributes(mergedDF), saveAttribs) } return(mergedDF) } diff --git a/tests/testthat/tests_imports.R b/tests/testthat/tests_imports.R index 34ce719..0a02a5d 100644 --- a/tests/testthat/tests_imports.R +++ b/tests/testthat/tests_imports.R @@ -178,11 +178,11 @@ test_that("importWaterML2 internal test", { test_that("importWaterML2 external test", { testthat::skip_on_cran() - url <- "http://waterservices.usgs.gov/nwis/iv/?format=waterml,2.0&sites=01646500¶meterCd=00060,00065" - data <- importWaterML2(url) + url <- "https://waterservices.usgs.gov/nwis/iv/?format=waterml,2.0&sites=01646500¶meterCd=00060,00065" + exData <- importWaterML2(url) # saveRDS(data, "rds/externalML2.rds") - expect_is(data$value, 'numeric') - expect_gt(nrow(data),0) + expect_is(exData$value, 'numeric') + expect_gt(nrow(exData),0) }) -- GitLab From 5b84ea33e84456d20d399e99dafb80ba85c4cbb1 Mon Sep 17 00:00:00 2001 From: wdwatkins Date: Fri, 3 Mar 2017 15:41:50 -0600 Subject: [PATCH 4/8] comments --- R/importNGWMN_wml2.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/importNGWMN_wml2.R b/R/importNGWMN_wml2.R index 1a7c0bd..4760512 100644 --- a/R/importNGWMN_wml2.R +++ b/R/importNGWMN_wml2.R @@ -170,10 +170,7 @@ parseWaterML2Timeseries <- function(input, asDateTime) { #from the default metadata section #append to existing attributes if they aren't empty - # attr(df, "defaultQualifier") <- defaultQuals - # attr(df, "defaultUOM") <- defaultUOM - # attr(df, "gmlID") <- gmlID - mdAttribs <- list(defaultQualifier=defaultQuals, defaultUOM=defaultUOM, + mdAttribs <- list(defaultQualifier=defaultQuals, defaultUOM=defaultUOM, gmlID=gmlID) #all attributes must have names mdAttribs_use <- mdAttribs[sapply(mdAttribs, function(x){length(x) > 0})] attributes(df) <- append(attributes(df), mdAttribs_use) -- GitLab From 29d5409e19b38d9761ce4cf8d46358cb6166c4c5 Mon Sep 17 00:00:00 2001 From: wdwatkins Date: Fri, 3 Mar 2017 16:40:36 -0600 Subject: [PATCH 5/8] examples that actually work --- R/readNGWMNdata.R | 26 +++++++++++--------------- man/readNGWMNdata.Rd | 10 +++++----- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/R/readNGWMNdata.R b/R/readNGWMNdata.R index e09c429..cc24c33 100644 --- a/R/readNGWMNdata.R +++ b/R/readNGWMNdata.R @@ -1,13 +1,13 @@ #' import data from the National Groundwater Monitoring Network \url{http://cida.usgs.gov/ngwmn/}. #' #' Only water level data and site locations and names are currently available through the web service. +#' @param service char Service for the request - "observation" and "featureOfInterest" are implemented. +#' @param \dots Other parameters to supply, namely \code{featureID} or \code{bbox} #' @param asDateTime logical if \code{TRUE}, will convert times to POSIXct format. Currently defaults to #' \code{FALSE} since time zone information is not included. #' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the #' datetimes to UTC (properly accounting for daylight savings times based on the data's provided time zone offset). -#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", -#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla" -#' @param \dots Other parameters to supply, namely \code{featureID} or \code{bbox} +#' Accepts all values from \code{OlsonNames()}. #' @import utils #' @importFrom dplyr mutate #' @importFrom dplyr bind_rows @@ -18,17 +18,17 @@ #' \dontrun{ #' #one site #' site <- "USGS.430427089284901" -#' oneSite <- readNGWMNdata(featureID = site) +#' oneSite <- readNGWMNdata(featureID = site, service = "observation") #' #' #multiple sites #' sites <- c("USGS.272838082142201","USGS.404159100494601", "USGS.401216080362703") -#' multiSiteData <- readNGWMNdata(sites) +#' multiSiteData <- readNGWMNdata(featureID = sites, service = "observation") #' attributes(multiSiteData) #' #' #non-USGS site #' #accepts colon or period between agency and ID -#' site <- "MBMG:892195" -#' data <- readNGWMNdata(featureID = site) +#' site <- "MBMG:702934" +#' data <- readNGWMNdata(featureID = site, service = "featureOfInterest") #' #' #site with no data returns empty data frame #' noDataSite <- "UTGS.401544112060301" @@ -36,21 +36,17 @@ #' #' #bounding box #' bboxSites <- readNGWMNdata(service = "featureOfInterest", bbox = c(30, -99, 31, 102)) +#' #retrieve 100 sites. Set asDateTime to false since one site has an invalid date +#' bboxData <- readNGWMNdata(service = "observation", featureID = bboxSites$site[1:100], +#' asDateTime = FALSE) #' } #' -readNGWMNdata <- function(..., asDateTime = TRUE, tz = ""){ +readNGWMNdata <- function(service, ..., asDateTime = TRUE, tz = ""){ message("DISCLAIMER: NGWMN retrieval functions are still in flux, and no future behavior or output is guaranteed") dots <- convertLists(...) - if("service" %in% names(dots)){ - service <- dots$service - dots$service <- NULL - } else { - service <- "observation" - } - match.arg(service, c("observation", "featureOfInterest")) if(service == "observation"){ diff --git a/man/readNGWMNdata.Rd b/man/readNGWMNdata.Rd index 7f5d07e..f0bdb16 100644 --- a/man/readNGWMNdata.Rd +++ b/man/readNGWMNdata.Rd @@ -4,7 +4,7 @@ \alias{readNGWMNdata} \title{import data from the National Groundwater Monitoring Network \url{http://cida.usgs.gov/ngwmn/}.} \usage{ -readNGWMNdata(..., asDateTime = TRUE, tz = "") +readNGWMNdata(service, ..., asDateTime = TRUE, tz = "") } \arguments{ \item{\dots}{Other parameters to supply, namely \code{featureID} or \code{bbox}} @@ -24,17 +24,17 @@ Only water level data and site locations and names are currently available throu \dontrun{ #one site site <- "USGS.430427089284901" -oneSite <- readNGWMNdata(featureID = site) +oneSite <- readNGWMNdata(featureID = site, service = "observation") #multiple sites sites <- c("USGS.272838082142201","USGS.404159100494601", "USGS.401216080362703") -multiSiteData <- readNGWMNdata(sites) +multiSiteData <- readNGWMNdata(featureID = sites, service = "observation") attributes(multiSiteData) #non-USGS site #accepts colon or period between agency and ID -site <- "MBMG:892195" -data <- readNGWMNdata(featureID = site) +site <- "MBMG:702934" +data <- readNGWMNdata(featureID = site, service = "featureOfInterest") #site with no data returns empty data frame noDataSite <- "UTGS.401544112060301" -- GitLab From 6cf22fa43fa865c8deed15df03cec4aeb2567586 Mon Sep 17 00:00:00 2001 From: wdwatkins Date: Tue, 7 Mar 2017 11:02:06 -0600 Subject: [PATCH 6/8] use documentation tweak --- R/readNWISunit.r | 3 ++- man/readNWISuse.Rd | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/readNWISunit.r b/R/readNWISunit.r index 40453c6..b4007ce 100644 --- a/R/readNWISunit.r +++ b/R/readNWISunit.r @@ -507,7 +507,8 @@ readNWISstat <- function(siteNumbers, parameterCd, startDate = "", endDate = "", #' for every county in a state. Can be a vector of counties in the same state. #' @param years integer Years for data retrieval. Must be years ending in 0 or 5. Default is all available years. #' @param categories character categories of water use. Defaults to \code{ALL}. Specific categories must be supplied as two- -#' letter abbreviations as seen in the URL when using the NWIS water use web interface. +#' letter abbreviations as seen in the URL when using the NWIS water use web interface. Note that +#' there are different codes for national and state level data. #' @param convertType logical defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to #' numerics based on a standard algorithm. Years, months, and days (if appliccable) are also returned as numerics #' in separate columns. If convertType is false, everything is returned as a character. diff --git a/man/readNWISuse.Rd b/man/readNWISuse.Rd index 7e05b83..ade6f8b 100644 --- a/man/readNWISuse.Rd +++ b/man/readNWISuse.Rd @@ -17,7 +17,8 @@ for every county in a state. Can be a vector of counties in the same state.} \item{years}{integer Years for data retrieval. Must be years ending in 0 or 5. Default is all available years.} \item{categories}{character categories of water use. Defaults to \code{ALL}. Specific categories must be supplied as two- -letter abbreviations as seen in the URL when using the NWIS water use web interface.} +letter abbreviations as seen in the URL when using the NWIS water use web interface. Note that +there are different codes for national and state level data.} \item{convertType}{logical defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to numerics based on a standard algorithm. Years, months, and days (if appliccable) are also returned as numerics -- GitLab From e8e6c19c5efea43aece8d855a94cc74c183ad60d Mon Sep 17 00:00:00 2001 From: wdwatkins Date: Tue, 7 Mar 2017 11:02:25 -0600 Subject: [PATCH 7/8] time zone change was broken --- R/importNGWMN_wml2.R | 6 +++--- R/importWaterML2.r | 2 +- R/readNGWMNdata.R | 6 +++--- man/parseWaterML2Timeseries.Rd | 2 +- man/readNGWMNdata.Rd | 8 ++++++-- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/R/importNGWMN_wml2.R b/R/importNGWMN_wml2.R index 4760512..65e5cee 100644 --- a/R/importNGWMN_wml2.R +++ b/R/importNGWMN_wml2.R @@ -63,7 +63,7 @@ importNGWMN_wml2 <- function(input, asDateTime=FALSE, tz=""){ mergedDF <- NULL for(t in timeSeries){ - df <- parseWaterML2Timeseries(t, asDateTime) + df <- parseWaterML2Timeseries(t, asDateTime, tz) if (is.null(mergedDF)){ mergedDF <- df @@ -122,7 +122,7 @@ importNGWMN_wml2 <- function(input, asDateTime=FALSE, tz=""){ #' @importFrom dplyr mutate #' @importFrom lubridate parse_date_time #' @export -parseWaterML2Timeseries <- function(input, asDateTime) { +parseWaterML2Timeseries <- function(input, asDateTime, tz) { gmlID <- xml_attr(input,"id") #TODO: make this an attribute TVP <- xml_find_all(input, ".//wml2:MeasurementTVP")#time-value pairs rawTime <- xml_text(xml_find_all(TVP,".//wml2:time")) @@ -151,7 +151,7 @@ parseWaterML2Timeseries <- function(input, asDateTime) { timeDF$dateTime <- parse_date_time(timeDF$dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE) #^^setting tz in as.POSIXct just sets the attribute, does not convert the time! - attr(time, 'tzone') <- tz + attr(timeDF$dateTime, 'tzone') <- tz } uom <- xml_attr(valueNodes, "uom", default = NA) diff --git a/R/importWaterML2.r b/R/importWaterML2.r index 248f0e4..cc1afe0 100644 --- a/R/importWaterML2.r +++ b/R/importWaterML2.r @@ -71,7 +71,7 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz="UTC"){ for(t in timeSeries){ - df <- parseWaterML2Timeseries(t, asDateTime) + df <- parseWaterML2Timeseries(t, asDateTime, tz) #need to save attributes first, and create identifier column saveAttribs <- attributes(df)[-(1:3)] #remove time and date columns, add site col diff --git a/R/readNGWMNdata.R b/R/readNGWMNdata.R index cc24c33..d345ed1 100644 --- a/R/readNGWMNdata.R +++ b/R/readNGWMNdata.R @@ -58,7 +58,7 @@ readNGWMNdata <- function(service, ..., asDateTime = TRUE, tz = ""){ featureID <- na.omit(gsub(":",".",dots[['featureID']])) for(f in featureID){ - obsFID <- retrieveObservation(featureID = f, asDateTime, attrs) + obsFID <- retrieveObservation(featureID = f, asDateTime, attrs, tz = tz) obsFIDattr <- saveAttrs(attrs, obsFID) obsFID <- removeAttrs(attrs, obsFID) allObs <- bind_rows(allObs, obsFID) @@ -156,12 +156,12 @@ readNGWMNsites <- function(featureID){ } -retrieveObservation <- function(featureID, asDateTime, attrs){ +retrieveObservation <- function(featureID, asDateTime, attrs, tz){ url <- drURL(base.name = "NGWMN", access = pkg.env$access, request = "GetObservation", service = "SOS", version = "2.0.0", observedProperty = "urn:ogc:def:property:OGC:GroundWaterLevel", responseFormat = "text/xml", featureOfInterest = paste("VW_GWDP_GEOSERVER", featureID, sep = ".")) - returnData <- importNGWMN_wml2(url, asDateTime) + returnData <- importNGWMN_wml2(url, asDateTime, tz = tz) if(nrow(returnData) == 0){ #need to add NA attributes, so they aren't messed up when stored as DFs attr(returnData, "gml:identifier") <- NA diff --git a/man/parseWaterML2Timeseries.Rd b/man/parseWaterML2Timeseries.Rd index 7562b5c..a290e37 100644 --- a/man/parseWaterML2Timeseries.Rd +++ b/man/parseWaterML2Timeseries.Rd @@ -4,7 +4,7 @@ \alias{parseWaterML2Timeseries} \title{parse the timeseries portion of a waterML2 file} \usage{ -parseWaterML2Timeseries(input, asDateTime) +parseWaterML2Timeseries(input, asDateTime, tz) } \arguments{ \item{input}{XML with only the wml2:MeasurementTimeseries node and children} diff --git a/man/readNGWMNdata.Rd b/man/readNGWMNdata.Rd index f0bdb16..5d672fa 100644 --- a/man/readNGWMNdata.Rd +++ b/man/readNGWMNdata.Rd @@ -7,6 +7,8 @@ readNGWMNdata(service, ..., asDateTime = TRUE, tz = "") } \arguments{ +\item{service}{char Service for the request - "observation" and "featureOfInterest" are implemented.} + \item{\dots}{Other parameters to supply, namely \code{featureID} or \code{bbox}} \item{asDateTime}{logical if \code{TRUE}, will convert times to POSIXct format. Currently defaults to @@ -14,8 +16,7 @@ readNGWMNdata(service, ..., asDateTime = TRUE, tz = "") \item{tz}{character to set timezone attribute of datetime. Default is an empty quote, which converts the datetimes to UTC (properly accounting for daylight savings times based on the data's provided time zone offset). -Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", -"America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"} +Accepts all values from \code{OlsonNames()}.} } \description{ Only water level data and site locations and names are currently available through the web service. @@ -42,6 +43,9 @@ noDataSite <- readNGWMNdata(featureID = noDataSite, service = "observation") #bounding box bboxSites <- readNGWMNdata(service = "featureOfInterest", bbox = c(30, -99, 31, 102)) +#retrieve 100 sites. Set asDateTime to false since one site has an invalid date +bboxData <- readNGWMNdata(service = "observation", featureID = bboxSites$site[1:100], +asDateTime = FALSE) } } -- GitLab From 57578d42fd183cbedc152f15bceaa52820fd4e72 Mon Sep 17 00:00:00 2001 From: wdwatkins Date: Tue, 7 Mar 2017 11:43:46 -0600 Subject: [PATCH 8/8] tests --- R/importNGWMN_wml2.R | 2 +- R/readNGWMNdata.R | 7 ++- man/importNGWMN_wml2.Rd | 2 +- man/readNGWMNlevels.Rd | 2 +- tests/testthat/tests_general.R | 55 ++++++++++++++++++++++++ tests/testthat/tests_userFriendly_fxns.R | 44 ------------------- 6 files changed, 61 insertions(+), 51 deletions(-) diff --git a/R/importNGWMN_wml2.R b/R/importNGWMN_wml2.R index 65e5cee..037a93e 100644 --- a/R/importNGWMN_wml2.R +++ b/R/importNGWMN_wml2.R @@ -32,7 +32,7 @@ #' #' #TODO: separate id and agency name, give also as separate dimensions -importNGWMN_wml2 <- function(input, asDateTime=FALSE, tz=""){ +importNGWMN_wml2 <- function(input, asDateTime=FALSE, tz){ if(tz != ""){ tz <- match.arg(tz, OlsonNames()) }else{tz = "UTC"} diff --git a/R/readNGWMNdata.R b/R/readNGWMNdata.R index d345ed1..e8b89b5 100644 --- a/R/readNGWMNdata.R +++ b/R/readNGWMNdata.R @@ -116,9 +116,9 @@ readNGWMNdata <- function(service, ..., asDateTime = TRUE, tz = ""){ #' noDataSite <- readNGWMNlevels(featureID = noDataSite) #' } -readNGWMNlevels <- function(featureID, asDateTime = TRUE){ +readNGWMNlevels <- function(featureID, asDateTime = TRUE, tz = ""){ data <- readNGWMNdata(featureID = featureID, service = "observation", - asDateTime = asDateTime) + asDateTime = asDateTime, tz = tz) return(data) } @@ -155,7 +155,6 @@ readNGWMNsites <- function(featureID){ return(sites) } - retrieveObservation <- function(featureID, asDateTime, attrs, tz){ url <- drURL(base.name = "NGWMN", access = pkg.env$access, request = "GetObservation", service = "SOS", version = "2.0.0", observedProperty = "urn:ogc:def:property:OGC:GroundWaterLevel", @@ -206,7 +205,7 @@ retrieveFeatureOfInterest <- function(..., asDateTime, srsName="urn:ogc:def:crs: stop("Geographical filter not specified. Please use featureID or bbox") } - siteDF <- importNGWMN_wml2(url, asDateTime) + siteDF <- importNGWMN_wml2(url, asDateTime, tz = "") attr(siteDF, "url") <- url attr(siteDF, "queryTime") <- Sys.time() return(siteDF) diff --git a/man/importNGWMN_wml2.Rd b/man/importNGWMN_wml2.Rd index ba103b1..fb8005b 100644 --- a/man/importNGWMN_wml2.Rd +++ b/man/importNGWMN_wml2.Rd @@ -4,7 +4,7 @@ \alias{importNGWMN_wml2} \title{Function to return data from the National Ground Water Monitoring Network waterML2 format} \usage{ -importNGWMN_wml2(input, asDateTime = FALSE, tz = "") +importNGWMN_wml2(input, asDateTime = FALSE, tz) } \arguments{ \item{input}{character or raw, containing the url for the retrieval or a path to the data file, or raw XML.} diff --git a/man/readNGWMNlevels.Rd b/man/readNGWMNlevels.Rd index cf7f6ac..33e04b0 100644 --- a/man/readNGWMNlevels.Rd +++ b/man/readNGWMNlevels.Rd @@ -4,7 +4,7 @@ \alias{readNGWMNlevels} \title{Retrieve groundwater levels from the National Ground Water Monitoring Network \url{http://cida.usgs.gov/ngwmn/}.} \usage{ -readNGWMNlevels(featureID, asDateTime = TRUE) +readNGWMNlevels(featureID, asDateTime = TRUE, tz = "") } \arguments{ \item{featureID}{character Vector of feature IDs formatted with agency code and site number diff --git a/tests/testthat/tests_general.R b/tests/testthat/tests_general.R index 9c7235e..0df4f97 100644 --- a/tests/testthat/tests_general.R +++ b/tests/testthat/tests_general.R @@ -217,3 +217,58 @@ test_that("readWQPdots working", { expect_true("statecode" %in% names(formArgs)) expect_false("stateCd" %in% names(formArgs)) }) + +context("NGWMN") +test_that("NGWMN functions working", { + testthat::skip_on_cran() + noDataSite <- "UTGS.401544112060301" + noDataSite <- readNGWMNlevels(featureID = noDataSite) + expect_true(is.data.frame(noDataSite)) + + #bounding box and a bigger request + bboxSites <- readNGWMNdata(service = "featureOfInterest", bbox = c(30, -99, 31, 102)) + expect_gt(nrow(bboxSites), 0) + siteInfo <- readNGWMNsites(bboxSites$site[1:100]) + expect_gt(nrow(siteInfo), 90) + + #one site + site <- "USGS.430427089284901" + oneSite <- readNGWMNlevels(featureID = site) + siteInfo <- readNGWMNsites(site) + expect_true(is.numeric(oneSite$value)) + expect_true(is.character(oneSite$site)) + expect_true(is.data.frame(siteInfo)) + expect_true(nrow(siteInfo) > 0) + expect_true(nrow(oneSite) > 0) + + #non-USGS site + data <- readNGWMNlevels(featureID = "MBMG.1388") + expect_true(nrow(data) > 1) + expect_true(is.numeric(oneSite$value)) + + #sites with colons and NAs work + + na_colons <- c(NA, bboxSites$site[200:212], NA, NA) + returnDF <- readNGWMNdata(service = "observation", + featureID = na_colons, asDateTime = FALSE) + expect_is(returnDF, "data.frame") + expect_true(nrow(returnDF) > 1) + expect_true(!is.null(attributes(returnDF)$siteInfo)) + + sites <- c("USGS:424427089494701", NA) + siteInfo <- readNGWMNsites(sites) + expect_is(siteInfo, "data.frame") + expect_true(nrow(siteInfo) == 1) + + #time zones + tzSite <- "USGS.385111104214403" + tzDataUTC <- readNGWMNlevels(tzSite, asDateTime = TRUE) + tzDataMT <- readNGWMNlevels(tzSite, asDateTime = TRUE, + tz = "US/Mountain") + expect_gt(nrow(tzDataMT), 1) + expect_gt(nrow(tzDataUTC), 1) + expect_is(tzDataUTC$dateTime, "POSIXct") + expect_is(tzDataMT$dateTime, "POSIXct") + expect_equal(attr(tzDataMT$dateTime, 'tzone'), "US/Mountain") + expect_warning(tzDataUTC$dateTime == tzDataMT$dateTime) +}) diff --git a/tests/testthat/tests_userFriendly_fxns.R b/tests/testthat/tests_userFriendly_fxns.R index 0f2cda8..d2a072a 100644 --- a/tests/testthat/tests_userFriendly_fxns.R +++ b/tests/testthat/tests_userFriendly_fxns.R @@ -224,50 +224,6 @@ test_that("state county tests",{ expect_equal(fromIDs, "Bacon County") }) -context("NGWMN") -test_that("NGWMN functions working", { - testthat::skip_on_cran() - noDataSite <- "UTGS.401544112060301" - noDataSite <- readNGWMNlevels(featureID = noDataSite) - expect_true(is.data.frame(noDataSite)) - - #bounding box and a bigger request - bboxSites <- readNGWMNdata(service = "featureOfInterest", bbox = c(30, -99, 31, 102)) - expect_gt(nrow(bboxSites), 0) - siteInfo <- readNGWMNsites(bboxSites$site[1:100]) - expect_gt(nrow(siteInfo), 90) - - #one site - site <- "USGS.430427089284901" - oneSite <- readNGWMNlevels(featureID = site) - siteInfo <- readNGWMNsites(site) - expect_true(is.numeric(oneSite$value)) - expect_true(is.character(oneSite$site)) - expect_true(is.data.frame(siteInfo)) - expect_true(nrow(siteInfo) > 0) - expect_true(nrow(oneSite) > 0) - - #non-USGS site - data <- readNGWMNlevels(featureID = "MBMG.1388") - expect_true(nrow(data) > 1) - expect_true(is.numeric(oneSite$value)) - - #sites with colons and NAs work - - na_colons <- c(NA, bboxSites$site[200:212], NA, NA) - returnDF <- readNGWMNdata(service = "observation", - featureID = na_colons, asDateTime = FALSE) - expect_is(returnDF, "data.frame") - expect_true(nrow(returnDF) > 1) - expect_true(!is.null(attributes(returnDF)$siteInfo)) - - sites <- c("USGS:424427089494701", NA) - siteInfo <- readNGWMNsites(sites) - expect_is(siteInfo, "data.frame") - expect_true(nrow(siteInfo) == 1) - -}) - context("water year column") df_test <- data.frame(site_no = as.character(1:13), -- GitLab