Commit d0e3d7b1 authored by David Watkins's avatar David Watkins
Browse files

tests passing

parent 33f2ac46
...@@ -130,7 +130,6 @@ parseWaterML2Timeseries <- function(input, asDateTime) { ...@@ -130,7 +130,6 @@ parseWaterML2Timeseries <- function(input, asDateTime) {
valueNodes <- xml_find_all(TVP,".//wml2:value") valueNodes <- xml_find_all(TVP,".//wml2:value")
values <- as.numeric(xml_text(valueNodes)) values <- as.numeric(xml_text(valueNodes))
nVals <- length(values) nVals <- length(values)
#gmlID <- rep(gmlID, nVals)
#df of date, time, dateTime #df of date, time, dateTime
oneCol <- rep(NA, nVals) oneCol <- rep(NA, nVals)
...@@ -145,7 +144,9 @@ parseWaterML2Timeseries <- function(input, asDateTime) { ...@@ -145,7 +144,9 @@ parseWaterML2Timeseries <- function(input, asDateTime) {
timeDF <- mutate(splitTime, dateTime = NA) timeDF <- mutate(splitTime, dateTime = NA)
logicVec <- nchar(rawTime) > 19 logicVec <- nchar(rawTime) > 19
timeDF$dateTime[logicVec] <- rawTime[logicVec] if(!all(!logicVec)) { #otherwise sets it to char <NA>
timeDF$dateTime[logicVec] <- rawTime[logicVec]
}
if(asDateTime){ if(asDateTime){
timeDF$dateTime <- parse_date_time(timeDF$dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S", timeDF$dateTime <- parse_date_time(timeDF$dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE) "%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE)
...@@ -157,18 +158,24 @@ parseWaterML2Timeseries <- function(input, asDateTime) { ...@@ -157,18 +158,24 @@ parseWaterML2Timeseries <- function(input, asDateTime) {
source <- xml_attr(xml_find_all(TVP, ".//wml2:source"), "title") source <- xml_attr(xml_find_all(TVP, ".//wml2:source"), "title")
comment <- xml_text(xml_find_all(TVP, ".//wml2:comment")) comment <- xml_text(xml_find_all(TVP, ".//wml2:comment"))
#TODO: other fields, then list, then df from list
tvpQuals <- xml_text(xml_find_all(TVP, ".//swe:description")) tvpQuals <- xml_text(xml_find_all(TVP, ".//swe:description"))
defaultMeta <- xml_find_all(input, ".//wml2:DefaultTVPMeasurementMetadata") defaultMeta <- xml_find_all(input, ".//wml2:DefaultTVPMeasurementMetadata")
defaultQuals <- xml_text(xml_find_all(defaultMeta, ".//swe:description")) defaultQuals <- xml_text(xml_find_all(defaultMeta, ".//swe:description"))
defaultUOM <- xml_attr(xml_find_all(defaultMeta, ".//wml2:uom"), "title", default = NA) defaultUOM <- xml_attr(xml_find_all(defaultMeta, ".//wml2:uom"), "title", default = NA)
#attach defaultQuals as attributes
df_vars <- list(source = source, timeDF, value = values,
df_vars <- list(source, timeDF, value = values, uom, comment) uom = uom, comment = comment)
df_use <- df_vars[sapply(df_vars, function(x){length(x) > 0 && !all(is.na(x))})] df_use <- df_vars[sapply(df_vars, function(x){length(x) > 0 && !all(is.na(x))})]
df <- data.frame(df_use, stringsAsFactors = FALSE) df <- data.frame(df_use, stringsAsFactors = FALSE)
attr(df, "defaultQualifier") <- defaultQuals
attr(df, "defaultUOM") <- defaultUOM #from the default metadata section
attr(df, "gmlID") <- gmlID #append to existing attributes if they aren't empty
# attr(df, "defaultQualifier") <- defaultQuals
# attr(df, "defaultUOM") <- defaultUOM
# attr(df, "gmlID") <- gmlID
mdAttribs <- list(defaultQualifier=defaultQuals, defaultUOM=defaultUOM,
gmlID=gmlID) #all attributes must have names
mdAttribs_use <- mdAttribs[sapply(mdAttribs, function(x){length(x) > 0})]
attributes(df) <- append(attributes(df), mdAttribs_use)
return(df) return(df)
} }
\ No newline at end of file
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#' @importFrom xml2 xml_find_all #' @importFrom xml2 xml_find_all
#' @importFrom xml2 xml_text #' @importFrom xml2 xml_text
#' @importFrom xml2 xml_attr #' @importFrom xml2 xml_attr
#' @importFrom dplyr rbind_all #' @importFrom dplyr rbind_all select
#' @importFrom lubridate parse_date_time #' @importFrom lubridate parse_date_time
#' @examples #' @examples
#' baseURL <- "https://waterservices.usgs.gov/nwis/dv/?format=waterml,2.0" #' baseURL <- "https://waterservices.usgs.gov/nwis/dv/?format=waterml,2.0"
...@@ -72,36 +72,17 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz="UTC"){ ...@@ -72,36 +72,17 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz="UTC"){
for(t in timeSeries){ for(t in timeSeries){
df <- parseWaterML2Timeseries(t, asDateTime) df <- parseWaterML2Timeseries(t, asDateTime)
#remove time and date columns #need to save attributes first, and create identifier column
saveAttribs <- attributes(df)[-(1:3)]
TVP <- xml_find_all(t, ".//wml2:MeasurementTVP")#time-value pairs #remove time and date columns, add site col
time <- xml_text(xml_find_all(TVP,".//wml2:time")) df <- mutate(df, identifier = saveAttribs$gmlID,
if(asDateTime){ qualifier = ifelse(is.null(saveAttribs$defaultQualifier),
time <- parse_date_time(time, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S", NA, saveAttribs$defaultQualifier))
"%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE) if(all(is.na(df$dateTime))){
#^^setting tz in as.POSIXct just sets the attribute, does not convert the time! df <- subset(df, select=-c(dateTime, time))
attr(time, 'tzone') <- tz #should the remaining column be changed to dateTime?
} } else {
values <- as.numeric(xml_text(xml_find_all(TVP,".//wml2:value"))) df <- subset(df, select=-c(date, time))
#TODO: deal with multiple identifiers (assigning column names)
idents <- xml_text(xml_find_all(t, ".//gml:identifier"))
useIdents <- rep(idents, length(values))
#TODO: check qualifiers in points against default, if both exist, same, etc
tvpQuals <- xml_text(xml_find_all(TVP, ".//swe:description"))
defaultPointMeta <- xml_find_all(t, ".//wml2:DefaultTVPMeasurementMetadata")
defaultQuals <- xml_text(xml_find_all(defaultPointMeta, ".//swe:description"))
if(length(tvpQuals) == 0){
useQuals <- rep(defaultQuals, length(values))
}else{
useQuals <- tvpQuals
}
if(length(useQuals) == 0){
df <- cbind.data.frame(time, value=values, identifier=useIdents,
stringsAsFactors=FALSE)
}else{
df <- cbind.data.frame(time, value=values, qualifier=useQuals, identifier=useIdents,
stringsAsFactors=FALSE)
} }
if (is.null(mergedDF)){ if (is.null(mergedDF)){
mergedDF <- df mergedDF <- df
...@@ -109,6 +90,7 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz="UTC"){ ...@@ -109,6 +90,7 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz="UTC"){
similarNames <- intersect(colnames(mergedDF), colnames(df)) similarNames <- intersect(colnames(mergedDF), colnames(df))
mergedDF <- full_join(mergedDF, df, by=similarNames) mergedDF <- full_join(mergedDF, df, by=similarNames)
} }
attributes(mergedDF) <- append(attributes(mergedDF), saveAttribs)
} }
return(mergedDF) return(mergedDF)
} }
......
...@@ -178,11 +178,11 @@ test_that("importWaterML2 internal test", { ...@@ -178,11 +178,11 @@ test_that("importWaterML2 internal test", {
test_that("importWaterML2 external test", { test_that("importWaterML2 external test", {
testthat::skip_on_cran() testthat::skip_on_cran()
url <- "http://waterservices.usgs.gov/nwis/iv/?format=waterml,2.0&sites=01646500&parameterCd=00060,00065" url <- "https://waterservices.usgs.gov/nwis/iv/?format=waterml,2.0&sites=01646500&parameterCd=00060,00065"
data <- importWaterML2(url) exData <- importWaterML2(url)
# saveRDS(data, "rds/externalML2.rds") # saveRDS(data, "rds/externalML2.rds")
expect_is(data$value, 'numeric') expect_is(exData$value, 'numeric')
expect_gt(nrow(data),0) expect_gt(nrow(exData),0)
}) })
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment