Commit d0e3d7b1 authored by David Watkins's avatar David Watkins
Browse files

tests passing

parent 33f2ac46
......@@ -130,7 +130,6 @@ parseWaterML2Timeseries <- function(input, asDateTime) {
valueNodes <- xml_find_all(TVP,".//wml2:value")
values <- as.numeric(xml_text(valueNodes))
nVals <- length(values)
#gmlID <- rep(gmlID, nVals)
#df of date, time, dateTime
oneCol <- rep(NA, nVals)
......@@ -145,7 +144,9 @@ parseWaterML2Timeseries <- function(input, asDateTime) {
timeDF <- mutate(splitTime, dateTime = NA)
logicVec <- nchar(rawTime) > 19
timeDF$dateTime[logicVec] <- rawTime[logicVec]
if(!all(!logicVec)) { #otherwise sets it to char <NA>
timeDF$dateTime[logicVec] <- rawTime[logicVec]
}
if(asDateTime){
timeDF$dateTime <- parse_date_time(timeDF$dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE)
......@@ -157,18 +158,24 @@ parseWaterML2Timeseries <- function(input, asDateTime) {
source <- xml_attr(xml_find_all(TVP, ".//wml2:source"), "title")
comment <- xml_text(xml_find_all(TVP, ".//wml2:comment"))
#TODO: other fields, then list, then df from list
tvpQuals <- xml_text(xml_find_all(TVP, ".//swe:description"))
defaultMeta <- xml_find_all(input, ".//wml2:DefaultTVPMeasurementMetadata")
defaultQuals <- xml_text(xml_find_all(defaultMeta, ".//swe:description"))
defaultUOM <- xml_attr(xml_find_all(defaultMeta, ".//wml2:uom"), "title", default = NA)
#attach defaultQuals as attributes
df_vars <- list(source, timeDF, value = values, uom, comment)
df_vars <- list(source = source, timeDF, value = values,
uom = uom, comment = comment)
df_use <- df_vars[sapply(df_vars, function(x){length(x) > 0 && !all(is.na(x))})]
df <- data.frame(df_use, stringsAsFactors = FALSE)
attr(df, "defaultQualifier") <- defaultQuals
attr(df, "defaultUOM") <- defaultUOM
attr(df, "gmlID") <- gmlID
#from the default metadata section
#append to existing attributes if they aren't empty
# attr(df, "defaultQualifier") <- defaultQuals
# attr(df, "defaultUOM") <- defaultUOM
# attr(df, "gmlID") <- gmlID
mdAttribs <- list(defaultQualifier=defaultQuals, defaultUOM=defaultUOM,
gmlID=gmlID) #all attributes must have names
mdAttribs_use <- mdAttribs[sapply(mdAttribs, function(x){length(x) > 0})]
attributes(df) <- append(attributes(df), mdAttribs_use)
return(df)
}
\ No newline at end of file
......@@ -16,7 +16,7 @@
#' @importFrom xml2 xml_find_all
#' @importFrom xml2 xml_text
#' @importFrom xml2 xml_attr
#' @importFrom dplyr rbind_all
#' @importFrom dplyr rbind_all select
#' @importFrom lubridate parse_date_time
#' @examples
#' baseURL <- "https://waterservices.usgs.gov/nwis/dv/?format=waterml,2.0"
......@@ -72,36 +72,17 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz="UTC"){
for(t in timeSeries){
df <- parseWaterML2Timeseries(t, asDateTime)
#remove time and date columns
TVP <- xml_find_all(t, ".//wml2:MeasurementTVP")#time-value pairs
time <- xml_text(xml_find_all(TVP,".//wml2:time"))
if(asDateTime){
time <- parse_date_time(time, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE)
#^^setting tz in as.POSIXct just sets the attribute, does not convert the time!
attr(time, 'tzone') <- tz
}
values <- as.numeric(xml_text(xml_find_all(TVP,".//wml2:value")))
#TODO: deal with multiple identifiers (assigning column names)
idents <- xml_text(xml_find_all(t, ".//gml:identifier"))
useIdents <- rep(idents, length(values))
#TODO: check qualifiers in points against default, if both exist, same, etc
tvpQuals <- xml_text(xml_find_all(TVP, ".//swe:description"))
defaultPointMeta <- xml_find_all(t, ".//wml2:DefaultTVPMeasurementMetadata")
defaultQuals <- xml_text(xml_find_all(defaultPointMeta, ".//swe:description"))
if(length(tvpQuals) == 0){
useQuals <- rep(defaultQuals, length(values))
}else{
useQuals <- tvpQuals
}
if(length(useQuals) == 0){
df <- cbind.data.frame(time, value=values, identifier=useIdents,
stringsAsFactors=FALSE)
}else{
df <- cbind.data.frame(time, value=values, qualifier=useQuals, identifier=useIdents,
stringsAsFactors=FALSE)
#need to save attributes first, and create identifier column
saveAttribs <- attributes(df)[-(1:3)]
#remove time and date columns, add site col
df <- mutate(df, identifier = saveAttribs$gmlID,
qualifier = ifelse(is.null(saveAttribs$defaultQualifier),
NA, saveAttribs$defaultQualifier))
if(all(is.na(df$dateTime))){
df <- subset(df, select=-c(dateTime, time))
#should the remaining column be changed to dateTime?
} else {
df <- subset(df, select=-c(date, time))
}
if (is.null(mergedDF)){
mergedDF <- df
......@@ -109,6 +90,7 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz="UTC"){
similarNames <- intersect(colnames(mergedDF), colnames(df))
mergedDF <- full_join(mergedDF, df, by=similarNames)
}
attributes(mergedDF) <- append(attributes(mergedDF), saveAttribs)
}
return(mergedDF)
}
......
......@@ -178,11 +178,11 @@ test_that("importWaterML2 internal test", {
test_that("importWaterML2 external test", {
testthat::skip_on_cran()
url <- "http://waterservices.usgs.gov/nwis/iv/?format=waterml,2.0&sites=01646500&parameterCd=00060,00065"
data <- importWaterML2(url)
url <- "https://waterservices.usgs.gov/nwis/iv/?format=waterml,2.0&sites=01646500&parameterCd=00060,00065"
exData <- importWaterML2(url)
# saveRDS(data, "rds/externalML2.rds")
expect_is(data$value, 'numeric')
expect_gt(nrow(data),0)
expect_is(exData$value, 'numeric')
expect_gt(nrow(exData),0)
})
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment