From d9e80b42ab5106ff36dfaf930f1327df403d1902 Mon Sep 17 00:00:00 2001 From: unknown <ldecicco@usgs.gov> Date: Tue, 11 Nov 2014 16:15:57 -0600 Subject: [PATCH] Lots of upgrades. --- R/importWaterML1.r | 138 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 111 insertions(+), 27 deletions(-) diff --git a/R/importWaterML1.r b/R/importWaterML1.r index dd2e71fb..91ca3f79 100644 --- a/R/importWaterML1.r +++ b/R/importWaterML1.r @@ -21,26 +21,32 @@ #' property <- '00060' #' obs_url <- constructNWISURL(siteNumber,property,startDate,endDate,'dv') #' data <- importWaterML1(obs_url,TRUE) -#' urlMulti <- constructNWISURL("04085427",c("00060","00010"), -#' startDate,endDate,'dv',statCd=c("00003","00001")) -#' multiData <- importWaterML1(urlMulti) +#' #' groundWaterSite <- "431049071324301" #' startGW <- "2013-10-01" #' endGW <- "2014-06-30" #' groundwaterExampleURL <- constructNWISURL(groundWaterSite, NA, -#' startGW,endGW, service="gwlevels", format="xml") +#' startGW,endGW, service="gwlevels") #' groundWater <- importWaterML1(groundwaterExampleURL) +#' #' unitDataURL <- constructNWISURL(siteNumber,property, -#' "2013-11-03","2013-11-03",'uv',format='xml') +#' "2013-11-03","2013-11-03",'uv') #' unitData <- importWaterML1(unitDataURL,TRUE) +#' #' filePath <- system.file("extdata", package="dataRetrieval") #' fileName <- "WaterML1Example.xml" #' fullPath <- file.path(filePath, fileName) #' importUserWM1 <- importWaterML1(fullPath,TRUE) -#' siteWithTwo <- '01480015' -#' url2 <- constructNWISURL(siteWithTwo, "00060",startDate,endDate,'dv') -#' twoResults <- importWaterML1(url2,TRUE) -#' +#' +#' # Two sites, two pcodes, one site has two data descriptors: +#' siteNumber <- c('01480015',"04085427") +#' obs_url <- constructNWISURL(siteNumber,c("00060","00010"),startDate,endDate,'dv') +#' data <- importWaterML1(obs_url) +#' data$dateTime <- as.Date(data$dateTime) +#' data <- renameNWISColumns(data) +#' names(attributes(data)) +#' attr(data, "url") +#' attr(data, "disclaimer") importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ if(url.exists(obs_url)){ @@ -77,6 +83,15 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ doc <- xmlRoot(doc) ns <- xmlNamespaceDefinitions(doc, simplify = TRUE) + queryInfo <- xmlToList(xmlRoot(xmlDoc(doc[["queryInfo"]]))) + names(queryInfo) <- make.unique(names(queryInfo)) + + noteIndex <- grep("note",names(queryInfo)) + + noteTitles <- as.character(lapply(queryInfo[noteIndex], function(x) x$.attrs)) + notes <- as.character(lapply(queryInfo[noteIndex], function(x) x$text)) + names(notes) <- noteTitles + timeSeries <- xpathApply(doc, "//ns1:timeSeries", namespaces = ns) if(0 == length(timeSeries)){ @@ -84,19 +99,24 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ #TODO: return() } + attList <- list() + for (i in 1:length(timeSeries)){ chunk <- xmlDoc(timeSeries[[i]]) chunk <- xmlRoot(chunk) chunkNS <- xmlNamespaceDefinitions(chunk, simplify = TRUE) + uniqueName <- as.character(xpathApply(chunk, "@name", namespaces = chunkNS)) site <- as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:siteCode", namespaces = chunkNS, xmlValue)) agency <- as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:siteCode/@agencyCode", namespaces = chunkNS)) pCode <-as.character(xpathApply(chunk, "ns1:variable/ns1:variableCode", namespaces = chunkNS, xmlValue)) statCd <- as.character(xpathApply(chunk, "ns1:variable/ns1:options/ns1:option/@optionCode", namespaces = chunkNS)) noValue <- as.numeric(xpathApply(chunk, "ns1:variable/ns1:noDataValue", namespaces = chunkNS, xmlValue)) - + extraSiteData <- xmlToList(xmlRoot(xmlDoc(chunk[["sourceInfo"]]))) + extraVariableData <- xmlToList(xmlRoot(xmlDoc(chunk[["variable"]]))) + valuesIndex <- as.numeric(which("values" == names(chunk))) @@ -129,10 +149,8 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ valueName <- paste("X",pCode,statCd,sep="_") - if(length(methodDescription) > 0){ - if(methodDescription != ""){ - valueName <- paste("X",methodDescription,pCode,statCd,sep="_") - } + if(length(methodDescription) > 0 && methodDescription != ""){ + valueName <- paste("X",methodDescription,pCode,statCd,sep="_") } @@ -167,22 +185,24 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ } if("dateTime" %in% attributeNames){ + + datetime <- xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS) + + numChar <- nchar(datetime) + if(asDateTime){ # Common options: - # YYYY - # YYYY-MM-DD - # YYYY-MM-DDTHH:MM - # YYYY-MM-DDTHH:MM:SS - # YYYY-MM-DDTHH:MM:SSZ - # YYYY-MM-DDTHH:MM:SS.000 - # YYYY-MM-DDTHH:MM:SS.000-XX:00 - datetime <- xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS) - - numChar <- nchar(datetime) - + # YYYY numChar=4 + # YYYY-MM-DD numChar=10 + # YYYY-MM-DDTHH:MM numChar=16 + # YYYY-MM-DDTHH:MM:SS numChar=19 + # YYYY-MM-DDTHH:MM:SSZ numChar=20 + # YYYY-MM-DDTHH:MM:SS.000 numChar=23 + # YYYY-MM-DDTHH:MM:SS.000-XX:00 numChar=29 + if(abs(max(numChar) - min(numChar)) != 0){ - message("Mixed date types") + message("Mixed date types, not converted to POSIXct") } else { numChar <- numChar[1] if(numChar == 4){ @@ -200,6 +220,7 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ } else if(numChar == 24){ datetime <- substr(datetime,1,23) datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%OS", tz = "UTC") + df$tz_cd <- rep(zoneAbbrievs[1], nrow(df)) } else if(numChar == 29){ tzOffset <- as.character(substr(datetime,24,numChar)) @@ -210,6 +231,12 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ datetime <- datetime + tzHours*60*60 df$tz_cd <- as.character(zoneAbbrievs[tzOffset]) } + + if(!("tz_cd" %in% names(df))){ + df$tz_cd <- zoneAbbrievs[1] + tzHours <- as.numeric(substr(names(zoneAbbrievs[1]),1,3)) + datetime <- datetime + tzHours*60*60 + } } if(tz != ""){ @@ -218,7 +245,16 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ } else { - datetime <- as.character(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS)) + + datetime <- as.character(datetime) + if(any(numChar) == 29){ + tzOffset <- as.character(substr(datetime,24,numChar)) + df$tz_cd <- as.character(zoneAbbrievs[tzOffset]) + df$tz_cd[is.na(df$tz_cd)] <- zoneAbbrievs[1] + } else { + df$tz_cd <- zoneAbbrievs[1] + } + } df$dateTime <- datetime @@ -237,15 +273,63 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ df <- df[,columnsOrderd] + names(extraSiteData) <- make.unique(names(extraSiteData)) + + sitePropertyIndex <- grep("siteProperty",names(extraSiteData)) + + properties <- as.character(lapply(extraSiteData[sitePropertyIndex], function(x) x$.attrs)) + propertyValues <- as.character(lapply(extraSiteData[sitePropertyIndex], function(x) x$text)) + names(propertyValues) <- properties + + siteInfo <- data.frame(station_nm=extraSiteData$siteName, + site_no=extraSiteData$siteCode$text, + agency=extraSiteData$siteCode$.attrs[["agencyCode"]], + timeZoneOffset=extraSiteData$timeZoneInfo$defaultTimeZone[1], + timeZoneAbbreviation=extraSiteData$timeZoneInfo$defaultTimeZone[2], + dec_lat_va=as.numeric(extraSiteData$geoLocation$geogLocation$latitude), + dec_lon_va=as.numeric(extraSiteData$geoLocation$geogLocation$longitude), + srs=extraSiteData$geoLocation$geogLocation$.attrs[["srs"]], + stringsAsFactors=FALSE) + + siteInfo <- cbind(siteInfo, t(propertyValues)) + + names(extraVariableData) <- make.unique(names(extraVariableData)) + variableInfo <- data.frame(parameterCd=extraVariableData$variableCode$text, + parameter_nm=extraVariableData$variableName, + parameter_desc=extraVariableData$variableDescription, + valueType=extraVariableData$valueType, + param_units=extraVariableData$unit$unitCode, + noDataValue=as.numeric(extraVariableData$noDataValue), + stringsAsFactors=FALSE) + if (1 == i & valuesIndex[1] == j){ mergedDF <- df + siteInformation <- siteInfo + variableInformation <- variableInfo + } else { similarNames <- intersect(names(mergedDF), names(df)) mergedDF <- merge(mergedDF, df,by=similarNames,all=TRUE) + + similarSites <- intersect(names(siteInformation), names(siteInfo)) + siteInformation <- merge(siteInformation, siteInfo, by=similarSites, all=TRUE) + + similarVariables <- intersect(names(variableInformation),names(variableInfo)) + variableInformation <- merge(variableInformation, variableInfo, by=similarVariables, all=TRUE) } } } + attList[[uniqueName]] <- list(extraSiteData, extraVariableData) + + } + row.names(mergedDF) <- NULL + attr(mergedDF, "url") <- obs_url + attr(mergedDF, "attributeList") <- attList + attr(mergedDF, "siteInfo") <- siteInformation + attr(mergedDF, "variableInfo") <- variableInformation + attr(mergedDF, "disclaimer") <- notes["disclaimer"] + attr(mergedDF, "queryInfo") <- queryInfo return (mergedDF) } -- GitLab