From 47d86ddeb65d205d6e8782bb093e784445db6699 Mon Sep 17 00:00:00 2001 From: unknown <ldecicco@usgs.gov> Date: Mon, 3 Nov 2014 16:31:10 -0600 Subject: [PATCH] Return empty dataset, changed column names based on method Description, reordered columns. --- R/importWaterML1.r | 121 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 91 insertions(+), 30 deletions(-) diff --git a/R/importWaterML1.r b/R/importWaterML1.r index 096b3fda..cffff197 100644 --- a/R/importWaterML1.r +++ b/R/importWaterML1.r @@ -20,7 +20,7 @@ #' offering <- '00003' #' property <- '00060' #' obs_url <- constructNWISURL(siteNumber,property,startDate,endDate,'dv') -#' data <- importWaterML1(obs_url) +#' data <- importWaterML1(obs_url,TRUE) #' urlMulti <- constructNWISURL("04085427",c("00060","00010"), #' startDate,endDate,'dv',statCd=c("00003","00001")) #' multiData <- importWaterML1(urlMulti) @@ -36,7 +36,11 @@ #' filePath <- system.file("extdata", package="dataRetrievaldemo") #' fileName <- "WaterML1Example.xml" #' fullPath <- file.path(filePath, fileName) -#' importUserWM1 <- importWaterML1(fullPath) +#' importUserWM1 <- importWaterML1(fullPath,TRUE) +#' siteWithTwo <- '01480015' +#' url2 <- constructNWISURL(siteWithTwo, "00060",startDate,endDate,'dv') +#' twoResults <- importWaterML1(url2,TRUE) +#' importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ if(url.exists(obs_url)){ @@ -76,7 +80,8 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ timeSeries <- xpathApply(doc, "//ns1:timeSeries", namespaces = ns) if(0 == length(timeSeries)){ - stop("No data to return for URL:", obs_url) + message("Returning an empty dataset") + #TODO: return() } for (i in 1:length(timeSeries)){ @@ -89,7 +94,9 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ agency <- as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:siteCode/@agencyCode", namespaces = chunkNS)) pCode <-as.character(xpathApply(chunk, "ns1:variable/ns1:variableCode", namespaces = chunkNS, xmlValue)) statCd <- as.character(xpathApply(chunk, "ns1:variable/ns1:options/ns1:option/@optionCode", namespaces = chunkNS)) - + noValue <- as.numeric(xpathApply(chunk, "ns1:variable/ns1:noDataValue", namespaces = chunkNS, xmlValue)) + + valuesIndex <- as.numeric(which("values" == names(chunk))) @@ -108,60 +115,101 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ value <- as.numeric(xpathSApply(subChunk, "ns1:value",namespaces = chunkNS, xmlValue)) + value[value == noValue] <- NA attNames <- xpathSApply(subChunk, "ns1:value/@*",namespaces = chunkNS) attributeNames <- unique(names(attNames)) x <- lapply(attributeNames, function(x) xpathSApply(subChunk, paste0("ns1:value/@",x),namespaces = chunkNS)) - valueName <- paste(methodID,pCode,statCd,sep="_") - valueName <- paste("X",valueName,sep="") + + methodDescription <- as.character(xpathApply(subChunk, "ns1:method/ns1:methodDescription", namespaces = chunkNS, xmlValue)) + + if(length(methodDescription) > 0 & methodDescription != ""){ + valueName <- paste("X",methodDescription,pCode,statCd,sep="_") + } else { + valueName <- paste("X",pCode,statCd,sep="_") + } + + assign(valueName,value) df <- data.frame(agency = rep(agency,length(value)), site_no = rep(site,length(value)), stringsAsFactors=FALSE) - for(k in 1:length(attributeNames)){ - attVal <- as.character(x[[k]]) - if(length(attVal) == nrow(df)){ - df$temp <- as.character(x[[k]]) + if(length(attributeNames) > 0){ + for(k in 1:length(attributeNames)){ + attVal <- as.character(x[[k]]) + if(length(attVal) == nrow(df)){ + df$temp <- as.character(x[[k]]) + + } else { + attrList <- xpathApply(subChunk, "ns1:value", namespaces = chunkNS, xmlAttrs) + df$temp <- sapply(1:nrow(df),function(x) as.character(attrList[[x]][attributeNames[k]])) + df$temp[is.na(df$temp)] <- "" + } + names(df)[which(names(df) %in% "temp")] <- attributeNames[k] - } else { - attrList <- xpathApply(subChunk, "ns1:value", namespaces = chunkNS, xmlAttrs) - df$temp <- sapply(1:nrow(df),function(x) as.character(attrList[[x]][attributeNames[k]])) - df$temp[is.na(df$temp)] <- "" } - names(df)[which(names(df) %in% "temp")] <- attributeNames[k] - } - + df <- cbind(df, get(valueName)) names(df)[length(df)] <- valueName if("qualifiers" %in% names(df)){ - qualName <- paste(methodID,pCode,statCd,"cd",sep="_") - qualName <- paste("X",qualName,sep="") + qualName <- paste(valueName,"cd",sep="_") names(df)[which(names(df) == "qualifiers")] <- qualName } if("dateTime" %in% attributeNames){ if(asDateTime){ - datetime <- as.POSIXct(strptime(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS),"%Y-%m-%dT%H:%M:%S"), tz="UTC") - tzHours <- as.numeric(substr(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS), - 24, - nchar(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS))-3)) - tzHoursOff <- substr(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS), - 24, - nchar(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS))) - tzAbbriev <- as.character(zoneAbbrievs[tzHoursOff]) - datetime <- datetime - tzHours*60*60 + # Common options: + # YYYY + # YYYY-MM-DD + # YYYY-MM-DDTHH:MM + # YYYY-MM-DDTHH:MM:SS + # YYYY-MM-DDTHH:MM:SSZ + # YYYY-MM-DDTHH:MM:SS.000 + # YYYY-MM-DDTHH:MM:SS.000-XX:00 + datetime <- xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS) + + numChar <- nchar(datetime) + + if(abs(max(numChar) - min(numChar)) != 0){ + message("Mixed date types") + } else { + numChar <- numChar[1] + if(numChar == 4){ + datetime <- as.POSIXct(datetime, "%Y", tz = "UTC") + } else if(numChar == 10){ + datetime <- as.POSIXct(datetime, "%Y-%m-%d", tz = "UTC") + } else if(numChar == 16){ + datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M", tz = "UTC") + } else if(numChar == 19){ + datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%S", tz = "UTC") + } else if(numChar == 20){ + datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%S", tz = "UTC") + } else if(numChar == 23){ + datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%OS", tz = "UTC") + } else if(numChar == 24){ + datetime <- substr(datetime,1,23) + datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%OS", tz = "UTC") + } else if(numChar == 29){ + tzHours <- as.numeric(substr(datetime,24,numChar-3)) + + datetime <- substr(datetime,1,23) + datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%OS", tz = "UTC") + datetime <- datetime - tzHours*60*60 + df$tz_cd <- as.character(zoneAbbrievs[tzHours]) + } + } if(tz != ""){ attr(datetime, "tzone") <- tz } - df$tz_cd <- tzAbbriev + } else { datetime <- as.character(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS)) @@ -170,7 +218,19 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ df$dateTime <- datetime } - + + colNames <- names(df) + + if( exists("qualName")){ + columnsOrdered <- c("agency","site_no","dateTime","tz_cd",attributeNames[attributeNames != "dateTime"],qualName,valueName) + } else { + columnsOrdered <- c("agency","site_no","dateTime","tz_cd",attributeNames[attributeNames != "dateTime"],valueName) + } + + columnsOrderd <- columnsOrdered[columnsOrdered %in% names(df)] + + df <- df[,columnsOrderd] + if (1 == i & valuesIndex[1] == j){ mergedDF <- df } else { @@ -178,6 +238,7 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ mergedDF <- merge(mergedDF, df,by=similarNames,all=TRUE) } } + } return (mergedDF) -- GitLab