diff --git a/R/getWaterML1Data.r b/R/getWaterML1Data.r index e33814f59939f0921a14c2f4efc4b7d1e7cf5558..397e4fa2b701d0c10de0855cb35e41c97ecc0caf 100644 --- a/R/getWaterML1Data.r +++ b/R/getWaterML1Data.r @@ -60,20 +60,20 @@ getWaterML1Data <- function(obs_url){ chunk <- xmlDoc(timeSeries[[i]]) chunk <- xmlRoot(chunk) chunkNS <- xmlNamespaceDefinitions(chunk, simplify = TRUE) - -# site <- as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:siteProperty[@name='hucCd']", namespaces = chunkNS, xmlValue)) + + # site <- as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:siteProperty[@name='hucCd']", namespaces = chunkNS, xmlValue)) site <- as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:siteCode", namespaces = chunkNS, xmlValue)) agency <- as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:siteCode/@agencyCode", namespaces = chunkNS)) pCode <-as.character(xpathApply(chunk, "ns1:variable/ns1:variableCode", namespaces = chunkNS, xmlValue)) statCd <- as.character(xpathApply(chunk, "ns1:variable/ns1:options/ns1:option/@optionCode", namespaces = chunkNS)) - + valuesIndex <- as.numeric(which("values" == names(chunk))) zoneAbbrievs <- c(as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:timeZoneInfo/ns1:defaultTimeZone/@zoneAbbreviation", namespaces = chunkNS)), as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:timeZoneInfo/ns1:daylightSavingsTimeZone/@zoneAbbreviation", namespaces = chunkNS))) - + names(zoneAbbrievs) <- c(as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:timeZoneInfo/ns1:defaultTimeZone/@zoneOffset", namespaces = chunkNS)), - as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:timeZoneInfo/ns1:daylightSavingsTimeZone/@zoneOffset", namespaces = chunkNS))) + as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:timeZoneInfo/ns1:daylightSavingsTimeZone/@zoneOffset", namespaces = chunkNS))) for (j in valuesIndex){ subChunk <- xmlRoot(xmlDoc(chunk[[j]])) @@ -82,67 +82,69 @@ getWaterML1Data <- function(obs_url){ methodID <- padVariable(methodID,2) - value <- as.numeric(xpathSApply(subChunk, "ns1:value",namespaces = chunkNS, xmlValue)) - datetime <- as.POSIXct(strptime(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS),"%Y-%m-%dT%H:%M:%S")) - tzHours <- substr(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS), - 24, - nchar(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS))) - if(mean(nchar(tzHours),rm.na=TRUE) == 6){ - tzAbbriev <- zoneAbbrievs[tzHours] - } else { - tzAbbriev <- rep(as.character(zoneAbbrievs[1]),length(datetime)) - } - - timeZoneLibrary <- setNames(c("America/New_York","America/New_York","America/Chicago","America/Chicago", - "America/Denver","America/Denver","America/Los_Angeles","America/Los_Angeles", - "America/Anchorage","America/Anchorage","America/Honolulu","America/Honolulu"), - c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) - timeZone <- as.character(timeZoneLibrary[tzAbbriev]) - if(length(unique(timeZone)) == 1){ - datetime <- as.POSIXct(as.character(datetime), tz = unique(timeZone)) - } else { - warning("Mixed time zone information") - for(i in seq_along(datetime)){ - datetime[i] <- as.POSIXct(as.character(datetime[i]), tz = timeZone[i]) + value <- as.numeric(xpathSApply(subChunk, "ns1:value",namespaces = chunkNS, xmlValue)) + if(length(value)!=0){ + datetime <- as.POSIXct(strptime(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS),"%Y-%m-%dT%H:%M:%S")) + tzHours <- substr(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS), + 24, + nchar(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS))) + if(mean(nchar(tzHours),rm.na=TRUE) == 6){ + tzAbbriev <- zoneAbbrievs[tzHours] + } else { + tzAbbriev <- rep(as.character(zoneAbbrievs[1]),length(datetime)) } - } - - qualifier <- as.character(xpathSApply(subChunk, "ns1:value/@qualifiers",namespaces = chunkNS)) - - valueName <- paste(methodID,pCode,statCd,sep="_") - qualName <- paste(methodID,pCode,statCd,"cd",sep="_") - valueName <- paste("X",valueName,sep="") - qualName <- paste("X",qualName,sep="") - - assign(valueName,value) - assign(qualName,qualifier) - - if(length(get(qualName))!=0){ - df <- data.frame(rep(agency,length(datetime)), - rep(site,length(datetime)), - datetime, - tzAbbriev, - get(valueName), - get(qualName), - stringsAsFactors=FALSE) - names(df) <- c("agency_cd","site_no","datetime","tz_cd",valueName,qualName) - } else { - df <- data.frame(rep(agency,length(datetime)), - rep(site,length(datetime)), - datetime, - tzAbbriev, - get(valueName),stringsAsFactors=FALSE) + timeZoneLibrary <- setNames(c("America/New_York","America/New_York","America/Chicago","America/Chicago", + "America/Denver","America/Denver","America/Los_Angeles","America/Los_Angeles", + "America/Anchorage","America/Anchorage","America/Honolulu","America/Honolulu"), + c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) + timeZone <- as.character(timeZoneLibrary[tzAbbriev]) + if(length(unique(timeZone)) == 1){ + datetime <- as.POSIXct(as.character(datetime), tz = unique(timeZone)) + } else { + warning("Mixed time zone information") + for(i in seq_along(datetime)){ + datetime[i] <- as.POSIXct(as.character(datetime[i]), tz = timeZone[i]) + } + } - names(df) <- c("agency_cd","site_no","datetime","tz_cd",valueName) - } - - if (1 == i & valuesIndex[1] == j){ - mergedDF <- df - } else { - similarNames <- intersect(names(mergedDF), names(df)) - mergedDF <- merge(mergedDF, df,by=similarNames,all=TRUE) -# mergedDF <- merge(mergedDF, df,by=c("agency_cd","site_no","datetime","tz_cd"),all=TRUE) + qualifier <- as.character(xpathSApply(subChunk, "ns1:value/@qualifiers",namespaces = chunkNS)) + + valueName <- paste(methodID,pCode,statCd,sep="_") + qualName <- paste(methodID,pCode,statCd,"cd",sep="_") + valueName <- paste("X",valueName,sep="") + qualName <- paste("X",qualName,sep="") + + assign(valueName,value) + assign(qualName,qualifier) + + if(length(get(qualName))!=0){ + df <- data.frame(rep(agency,length(datetime)), + rep(site,length(datetime)), + datetime, + tzAbbriev, + get(valueName), + get(qualName), + stringsAsFactors=FALSE) + + names(df) <- c("agency_cd","site_no","datetime","tz_cd",valueName,qualName) + } else { + df <- data.frame(rep(agency,length(datetime)), + rep(site,length(datetime)), + datetime, + tzAbbriev, + get(valueName),stringsAsFactors=FALSE) + + names(df) <- c("agency_cd","site_no","datetime","tz_cd",valueName) + } + + if (1 == i & valuesIndex[1] == j){ + mergedDF <- df + } else { + similarNames <- intersect(names(mergedDF), names(df)) + mergedDF <- merge(mergedDF, df,by=similarNames,all=TRUE) + # mergedDF <- merge(mergedDF, df,by=c("agency_cd","site_no","datetime","tz_cd"),all=TRUE) + } } } }