From 67a23d718915915889b2ef86a15f9bcdb8d04567 Mon Sep 17 00:00:00 2001 From: "Kevin M. Smith" <Kevin.Smith@tufts.edu> Date: Mon, 3 Nov 2014 19:03:45 -0500 Subject: [PATCH] Catch <values> without <value> error. This catches responses that return <values> without associated <value>. Example Below: url = "http://waterservices.usgs.gov/nwis/iv/?format=waterml,1.1&huc=02&startDT=2014-10-11T00:00:00.000&endDT=2014-10-11T00:30:00.000¶meterCd=00060,00065" data <- getWaterML1Data(url) WaterML Response (chunk) <ns1:timeSeries xmlns:ns1="http://www.cuahsi.org/waterML/1.1/" name="USGS:01350080:00065:00011"> <ns1:sourceInfo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="ns1:SiteInfoType"> <ns1:siteName>MANOR KILL AT WEST CONESVILLE NEAR GILBOA NY</ns1:siteName> <ns1:siteCode network="NWIS" agencyCode="USGS">01350080</ns1:siteCode> <ns1:timeZoneInfo siteUsesDaylightSavingsTime="false"> <ns1:defaultTimeZone zoneOffset="-05:00" zoneAbbreviation="EST"/> <ns1:daylightSavingsTimeZone zoneOffset="-04:00" zoneAbbreviation="EDT"/> </ns1:timeZoneInfo> <ns1:geoLocation> <ns1:geogLocation xsi:type="ns1:LatLonPointType" srs="EPSG:4326"> <ns1:latitude>42.37694444</ns1:latitude> <ns1:longitude>-74.4130556</ns1:longitude> </ns1:geogLocation> </ns1:geoLocation> <ns1:siteProperty name="siteTypeCd">ST</ns1:siteProperty> <ns1:siteProperty name="hucCd">02020005</ns1:siteProperty> <ns1:siteProperty name="stateCd">36</ns1:siteProperty> <ns1:siteProperty name="countyCd">36095</ns1:siteProperty> </ns1:sourceInfo> <ns1:variable ns1:oid="45807202"> <ns1:variableCode network="NWIS" vocabulary="NWIS:UnitValues" default="true" variableID="45807202">00065</ns1:variableCode> <ns1:variableName>Gage height, ft</ns1:variableName> <ns1:variableDescription>Gage height, feet</ns1:variableDescription> <ns1:valueType>Derived Value</ns1:valueType> <ns1:unit> <ns1:unitCode>ft</ns1:unitCode> </ns1:unit> <ns1:options> <ns1:option name="Statistic" optionCode="00011"/> </ns1:options> <ns1:noDataValue>-999999.0</ns1:noDataValue> </ns1:variable> <ns1:values> <ns1:method methodID="1"> <ns1:methodDescription>[backup from well]</ns1:methodDescription> </ns1:method> </ns1:values> <ns1:values> <ns1:value qualifiers="P" dateTime="2014-10-11T00:00:00.000-05:00">0.85</ns1:value> <ns1:value qualifiers="P" dateTime="2014-10-11T00:15:00.000-05:00">0.85</ns1:value> <ns1:value qualifiers="P" dateTime="2014-10-11T00:30:00.000-05:00">0.85</ns1:value> <ns1:qualifier qualifierID="0" ns1:network="NWIS" ns1:vocabulary="uv_rmk_cd"> <ns1:qualifierCode>P</ns1:qualifierCode> <ns1:qualifierDescription>Provisional data subject to revision.</ns1:qualifierDescription> </ns1:qualifier> <ns1:method methodID="12"> <ns1:methodDescription/> </ns1:method> </ns1:values> </ns1:timeSeries> --- R/getWaterML1Data.r | 128 ++++++++++++++++++++++---------------------- 1 file changed, 65 insertions(+), 63 deletions(-) diff --git a/R/getWaterML1Data.r b/R/getWaterML1Data.r index e33814f5..397e4fa2 100644 --- a/R/getWaterML1Data.r +++ b/R/getWaterML1Data.r @@ -60,20 +60,20 @@ getWaterML1Data <- function(obs_url){ chunk <- xmlDoc(timeSeries[[i]]) chunk <- xmlRoot(chunk) chunkNS <- xmlNamespaceDefinitions(chunk, simplify = TRUE) - -# site <- as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:siteProperty[@name='hucCd']", namespaces = chunkNS, xmlValue)) + + # site <- as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:siteProperty[@name='hucCd']", namespaces = chunkNS, xmlValue)) site <- as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:siteCode", namespaces = chunkNS, xmlValue)) agency <- as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:siteCode/@agencyCode", namespaces = chunkNS)) pCode <-as.character(xpathApply(chunk, "ns1:variable/ns1:variableCode", namespaces = chunkNS, xmlValue)) statCd <- as.character(xpathApply(chunk, "ns1:variable/ns1:options/ns1:option/@optionCode", namespaces = chunkNS)) - + valuesIndex <- as.numeric(which("values" == names(chunk))) zoneAbbrievs <- c(as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:timeZoneInfo/ns1:defaultTimeZone/@zoneAbbreviation", namespaces = chunkNS)), as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:timeZoneInfo/ns1:daylightSavingsTimeZone/@zoneAbbreviation", namespaces = chunkNS))) - + names(zoneAbbrievs) <- c(as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:timeZoneInfo/ns1:defaultTimeZone/@zoneOffset", namespaces = chunkNS)), - as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:timeZoneInfo/ns1:daylightSavingsTimeZone/@zoneOffset", namespaces = chunkNS))) + as.character(xpathApply(chunk, "ns1:sourceInfo/ns1:timeZoneInfo/ns1:daylightSavingsTimeZone/@zoneOffset", namespaces = chunkNS))) for (j in valuesIndex){ subChunk <- xmlRoot(xmlDoc(chunk[[j]])) @@ -82,67 +82,69 @@ getWaterML1Data <- function(obs_url){ methodID <- padVariable(methodID,2) - value <- as.numeric(xpathSApply(subChunk, "ns1:value",namespaces = chunkNS, xmlValue)) - datetime <- as.POSIXct(strptime(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS),"%Y-%m-%dT%H:%M:%S")) - tzHours <- substr(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS), - 24, - nchar(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS))) - if(mean(nchar(tzHours),rm.na=TRUE) == 6){ - tzAbbriev <- zoneAbbrievs[tzHours] - } else { - tzAbbriev <- rep(as.character(zoneAbbrievs[1]),length(datetime)) - } - - timeZoneLibrary <- setNames(c("America/New_York","America/New_York","America/Chicago","America/Chicago", - "America/Denver","America/Denver","America/Los_Angeles","America/Los_Angeles", - "America/Anchorage","America/Anchorage","America/Honolulu","America/Honolulu"), - c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) - timeZone <- as.character(timeZoneLibrary[tzAbbriev]) - if(length(unique(timeZone)) == 1){ - datetime <- as.POSIXct(as.character(datetime), tz = unique(timeZone)) - } else { - warning("Mixed time zone information") - for(i in seq_along(datetime)){ - datetime[i] <- as.POSIXct(as.character(datetime[i]), tz = timeZone[i]) + value <- as.numeric(xpathSApply(subChunk, "ns1:value",namespaces = chunkNS, xmlValue)) + if(length(value)!=0){ + datetime <- as.POSIXct(strptime(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS),"%Y-%m-%dT%H:%M:%S")) + tzHours <- substr(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS), + 24, + nchar(xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS))) + if(mean(nchar(tzHours),rm.na=TRUE) == 6){ + tzAbbriev <- zoneAbbrievs[tzHours] + } else { + tzAbbriev <- rep(as.character(zoneAbbrievs[1]),length(datetime)) } - } - - qualifier <- as.character(xpathSApply(subChunk, "ns1:value/@qualifiers",namespaces = chunkNS)) - - valueName <- paste(methodID,pCode,statCd,sep="_") - qualName <- paste(methodID,pCode,statCd,"cd",sep="_") - valueName <- paste("X",valueName,sep="") - qualName <- paste("X",qualName,sep="") - - assign(valueName,value) - assign(qualName,qualifier) - - if(length(get(qualName))!=0){ - df <- data.frame(rep(agency,length(datetime)), - rep(site,length(datetime)), - datetime, - tzAbbriev, - get(valueName), - get(qualName), - stringsAsFactors=FALSE) - names(df) <- c("agency_cd","site_no","datetime","tz_cd",valueName,qualName) - } else { - df <- data.frame(rep(agency,length(datetime)), - rep(site,length(datetime)), - datetime, - tzAbbriev, - get(valueName),stringsAsFactors=FALSE) + timeZoneLibrary <- setNames(c("America/New_York","America/New_York","America/Chicago","America/Chicago", + "America/Denver","America/Denver","America/Los_Angeles","America/Los_Angeles", + "America/Anchorage","America/Anchorage","America/Honolulu","America/Honolulu"), + c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) + timeZone <- as.character(timeZoneLibrary[tzAbbriev]) + if(length(unique(timeZone)) == 1){ + datetime <- as.POSIXct(as.character(datetime), tz = unique(timeZone)) + } else { + warning("Mixed time zone information") + for(i in seq_along(datetime)){ + datetime[i] <- as.POSIXct(as.character(datetime[i]), tz = timeZone[i]) + } + } - names(df) <- c("agency_cd","site_no","datetime","tz_cd",valueName) - } - - if (1 == i & valuesIndex[1] == j){ - mergedDF <- df - } else { - similarNames <- intersect(names(mergedDF), names(df)) - mergedDF <- merge(mergedDF, df,by=similarNames,all=TRUE) -# mergedDF <- merge(mergedDF, df,by=c("agency_cd","site_no","datetime","tz_cd"),all=TRUE) + qualifier <- as.character(xpathSApply(subChunk, "ns1:value/@qualifiers",namespaces = chunkNS)) + + valueName <- paste(methodID,pCode,statCd,sep="_") + qualName <- paste(methodID,pCode,statCd,"cd",sep="_") + valueName <- paste("X",valueName,sep="") + qualName <- paste("X",qualName,sep="") + + assign(valueName,value) + assign(qualName,qualifier) + + if(length(get(qualName))!=0){ + df <- data.frame(rep(agency,length(datetime)), + rep(site,length(datetime)), + datetime, + tzAbbriev, + get(valueName), + get(qualName), + stringsAsFactors=FALSE) + + names(df) <- c("agency_cd","site_no","datetime","tz_cd",valueName,qualName) + } else { + df <- data.frame(rep(agency,length(datetime)), + rep(site,length(datetime)), + datetime, + tzAbbriev, + get(valueName),stringsAsFactors=FALSE) + + names(df) <- c("agency_cd","site_no","datetime","tz_cd",valueName) + } + + if (1 == i & valuesIndex[1] == j){ + mergedDF <- df + } else { + similarNames <- intersect(names(mergedDF), names(df)) + mergedDF <- merge(mergedDF, df,by=similarNames,all=TRUE) + # mergedDF <- merge(mergedDF, df,by=c("agency_cd","site_no","datetime","tz_cd"),all=TRUE) + } } } } -- GitLab