Commit e3348ab7 authored by David Watkins's avatar David Watkins
Browse files

upstream merge

Merge branch 'master' of github.com:USGS-R/dataRetrieval into NGWMN

# Conflicts:
#	DESCRIPTION
#	NAMESPACE
#	R/importNGWMN_wml2.R
#	R/readNGWMNdata.R
#	man/importNGWMN_wml2.Rd
#	man/readNGWMNdata.Rd
#	man/readNGWMNlevels.Rd
#	man/readNGWMNsites.Rd
#	man/readWQPdata.Rd
#	man/whatWQPsites.Rd
#	tests/testthat/tests_userFriendly_fxns.R
parents f9182029 12536052
......@@ -3,7 +3,9 @@ vignettes/figure
appveyor.yml
.travis.yml
.gitignore
dataRetrieval.Rproj
examplePhosQuery.R
README.md
dataRetrieval-concordance.tex
^.*\.Rproj$
^\.Rproj\.user$
......
Package: dataRetrieval
Type: Package
Title: Retrieval Functions for USGS and EPA Hydrologic and Water Quality Data
Version: 2.6.1
Date: 2016-08-26
Version: 2.6.8
Date: 2017-01-20
Authors@R: c( person("Robert", "Hirsch", role = c("aut"),
email = "rhirsch@usgs.gov"),
person("Laura", "DeCicco", role = c("aut","cre"),
......@@ -17,15 +17,13 @@ Authors@R: c( person("Robert", "Hirsch", role = c("aut"),
email = "wwatkins@usgs.gov"))
Description: Collection of functions to help retrieve U.S. Geological Survey
(USGS) and U.S. Environmental Protection Agency (EPA) water quality and
hydrology data from web services. USGS web services are discovered from the
National Water Information System (NWIS) tools. Both EPA and USGS water
quality data are obtained from the Water Quality Portal.
hydrology data from web services. USGS web services are discovered from
National Water Information System (NWIS) <https://waterservices.usgs.gov/> and <https://waterdata.usgs.gov/nwis>.
Both EPA and USGS water quality data are obtained from the Water Quality Portal <https://www.waterqualitydata.us/>.
License: CC0
Copyright: This software is in the public domain because it contains materials
that originally came from the United States Geological Survey, an agency of
the United States Department of Interior. For more information, see the
official USGS copyright policy at
http://www.usgs.gov/visual-id/credit_usgs.html#copyright
the United States Department of Interior.
Depends:
R (>= 3.0)
Imports:
......@@ -45,5 +43,5 @@ Suggests:
VignetteBuilder: knitr
BuildVignettes: true
BugReports: https://github.com/USGS-R/dataRetrieval/issues
URL: https://github.com/USGS-R/dataRetrieval, http://pubs.usgs.gov/tm/04/a10/
URL: https://pubs.er.usgs.gov/publication/tm4A10
RoxygenNote: 6.0.1
# Generated by roxygen2: do not edit by hand
export(addWaterYear)
export(calcWaterYear)
export(checkWQPdates)
export(constructNWISURL)
export(constructUseURL)
......@@ -32,7 +34,6 @@ export(readNWISuv)
export(readWQPdata)
export(readWQPqw)
export(renameNWISColumns)
export(retrieveFeatureOfInterest)
export(setAccess)
export(stateCd)
export(stateCdLookup)
......@@ -47,17 +48,21 @@ importFrom(curl,curl_version)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,everything)
importFrom(dplyr,full_join)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,mutate_)
importFrom(dplyr,mutate_each_)
importFrom(dplyr,rbind_all)
importFrom(dplyr,select)
importFrom(dplyr,select_)
importFrom(httr,GET)
importFrom(httr,HEAD)
importFrom(httr,POST)
importFrom(httr,RETRY)
importFrom(httr,content)
importFrom(httr,content_type)
importFrom(httr,headers)
importFrom(httr,status_code)
importFrom(httr,stop_for_status)
......
dataRetrieval 2.6.3
==========
* All USGS web services are being flipped from http to https. This version of dataRetrieval will attempt to access https, if that fails, will attempt http.
dataRetrieval 2.5.12
==========
* Response to readr 1.0.0 updates
......
......@@ -5,8 +5,4 @@ pkg.env <- new.env()
options(Access.dataRetrieval = NULL)
}
.onAttach = function(libname, pkgname){
packageStartupMessage("Recent changes to NWIS data may affect dataRetrieval output.
Please see http://help.waterdata.usgs.gov/news/061016
for more information.")
}
#' add a water year column
#'
#' Add a column to the dataRetrieval data frame with the water year. WQP
#' queries will return a water year column for the start and end dates
#' of the data.
#'
#' @param rawData the daily- or unit-values datset retrieved from NWISweb. Must
#' have at least one of the following columns to add the new water year columns:
#' `dateTime`, `Date`, `ActivityStartDate`, or `ActivityEndDate`. The date column(s)
#' can be character, POSIXct, Date. They cannot be numeric.
#'
#' @return data.frame with an additional integer column with "WY" appended to the
#' date column name. For WQP, there will be 2 columns: `ActivityStartDateWY` and
#' `ActivityEndDateWY`.
#' @export
#'
#' @importFrom dplyr select
#' @importFrom dplyr everything
#' @examples
#' \dontrun{
#' dataTemp <- readNWISdata(stateCd="OH",parameterCd="00010", service="dv")
#' dataTemp <- addWaterYear(dataTemp)
#'
#' pHData <- readWQPdata(siteid="USGS-04024315",characteristicName="pH")
#' pHData <- addWaterYear(pHData)
#' }
addWaterYear <- function(rawData){
allowedDateColNames <- c("dateTime", "Date", "ActivityStartDate", "ActivityEndDate")
allowedWYColNames <- c("waterYear", "waterYear", "ActivityStartWaterYear", "ActivityEndWaterYear")
names(allowedWYColNames) <- allowedDateColNames
# only allow WY to be added if there is an appropriate date column
if(all(!allowedDateColNames %in% names(rawData))){
stop("specified date column does not exist in supplied data frame")
}
# set the name of the date column(s) to use for calculating the WY &
# if the WY column already exists, do not add another (rm that date col
# from the list that will be looped over)
dateColNames <- names(rawData)[names(rawData) %in% allowedDateColNames]
dateColNames <- dateColNames[!allowedWYColNames[dateColNames] %in% names(rawData)]
for(dateCol in dateColNames){
dateColWY <- allowedWYColNames[dateCol]
# calculate WY & add as new column
rawData[[dateColWY]] <- calcWaterYear(rawData[[dateCol]])
# move waterYear so that it is always comes right after dateTime
dateCol_i <- which(names(rawData) == dateCol)
dateColWY_i <- which(names(rawData) == dateColWY)
rawData <- select(rawData, 1:dateCol_i, dateColWY_i, everything())
}
return(rawData)
}
#' Extract WY from a date
#'
#' Determine the correct water year based on a calendar date.
#'
#' @param dateVec vector of dates as character ("YYYY-DD-MM"),
#' Date, or POSIXct. Numeric does not work.
#'
#' @details This function calculates a water year based on the USGS
#' definition that a water year starts on October 1 of the year before,
#' and ends on September 30. For example, water year 2015 started on
#' 2014-10-01 and ended on 2015-09-30. See the USGS definition at
#' \url{https://water.usgs.gov/nwc/explain_data.html}.
#'
#' @return numeric vector indicating the water year
#' @export
#' @examples
#' x <- seq(as.Date("2010-01-01"), as.Date("2010-12-31"), by="month")
#' waterYear <- calcWaterYear(x)
#'
calcWaterYear <- function(dateVec){
# POSIXlt years start at 100, POSIXlt months start at 0
dateTimeVec <- as.POSIXlt(dateVec)
calYear <- dateTimeVec$year + 1900
calMon <- dateTimeVec$mon + 1
# when the date is NA, it should not try to add 1
whichPastOct <- calMon >= 10
whichPastOct[is.na(whichPastOct)] <- FALSE
# add one to the year if it is in October or after
waterYear <- calYear
waterYear[whichPastOct] <- calYear[whichPastOct] + 1
return(waterYear)
}
#' Construct NWIS url for data retrieval
#'
#' Imports data from NWIS web service. This function gets the data from here: \url{http://nwis.waterdata.usgs.gov/nwis/qwdata}
#' A list of parameter codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/pmcodes/}
#' A list of statistic codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/help/?read_file=stat&format=table}
#' Imports data from NWIS web service. This function gets the data from here: \url{https://nwis.waterdata.usgs.gov/nwis/qwdata}
#' A list of parameter codes can be found here: \url{https://nwis.waterdata.usgs.gov/nwis/pmcodes/}
#' A list of statistic codes can be found here: \url{https://nwis.waterdata.usgs.gov/nwis/help/?read_file=stat&format=table}
#'
#' @param siteNumber string or vector of strings USGS site number. This is usually an 8 digit number
#' @param parameterCd string or vector of USGS parameter code. This is usually an 5 digit number.
......@@ -26,7 +26,7 @@
#' provide statistics for each month and year within the range individually.
#' @param statType character Only used for statistics service requests. Type(s) of statistics to output for daily values. Default is mean, which is the only
#' option for monthly and yearly report types. See the statistics service documentation
#' at \url{http://waterservices.usgs.gov/rest/Statistics-Service.html#statType} for a full list of codes.
#' at \url{https://waterservices.usgs.gov/rest/Statistics-Service.html#statType} for a full list of codes.
#' @keywords data import USGS web service
#' @return url string
#' @export
......@@ -257,9 +257,7 @@ constructNWISURL <- function(siteNumber,parameterCd="00060",startDate="",endDate
#' Construct WQP url for data retrieval
#'
#' Imports data from WQP web service. This function gets the data from here: \url{http://nwis.waterdata.usgs.gov/nwis/qwdata}
#' A list of parameter codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/pmcodes/}
#' A list of statistic codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/help/?read_file=stat&format=table}
#' Construct WQP url for data retrieval. This function gets the data from here: \url{https://www.waterqualitydata.us}
#'
#' @param siteNumber string or vector of strings USGS site number. This is usually an 8 digit number
#' @param parameterCd string or vector of USGS parameter code. This is usually an 5 digit number.
......@@ -296,9 +294,8 @@ constructWQPURL <- function(siteNumber,parameterCd,startDate,endDate,zip=FALSE){
parameterCd <- paste(parameterCd, collapse=";")
}
baseURL <- "http://www.waterqualitydata.us/Result/search?siteid="
baseURL <- drURL("wqpData", siteid = siteNumber)
url <- paste0(baseURL,
siteNumber,
ifelse(pCodeLogic,"&pCode=","&characteristicName="),
parameterCd)
......@@ -324,7 +321,7 @@ constructWQPURL <- function(siteNumber,parameterCd,startDate,endDate,zip=FALSE){
#' Construct URL for NWIS water use data service
#'
#' Reconstructs URLs to retrieve data from here: \url{http://waterdata.usgs.gov/nwis/wu}
#' Reconstructs URLs to retrieve data from here: \url{https://waterdata.usgs.gov/nwis/wu}
#'
#' @param years integer Years for data retrieval. Must be years ending in 0 or 5, or "ALL", which retrieves all available years.
#' @param stateCd could be character (full name, abbreviation, id), or numeric (id)
......@@ -336,21 +333,23 @@ constructWQPURL <- function(siteNumber,parameterCd,startDate,endDate,zip=FALSE){
#' url <- constructUseURL(years=c(1990,1995),stateCd="Ohio",countyCd = c(1,3), categories = "ALL")
#'
constructUseURL <- function(years,stateCd,countyCd,categories){
baseURL <- "http://waterdata.usgs.gov/"
base2 <- "nwis/water_use?format=rdb&rdb_compression=value"
if(is.null(stateCd)){
baseURL <- paste0(baseURL,base2)
} else{
baseURL <- drURL("useNat", format="rdb", rdb_compression="value")
} else {
stateCd <- stateCdLookup(input = stateCd, outputType = "postal")
baseURL <- "https://waterdata.usgs.gov/"
base2 <- "nwis/water_use?format=rdb&rdb_compression=value"
baseURL <- paste0(baseURL,paste0(stateCd,"/"),base2)
if(!is.null(countyCd)){
if(length(countyCd) > 1) {countyCd <- paste(countyCd,collapse="%2C")}
baseURL <- paste0(baseURL,"&wu_area=county&wu_county=",countyCd)
} else{
baseURL <- paste0(baseURL,"&wu_area=State%20Total")
}
} else {
baseURL <- paste0(baseURL,"&wu_area=State%20Total")
}
}
years <- paste(years, collapse="%2C")
categories <- paste(categories, collapse = "%2C")
retURL <- paste0(baseURL,"&wu_year=",years,"&wu_category=",categories)
......
......@@ -6,12 +6,14 @@
#' @param obs_url character containing the url for the retrieval
#' @param \dots information to pass to header request
#' @importFrom httr GET
#' @importFrom httr POST
#' @importFrom httr RETRY
#' @importFrom httr user_agent
#' @importFrom httr stop_for_status
#' @importFrom httr status_code
#' @importFrom httr headers
#' @importFrom httr content
#' @importFrom httr content_type
#' @importFrom curl curl_version
#' @export
#' @return raw data from web services
......@@ -27,33 +29,50 @@
#' }
getWebServiceData <- function(obs_url, ...){
returnedList <- RETRY("GET",obs_url, ..., user_agent(default_ua()))
returnedList <- tryCatch({
retryGetOrPost(obs_url, ...)
}, error = function(e){
NULL
})
if(is.null(returnedList)){
message("Switching from https to http")
obs_url <- gsub("https", "http", obs_url)
returnedList <- tryCatch({
retryGetOrPost(obs_url, ...)
}, error = function(e){
NULL
})
}
if(status_code(returnedList) != 200){
message("For: ", obs_url,"\n")
stop_for_status(returnedList)
} else {
headerInfo <- headers(returnedList)
if(status_code(returnedList) != 200){
message("For: ", obs_url,"\n")
stop_for_status(returnedList)
} else {
headerInfo <- headers(returnedList)
if(headerInfo$`content-type` == "text/tab-separated-values;charset=UTF-8"){
returnedDoc <- content(returnedList, type="text",encoding = "UTF-8")
} else if (headerInfo$`content-type` == "text/html"){
txt <- readBin(returnedList$content, character())
message(txt)
return(txt)
} else {
returnedDoc <- content(returnedList,encoding = "UTF-8")
if(grepl("No sites/data found using the selection criteria specified", returnedDoc)){
message(returnedDoc)
}
if(headerInfo$`content-type` == "text/tab-separated-values;charset=UTF-8"){
returnedDoc <- content(returnedList, type="text",encoding = "UTF-8")
} else if (headerInfo$`content-type` %in%
c("application/zip", "application/zip;charset=UTF-8")) {
returnedDoc <- returnedList
} else if (headerInfo$`content-type` %in% c("text/html","text/html; charset=UTF-8") ){
txt <- readBin(returnedList$content, character())
message(txt)
return(txt)
} else {
returnedDoc <- content(returnedList,encoding = "UTF-8")
if(grepl("No sites/data found using the selection criteria specified", returnedDoc)){
message(returnedDoc)
}
}
attr(returnedDoc, "headerInfo") <- headerInfo
attr(returnedDoc, "headerInfo") <- headerInfo
return(returnedDoc)
}
return(returnedDoc)
}
}
default_ua <- function() {
......@@ -73,13 +92,26 @@ default_ua <- function() {
getQuerySummary <- function(url){
queryHEAD <- HEAD(url)
retquery <- headers(queryHEAD)
countNames <- c('total-site-count', 'nwis-site-count', 'total-result-count', 'nwis-result-count')
retquery[which(names(retquery) %in% countNames)] <- unlist(lapply(countNames, retquery = retquery,
FUN = function(c, retquery){
retquery[[c]] <- as.numeric(retquery[[c]])
return(retquery[c])
}))
retquery$date <- as.Date(retquery$date, format = "%a, %d %b %Y %H:%M:%S")
retquery[grep("-count",names(retquery))] <- as.numeric(retquery[grep("-count",names(retquery))])
if("date" %in% names(retquery)){
retquery$date <- as.Date(retquery$date, format = "%a, %d %b %Y %H:%M:%S")
}
return(retquery)
}
retryGetOrPost <- function(url, ...) {
resp <- NULL
if (nchar(url) < 2048) {
resp <- RETRY("GET", url, ..., user_agent(default_ua()))
} else {
split <- strsplit(url, "?", fixed=TRUE)
url <- split[[1]][1]
query <- split[[1]][2]
resp <- RETRY("POST", url, ..., body = query,
content_type("application/x-www-form-urlencoded"), user_agent(default_ua()))
}
return(resp)
}
\ No newline at end of file
......@@ -7,10 +7,11 @@
#'
#' @param obs_url character containing the url for the retrieval or a file path to the data file.
#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date
#' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the
#' datetimes to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column).
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"
#' @param tz character to set timezone attribute of datetime. Default converts the datetimes to UTC
#' (properly accounting for daylight savings times based on the data's provided tz_cd column).
#' Recommended US values include "UTC","America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla".
#' For a complete list, see \url{https://en.wikipedia.org/wiki/List_of_tz_database_time_zones}
#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to dates, datetimes,
#' numerics based on a standard algorithm. If false, everything is returned as a character
#' @return A data frame with the following columns:
......@@ -83,16 +84,13 @@
#' fullPath <- file.path(filePath, fileName)
#' importUserRDB <- importRDB1(fullPath)
#'
importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){
importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz="UTC"){
if(tz != ""){
tz <- match.arg(tz, c("America/New_York","America/Chicago",
"America/Denver","America/Los_Angeles",
"America/Anchorage","America/Honolulu",
"America/Jamaica","America/Managua",
"America/Phoenix","America/Metlakatla","UTC"))
if(tz == ""){
tz <- "UTC"
}
tz <- match.arg(tz, OlsonNames())
if(file.exists(obs_url)){
doc <- obs_url
......
......@@ -11,7 +11,7 @@
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"
#' @return retval dataframe raw data returned from the Water Quality Portal. Additionally, a POSIXct dateTime column is supplied for
#' start and end times, and converted to UTC. See \url{http://www.waterqualitydata.us/portal_userguide.jsp} for more information.
#' start and end times, and converted to UTC. See \url{https://www.waterqualitydata.us/portal_userguide/} for more information.
#' @export
#' @seealso \code{\link{readWQPdata}}, \code{\link{readWQPqw}}, \code{\link{whatWQPsites}}
#' @import utils
......@@ -47,11 +47,9 @@
importWQP <- function(obs_url, zip=FALSE, tz=""){
if(tz != ""){
tz <- match.arg(tz, c("America/New_York","America/Chicago",
"America/Denver","America/Los_Angeles",
"America/Anchorage","America/Honolulu",
"America/Jamaica","America/Managua",
"America/Phoenix","America/Metlakatla"))
tz <- match.arg(tz, OlsonNames())
} else {
tz <- "UTC"
}
if(!file.exists(obs_url)){
......@@ -60,11 +58,11 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
message("zip encoding access still in development")
temp <- tempfile()
temp <- paste0(temp,".zip")
doc <- GET(obs_url, user_agent(default_ua()),
write_disk(temp))
doc <- getWebServiceData(obs_url, write_disk(temp))
headerInfo <- headers(doc)
doc <- unzip(temp, exdir=tempdir())
unlink(temp)
on.exit(unlink(doc))
} else {
doc <- getWebServiceData(obs_url)
headerInfo <- attr(doc, "headerInfo")
......@@ -91,10 +89,6 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
emptyReturn <- data.frame(NA)
attr(emptyReturn, "headerInfo") <- headerInfo
return(emptyReturn)
}
if(zip){
doc <- unzip(temp)
}
} else {
......@@ -119,8 +113,6 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
`HUCEightDigitCode` = col_character()),
quote = "", delim = "\t"))
if(zip) unlink(doc)
if(!file.exists(obs_url)){
actualNumReturned <- nrow(retval)
......
......@@ -5,10 +5,11 @@
#'
#' @param obs_url character or raw, containing the url for the retrieval or a file path to the data file, or raw XML.
#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date
#' @param tz character to set timezone attribute of . Default is an empty quote, which converts the
#' s to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column).
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"
#' @param tz character to set timezone attribute of datetime. Default converts the datetimes to UTC
#' (properly accounting for daylight savings times based on the data's provided tz_cd column).
#' Recommended US values include "UTC","America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla".
#' For a complete list, see \url{https://en.wikipedia.org/wiki/List_of_tz_database_time_zones}
#' @return A data frame with the following columns:
#' \tabular{lll}{
#' Name \tab Type \tab Description \cr
......@@ -108,7 +109,7 @@
#' importFile <- importWaterML1(fullPath,TRUE)
#'
importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
importWaterML1 <- function(obs_url,asDateTime=FALSE, tz="UTC"){
#note: obs_url is a dated name, does not have to be a url/path
raw <- FALSE
if(class(obs_url) == "character" && file.exists(obs_url)){
......@@ -120,13 +121,10 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
returnedDoc <- xml_root(getWebServiceData(obs_url, encoding='gzip'))
}
if(tz != ""){ #check tz is valid if supplied
tz <- match.arg(tz, c("America/New_York","America/Chicago",
"America/Denver","America/Los_Angeles",
"America/Anchorage","America/Honolulu",
"America/Jamaica","America/Managua",
"America/Phoenix","America/Metlakatla"))
}else{tz <- "UTC"}
if(tz == ""){ #check tz is valid if supplied
tz <- "UTC"
}
tz <- match.arg(tz, OlsonNames())
timeSeries <- xml_find_all(returnedDoc, ".//ns1:timeSeries") #each parameter/site combo
......@@ -156,7 +154,7 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
obsDF <- NULL
useMethodDesc <- FALSE
if(length(valParents) > 1){ useMethodDesc <- TRUE} #append the method description to colnames later
sourceInfo <- xml_children(xml_find_all(t, ".//ns1:sourceInfo"))
variable <- xml_children(xml_find_all(t, ".//ns1:variable"))
agency_cd <- xml_attr(sourceInfo[xml_name(sourceInfo)=="siteCode"],"agencyCode")
......@@ -186,18 +184,18 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
obsColName <- paste(pCode,statCode,sep = "_")
obs <- xml_find_all(v, ".//ns1:value")
values <- as.numeric(xml_text(obs)) #actual observations
nObs <- length(values)
qual <- xml_attr(obs,"qualifiers")
if(all(is.na(qual))){
noQual <- TRUE
}else{noQual <- FALSE}
noQual <- all(is.na(qual))
dateTime <- xml_attr(obs,"dateTime")
if(asDateTime){
numChar <- nchar(dateTime)
dateTime <- parse_date_time(dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M",
"%Y-%m-%dT%H:%M:%S","%Y-%m-%dT%H:%M:%OS",
"%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE)
"%Y-%m-%dT%H:%M:%S","%Y-%m-%dT%H:%M:%OS",
"%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE)
if(any(numChar < 20) & any(numChar > 16)){
offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0),
code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST",""),
......@@ -245,15 +243,17 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
}
}else{
#need column names for joining later
obsDF <- data.frame(dateTime=character(0), tz_cd=character(0), stringsAsFactors = FALSE)
if(asDateTime){
obsDF$dateTime <- as.POSIXct(obsDF$dateTime)
attr(obsDF$dateTime, "tzone") <- tz
# but don't overwrite:
if(is.null(obsDF)){
obsDF <- data.frame(dateTime=character(0), tz_cd=character(0), stringsAsFactors = FALSE)
if(asDateTime){
obsDF$dateTime <- as.POSIXct(obsDF$dateTime)
attr(obsDF$dateTime, "tzone") <- tz
}
}
}
}
if(is.null(obsDF)){
mergedSite <- data.frame()
next
......@@ -276,7 +276,7 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
#replace no data vals with NA, change attribute df
noDataVal <- as.numeric(varText$noDataValue)
if(nObs > 0){
obsDF[obsDF$values == noDataVal] <- NA
obsDF[[obsColName]][obsDF[[obsColName]] == noDataVal] <- NA
}
varText$noDataValue <- NA
......@@ -332,6 +332,8 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
mergedDF <- mergedDF[c(mergedNames[-tzLoc],mergedNames[tzLoc])]
mergedDF <- arrange(mergedDF,site_no, dateTime)
names(mergedDF) <- make.names(names(mergedDF))
#attach other site info etc as attributes of mergedDF
if(!raw){
attr(mergedDF, "url") <- obs_url
......