Skip to content
Snippets Groups Projects
Commit dbd37dfc authored by Laura A DeCicco's avatar Laura A DeCicco
Browse files

Updated import functions to accept local files.

parent e200dd82
No related branches found
No related tags found
1 merge request!39Overhaul of function names. Move some functionality to EGRET.
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#' @param qw logical, if TRUE parses as water quality data (where dates/times are in start and end times) #' @param qw logical, if TRUE parses as water quality data (where dates/times are in start and end times)
#' @return data a data frame containing columns agency, site, dateTime (converted to UTC), values, and remark codes for all requested combinations #' @return data a data frame containing columns agency, site, dateTime (converted to UTC), values, and remark codes for all requested combinations
#' @export #' @export
#' @import RCurl
#' @examples #' @examples
#' siteNumber <- "02177000" #' siteNumber <- "02177000"
#' startDate <- "2012-09-01" #' startDate <- "2012-09-01"
...@@ -28,134 +29,141 @@ ...@@ -28,134 +29,141 @@
#' c('34247','30234','32104','34220'), #' c('34247','30234','32104','34220'),
#' "2010-11-03","","qw",format="rdb") #' "2010-11-03","","qw",format="rdb")
#' qwData <- importRDB1(qwURL, qw=TRUE) #' qwData <- importRDB1(qwURL, qw=TRUE)
#' # User file:
#' filePath <- system.file("extdata", package="dataRetrievaldemo")
#' fileName <- "RDB1Example.txt"
#' fullPath <- file.path(filePath, fileName)
#' importUserRDB <- importRDB1(fullPath)
importRDB1 <- function(obs_url,asDateTime=FALSE, qw=FALSE){ importRDB1 <- function(obs_url,asDateTime=FALSE, qw=FALSE){
retval = tryCatch({ if(url.exists(obs_url)){
h <- basicHeaderGatherer() retval = tryCatch({
doc <- getURL(obs_url, headerfunction = h$update) h <- basicHeaderGatherer()
doc <- getURL(obs_url, headerfunction = h$update)
}, warning = function(w) {
message(paste("URL caused a warning:", obs_url)) if(!(as.character(h$value()["Content-Type"]) == "text/plain;charset=UTF-8" |
message(w) as.character(h$value()["Content-Type"]) == "text/plain")){
}, error = function(e) { message(paste("URL caused an error:", obs_url))
message(paste("URL does not seem to exist:", obs_url)) message("Content-Type=",h$value()["Content-Type"])
message(e) }
return(NA) doc <- textConnection(doc)
})
}, warning = function(w) {
message(paste("URL caused a warning:", obs_url))
message(w)
}, error = function(e) {
message(paste("URL does not seem to exist:", obs_url))
message(e)
return(NA)
})
} else {
doc <- obs_url
}
tmp <- read.delim(
doc,
header = TRUE,
quote="\"",
dec=".",
sep='\t',
colClasses=c('character'),
fill = TRUE,
comment.char="#")
if(as.character(h$value()["Content-Type"]) == "text/plain;charset=UTF-8" | as.character(h$value()["Content-Type"]) == "text/plain"){ dataType <- tmp[1,]
data <- tmp[-1,]
# comments <- readLines(doc)
multiSiteCorrections <- -which(as.logical(apply(data[,1:2], 1, FUN=function(x) all(x %in% as.character(dataType[,1:2])))))
tmp <- read.delim(
textConnection(doc), if(length(multiSiteCorrections) > 0){
header = TRUE, data <- data[multiSiteCorrections,]
quote="\"",
dec=".",
sep='\t',
colClasses=c('character'),
fill = TRUE,
comment.char="#")
dataType <- tmp[1,]
data <- tmp[-1,]
multiSiteCorrections <- -which(as.logical(apply(data[,1:2], 1, FUN=function(x) all(x %in% as.character(dataType[,1:2])))))
if(length(multiSiteCorrections) > 0){ findRowsWithHeaderInfo <- as.integer(apply(data[,1:2], 1, FUN = function(x) if(x[1] == names(data)[1] & x[2] == names(data)[2]) 1 else 0))
data <- data[multiSiteCorrections,] findRowsWithHeaderInfo <- which(findRowsWithHeaderInfo == 0)
data <- data[findRowsWithHeaderInfo,]
}
timeZoneLibrary <- setNames(c("America/New_York","America/New_York","America/Chicago","America/Chicago",
"America/Denver","America/Denver","America/Los_Angeles","America/Los_Angeles",
"America/Anchorage","America/Anchorage","America/Honolulu","America/Honolulu"),
c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"))
offsetLibrary <- setNames(c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10),
c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"))
data[,grep('n$', dataType)] <- suppressWarnings(sapply(data[,grep('n$', dataType)], function(x) as.numeric(x)))
if(length(grep('d$', dataType)) > 0){
if (asDateTime & !qw){
findRowsWithHeaderInfo <- as.integer(apply(data[,1:2], 1, FUN = function(x) if(x[1] == names(data)[1] & x[2] == names(data)[2]) 1 else 0)) if("tz_cd" %in% names(data)){
findRowsWithHeaderInfo <- which(findRowsWithHeaderInfo == 0) offset <- offsetLibrary[data$tz_cd]
data <- data[findRowsWithHeaderInfo,] } else {
offset <- 0
}
offset[is.na(offset)] <- 0
} data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0], "%Y-%m-%d %H:%M", tz = "UTC")
data[,regexpr('d$', dataType) > 0] <- data[,regexpr('d$', dataType) > 0] + offset*60*60
timeZoneLibrary <- setNames(c("America/New_York","America/New_York","America/Chicago","America/Chicago", data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0])
"America/Denver","America/Denver","America/Los_Angeles","America/Los_Angeles",
"America/Anchorage","America/Anchorage","America/Honolulu","America/Honolulu"), } else if (qw){
c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"))
if("sample_start_time_datum_cd" %in% names(data)){
timeZoneStartOffset <- offsetLibrary[data$sample_start_time_datum_cd]
offsetLibrary <- setNames(c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10), timeZoneStartOffset[is.na(timeZoneStartOffset)] <- 0
c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) } else {
timeZoneStartOffset <- 0
data[,grep('n$', dataType)] <- suppressWarnings(sapply(data[,grep('n$', dataType)], function(x) as.numeric(x))) }
if(length(grep('d$', dataType)) > 0){ if("sample_end_time_datum_cd" %in% names(data)){
if (asDateTime & !qw){ timeZoneEndOffset <- offsetLibrary[data$sample_end_time_datum_cd]
timeZoneEndOffset[is.na(timeZoneEndOffset)] <- 0
if("tz_cd" %in% names(data)){ composite <- TRUE
offset <- offsetLibrary[data$tz_cd] } else {
} else { composite <- FALSE
offset <- 0 if(any(data$sample_end_dt != "") & any(data$sample_end_dm != "")){
} if(which(data$sample_end_dt != "") == which(data$sample_end_dm != "")){
offset[is.na(offset)] <- 0 composite <- TRUE
data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0], "%Y-%m-%d %H:%M", tz = "UTC")
data[,regexpr('d$', dataType) > 0] <- data[,regexpr('d$', dataType) > 0] + offset*60*60
data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0])
} else if (qw){
if("sample_start_time_datum_cd" %in% names(data)){
timeZoneStartOffset <- offsetLibrary[data$sample_start_time_datum_cd]
timeZoneStartOffset[is.na(timeZoneStartOffset)] <- 0
} else {
timeZoneStartOffset <- 0
}
if("sample_end_time_datum_cd" %in% names(data)){
timeZoneEndOffset <- offsetLibrary[data$sample_end_time_datum_cd]
timeZoneEndOffset[is.na(timeZoneEndOffset)] <- 0
composite <- TRUE
} else {
composite <- FALSE
if(any(data$sample_end_dt != "") & any(data$sample_end_dm != "")){
if(which(data$sample_end_dt != "") == which(data$sample_end_dm != "")){
composite <- TRUE
}
}
timeZoneEndOffset <- 0
}
if("sample_dt" %in% names(data)){
if(any(data$sample_dt != "")){
suppressWarnings(data$sample_dt <- as.Date(parse_date_time(data$sample_dt, c("Ymd", "mdY"))))
} }
} }
timeZoneEndOffset <- 0
if("sample_end_dt" %in% names(data)){ }
if(any(data$sample_end_dt != "")){
suppressWarnings(data$sample_end_dt <- as.Date(parse_date_time(data$sample_end_dt, c("Ymd", "mdY")))) if("sample_dt" %in% names(data)){
} if(any(data$sample_dt != "")){
} suppressWarnings(data$sample_dt <- as.Date(parse_date_time(data$sample_dt, c("Ymd", "mdY"))))
# if(any(!is.na(timeZoneStartOffset))){
data$startDateTime <- with(data, as.POSIXct(paste(sample_dt, sample_tm),format="%Y-%m-%d %H:%M", tz = "UTC"))
data$startDateTime <- data$startDateTime + timeZoneStartOffset*60*60
data$startDateTime <- as.POSIXct(data$startDateTime)
# }
if(composite){
data$endDateTime <- with(data, as.POSIXct(paste(sample_end_dt, sample_end_tm),format="%Y-%m-%d %H:%M", tz = "UTC"))
data$endDateTime <- data$endDateTime + timeZoneEndOffset*60*60
data$endDateTime <- as.POSIXct(data$endDateTime)
} }
}
} else {
for (i in grep('d$', dataType)){ if("sample_end_dt" %in% names(data)){
if (all(data[,i] != "")){ if(any(data$sample_end_dt != "")){
data[,i] <- as.Date(data[,i]) suppressWarnings(data$sample_end_dt <- as.Date(parse_date_time(data$sample_end_dt, c("Ymd", "mdY"))))
} }
}
data$startDateTime <- with(data, as.POSIXct(paste(sample_dt, sample_tm),format="%Y-%m-%d %H:%M", tz = "UTC"))
data$startDateTime <- data$startDateTime + timeZoneStartOffset*60*60
data$startDateTime <- as.POSIXct(data$startDateTime)
if(composite){
data$endDateTime <- with(data, as.POSIXct(paste(sample_end_dt, sample_end_tm),format="%Y-%m-%d %H:%M", tz = "UTC"))
data$endDateTime <- data$endDateTime + timeZoneEndOffset*60*60
data$endDateTime <- as.POSIXct(data$endDateTime)
}
} else {
for (i in grep('d$', dataType)){
if (all(data[,i] != "")){
data[,i] <- as.Date(data[,i])
} }
} }
} }
row.names(data) <- NULL
return(data)
} else {
message(paste("URL caused an error:", obs_url))
message("Content-Type=",h$value()["Content-Type"])
} }
row.names(data) <- NULL
return(data)
} }
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#' @return mergedDF a data frame containing columns agency, site, dateTime, values, and remark codes for all requested combinations #' @return mergedDF a data frame containing columns agency, site, dateTime, values, and remark codes for all requested combinations
#' @export #' @export
#' @import XML #' @import XML
#' @import RCurl
#' @examples #' @examples
#' siteNumber <- "02177000" #' siteNumber <- "02177000"
#' startDate <- "2012-09-01" #' startDate <- "2012-09-01"
...@@ -28,27 +29,35 @@ ...@@ -28,27 +29,35 @@
#' unitDataURL <- constructNWISURL(siteNumber,property, #' unitDataURL <- constructNWISURL(siteNumber,property,
#' "2013-11-03","2013-11-03",'uv',format='xml') #' "2013-11-03","2013-11-03",'uv',format='xml')
#' unitData <- importWaterML1(unitDataURL,TRUE) #' unitData <- importWaterML1(unitDataURL,TRUE)
#' filePath <- system.file("extdata", package="dataRetrievaldemo")
#' fileName <- "WaterML1Example.xml"
#' fullPath <- file.path(filePath, fileName)
#' importUserWM1 <- importWaterML1(fullPath)
importWaterML1 <- function(obs_url,asDateTime=FALSE){ importWaterML1 <- function(obs_url,asDateTime=FALSE){
h <- basicHeaderGatherer() if(url.exists(obs_url)){
doc = tryCatch({ doc = tryCatch({
returnedDoc <- getURI(obs_url, headerfunction = h$update) h <- basicHeaderGatherer()
if(h$value()["Content-Type"] == "text/xml;charset=UTF-8"){ returnedDoc <- getURI(obs_url, headerfunction = h$update)
xmlTreeParse(returnedDoc, getDTD = FALSE, useInternalNodes = TRUE) if(h$value()["Content-Type"] == "text/xml;charset=UTF-8"){
} else { xmlTreeParse(returnedDoc, getDTD = FALSE, useInternalNodes = TRUE)
message(paste("URL caused an error:", obs_url)) } else {
message("Content-Type=",h$value()["Content-Type"]) message(paste("URL caused an error:", obs_url))
message("Content-Type=",h$value()["Content-Type"])
return(NA)
}
}, warning = function(w) {
message(paste("URL caused a warning:", obs_url))
message(w)
}, error = function(e) {
message(paste("URL does not seem to exist:", obs_url))
message(e)
return(NA) return(NA)
} })
} else {
}, warning = function(w) { doc <- xmlTreeParse(obs_url, getDTD = FALSE, useInternalNodes = TRUE)
message(paste("URL caused a warning:", obs_url)) }
message(w)
}, error = function(e) {
message(paste("URL does not seem to exist:", obs_url))
message(e)
return(NA)
})
doc <- xmlRoot(doc) doc <- xmlRoot(doc)
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#' @return mergedDF a data frame containing columns agency, site, dateTime, values, and remark codes for all requested combinations #' @return mergedDF a data frame containing columns agency, site, dateTime, values, and remark codes for all requested combinations
#' @export #' @export
#' @import XML #' @import XML
#' @import RCurl
#' @importFrom plyr rbind.fill.matrix #' @importFrom plyr rbind.fill.matrix
#' @examples #' @examples
#' baseURL <- "http://waterservices.usgs.gov/nwis/dv/?format=waterml,2.0" #' baseURL <- "http://waterservices.usgs.gov/nwis/dv/?format=waterml,2.0"
...@@ -29,29 +30,37 @@ ...@@ -29,29 +30,37 @@
#' "statCd=00003", #' "statCd=00003",
#' "parameterCd=00060",sep="&") #' "parameterCd=00060",sep="&")
#' dataReturnMulti <- importWaterML2(URLmulti) #' dataReturnMulti <- importWaterML2(URLmulti)
#' filePath <- system.file("extdata", package="dataRetrievaldemo")
#' fileName <- "WaterML2Example.xml"
#' fullPath <- file.path(filePath, fileName)
#' UserData <- importWaterML2(fullPath)
#' } #' }
importWaterML2 <- function(obs_url, asDateTime=FALSE){ importWaterML2 <- function(obs_url, asDateTime=FALSE){
h <- basicHeaderGatherer() if(url.exists(obs_url)){
doc = tryCatch({ doc = tryCatch({
returnedDoc <- getURL(obs_url, headerfunction = h$update) h <- basicHeaderGatherer()
if(h$value()["Content-Type"] == "text/xml;charset=UTF-8" | returnedDoc <- getURL(obs_url, headerfunction = h$update)
h$value()["Content-Type"] == "text/xml; subtype=gml/3.1.1;charset=UTF-8"){ if(h$value()["Content-Type"] == "text/xml;charset=UTF-8" |
xmlTreeParse(returnedDoc, getDTD = FALSE, useInternalNodes = TRUE) h$value()["Content-Type"] == "text/xml; subtype=gml/3.1.1;charset=UTF-8"){
} else { xmlTreeParse(returnedDoc, getDTD = FALSE, useInternalNodes = TRUE)
message(paste("URL caused an error:", obs_url)) } else {
message("Content-Type=",h$value()["Content-Type"]) message(paste("URL caused an error:", obs_url))
message("Content-Type=",h$value()["Content-Type"])
return(NA)
}
}, warning = function(w) {
message(paste("URL caused a warning:", obs_url))
message(w)
}, error = function(e) {
message(paste("URL does not seem to exist:", obs_url))
message(e)
return(NA) return(NA)
} })
} else {
}, warning = function(w) { doc <- xmlTreeParse(obs_url, getDTD = FALSE, useInternalNodes = TRUE)
message(paste("URL caused a warning:", obs_url)) }
message(w)
}, error = function(e) {
message(paste("URL does not seem to exist:", obs_url))
message(e)
return(NA)
})
doc <- xmlRoot(doc) doc <- xmlRoot(doc)
......
...@@ -34,13 +34,13 @@ Web service retrieval functions: ...@@ -34,13 +34,13 @@ Web service retrieval functions:
Moving `EGRETdemo` specific functions to `EGRETdemo` (version 2.0.0 and greater): Moving `EGRETdemo` specific functions to `EGRETdemo` (version 2.0.0 and greater):
|Information Source | Site Query | Meta Data | Data | |Information Source | Meta Data | Data |
| -------------| -------------| ------------- |:-------------| | -------------| -------------| ------------- |:-------------|
|NWIS | | `getNWISInfo` | `getNWISSample` | |NWIS | `getNWISInfo` | `getNWISSample` |
| | | | `getNWISDaily` | | | | `getNWISDaily` |
| Water Quality Portal | | `getWQPInfo`| `getWQPSample` | | Water Quality Portal | `getWQPInfo`| `getWQPSample` |
| User-supplied files | | `getUserInfo` | `getUserDaily`| | User-supplied files | `getUserInfo` | `getUserDaily`|
| | | | `getUserSample` | | | | `getUserSample` |
......
This diff is collapsed.
...@@ -38,5 +38,10 @@ qwURL <- constructNWISURL(c('04024430','04024000'), ...@@ -38,5 +38,10 @@ qwURL <- constructNWISURL(c('04024430','04024000'),
c('34247','30234','32104','34220'), c('34247','30234','32104','34220'),
"2010-11-03","","qw",format="rdb") "2010-11-03","","qw",format="rdb")
qwData <- importRDB1(qwURL, qw=TRUE) qwData <- importRDB1(qwURL, qw=TRUE)
# User file:
filePath <- system.file("extdata", package="dataRetrievaldemo")
fileName <- "RDB1Example.txt"
fullPath <- file.path(filePath, fileName)
importUserRDB <- importRDB1(fullPath)
} }
...@@ -37,5 +37,9 @@ groundWater <- importWaterML1(groundwaterExampleURL) ...@@ -37,5 +37,9 @@ groundWater <- importWaterML1(groundwaterExampleURL)
unitDataURL <- constructNWISURL(siteNumber,property, unitDataURL <- constructNWISURL(siteNumber,property,
"2013-11-03","2013-11-03",'uv',format='xml') "2013-11-03","2013-11-03",'uv',format='xml')
unitData <- importWaterML1(unitDataURL,TRUE) unitData <- importWaterML1(unitDataURL,TRUE)
filePath <- system.file("extdata", package="dataRetrievaldemo")
fileName <- "WaterML1Example.xml"
fullPath <- file.path(filePath, fileName)
importUserWM1 <- importWaterML1(fullPath)
} }
...@@ -37,6 +37,10 @@ URLmulti <- paste(baseURL, ...@@ -37,6 +37,10 @@ URLmulti <- paste(baseURL,
"statCd=00003", "statCd=00003",
"parameterCd=00060",sep="&") "parameterCd=00060",sep="&")
dataReturnMulti <- importWaterML2(URLmulti) dataReturnMulti <- importWaterML2(URLmulti)
filePath <- system.file("extdata", package="dataRetrievaldemo")
fileName <- "WaterML2Example.xml"
fullPath <- file.path(filePath, fileName)
UserData <- importWaterML2(fullPath)
} }
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment