Commit bef00ff8 authored by Manuela's avatar Manuela

Adjustment for multiple sites, documentation of functions and new datasets

parent 0ee4b95a
load("~/PRSim-devel/data/runoff_multi_sites.rda")
data<- data_list
data<- runoff_multi_sites
prsim.wave <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
marginal=c("kappa","empirical"), n_par=4, n_wave, marginalpar=TRUE,
marginal=c("kappa","empirical"), n_par=4, n_wave=100, marginalpar=TRUE,
GoFtest=NULL, out_dir, verbose=TRUE, suppWarn=FALSE, ...){
### function for backtransformation of continuous wavelet transform
......@@ -67,9 +67,18 @@ prsim.wave <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
### remove February 29
data[[l]] <- data[[l]][format(data[[l]]$timestamp, "%m %d") != "02 29",]
### remove incomplete years
if(which(format(data[[l]]$timestamp,format='%j')=='001')[1]>1){
data[[l]] <- data[[l]][-c(1:(which(format(data[[l]]$timestamp,format='%j')=='001')[1]-1)),]
}
if ((nrow(data[[l]]) %% 365)>0) stop("No missing values allowed. Some days are missing.")
### replace missing data by mean values
if(length(which(is.na(data[[l]]$timestamp)))>0){
### replace days with missing data
data[[l]][which(is.na(data[[l]]$timestamp)),]$Qobs <- mean(data[[l]]$Qobs,na.rm=T)
}
### generate a day index
# data[[l]]$index <- rep(c(1:365), times=length(unique(data[[l]]$YYYY)))
data[[l]]$index <- as.numeric(format(data[[l]]$timestamp,format='%j'))
......@@ -88,11 +97,17 @@ prsim.wave <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
}
### fitting of kappa distribution to all stations for which simulations are to be derived
par_day_list <- marginal_list<- list()
for(l in 1:length(data)){
### daily fitting of Kappa distribution
### fit the parameters of the Kappa distribution for each day separately.
### To enable a sufficient sample size by using daily values in moving window around day i (i.e., reduce uncertainty due to fitting)
### TO RESOLVE
### data[[l]]$index is somehow overwritten
if(marginal=="kappa"){
marginal_list[[l]] <- 'kappa'
p_vals <- numeric(365)
par_day <- matrix(0, nrow=365, ncol=4)
# density_kap <- list()
......@@ -156,24 +171,27 @@ prsim.wave <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
}
}
### Treatment for the case when Kappa distribution can not be fitted
### a) parameters can't be fitted for any of the days
if(length(which(is.na(par_day)))==365){
if(length(which(is.na(par_day[,1])))==365){
### use empirical distribution instead
marginal<-'empirical'
marginal_list[[l]]<-'empirical'
} else{
### b) parameters can be fitted for some days
### replace NA entries by values estimated for subsequent day
if(length(which(is.na(par_day)))>0){
indices <- rev(which(is.na(par_day)))
if(length(which(is.na(par_day[,1])))>0){
indices <- rev(which(is.na(par_day[,1])))
for(i in 1:length(indices)){
par_day[[indices[i]]] <- par_day[[indices[i]+1]]
par_day[indices[i],] <- par_day[indices[i]+1,]
}
}
}
}
### use either a predefined distribution in R or define own function
if(marginal!="kappa" & marginal!="empirical"){
marginal_list[[l]] <- marginal
p_vals <- numeric(365)
par_day <- matrix(0, nrow=365, ncol=n_par)
for(d in c(1:365)){
......@@ -185,7 +203,7 @@ prsim.wave <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
### define days within window
ids <- c(before,after)
### determine values in window around day i
data_window <- data$Qobs[which(data$index%in%ids)]
data_window <- data[[l]]$Qobs[which(data[[l]]$index%in%ids)]
theta <- CDF_fit(xdat=data_window, ...)
### goodness of fit test
......@@ -204,13 +222,13 @@ prsim.wave <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
par_day[d,] <- theta
}
}
}
par_day_list[[l]] <- par_day
}
### replace NA values by mean if necessary: otherwise, problems with transformation
for(l in 1:length(data)){
if(length(which(is.na(data[[l]]$Qobs)))>0){
data[[l]]$Qobs[which(is.na(data[[l]]$Qobs))] <- mean(data[[l]]$Qobs,na.rm=T)
}
# if(length(which(is.na(data[[l]]$Qobs)))>0){
# data[[l]]$Qobs[which(is.na(data[[l]]$Qobs))] <- mean(data[[l]]$Qobs,na.rm=T)
# }
### center_data: substract mean from values
data[[l]]$norm <- data[[l]]$Qobs-mean(data[[l]]$Qobs,na.rm=T)
......@@ -322,14 +340,14 @@ prsim.wave <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
data_day <- data[[l]][which(data[[l]]$index%in%c(d)),]
### use kappa distribution for backtransformation
if(marginal=="kappa"){
colnames(par_day) <- names(kap_par)
if(marginal_list[[l]]=="kappa"){
colnames(par_day_list[[l]]) <- names(kap_par)
### use monthly Kappa distribution for backtransformation
### simulate random sample of size n from Kappa disribution
data_day$kappa <- rand.kappa(length(data_day$Qobs),
xi=par_day[d,"xi"],alfa=par_day[d,"alfa"],
k=par_day[d,"k"],h=par_day[d,"h"])
xi=par_day_list[[l]][d,"xi"],alfa=par_day_list[[l]][d,"alfa"],
k=par_day_list[[l]][d,"k"],h=par_day_list[[l]][d,"h"])
data_day$rank <- rank(data_day$kappa)
......@@ -353,7 +371,7 @@ prsim.wave <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
}
}
### use empirical distribution for backtransformation
if(marginal=="empirical"){
if(marginal_list[[l]]=="empirical"){
data_day$rank <- rank(data_day$Qobs)
data_new$rank <- rank(data_new$seasonal)
data_new$rank[which(data[[l]]$index%in%c(d))] <- rank(data_new[which(data[[l]]$index%in%c(d)),]$seasonal)
......@@ -365,10 +383,10 @@ prsim.wave <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
}
### use any predefined distribution for backtransformation
if(marginal!="kappa" & marginal!="empirical"){
if(marginal_list[[l]]!="kappa" & marginal_list[[l]]!="empirical"){
### use monthly distribution for backtransformation
### simulate random sample of size n from disribution
data_day$cdf <- rCDF(n=length(data_day$Qobs), par_day[d,])
data_day$cdf <- rCDF(n=length(data_day$Qobs), par_day_list[[l]][d,])
data_day$rank <- rank(data_day$cdf)
data_new$rank <- rank(data_new$seasonal)
......@@ -403,14 +421,16 @@ prsim.wave <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
if(marginal != "empirical"){
if (marginalpar) { # also return intermediate results
# return(list(simulation=data_stoch, pars=par_day, p_val=p_vals))
save(file=paste(l,'_stoch_sim.rda',sep=''),list(simulation=data_stoch, pars=par_day, p_val=p_vals))
out_list <- list(simulation=data_stoch, pars=par_day, p_val=p_vals)
save(file=paste(l,'_stoch_sim.rda',sep=''),out_list)
} else {
# return(list(simulation=data_stoch, pars=NULL, p_val=p_vals))
save(file=paste(l,'_stoch_sim.rda',sep=''),list(simulation=data_stoch, pars=NULL, p_val=p_vals))
out_list <- list(simulation=data_stoch, pars=NULL, p_val=p_vals)
save(file=paste(l,'_stoch_sim.rda',sep=''),out_list)
}
}else{
# return(list(simulation=data_stoch))
save(file=paste(l,'_stoch_sim.rda',sep=''),list(simulation=data_stoch))
save(file=paste(l,'_stoch_sim.rda',sep=''),simulation=data_stoch)
}
### to to next station
}
......
# PRSim-devel
The R package `PRSim` provides a simulation framework to simulate streamflow time series with similar main characteristics as observed data. These characteristics include the distribution of daily streamflow values and their temporal correlation as expressed by short- and long-range dependence. The approach is based on the randomization of the phases of the Fourier transform. We further use the flexible four-parameter Kappa distribution, which allows for the extrapolation to yet unobserved low and high flows.
The R package `PRSim` provides a simulation framework to simulate streamflow time series with similar main characteristics as observed data. These characteristics include the distribution of daily streamflow values and their temporal correlation as expressed by short- and long-range dependence. The approach is based on the randomization of the phases of the Fourier transform and the randomization of the phases of the wavelet transform. We further use the flexible four-parameter Kappa distribution, which allows for the extrapolation to yet unobserved low and high flows.
This is the development repository. A recent stable version is posted on https://CRAN.R-project.org/package=PRSim
......@@ -13,9 +13,12 @@
The DESCRIPTION file:
\packageDESCRIPTION{PRSim}
\packageIndices{PRSim}
Contains two functions for the stochastic simulation of continuous discharge time series: prsim and prsim.wave both using phase randomization. prsim is based on the Fourier transform while prsim.wave uses the wavelet transform.
Simulation in the frequency domain is based on the randomization of the phases of the Fourier transform. We here combine phase randomization simulation with the flexible, four-parameter Kappa distribution, which allows for the extrapolation to yet unobserved low and high flows. The
simulation approach consists of eight steps: 1) fitting of theoretical Kappa distribution, 2) normalization and deseasonalization, 3) Fourier transformation, 4) Fourier phases computation, 5) random phase generation, 6) inverse Fourier transformation, 7) back transformation, and 8) simulation.
prsim: Simulation in the frequency domain is based on the randomization of the phases of the Fourier transform. We here combine phase randomization simulation with the flexible, four-parameter kappa distribution, which allows for the extrapolation to yet unobserved low and high flows. Alternative distributions or the empirical distribution can be used instead. The
simulation approach consists of eight steps: (1) fitting of theoretical Kappa distribution, (2) normalization and deseasonalization, (3) Fourier transformation, (4) Fourier phases computation, (5) random phase generation, (6) inverse Fourier transformation, (7) back transformation, and (8) simulation.
prsim.wave: Simulation in the frequency domain based on the randomization of the phases of the continuous wavelet transform. We combine phase randomization with the flexible, four-parameter kappa distribution. Alternative theoretical distributions or the empirical distribution can be used instead. The simulation procedure consists of five steps: (1) Derivation of random phases from a white noise time series, (2) Fitting of kappa distribution, (3) Wavelet transform, (4) Inverse wavelet transform, and (5) Transformation to the kappa distribution (or the distribution of choice).
}
\author{
\packageAuthor{PRSim}
......@@ -23,7 +26,7 @@ simulation approach consists of eight steps: 1) fitting of theoretical Kappa dis
Maintainer: \packageMaintainer{PRSim}
}
\references{
Brunner, Bardossy, Furrer (2019) Technical note: Stochastic simulation of streamflow time series using phase randomization. Submitted.
Brunner, M. I., A. Bárdossy, and R. Furrer (2019). Technical note: Stochastic simulation of streamflow time series using phase randomization. Hydrology and Earth System Sciences, 23, 3175-3187, doi:10.5194/hess-23-3175-2019
}
\keyword{ package }
\examples{
......
......@@ -44,7 +44,7 @@ When using the kappa distribution, the AD test can for certain values of the par
\item{p_val}{A vector containing the p-values of \code{ks.test} or \code{ad.test} applied to the daily detrended data (if \code{GoFtest} is not NULL)}
}
\references{
Brunner, Bardossy, Furrer (2019) Technical note: Stochastic simulation of streamflow time series using phase randomization. Under review. \url{https://www.hydrol-earth-syst-sci-discuss.net/hess-2019-142/}.
Brunner, M. I., A. Bárdossy, and R. Furrer (2019). Technical note: Stochastic simulation of streamflow time series using phase randomization. Hydrology and Earth System Sciences, 23, 3175-3187, doi:10.5194/hess-23-3175-2019.
}
\author{
Manuela Brunner
......
\name{pRsim.wave}
\alias{PRsim.wave}
\alias{prsim.wave}
\alias{prsim_wave}
\title{Simulate for multiple stations}
\description{
Applies the wavelet-based simulation algorithm to multiple sites (single site possible as well)
}
\usage{
prsim.wave(data, station_id="Qobs", number_sim=1, win_h_length=15,
marginal=c("kappa","empirical"), n_par=4, n_wave=100, marginalpar=TRUE,
GoFtest=NULL, out_dir, verbose=TRUE, suppWarn=FALSE, ...)
}
\arguments{
\item{data}{list of data frames. One list entry, i.e. data frame, corresponds to one station. Data frames contain the time indications and runoff of one station. See \sQuote{Details}.}
\item{station_id}{identifies the station in case several time series are present in \code{data}. See \sQuote{Details}.}
\item{number_sim}{number of simulations to be carried out.}
\item{win_h_length}{(half-)length of moving window size.}
\item{marginal}{marginal distribution to be used for the backtransformation. Can be either \code{"kappa"}, \code{"empirical"}, or any type of CDF (see \sQuote{Details}). \code{"kappa"} uses the four-parameter kappa distribution for backtransformation, \code{"empirical"} uses the empirical distribution. CDF allows for specifying any distribution \sQuote{Examples}.}
\item{n_par}{number of parameters of the marginal distribution used}
\item{GoFtest}{If (non-null) a GoF test for daily data should be performed: \code{"KS"} performs a Kolmogorof-Smirnov test, and \code{"AD"} performs an Anderson-Darling test. see \sQuote{Details})}
\item{verbose}{logical. Should progress be reported?}
\item{marginalpar}{logical. Should the estimated parameters of the distribution used be returned?}
\item{n_wave}{number of scales to be considered in the continuous wavelet transform.}
\item{out_dir}{path of output directory where the stochastic simulations for the stations considered are saved.}
\item{suppWarn}{logical. See \sQuote{Details}.}
\item{...}{any other argument passed to the sub-function specifying the cdf for fitting. See \sQuote{Details} and \sQuote{Examples}. }
}
\details{
Time can be given with three columns named \code{"YYYY"}, \code{"MM"}, \code{"DD"}, or as in POSIXct format \code{YYYY-MM-DD}.
All leap days (Feb 29th) will be omitted from the analysis, but no missing observations are allowed.
Stations are identified by list index.
The function \code{homtest::par.kappa} might issue quite a few warnings of type \code{In fn(par, ...) : value out of range in 'gammafn'}. The argument \code{suppWarn} allows to silence warnings for the specific function call via \code{suppressWarnings()}. Of course, a subsequent check via \code{warnings()} is recommended.
Alternative distributions can be specified by providing three functions: (1) a function fitting the parameters of a distributions and providing a vector of these parameters as output (CDF_fit), (2) a function simulating random numbers from this distribution (rCDF), and (3) a function specifying the distribution (pCDF). See \sQuote{Examples} for the generalized beta for the second kind and for the Generalized Extreme Values (GEV) distribution.
When using the kappa distribution, the AD test can for certain values of the parameter h not be performed.
}
\value{A list with elements
\item{simulation}{A data frame with time information, observations, and
\code{number_sim} columns containing the simulated runoff.}
\item{pars}{A matrix containing the estimated parameters of the marginal distribution (if \code{marginalpar}).}
\item{p_val}{A vector containing the p-values of \code{ks.test} or \code{ad.test} applied to the daily detrended data (if \code{GoFtest} is not NULL)}
}
\references{
Brunner and Gilleland to be submitted
}
\author{
Manuela Brunner
}
\seealso{
\code{ks.test}
}
\examples{
data(runoff_multi_sites)
prsim.wave(runoff_multi_sites, "Qobs", 1, suppWarn=TRUE, out_dir='~/PRSim-devel/data/simulations_multi_sites')
# warnings() # as a follow-up to `suppWarn=TRUE`
## Specifying particular CDFs:
## (1) example with the Generalized Extreme Value (GEV) distribution
require("evd")
require("ismev")
rGEV <- function(n, theta) rgev(n, theta[1], theta[2], theta[3])
pGEV <- function(x, theta) pgev(x, theta[1], theta[2], theta[3])
GEV_fit <- function( xdat, ...) gev.fit( xdat, ...)$mle
\dontrun{ # The following call requires 5 seconds to execute
prsim.wave(runoff_multi_sites, "Qobs", 1,
marginal="GEV", n_par=3, verbose=FALSE, marginalpar=FALSE,
show=FALSE, out_dir='dir_PRsim_wave_data/simulations_multi_sites') # Supress 'gev.fit' output.
}
## (2) example with generalized Beta distribution of the second kind
require( "GB2")
rGB2 <- function(n, theta) rgb2(n, theta[1], theta[2], theta[3], theta[4])
pGB2 <- function(x, theta) pgb2(x, theta[1], theta[2], theta[3], theta[4])
GB2_fit <- function( xdat, ...) ml.gb2( xdat, ...)$opt1$par
\dontrun{ # The following call requires half minute or so to execute. Some warnings are issued
prsim.wave(runoff_multi_sites, "Qobs", 1, suppWarn=TRUE,
marginal="GB2", out_dir='dir_PRsim_wave_data/simulations_multi_sites')
}
}
\keyword{ts}
\name{runoff_multi_sites}
\alias{runoff multi sites}
\docType{data}
\title{
Sample runoff of four catchments with a similar discharge regime
}
\description{
Observed runoff data from four USGS sites.
}
\usage{data("runoff_multi_sites")}
\format{
A list of four data frames (one list per station) of the following 4 variables.
\describe{
\item{\code{YYYY}}{a numeric vector, year}
\item{\code{MM}}{a numeric vector, month}
\item{\code{DD}}{a numeric vector, day}
\item{\code{Qobs}}{a numeric vector, observed runoff}
}
}
\details{
The data contains runoff for four USGS gages: (i) Calawah River near Forks, WA (USGS 12043000), (ii) NF Stillaguamish River near Arlington, WA (USGS 12167000), (iii) Nehalem River near Foss, OR (USGS 14301000), and (iv) Steamboat Creek near Glide, OR (USGS 14316700).
}
\source{
The actual discharge data were downloaded from
\url{https://waterdata.usgs.gov/nwis}.
}
\references{
Brunner and Gilleland to be submitted.
}
\examples{
data(runoff_multi_sites)
str(runoff_multi_sites)
runoff_multi_sites[[1]]$timestamp <- paste(runoff_multi_sites[[1]]$YYYY, runoff_multi_sites[[1]]$MM, runoff_multi_sites[[1]]$DD, sep=" ")
runoff_multi_sites[[1]]$timestamp <- as.POSIXct(strptime(runoff_multi_sites[[1]]$timestamp,format="\%Y \%m \%d", tz="GMT"))
plot(runoff_multi_sites[[1]]$timestamp[1:1000], runoff_multi_sites[[1]]$Qobs[1:1000], type="l",
xlab="Time [d]", ylab=expression(paste("Discharge [m"^3,"/s]")))
}
\keyword{datasets}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment