...
 
Commits (3)
...@@ -17,7 +17,7 @@ Description: Provides a simulation framework to simulate streamflow time series ...@@ -17,7 +17,7 @@ Description: Provides a simulation framework to simulate streamflow time series
streamflow values and their temporal correlation as expressed by short- and long-range streamflow values and their temporal correlation as expressed by short- and long-range
dependence. The approach is based on the randomization of the phases of the Fourier dependence. The approach is based on the randomization of the phases of the Fourier
transform. We further use the flexible four-parameter Kappa distribution, which allows transform. We further use the flexible four-parameter Kappa distribution, which allows
for the extrapolation to yet unobserved low and high flows. A detailed description of for the extrapolation to yet unobserved low and high flows. Alternatively, the empirical or any other distribution can be used. A detailed description of
the simulation approach and an application example can be found the simulation approach and an application example can be found
in https://www.hydrol-earth-syst-sci-discuss.net/hess-2019-142/. in https://www.hydrol-earth-syst-sci-discuss.net/hess-2019-142/.
URL: https://git.math.uzh.ch/reinhard.furrer/PRSim-devel URL: https://git.math.uzh.ch/reinhard.furrer/PRSim-devel
......
importFrom("homtest", "Lmoments", "rand.kappa", "par.kappa", "F.kappa") importFrom("homtest", "Lmoments", "rand.kappa", "par.kappa", "F.kappa")
importFrom("stats", "rnorm", "runif", "fft", "ks.test") importFrom("stats", "rnorm", "runif", "fft", "ks.test")
<<<<<<< HEAD
importFrom("graphics", "hist")
=======
importFrom("goftest", "ad.test") importFrom("goftest", "ad.test")
>>>>>>> f0241bb562c45952cb5f9f249dbd9b2585c469b0
export("prsim") export("prsim")
prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
marginal=c("kappa","empirical"), marginalpar=TRUE, GoFtest=NULL, marginal=c("kappa","empirical",CDF),n_par=4,
verbose=TRUE, suppWarn=FALSE, ...){ verbose=TRUE, marginalpar=TRUE, suppWarn=FALSE, GoFtest=c(NULL, "AD", "KS"), ...){
ifft <- function (x) fft(x, inverse = TRUE)/length(x) ifft <- function (x) fft(x, inverse = TRUE)/length(x)
...@@ -45,6 +45,8 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, ...@@ -45,6 +45,8 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
data$timestamp <- as.POSIXct(strptime(tmp, format="%Y %m %d", tz="GMT")) data$timestamp <- as.POSIXct(strptime(tmp, format="%Y %m %d", tz="GMT"))
} }
### test whether r_CDF, p_CDF, and CDF_fit are properly defined.
### @Reinhard: could you please implement these tests?
### remove February 29 ### remove February 29
data <- data[format(data$timestamp, "%m %d") != "02 29",] data <- data[format(data$timestamp, "%m %d") != "02 29",]
...@@ -95,8 +97,8 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, ...@@ -95,8 +97,8 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
### To enable a sufficient sample size by using daily values in moving window around day i (i.e., reduce uncertainty due to fitting) ### To enable a sufficient sample size by using daily values in moving window around day i (i.e., reduce uncertainty due to fitting)
if(marginal=="kappa"){ if(marginal=="kappa"){
p_vals <- numeric(365) p_vals <- numeric(365)
kap_par_day <- matrix(0, nrow=365, ncol=4) par_day <- matrix(0, nrow=365, ncol=4)
density_kap <- list() # density_kap <- list()
### define window length ### define window length
win_length <- c(1:win_h_length) win_length <- c(1:win_h_length)
for(d in c(1:365)){ for(d in c(1:365)){
...@@ -119,7 +121,7 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, ...@@ -119,7 +121,7 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
if(length(test)>1){ if(length(test)>1){
kap_par <- test kap_par <- test
kap_par_day[d,] <- unlist(kap_par) par_day[d,] <- unlist(kap_par)
### define vector of quantiles ### define vector of quantiles
quant <- sort(data_window) quant <- sort(data_window)
thresh <- kap_par$xi + kap_par$alfa*(1 - kap_par$h^(-kap_par$k))/kap_par$k thresh <- kap_par$xi + kap_par$alfa*(1 - kap_par$h^(-kap_par$k))/kap_par$k
...@@ -134,35 +136,77 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, ...@@ -134,35 +136,77 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
# density_kap[[d]] <- density(data_kap) # density_kap[[d]] <- density(data_kap)
# hist(data_window) # hist(data_window)
# hist(data_kap,add=T,col="red") # hist(data_kap,add=T,col="red")
if (GoFtest=="KS")
p_vals[d] <- ks.test(data_window, data_kap)$p.value ### kappa distribution not rejected at alpha=0.05 if (tolower(GoFtest)=="ks")
if (GoFtest=="AD") p_vals[d] <- ks.test(data_window, data_kap)$p.value ### kappa distribution not rejected at alpha=0.05
p_vals[d] <- ad.test(data_window, F.kappa, xi=kap_par$xi,alfa=kap_par$alfa, k=kap_par$k, h=kap_par$h )$p.value ### NOTE: TRY if (tolower(GoFtest)=="ad")
if(length(try(ad.test(data_window,F.kappa,xi=kap_par$xi,alfa=kap_par$alfa,k=kap_par$k,h=kap_par$h)))==1){
p_vals[d] <- NA
}else{
p_vals[d] <- ad.test(data_window, F.kappa, xi=kap_par$xi, alfa=kap_par$alfa, k=kap_par$k, h=kap_par$h)$p.value
}
} else{ } else{
if(d==1){ if(d==1){
p_vals[d] <- NA p_vals[d] <- NA
kap_par_day[d,] <- NA par_day[d,] <- NA
}else{ }else{
p_vals[d] <- p_vals[d-1] p_vals[d] <- p_vals[d-1]
kap_par_day[d,] <- kap_par_day[d-1,] par_day[d,] <- par_day[d-1,]
} }
} }
} }
###qu: check if subsequent NAs work properly. ###qu: check if subsequent NAs work properly.
### replace NA entries by values of subsequent day ### replace NA entries by values of subsequent day
if(length(which(is.na(kap_par_day[,1])))>0){ if(length(which(is.na(par_day[,1])))>0){
indices <- rev(which(is.na(kap_par_day[,1]))) indices <- rev(which(is.na(par_day[,1])))
for(i in 1:length(indices)){ for(i in 1:length(indices)){
kap_par_day[indices[i],] <- kap_par_day[indices[i]+1,] par_day[indices[i],] <- par_day[indices[i]+1,]
} }
} }
} }
# which(unlist(p_val_list)<0.05) ### non-rejected for most days, difficulty with winter months (relation to hp production?) # which(unlist(p_val_list)<0.05) ### non-rejected for most days, difficulty with winter months (relation to hp production?)
### use either a predefined distribution in R or define own function
if(marginal!=c("kappa","empirical")){
p_vals <- numeric(365)
par_day <- matrix(0, nrow=365, ncol=n_par)
for(d in c(1:365)){
### define window length
win_length <- seq(1:15)
### define start and end of window
before <- data$index[d+365-win_length]
after <- data$index[d+365+win_length-1]
### define days within window
ids <- c(before,after)
### determine values in window around day i
data_window <- data$Qobs[which(data$index%in%ids)]
### fit generalized gamma distribution
# fitdistr(x=data_window,dgengamma.stacy,start=list("scale"=1,"k"=6,"d"=2)) ### package VGAM: fitting does not work
### use function from package felxsurv
# theta <- CDF.fit(xdat=data_window)
theta <- match.fun(paste(CDF,"_fit",sep=""))(xdat=data_window)
# do.call(paste(CDF,".fit",sep=""),list(xdat=data_window))
### goodness of fit test
# data_random <- rCDF(n=length(data_window),theta=theta)
data_random <- match.fun(paste("r_",CDF,sep=""))(n=length(data_window),theta=theta)
# density_gengam[[d]] <- density(data_gengam)
hist(data_window)
hist(data_random,add=T,col="red")
if (tolower(GoFtest)=="ks"){
p_vals[d] <- ks.test(data_window,data_random)$p.value
}
if (tolower(GoFtest)=="ad"){
p_vals[d] <- ad.test(data_window,match.fun(paste("p_",CDF,sep="")),theta)$p.value
}
### store parameters
par_day[d,] <- theta
}
}
### Deseasonalization was not found to be necessary. Use normalized data directly. ### Deseasonalization was not found to be necessary. Use normalized data directly.
data$des <- data$norm data$des <- data$norm
...@@ -263,13 +307,13 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, ...@@ -263,13 +307,13 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
### use kappa distribution for backtransformation ### use kappa distribution for backtransformation
if(marginal=="kappa"){ if(marginal=="kappa"){
colnames(kap_par_day) <- names(kap_par) colnames(par_day) <- names(kap_par)
### use monthly Kappa distribution for backtransformation ### use monthly Kappa distribution for backtransformation
### simulate random sample of size n from Kappa disribution ### simulate random sample of size n from Kappa disribution
data_day$kappa <- rand.kappa(length(data_day$Qobs), data_day$kappa <- rand.kappa(length(data_day$Qobs),
xi=kap_par_day[d,"xi"],alfa=kap_par_day[d,"alfa"], xi=par_day[d,"xi"],alfa=par_day[d,"alfa"],
k=kap_par_day[d,"k"],h=kap_par_day[d,"h"]) k=par_day[d,"k"],h=par_day[d,"h"])
# if(marginal=="wakeby"){ # if(marginal=="wakeby"){
# ### use Wakeby distribution for backtrasformation # ### use Wakeby distribution for backtrasformation
...@@ -309,6 +353,25 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, ...@@ -309,6 +353,25 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
data_new$simulated_seasonal[which(data_new$index%in%c(d))] <- data_ordered$Qobs[data_new$rank[which(data$index%in%c(d))]] data_new$simulated_seasonal[which(data_new$index%in%c(d))] <- data_ordered$Qobs[data_new$rank[which(data$index%in%c(d))]]
# } # }
} }
### use any predefined distribution for backtransformation
if(marginal!=c("kappa","empirical")){
### use monthly distribution for backtransformation
### simulate random sample of size n from disribution
data_day$cdf <- match.fun(paste("r_",CDF,sep=""))(n=length(data_day$Qobs),par_day[d,])
data_day$rank <- rank(data_day$cdf)
data_new$rank <- rank(data_new$seasonal)
# hist(data_day$Qobs)
# hist(data_day$cdf,add=T,col="blue")
# data_day$rank <- rank(data_day$cdf)
data_new$rank[which(data$index%in%c(d))] <- rank(data_new[which(data$index%in%c(d)),]$seasonal)
### derive corresponding values from the kappa distribution
### identify value corresponding to rank in the kappa time series
data_ordered <- data_day[order(data_day$rank),]
data_new$simulated_seasonal[which(data_new$index%in%c(d))] <- data_ordered$cdf[data_new$rank[which(data$index%in%c(d))]]
}
} # end for loop } # end for loop
data_sim[[r]] <- data_new$simulated_seasonal data_sim[[r]] <- data_new$simulated_seasonal
...@@ -325,6 +388,13 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, ...@@ -325,6 +388,13 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
data_sim) data_sim)
if (is.null(GoFtest)) { if (is.null(GoFtest)) {
p_vals <- NULL
}
if(marginal=="kappa"){
if (marginalpar) { # also return intermediate results
return(list( simulation=data_stoch, pars=par_day, p_val=p_vals))
p_vals <- NULL p_vals <- NULL
} }
...@@ -335,6 +405,6 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, ...@@ -335,6 +405,6 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
return(list( simulation=data_stoch, pars=NULL, p_val=p_vals)) return(list( simulation=data_stoch, pars=NULL, p_val=p_vals))
} }
}else{ }else{
return(list( simulation=data_stoch) ) return(list(simulation=data_stoch) )
} }
} }
rCDF <- function(n, theta) {
rnorm(n, theta[1], sqrt(theta[2]))
}
CDF.fit <- function(xdat,...) { gev.fit(xdat,..., show=FALSE)$mle}
#data(portpirie)
#gev.fttt(portpirie[,2])
\ No newline at end of file
...@@ -2,8 +2,40 @@ ...@@ -2,8 +2,40 @@
data(runoff) data(runoff)
out <- prsim(data=runoff, number_sim=1, marginal="empirical") out <- prsim(data=runoff, number_sim=1, marginal="empirical")
out <- prsim(data=runoff, number_sim=1, marginal="kappa") out <- prsim(data=runoff, number_sim=1, marginal="kappa",GoFtest = "KS")
### GEV distribution
r_gev <- function(n, theta){
rgev(n, theta[1], theta[2], theta[3])
}
p_gev <- function(x, theta){
pgev(x, theta[1], theta[2], theta[3])
}
gev_fit <- function( xdat, ...) {
gev.fit( xdat, ...)$mle
}
### generalized Pareto distribution of the second kind
r_gb2 <- function(n, theta){
rgb2(n, theta[1], theta[2], theta[3], theta[4])
}
p_gb2 <- function(x, theta){
pgb2(x, theta[1], theta[2], theta[3], theta[4])
}
gb2_fit <- function( xdat, ...) {
mlfit.gb2( xdat, ...)[[2]]$par
}
### @ Reinhard: This is not very elegant yet. CDF needs to be specified before giving it to the function.
### I guess this can be alternatively resolved by using a different argument for marginal. Any ideas?
CDF <-"gev"
out <- prsim(data=runoff, number_sim=1, marginal=CDF,GoFtest = "KS",n_par=3)
CDF <-"gb2"
out <- prsim(data=runoff, number_sim=1, marginal="gb2",GoFtest = "KS",n_par=4)
sim <- out$simulation sim <- out$simulation
# p_val <- out$p_val
par(mai=c(.9,.9,.1,.1)) par(mai=c(.9,.9,.1,.1))
plot(sim$timestamp[1:1000], sim$Qobs[1:1000], type="l", plot(sim$timestamp[1:1000], sim$Qobs[1:1000], type="l",
xlab="Time [d]", ylab=expression(paste("Discharge [m"^3,"/s]"))) xlab="Time [d]", ylab=expression(paste("Discharge [m"^3,"/s]")))
......
...@@ -6,19 +6,22 @@ ...@@ -6,19 +6,22 @@
Applies the algorithm to a single station Applies the algorithm to a single station
} }
\usage{ \usage{
prsim(data, station_id="Qobs", number_sim=1, win_h_length=15, <<<<<<< HEAD
marginal=c("kappa","empirical"), marginalpar=TRUE, GoFtest=NULL, prsim(data, station_id="Qobs", number_sim=1, win_h_length=15,marginal="kappa",n_par=4,
verbose=TRUE, suppWarn=FALSE, ...) verbose=TRUE, marginalpar=TRUE, suppWarn=FALSE, GoFtest=FALSE, ...)
} }
\arguments{ \arguments{
\item{data}{data frame containing the time indications and runoff of at least one station. See \sQuote{Details}.} \item{data}{data frame containing the time indications and runoff of at least one station. See \sQuote{Details}.}
\item{station_id}{identifies the station in case several runoffs are present in \code{data}. See \sQuote{Details}.} \item{station_id}{identifies the station in case several runoffs are present in \code{data}. See \sQuote{Details}.}
\item{number_sim}{number of simulations to be carried out.} \item{number_sim}{number of simulations to be carried out.}
\item{marginal}{marginal distribution to be used for the backtransformation. Can be either "kappa", "empirical", or CDF. "kappa" uses the four-parameter kappa distribution for backtransformation, "empirical" uses the empirical distribution. CDF allows for specifying any distribution \sQuote{Examples}.}
\item{n_par}{number of parameters of the marginal distribution used}
\item{win_h_length}{(half-)length of moving window size.} \item{win_h_length}{(half-)length of moving window size.}
\item{marginal}{marginal distribution to be used for the backtransformation, see \sQuote{Details}.} \item{marginal}{marginal distribution to be used for the backtransformation, see \sQuote{Details}.}
\item{verbose}{logical. Should progress be reported?} \item{verbose}{logical. Should progress be reported?}
\item{marginalpar}{logical. Should the estimated parameters of the kappa distribution be returned?} \item{marginalpar}{logical. Should the estimated parameters of the distribution used be returned?}
\item{suppWarn}{logical. See \sQuote{Details}.} \item{suppWarn}{logical. See \sQuote{Details}.}
\item{GoFtest}{NULL, KS or AD. Should a goodness-of-fit test for daily data be performed and the p-values be passed back? NULL performs no test, KS performs a Kolmogorof-Smirnov test, and AD performs an Anderson-Darling test.}
\item{GoFtest}{If (non-null) a GoF test for daily data should be performed, which one (\code{"AD"}, \code{KS}, see \sQuote{Details})} \item{GoFtest}{If (non-null) a GoF test for daily data should be performed, which one (\code{"AD"}, \code{KS}, see \sQuote{Details})}
\item{...}{any other argument passed to the sub-function specifying the cdf for fitting and drawing. See \sQuote{Details} and \sQuote{Examples}. } \item{...}{any other argument passed to the sub-function specifying the cdf for fitting and drawing. See \sQuote{Details} and \sQuote{Examples}. }
} }
...@@ -65,20 +68,33 @@ out <- prsim( runoff[ runoff$YYYY<1980, ], "Qobs", 1, suppWarn=TRUE) ...@@ -65,20 +68,33 @@ out <- prsim( runoff[ runoff$YYYY<1980, ], "Qobs", 1, suppWarn=TRUE)
# warnings() # as a follow-up to `suppWarn=TRUE` # warnings() # as a follow-up to `suppWarn=TRUE`
## Specifying particular CDF: ## Specifying particular CDF:
## example with generalized Beta distribution of the second kind
require( "GB2") require( "GB2")
rCDF <- function(n, theta){ CDF <- "gb2"
r_gb2 <- function(n, theta){
rgb2(n, theta[1], theta[2], theta[3], theta[4]) rgb2(n, theta[1], theta[2], theta[3], theta[4])
} }
pCDF <- function(x, theta){ p_gb2 <- function(x, theta){
pgb2(x, theta[1], theta[2], theta[3], theta[4]) pgb2(x, theta[1], theta[2], theta[3], theta[4])
} }
CDF.fit <- function( xdat, ...) { gb2_fit <- function( xdat, ...) {
mlfit.gb2( xdat, ...)[[2]]$par mlfit.gb2( xdat, ...)[[2]]$par
} }
out <- prsim( runoff[ runoff$YYYY<1980, ], "Qobs", 1, suppWarn=TRUE, out <- prsim( runoff[ runoff$YYYY<1980, ], "Qobs", 1, suppWarn=TRUE,
marginal="CDF.fit", draw="rCDF") marginal="CDF.fit", draw="rCDF")
## example with the Generalized Extreme Value (GEV) distribution
### test with GEV distribution
CDF <- "gev"
r_gev <- function(n, theta){
rgev(n, theta[1], theta[2], theta[3])
}
p_gev <- function(x, theta){
pgev(x, theta[1], theta[2], theta[3])
}
gev_fit <- function( xdat, ...) {
gev.fit( xdat, ...)$mle
}
} }
\keyword{ts} \keyword{ts}